-
Re: C++ Help Part Two
Small update. It's not pretty, but it gets the job done as far as parsing the HTML for text goes. Eliminated anything not whitespace or alphabetic characters, and changed all the upper case ones to lower case.
Code:
//
//
//
// Mid-Term Exam Program
//
// The purpose of this program is to parse an HTML file and count the number of
// unique key words that appear in the file.
#include<iostream> // Required for cin, cout, and cerr.
#include<fstream> // Required for ifstream and ofstream.
#include<string> // Required for string.
#include<cctype> // Required for tolower, isalpha, isupper.
using namespace std;
// Define constants and declare function prototypes.
int main()
{
// Declare Objects.
char character;
bool text_state = true;
string infile;
string outfile1;
string outfile2;
ifstream html;
ofstream htmltext;
ifstream itext;
ofstream otext;
// Prompt user for name of input file.
cout << "Enter the name of the input file:";
cin >> infile;
// Prompt user for name of output file.
cout << "Enter name of the output file:";
cin >> outfile1;
// Open files.
html.open(infile.c_str());
if(html.fail())
{
cerr << "Error opening the input file\n";
exit(1);
}
htmltext.open(outfile1.c_str());
//Read first character from html file.
html.get(character);
while(!html.eof())
{
//Check state
if(text_state)
{
if(character == '<') // Beginning of a tag.
{
text_state=false; // Change states.
}
else
{
htmltext << character; // Still text, write to file.
}
}
else
{
// Command state, no output required.
if(character == '>') // End of a tag.
text_state=true; // Change states.
}
// Read next character from html file.
html.get(character);
}
html.close();
htmltext.close();
//Input file for refinements same as last output file.
//cin >> outfile1;
// Get name of output file.
cout << "Enter name of final output file: ";
cin >> outfile2;
// Open files.
itext.open(outfile1.c_str());
if(itext.fail())
{
cerr << "Error opening the input file\n";
exit(1);
}
otext.open(outfile2.c_str());
// Read first character.
itext.get(character);
cout << "Hi!\n"; // Execution stage indicator.
while(!itext.eof())
{
if (isupper(character))
{
character=tolower(character);
putchar(character);
text_state=true;
otext << character;
}
else
{
if (isalpha(character)||isspace(character))
{
text_state=true;
otext << character;
}
else
{
text_state=false;
}
}
// Get next character.
itext.get(character);
}
itext.close();
otext.close();
return 0;
}
So, someone told me I should use a multiset for this...good idea or bad? If good, how would I take what I have in my text file and put it into said multiset?
-
Re: C++ Help Part Two
I hate to bump an old thread, but it's not worth creating a new one just for this. I can't get it to output to the very last file at the end of the program. Halp.
Code:
/*----------------------------------------------------------------------------*/
//
// Mid-Term Exam Program
//
// The purpose of this program is to parse an HTML file and count the number of
// unique key words that appear in the file.
//
/*----------------------------------------------------------------------------*/
#include <iostream> // Required for cin, cout, and cerr.
#include <fstream> // Required for ifstream and ofstream.
#include <string> // Required for string.
#include <cctype> // Required for tolower, isalpha, isupper.
#include <vector> // Required for vector <>.
using namespace std;
int main()
{
// Declare Objects.
char character;
bool text_state = true;
string infile;
string storage;
ifstream input;
ofstream store;
// Prompt user for name of the input file.
cout << "Enter the name of the input file:";
cin >> infile;
// Prompt user for name of the storage file.
cout << "Enter the name of the storage file:";
cin >> storage;
// Open files.
input.open(infile.c_str());
if(input.fail())
{
cerr << "Error opening the input file\n";
exit(1);
}
store.open(storage.c_str());
//Read first character from html file.
input.get(character);
while(!input.eof())
{
//Check state
if(text_state)
{
if(character == '<') // Beginning of a tag.
{
text_state=false; // Change states.
}
else
{
store << character; // Still text, write to file.
}
}
else
{
// Command state, no output required.
if(character == '>') // End of a tag.
text_state=true; // Change states.
}
// Read next character from html file.
input.get(character);
}
input.close();
store.close();
/*----------------------------------------------------------------------------*/
//
// This section removes non-alphabetic characters and converts uppercase letters
// to lowercase form. It also preserves whitespace.
//
/*----------------------------------------------------------------------------*/
// Declare objects.
string outfile;
ifstream input2;
ofstream output;
// Prompt users for the name of the final output file.
cout << "Enter the name of the final output file:";
cin >> outfile;
// Open fthe storage file.
input2.open(storage.c_str());
if(input2.fail())
{
cerr << "Error opening the input file\n";
exit(1);
}
output.open(outfile.c_str());
// Read first character.
input2.get(character);
// cout << "Hi!\n" << endl; // Execution stage indicator.
while(!input2.eof())
{
if (isupper(character))
{
character=tolower(character);
putchar(character);
text_state=true;
output << character;
}
else
{
if (isalpha(character)||isspace(character))
{
text_state=true;
output << character;
}
else
{
text_state=false;
}
} // End of "else".
// Get next character.
input2.get(character);
} // End of "while".
store.close();
output.close();
/*----------------------------------------------------------------------------*/
//
// This next segment will parse words from the output file and list them in
// descending order of frequency in a new file.
//
/*----------------------------------------------------------------------------*/
// Declare objects.
char wordchar; // characters that will make up each word
bool word_state;
string keywords;
string tempWord;
string testWord;
vector<string> WordList;
vector<int> WordCount;
ifstream input3;
ofstream outfinal;
// Prompt for filenames and open the files.
cout << "Enter name of file for the final list of keywords: ";
cin >> keywords;
// This next line is the previous output file.
input3.open(outfile.c_str());
if(input3.fail())
{
cout << "Error opening input file";
exit(1);
}
outfinal.open(keywords.c_str());
// Get the first character from the input file.
input3.get(wordchar);
// Parse for unique words.
do
{
if(isspace(wordchar))
{
for(int i=0; i<WordList.size(); i++)
{
if(WordList[i].compare(testWord) == 0)
{
WordCount[i] += 1;
break;
}
else
{
WordList.push_back(testWord);
break;
}
}
}
else
{
testWord += wordchar; // continues building current word.
}
}
while (!input3.eof());
// Close input file.
input3.close();
//Declarations for bubble sort algorithm.
bool flag = true;
int tempCount;
// Bubble sorter.
//for(int i=1; (i<=WordList.size()) && flag; i++)
while(flag = false)
{
//flag = false;
for(int j=0; j<((WordList.size())-2); j++)
{
if(WordCount[j+1] < WordCount[j])
{
tempCount = WordCount[j];
WordCount[j] = WordCount[j+1];
WordCount[j+1] = tempCount;
tempWord = WordList[j];
WordList[j] = WordList[j+1];
WordList[j+1] = tempWord;
flag = true;
}
}
}
for(int i=0; i<WordList.size(); i++)
{
outfinal << WordList[i] << " occurs " << WordCount[i] << "\n" << endl;
}
outfinal.close();
return 0;
}
-
Re: C++ Help Part Two
I take it you mean the :
for(int i=0; i<WordList.size(); i++)
{
outfinal << WordList[i] << " occurs " << WordCount[i] << "\n" << endl;
}
Personally I access my vectors using WorldList.at(i).
-
Re: C++ Help Part Two
That could actually possibly be the problem. I feel like a moron now, I had just read about that function too. Let me replace every instance with that function where appropriate, I'll update with results.
-
Re: C++ Help Part Two
It might but I doubt it. Vectors are basically dynamic arrays, so they can be accessed in same way.
-
Re: C++ Help Part Two
That's what I figured, but it never hurts to try. It didn't work, though. It creates the keywords file that I told it to, but it doesn't actually put anything in it. The program also never actually finishes, it just sits there in the console. I'm not getting any debugging errors through the IDE and I'm not getting any compiler or linker errors, so I have no idea what's going on.
-
Re: C++ Help Part Two
Have you stepped through it to see what its doing?
-
Re: C++ Help Part Two
Define "stepped through it." You mean put breaks in, run it to that point, and if it works move the break up? If so, then yes.
-
Re: C++ Help Part Two
Well yeah, debugged it and gone through and checked the variables along the way.
-
Re: C++ Help Part Two
Ok, I've got it fixed and outputting now. Just a few formatting errors needing fixed in the output. Thanks for the tips.