String Parser
String Parser
A string parser is an application that takes a block of text and extracts the string residing between 2 user defined tags. If these tags occur multiple times in the text, then the parser extracts multiple strings.
For instance assume TestData.txt contains the following data;
Tove
bonney
Jani
Reminder
Don’t forget me this weekend!
And my Start and End Tags are and. The StringParser library should find
Tove
bonney
A string parser design should be flexible in that it should be able to use any Start and End Tag.
Library
The string parser is so versatile that it should be packaged in a library so that it can beeasily integrated into other applications. This approach has the following advantages;
1. Easy to integrate into multiple projects.
2. Easy to maintain. Any changes made to the parser library are made in a
single codebase regardless of the number of applications using it.
It makes it easier to divvy up projects in a team. The library is defined by its
interface, in this case a header file that describes the functionality itprovides. The header file is a contract describing library functions, howthey are accessed, what is returned, and what is expected. It completelydefines all communication possibilities between the library and its clients.
For these 3 reasons alone much of the code produced in the world isprovided as a library.
What you get
On the course website, projects section, you will find 3 incomplete projects. The
following shows the solution as it appears in Eclipse. The, includes directory shouldbe in a top level stand alone folder, but we are running up against a limitation ofeclipses’ ‘Workspace’ concept. So it’s located in the 327_proj3_test folder. Note thatFileIO.h and StringParserClass.h as well as constants.h are located there.
Assignment
This project links 327_proj3_fileIO Library correctly to the 327_proj3_Test application. Please implement the 327_proj3_StringParser library and link it to the 327_proj3_Test application.
Please fill in all required content in;
• FileIO.cpp
• StringParserClass.cpp
• 327_proj3_test.cpp
Note that 327_proj3_test.cpp requires command line parameters to be passed in when the program is invoked, there should be 4 of them;
• the first is the filename to read data from
• the second is the first tag to search for
• the third is the second tag to search for
• and the fourth is the output file to write all the found data to
see below for sample run
FileIO.cpp
#include
#include
#include "../327_proj3_test/includes/FileIO.h"
#include "../327_proj3_test/includes/constants.h"
using namespace std;
intKP_FileIO::getFileContents(conststd::string &filename, std::string &contents)
{
// TODO fill in
}
intKP_FileIO::writeVectortoFile(conststd::string filename,std::vector&myEntryVector)
{
// TODO fill in
}
StringParserClass.cpp
#include
#include
// TODO Fill in
327_proj3_test.cpp
#include
#include
#include
#include
#include
#include
#include "../includes/constants.h"
#include "Stringtopointer.h"
#include "../includes/StringParserClass.h"
#include "../includes/FileIO.h"
#include "test.h"
using namespace std;
// two utility functions for looping through vector and printing
// its contents to std::out (the Console)
voidoutputvectorrow(string i) {
std::cout<myVector) {
for_each(myVector.begin(), myVector.end(), outputvectorrow);
}
inttypicalrun(string &inputfile,string&startTag,string&endTag,string&outputfile) {
vectormyStrings;
intiret = SUCCESS;
// open file, if not there ask for a different file or exit
std::string filecontents;
iret = KP_FileIO::getFileContents(inputfile.c_str(),filecontents );
if (iret != SUCCESS)
returniret;
// we cant manipulate String contents easily, so make a copy in a dynamically allocated array
// incidently this class shows the principles of RAII, allocated memory is automatically
// deallocated in the destructor
String_to_pointerstp(filecontents);
char* pChar = stp.getPointerToStringBegginning();
// create an instance of the stringparser
KP_StringParserClass::StringParserClassmyClass;
// what tags are we searching for?
iret = myClass.setTags(startTag.c_str(),endTag.c_str());
if (iret != SUCCESS)
returniret;
// pull out the data
iret = myClass.getDataBetweenTags(pChar, myStrings);
if (iret != SUCCESS)
returniret;
// serialize to file
iret = KP_FileIO::writeVectortoFile(outputfile,myStrings);
if (iret != SUCCESS)
returniret;
// dump to standard output
// dumpVecToStdOut(myStrings);
}
int main(intargc, char *argv[]) {
vectormyStrings;
intiret = SUCCESS;
// TODO verify that correct number of params are entered
// TODO otherwise return output WRONG_NUMB_ARGS and return FAIL_WRONG_NUMBER_ARGS
// harvest all the user info
stringinputfile = argv[1];
stringstartTag = argv[2];
stringendTag = argv[3];
stringoutputfile = argv[4];
// lets test the inputs
testfileIO(inputfile, outputfile);
// now lets test the string parsing
testStringParser(startTag,endTag);
cout<<"Score is:"<
Stringtopointer.cpp
#include
#include "Stringtopointer.h"
constint SPACE_FOR_TERMINATING_NULL_CHAR = 1;
// make a dynamic copy of myString with pChar pointing to start
String_to_pointer::String_to_pointer(conststd::string &myString):
pChar(0),aString(myString) {
}
// clean up any allocated memory
String_to_pointer::~String_to_pointer() {
clear();
}
// very dangerous to expose private data!
char* String_to_pointer::getPointerToStringBegginning() {
resetPointer();
returnpChar;
}
voidString_to_pointer::clear() {
if(pChar)
delete [] pChar;
pChar = 0;
}
voidString_to_pointer::resetPointer() {
clear();
// how many chars (+1 for the null!)
intlen = strlen(aString.c_str())+SPACE_FOR_TERMINATING_NULL_CHAR;
if(len>SPACE_FOR_TERMINATING_NULL_CHAR) {
pChar = new char[len];
strncpy(pChar,aString.c_str(),len);
*(pChar+len)=0;
}
}
voidString_to_pointer::changeString(conststd::string &myString) {
aString = myString;
clear();
}
Stringtopointer.h
#ifndef STRINGTOPOINTER_H_
#define STRINGTOPOINTER_H_
#include
/**
* Helper class to convert from string to dynamically allocated array
* memory is automatically deleted in the destructor
*
*/
classString_to_pointer {
public:
// make a dynamic copy of myString with pChar pointing to start
String_to_pointer(conststd::string &myString);
// calls clear
virtual ~String_to_pointer();
// very dangerous to expose private data!
char* getPointerToStringBegginning();
voidchangeString(conststd::string &myString);
private:
void clear(); // deallocates memory
voidresetPointer();
char* pChar;
std::stringaString;
};
#endif /* STRINGTOPOINTER_H_ */
test.cpp
#include "test.h"
#include
#include
#include
#include
#include
#include "../includes/constants.h"
#include "Stringtopointer.h"
#include "../includes/StringParserClass.h"
#include "../includes/FileIO.h"
using namespace std;
constint ZERO_POINTS = 0; // for retests
constint ONE_POINTS = 1;
constint TWO_POINTS = 2;
constint THREE_POINTS = 3;
constint FIVE_POINTS = 5;
constint TEN_POINTS = 10;
constint FIFTEEN_POINTS = 15;
// tracks how many points you will get out of 100
// yeah, yeah its a global
inttotal_points =0;
// this is a template class, its mostly here as a helper for me
// the T and U are generic params, I can substitute any type for them
// they must be comparable with ==, templates are extremely hard to get right BTW
template
bool EXPECT_EQ(T expectedVal, U actualVal,stringtestnumb = "", intpts=ONE_POINTS) {
bool bout = (expectedVal == actualVal);
if (bout) {
cout<<"SUCCESS "+testnumb;
total_points+=pts;
}
else
cout<<"FAIL "+ testnumb<< " Expected:"< mv;
mv.push_back("A");
mv.push_back("small");
mv.push_back("lizard");
// test read from user supplied input file
EXPECT_EQ (SUCCESS, KP_FileIO::getFileContents(userEnteredInputFile, contents),"25",FIVE_POINTS);
// TODO probably should confirm that it also has right contents
// test read and write to a bogus file
filename = TEST_DATA_NON_EXISTANT;
EXPECT_EQ (COULD_NOT_OPEN_FILE_TO_READ, KP_FileIO::getFileContents(filename, contents),"1",FIVE_POINTS);
EXPECT_EQ (COULD_NOT_OPEN_FILE_TO_WRITE, KP_FileIO::writeVectortoFile(filename, mv),"2",FIVE_POINTS);
// test write real
EXPECT_EQ (SUCCESS, KP_FileIO::writeVectortoFile(userEnteredOutputFile, mv),"3",FIVE_POINTS);
// test read real
contents.clear();
EXPECT_EQ (SUCCESS, KP_FileIO::getFileContents(userEnteredOutputFile, contents),"4",FIVE_POINTS);
EXPECT_EQ (true, TEST_DATA_SMALL_OUT_VALUE == contents,"5",FIVE_POINTS);
}
conststd::string TS_FIRST_NONCE = "Twilight Sparkle";
conststd::string TS_SECOND_NONCE = "Pinkie Pie";
conststd::string BOGUS_TAG = "——-";
voidtestStringParser(conststd::string &startTag,conststd::string &endTag) {
vector mv;
KP_StringParserClass::StringParserClasssp;
// verify correct null behaviour
char* pChar =0;
EXPECT_EQ (ERROR_TAGS_NULL, sp.getDataBetweenTags(pChar, mv),"6",FIVE_POINTS);
// verify correct null tag behaviour
EXPECT_EQ (ERROR_TAGS_NULL, sp.setTags(0, 0),"7",FIVE_POINTS);
// set tags
String_to_pointerstp_start(startTag);
String_to_pointerstp_end(endTag);
EXPECT_EQ (SUCCESS, sp.setTags(stp_start.getPointerToStringBegginning(), stp_end.getPointerToStringBegginning()),"8",FIVE_POINTS);
// now that tags are set verify that it fails if there is no data
EXPECT_EQ (ERROR_DATA_NULL, sp.getDataBetweenTags(pChar, mv),"9",FIVE_POINTS);
// we cant manipulate String contents easily, so make a copy in a dynamically allocated array
// incidently this class shows the principles of RAII, allocated memory is automatically
// deallocated in the destructor
String_to_pointerstp(TEST_STRING);
pChar = stp.getPointerToStringBegginning();
EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),"10",FIVE_POINTS);
EXPECT_EQ (2, mv.size(),"11",FIVE_POINTS);
EXPECT_EQ (TS_FIRST_NONCE, mv[0],"12",TWO_POINTS);
EXPECT_EQ (TS_SECOND_NONCE, mv[1],"13",THREE_POINTS);
// make sure they clear the vector and not append to it
pChar = stp.getPointerToStringBegginning();
EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),"14",ZERO_POINTS);
// already tested
EXPECT_EQ (2, mv.size(),"15",FIVE_POINTS);
// if one then it was cleared if 2 not
// try with no end tag
stp.changeString(TEST_STRING_NO_END_TAG);
pChar = stp.getPointerToStringBegginning();
EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),"16",FIVE_POINTS);
EXPECT_EQ (0, mv.size(),"17",FIVE_POINTS);
// try with no start tag
stp.changeString(TEST_STRING_NO_START_TAG);
pChar = stp.getPointerToStringBegginning();
EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),"18",FIVE_POINTS);
EXPECT_EQ (0, mv.size(),"19",FIVE_POINTS);
// make sure they made a deep copy of the tags
// the following 2 lines should not affect start and end tags in sp
stp_start.changeString(BOGUS_TAG);
stp_end.changeString(BOGUS_TAG);
// try with no start tag
stp.changeString(TEST_STRING);
pChar = stp.getPointerToStringBegginning();
mv.clear();
EXPECT_EQ (SUCCESS, sp.getDataBetweenTags(pChar, mv),"20",ZERO_POINTS);
// already tested
EXPECT_EQ (2, mv.size(),"21",TEN_POINTS);
}
intgetScore(){
returntotal_points;
}
test.h
#ifndef TEST_H_
#define TEST_H_
#include
voidtestfileIO(conststd::string &userEnteredInputFile, conststd::string &userEnteredOutputFile);
voidtestStringParser(conststd::string &startTag,conststd::string &endTag);
intgetScore( );
#endif /* TEST_H_ */
Solution
327_proj3_test.cpp
327_proj3_test.cpp
#include
#include
#include
#include
#include
#include
#include "../includes/constants.h"
#include "Stringtopointer.h"
#include "../includes/StringParserClass.h"
#include "../includes/FileIO.h"
#include "test.h"
using namespace std;
// two utility functions for looping through vector and printing
// its contents to std::out (the Console)
voidoutputvectorrow(string i){
std::cout<myVector) {
for_each(myVector.begin(), myVector.end(), outputvectorrow);
}
inttypicalrun(string &inputfile,string&startTag,string&endTag,string&outputfile) {
vectormyStrings;
intiret = SUCCESS;
// open file, if not there ask for a different file or exit
std::string filecontents;
iret = KP_FileIO::getFileContents(inputfile.c_str(),filecontents );
if (iret != SUCCESS)
returniret;
// we cant manipulate String contents easily, so make a copy
in a dynamically allocated array
// incidently this class shows the principles of RAII,
allocated memory is automatically
// deallocated in the destructor
String_to_pointerstp(filecontents);
char* pChar = stp.getPointerToStringBegginning();
// create an instance of the stringparser
KP_StringParserClass::StringParserClassmyClass;
// what tags are we searching for?
iret = myClass.setTags(startTag.c_str(),endTag.c_str());
if (iret != SUCCESS)
returniret;
// pull out the data
iret = myClass.getDataBetweenTags(pChar, myStrings);
if (iret != SUCCESS)
returniret;
// serialize to file
iret = KP_FileIO::writeVectortoFile(outputfile,myStrings);
if (iret != SUCCESS)
returniret;
// dump to standard output
// dumpVecToStdOut(myStrings);
}
int main(intargc, char *argv[]) {
vectormyStrings;
intiret = SUCCESS;
// TODO verify that correct number of params are entered
// TODO otherwise return output WRONG_NUMB_ARGS and return
FAIL_WRONG_NUMBER_ARGS
if (argc< 5)
{
return FAIL_WRONG_NUMBER_ARGS;
}
// harvest all the user info
stringinputfile = argv[1];
stringstartTag = argv[2];
stringendTag = argv[3];
stringoutputfile = argv[4];
// lets test the inputs
testfileIO(inputfile, outputfile);
// now lets test the string parsing
testStringParser(startTag,endTag);
cout<<"Score is:"<
FileIO.cpp
#include >
#include >
#include >
#include "../327_proj3_test-master/includes/FileIO.h"
#include "../327_proj3_test-master/includes/constants.h"
using namespace std;
intKP_FileIO::getFileContents(conststd::string &filename, std::string &contents) {
ifstreaminFile;
inFile.open(filename); //open the input file
if (!inFile.is_open())
{
return COULD_NOT_OPEN_FILE_TO_READ;
}
stringlineStr;
while (getline(inFile, lineStr))
{
contents += lineStr;
}
inFile.close();
return SUCCESS;
}
intKP_FileIO::writeVectortoFile(conststd::string filename,std::vector&myEntryVector)
{
ofstream out(filename);
if (!out.is_open())
{
return COULD_NOT_OPEN_FILE_TO_WRITE;
}
for (std::vector::iterator it = myEntryVector.begin(); it != myEntryVector.end(); it++)
{
cout<< *it<
StringParserClass.cpp
#include
#include
#include "../327_proj3_test-master/includes/StringParserClass.h"
#include "../327_proj3_test-master/includes/constants.h"
KP_StringParserClass::StringParserClass::StringParserClass(void) : pStartTag(0), pEndTag(0), areTagsSet(false) {
}
KP_StringParserClass::StringParserClass::~StringParserClass(void) {
cleanup();
}
intKP_StringParserClass::StringParserClass::setTags(const char* pStart, const char* pEnd) {
if (pStart == 0 || pEnd == 0) {
return ERROR_TAGS_NULL;
}
cleanup();
intlen = strlen(pStart) + 1;
pStartTag = new char[len];
strncpy_s(pStartTag, len, pStart, len);
*(pStartTag + len) = 0;
len = strlen(pEnd) + 1;
pEndTag = new char[len];
strncpy_s(pEndTag, len, pEnd, len);
*(pEndTag + len) = 0;
return SUCCESS;
}
int KP_StringParserClass::StringParserClass::getDataBetweenTags(char* pDataToSearchThru, std::vector &myVector) {
myVector.clear();
if (pStartTag == 0 || pStartTag == 0) {
return ERROR_TAGS_NULL;
}
if (pDataToSearchThru == 0) {
return ERROR_DATA_NULL;
}
char* cursor = pDataToSearchThru;
char* start = 0;
char* end = 0;
char* endOfInput(pDataToSearchThru);
endOfInput += strlen(pDataToSearchThru);
while(findTag(cursor, start, end) == SUCCESS) {
char* startCopy(start);
startCopy += strlen(pStartTag);
intlen = end – startCopy;
char* buffer = new char[len + 1];
strncpy_s(buffer, len + 1, startCopy, len);
*(buffer + len) = 0;
std::string s(buffer);
delete[]buffer;
myVector.push_back(s);
cursor = end + strlen(pEndTag);
if (cursor >= endOfInput) {
break;
}
}
return SUCCESS;
}
voidKP_StringParserClass::StringParserClass::cleanup() {
if (pStartTag != 0) {
delete[]pStartTag;
pStartTag = 0;
}
if (pEndTag != 0) {
delete[]pEndTag;
pEndTag = 0;
}
}
intKP_StringParserClass::StringParserClass::findTag(char *pTagToLookFor, char *&pStart, char *&pEnd) {
if (pStartTag == 0 || pStartTag == 0) {
return ERROR_TAGS_NULL;
}
pStart = strstr(pTagToLookFor, pStartTag);
pEnd = strstr(pTagToLookFor, pEndTag);
if (pStart == 0 || pEnd == 0) {
return FAIL;
}
return SUCCESS;
}