diff options
author | sbrandt <sbrandt@17b73243-c579-4c4c-a9d2-2d5706c11dac> | 2014-02-18 17:56:10 +0000 |
---|---|---|
committer | sbrandt <sbrandt@17b73243-c579-4c4c-a9d2-2d5706c11dac> | 2014-02-18 17:56:10 +0000 |
commit | 375a3181eca67ade911daab563ff02cef15ddda4 (patch) | |
tree | 516108ec7b9072b790a9bbb66d25ec711827d321 | |
parent | 7bab271c2b988a8feb0e2aed226fbe563bdc674f (diff) |
Make it possible to parse very large paramter files and to do it quickly.
git-svn-id: http://svn.cactuscode.org/flesh/trunk@5086 17b73243-c579-4c4c-a9d2-2d5706c11dac
-rw-r--r-- | src/piraha/Call.cc | 6 | ||||
-rw-r--r-- | src/piraha/Generic.cc | 2 | ||||
-rw-r--r-- | src/piraha/Group.cc | 26 | ||||
-rw-r--r-- | src/piraha/Lookup.cc | 6 | ||||
-rw-r--r-- | src/piraha/Matcher.cc | 3 | ||||
-rw-r--r-- | src/piraha/Multi.cc | 6 | ||||
-rw-r--r-- | src/piraha/Or.cc | 5 | ||||
-rw-r--r-- | src/piraha/Piraha.hpp | 25 | ||||
-rw-r--r-- | src/piraha/smart_ptr.hpp | 3 |
9 files changed, 50 insertions, 32 deletions
diff --git a/src/piraha/Call.cc b/src/piraha/Call.cc index 99f8f005..76cd647a 100644 --- a/src/piraha/Call.cc +++ b/src/piraha/Call.cc @@ -869,7 +869,11 @@ extern "C" int cctk_PirahaParser(const char *buffer,unsigned long buffersize,int std::string active; smart_ptr<Matcher> m2 = new Matcher(par_file_grammar,"file",buffer,buffersize); - if(m2->matches()) { + std::clock_t st = std::clock(); + bool b = m2->matches(); + std::clock_t en = std::clock(); + std::cout << "PARSE TIME = " << ((en-st)/CLOCKS_PER_SEC) << std::endl; + if(b) { int line = -1; for(int i=0;i<m2->groupCount();i++) { smart_ptr<Group> gr = m2->group(i); diff --git a/src/piraha/Generic.cc b/src/piraha/Generic.cc index d9b3f84b..118701b5 100644 --- a/src/piraha/Generic.cc +++ b/src/piraha/Generic.cc @@ -103,7 +103,7 @@ int main(int argc,char **argv) { std::cout << "writing file: " << outFile << std::endl; smart_ptr<Group> src_file = new Group("annot:src_file",inputArg.c_str()); - mg->children.push_back(src_file); + mg->children->push_back(src_file); if(perlFlag) { mg->dumpPerl(o); } else if(pythonFlag) { diff --git a/src/piraha/Group.cc b/src/piraha/Group.cc index c236bf4c..0396285c 100644 --- a/src/piraha/Group.cc +++ b/src/piraha/Group.cc @@ -12,15 +12,15 @@ void Group::dump(int n,std::ostream& o,int indent) { o << "[" << n << "] "; } o << pattern << ": "; - if(children.size()==0) { + if(children->size()==0) { for(int i=start_;i<end_;i++) o << input[i]; } o << std::endl; typedef vector<smart_ptr<Group> >::iterator group_iter; int nn = 0; - for(group_iter gi = children.begin(); - gi != children.end(); + for(group_iter gi = children->begin(); + gi != children->end(); ++gi) { (*gi)->dump(nn++,o,indent+2); } @@ -38,7 +38,7 @@ void Group::dumpPerl(std::ostream &o,int indent) { for(int i=0;i<indent;i++) o << ' '; o << "name=> \"" << getPatternName() << "\"," << std::endl; - if(children.size()==0) { + if(children->size()==0) { for(int i=0;i<indent;i++) o << ' '; o << "children=>[]," << std::endl; @@ -53,8 +53,8 @@ void Group::dumpPerl(std::ostream &o,int indent) { o << ' '; o << "children=>[" << std::endl; typedef vector<smart_ptr<Group> >::iterator group_iter; - for(group_iter gi = children.begin(); - gi != children.end(); + for(group_iter gi = children->begin(); + gi != children->end(); ++gi) { (*gi)->dumpPerl(o,indent+2); for(int i=0;i<indent;i++) @@ -92,7 +92,7 @@ void Group::dumpPython(std::ostream &o,int indent) { for(int i=0;i<indent;i++) o << ' '; o << "'name' : \"" << getPatternName() << "\"," << std::endl; - if(children.size()==0) { + if(children->size()==0) { for(int i=0;i<indent;i++) o << ' '; o << "'children' : []," << std::endl; @@ -107,8 +107,8 @@ void Group::dumpPython(std::ostream &o,int indent) { o << ' '; o << "'children' : [" << std::endl; typedef vector<smart_ptr<Group> >::iterator group_iter; - for(group_iter gi = children.begin(); - gi != children.end(); + for(group_iter gi = children->begin(); + gi != children->end(); ++gi) { (*gi)->dumpPython(o,indent+2); for(int i=0;i<indent;i++) @@ -154,3 +154,11 @@ int Group::line() { } return line; } + +int Group::childCount() { + return children->size(); +} + +smart_ptr<Group> Group::child(int n) { + return (*children)[n]; +} diff --git a/src/piraha/Lookup.cc b/src/piraha/Lookup.cc index a1cc457c..24f56871 100644 --- a/src/piraha/Lookup.cc +++ b/src/piraha/Lookup.cc @@ -13,8 +13,8 @@ bool Lookup::match(Matcher *m) { smart_ptr<Pattern> p = gram->patterns.get(name); if(!p.valid()) std::cout << "Lookup of pattern [" << name << "] failed. Jmap = " << gram->patterns << std::endl; assert(p.valid()); - vector<smart_ptr<Group> > chSave = m->children; - m->children.clear(); + smart_ptr<vector<smart_ptr<Group> > > chSave = m->children; + m->children = new vector<smart_ptr<Group> >(); int s = m->pos; std::string save_name = m->inrule; m->inrule += "::"; @@ -33,7 +33,7 @@ bool Lookup::match(Matcher *m) { */ smart_ptr<Group> g = new Group(name,m->input,s,e,m->children); if(capture) - chSave.push_back(g); + chSave->push_back(g); } m->children = chSave; return b; diff --git a/src/piraha/Matcher.cc b/src/piraha/Matcher.cc index fd1b2355..71fb0550 100644 --- a/src/piraha/Matcher.cc +++ b/src/piraha/Matcher.cc @@ -27,11 +27,12 @@ bool Matcher::matchesTo(int match_to_) { std::cout << g->patterns << std::endl; } assert(p.valid()); + //packrat.clear(); pos = 0; max_pos = -1; match_to = match_to_; err_pos = -1; - children.clear(); + children->clear(); bool b = p->match(this); end_ = pos; return b; diff --git a/src/piraha/Multi.cc b/src/piraha/Multi.cc index ae795645..f66466eb 100644 --- a/src/piraha/Multi.cc +++ b/src/piraha/Multi.cc @@ -4,13 +4,13 @@ using namespace cctki_piraha; bool Multi::match(Matcher *m) { - vector<smart_ptr<Group> > chSave; + int chSize; for(int i=0;i<maxv;i++) { int save = m->pos; - chSave = m->children; + chSize = m->children->size(); if(!pattern->match(m) || m->pos == save) { m->pos = save; - m->children = chSave; + m->children->resize(chSize); return i >= minv; } } diff --git a/src/piraha/Or.cc b/src/piraha/Or.cc index d9633e3e..6195958a 100644 --- a/src/piraha/Or.cc +++ b/src/piraha/Or.cc @@ -22,11 +22,10 @@ Or::Or(Pattern *p,...) : patterns() { bool Or::match(Matcher *m) { typedef vector<smart_ptr<Pattern> >::iterator pattern_iter; int save = m->pos; - vector<smart_ptr<Group> > chSave; - chSave = m->children; + int chSave = m->children->size(); for(pattern_iter p = patterns.begin();p != patterns.end();++p) { m->pos = save; - m->children = chSave; + m->children->resize(chSave); if((*p)->match(m)) return true; } diff --git a/src/piraha/Piraha.hpp b/src/piraha/Piraha.hpp index 2e7dff90..deac2047 100644 --- a/src/piraha/Piraha.hpp +++ b/src/piraha/Piraha.hpp @@ -6,10 +6,12 @@ #include <vector> #include <iostream> #include <smart_ptr.hpp> +#include <climits> +#include <string.h> namespace cctki_piraha { -const int max_int = 10000; +const int max_int = INT_MAX-1; using std::map; using std::vector; @@ -33,16 +35,15 @@ public: std::string pattern; const char *input; int start_,end_; - vector<smart_ptr<Group> > children; + smart_ptr<vector<smart_ptr<Group> > > children; Group(const char *p,const char *value) - : pattern(p), input(value), start_(0) { - for(end_=0;value[end_] != '\0';end_++); + : pattern(p), input(value), start_(0), end_(strlen(value)), children(new vector<smart_ptr<Group> >()) { } Group(std::string p,const char *input_) - : pattern(p), input(input_), start_(0), end_(0), children() {} + : pattern(p), input(input_), start_(0), end_(0), children(new vector<smart_ptr<Group> >()) {} Group(std::string p,const char *input_,int s,int e, - vector<smart_ptr<Group> > ch) + smart_ptr<vector<smart_ptr<Group> > > ch) : pattern(p), input(input_), start_(s), end_(e), children(ch) {} virtual ~Group() {} @@ -59,13 +60,13 @@ public: void dumpPerl(std::ostream&o,int indent); void dumpPython(std::ostream&o=std::cout); void dumpPython(std::ostream&o,int indent); - int groupCount() { return children.size(); } - smart_ptr<Group> group(int i) { return children[i]; } + int groupCount() { return children->size(); } + smart_ptr<Group> group(int i) { return (*children)[i]; } smart_ptr<Group> group(const char *nm,int ix=0) { - for(unsigned int i=0;i<children.size();i++) { - if(children[i]->getPatternName() == nm) { + for(unsigned int i=0;i<children->size();i++) { + if((*children)[i]->getPatternName() == nm) { if(ix == 0) { - return children[i]; + return (*children)[i]; } ix--; } @@ -350,6 +351,8 @@ class Matcher : public Group { public: Matcher(smart_ptr<Grammar> g,const char *pat_,const char *input_,int input_size=-1); virtual ~Matcher() {} + + //std::map<std::string,std::vector<smar_ptr<Group> > > packrat; const char *input; smart_ptr<Grammar> g; int input_size; diff --git a/src/piraha/smart_ptr.hpp b/src/piraha/smart_ptr.hpp index 71e3b4d6..38c34b42 100644 --- a/src/piraha/smart_ptr.hpp +++ b/src/piraha/smart_ptr.hpp @@ -8,6 +8,8 @@ #define NULL ((void*)0) #endif +#define NDEBUG + namespace cctki_piraha { // This global debug variable is used to detect the case @@ -22,6 +24,7 @@ extern std::set<void*> *ptrs; #ifndef NDEBUG inline void add(void *t) { + abort(); if(t == NULL) return; if(ptrs == 0) |