summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsbrandt <sbrandt@17b73243-c579-4c4c-a9d2-2d5706c11dac>2014-02-18 17:56:10 +0000
committersbrandt <sbrandt@17b73243-c579-4c4c-a9d2-2d5706c11dac>2014-02-18 17:56:10 +0000
commit375a3181eca67ade911daab563ff02cef15ddda4 (patch)
tree516108ec7b9072b790a9bbb66d25ec711827d321
parent7bab271c2b988a8feb0e2aed226fbe563bdc674f (diff)
Make it possible to parse very large paramter files and to do it quickly.
git-svn-id: http://svn.cactuscode.org/flesh/trunk@5086 17b73243-c579-4c4c-a9d2-2d5706c11dac
-rw-r--r--src/piraha/Call.cc6
-rw-r--r--src/piraha/Generic.cc2
-rw-r--r--src/piraha/Group.cc26
-rw-r--r--src/piraha/Lookup.cc6
-rw-r--r--src/piraha/Matcher.cc3
-rw-r--r--src/piraha/Multi.cc6
-rw-r--r--src/piraha/Or.cc5
-rw-r--r--src/piraha/Piraha.hpp25
-rw-r--r--src/piraha/smart_ptr.hpp3
9 files changed, 50 insertions, 32 deletions
diff --git a/src/piraha/Call.cc b/src/piraha/Call.cc
index 99f8f005..76cd647a 100644
--- a/src/piraha/Call.cc
+++ b/src/piraha/Call.cc
@@ -869,7 +869,11 @@ extern "C" int cctk_PirahaParser(const char *buffer,unsigned long buffersize,int
std::string active;
smart_ptr<Matcher> m2 = new Matcher(par_file_grammar,"file",buffer,buffersize);
- if(m2->matches()) {
+ std::clock_t st = std::clock();
+ bool b = m2->matches();
+ std::clock_t en = std::clock();
+ std::cout << "PARSE TIME = " << ((en-st)/CLOCKS_PER_SEC) << std::endl;
+ if(b) {
int line = -1;
for(int i=0;i<m2->groupCount();i++) {
smart_ptr<Group> gr = m2->group(i);
diff --git a/src/piraha/Generic.cc b/src/piraha/Generic.cc
index d9b3f84b..118701b5 100644
--- a/src/piraha/Generic.cc
+++ b/src/piraha/Generic.cc
@@ -103,7 +103,7 @@ int main(int argc,char **argv) {
std::cout << "writing file: " << outFile << std::endl;
smart_ptr<Group> src_file =
new Group("annot:src_file",inputArg.c_str());
- mg->children.push_back(src_file);
+ mg->children->push_back(src_file);
if(perlFlag) {
mg->dumpPerl(o);
} else if(pythonFlag) {
diff --git a/src/piraha/Group.cc b/src/piraha/Group.cc
index c236bf4c..0396285c 100644
--- a/src/piraha/Group.cc
+++ b/src/piraha/Group.cc
@@ -12,15 +12,15 @@ void Group::dump(int n,std::ostream& o,int indent) {
o << "[" << n << "] ";
}
o << pattern << ": ";
- if(children.size()==0) {
+ if(children->size()==0) {
for(int i=start_;i<end_;i++)
o << input[i];
}
o << std::endl;
typedef vector<smart_ptr<Group> >::iterator group_iter;
int nn = 0;
- for(group_iter gi = children.begin();
- gi != children.end();
+ for(group_iter gi = children->begin();
+ gi != children->end();
++gi) {
(*gi)->dump(nn++,o,indent+2);
}
@@ -38,7 +38,7 @@ void Group::dumpPerl(std::ostream &o,int indent) {
for(int i=0;i<indent;i++)
o << ' ';
o << "name=> \"" << getPatternName() << "\"," << std::endl;
- if(children.size()==0) {
+ if(children->size()==0) {
for(int i=0;i<indent;i++)
o << ' ';
o << "children=>[]," << std::endl;
@@ -53,8 +53,8 @@ void Group::dumpPerl(std::ostream &o,int indent) {
o << ' ';
o << "children=>[" << std::endl;
typedef vector<smart_ptr<Group> >::iterator group_iter;
- for(group_iter gi = children.begin();
- gi != children.end();
+ for(group_iter gi = children->begin();
+ gi != children->end();
++gi) {
(*gi)->dumpPerl(o,indent+2);
for(int i=0;i<indent;i++)
@@ -92,7 +92,7 @@ void Group::dumpPython(std::ostream &o,int indent) {
for(int i=0;i<indent;i++)
o << ' ';
o << "'name' : \"" << getPatternName() << "\"," << std::endl;
- if(children.size()==0) {
+ if(children->size()==0) {
for(int i=0;i<indent;i++)
o << ' ';
o << "'children' : []," << std::endl;
@@ -107,8 +107,8 @@ void Group::dumpPython(std::ostream &o,int indent) {
o << ' ';
o << "'children' : [" << std::endl;
typedef vector<smart_ptr<Group> >::iterator group_iter;
- for(group_iter gi = children.begin();
- gi != children.end();
+ for(group_iter gi = children->begin();
+ gi != children->end();
++gi) {
(*gi)->dumpPython(o,indent+2);
for(int i=0;i<indent;i++)
@@ -154,3 +154,11 @@ int Group::line() {
}
return line;
}
+
+int Group::childCount() {
+ return children->size();
+}
+
+smart_ptr<Group> Group::child(int n) {
+ return (*children)[n];
+}
diff --git a/src/piraha/Lookup.cc b/src/piraha/Lookup.cc
index a1cc457c..24f56871 100644
--- a/src/piraha/Lookup.cc
+++ b/src/piraha/Lookup.cc
@@ -13,8 +13,8 @@ bool Lookup::match(Matcher *m) {
smart_ptr<Pattern> p = gram->patterns.get(name);
if(!p.valid()) std::cout << "Lookup of pattern [" << name << "] failed. Jmap = " << gram->patterns << std::endl;
assert(p.valid());
- vector<smart_ptr<Group> > chSave = m->children;
- m->children.clear();
+ smart_ptr<vector<smart_ptr<Group> > > chSave = m->children;
+ m->children = new vector<smart_ptr<Group> >();
int s = m->pos;
std::string save_name = m->inrule;
m->inrule += "::";
@@ -33,7 +33,7 @@ bool Lookup::match(Matcher *m) {
*/
smart_ptr<Group> g = new Group(name,m->input,s,e,m->children);
if(capture)
- chSave.push_back(g);
+ chSave->push_back(g);
}
m->children = chSave;
return b;
diff --git a/src/piraha/Matcher.cc b/src/piraha/Matcher.cc
index fd1b2355..71fb0550 100644
--- a/src/piraha/Matcher.cc
+++ b/src/piraha/Matcher.cc
@@ -27,11 +27,12 @@ bool Matcher::matchesTo(int match_to_) {
std::cout << g->patterns << std::endl;
}
assert(p.valid());
+ //packrat.clear();
pos = 0;
max_pos = -1;
match_to = match_to_;
err_pos = -1;
- children.clear();
+ children->clear();
bool b = p->match(this);
end_ = pos;
return b;
diff --git a/src/piraha/Multi.cc b/src/piraha/Multi.cc
index ae795645..f66466eb 100644
--- a/src/piraha/Multi.cc
+++ b/src/piraha/Multi.cc
@@ -4,13 +4,13 @@
using namespace cctki_piraha;
bool Multi::match(Matcher *m) {
- vector<smart_ptr<Group> > chSave;
+ int chSize;
for(int i=0;i<maxv;i++) {
int save = m->pos;
- chSave = m->children;
+ chSize = m->children->size();
if(!pattern->match(m) || m->pos == save) {
m->pos = save;
- m->children = chSave;
+ m->children->resize(chSize);
return i >= minv;
}
}
diff --git a/src/piraha/Or.cc b/src/piraha/Or.cc
index d9633e3e..6195958a 100644
--- a/src/piraha/Or.cc
+++ b/src/piraha/Or.cc
@@ -22,11 +22,10 @@ Or::Or(Pattern *p,...) : patterns() {
bool Or::match(Matcher *m) {
typedef vector<smart_ptr<Pattern> >::iterator pattern_iter;
int save = m->pos;
- vector<smart_ptr<Group> > chSave;
- chSave = m->children;
+ int chSave = m->children->size();
for(pattern_iter p = patterns.begin();p != patterns.end();++p) {
m->pos = save;
- m->children = chSave;
+ m->children->resize(chSave);
if((*p)->match(m))
return true;
}
diff --git a/src/piraha/Piraha.hpp b/src/piraha/Piraha.hpp
index 2e7dff90..deac2047 100644
--- a/src/piraha/Piraha.hpp
+++ b/src/piraha/Piraha.hpp
@@ -6,10 +6,12 @@
#include <vector>
#include <iostream>
#include <smart_ptr.hpp>
+#include <climits>
+#include <string.h>
namespace cctki_piraha {
-const int max_int = 10000;
+const int max_int = INT_MAX-1;
using std::map;
using std::vector;
@@ -33,16 +35,15 @@ public:
std::string pattern;
const char *input;
int start_,end_;
- vector<smart_ptr<Group> > children;
+ smart_ptr<vector<smart_ptr<Group> > > children;
Group(const char *p,const char *value)
- : pattern(p), input(value), start_(0) {
- for(end_=0;value[end_] != '\0';end_++);
+ : pattern(p), input(value), start_(0), end_(strlen(value)), children(new vector<smart_ptr<Group> >()) {
}
Group(std::string p,const char *input_)
- : pattern(p), input(input_), start_(0), end_(0), children() {}
+ : pattern(p), input(input_), start_(0), end_(0), children(new vector<smart_ptr<Group> >()) {}
Group(std::string p,const char *input_,int s,int e,
- vector<smart_ptr<Group> > ch)
+ smart_ptr<vector<smart_ptr<Group> > > ch)
: pattern(p), input(input_), start_(s), end_(e), children(ch) {}
virtual ~Group() {}
@@ -59,13 +60,13 @@ public:
void dumpPerl(std::ostream&o,int indent);
void dumpPython(std::ostream&o=std::cout);
void dumpPython(std::ostream&o,int indent);
- int groupCount() { return children.size(); }
- smart_ptr<Group> group(int i) { return children[i]; }
+ int groupCount() { return children->size(); }
+ smart_ptr<Group> group(int i) { return (*children)[i]; }
smart_ptr<Group> group(const char *nm,int ix=0) {
- for(unsigned int i=0;i<children.size();i++) {
- if(children[i]->getPatternName() == nm) {
+ for(unsigned int i=0;i<children->size();i++) {
+ if((*children)[i]->getPatternName() == nm) {
if(ix == 0) {
- return children[i];
+ return (*children)[i];
}
ix--;
}
@@ -350,6 +351,8 @@ class Matcher : public Group {
public:
Matcher(smart_ptr<Grammar> g,const char *pat_,const char *input_,int input_size=-1);
virtual ~Matcher() {}
+
+ //std::map<std::string,std::vector<smar_ptr<Group> > > packrat;
const char *input;
smart_ptr<Grammar> g;
int input_size;
diff --git a/src/piraha/smart_ptr.hpp b/src/piraha/smart_ptr.hpp
index 71e3b4d6..38c34b42 100644
--- a/src/piraha/smart_ptr.hpp
+++ b/src/piraha/smart_ptr.hpp
@@ -8,6 +8,8 @@
#define NULL ((void*)0)
#endif
+#define NDEBUG
+
namespace cctki_piraha {
// This global debug variable is used to detect the case
@@ -22,6 +24,7 @@ extern std::set<void*> *ptrs;
#ifndef NDEBUG
inline void add(void *t) {
+ abort();
if(t == NULL)
return;
if(ptrs == 0)