summaryrefslogtreecommitdiff
path: root/src/piraha/ReParse.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/piraha/ReParse.cc')
-rw-r--r--src/piraha/ReParse.cc190
1 files changed, 190 insertions, 0 deletions
diff --git a/src/piraha/ReParse.cc b/src/piraha/ReParse.cc
new file mode 100644
index 00000000..634e1f10
--- /dev/null
+++ b/src/piraha/ReParse.cc
@@ -0,0 +1,190 @@
+#include "Piraha.hpp"
+#include <stdlib.h>
+#include <string.h>
+
+char getChar(smart_ptr<Group> gr) {
+ if(gr->groupCount()==1) {
+ std::string sub = gr->group(0)->substring();
+ int n = 0;
+ for(unsigned int i=0;i<sub.size();i++) {
+ char c = sub[i];
+ if(c >= '0' && c <= '9')
+ n = n*16+c-'0';
+ else if(c >= 'a' && c <= 'f')
+ n = n*16+c-'a'+10;
+ else if(c >= 'A' && c <= 'F')
+ n = n*16+c-'A'+10;
+ }
+ }
+ std::string gs = gr->substring();
+ if(gs.size()==2) {
+ char c = gs[1];
+ if(c == 'n')
+ return '\n';
+ else if(c == 'r')
+ return '\r';
+ else if(c == 't')
+ return '\t';
+ else if(c == 'b')
+ return '\b';
+ else
+ return c;
+ } else {
+ return gs[0];
+ }
+}
+smart_ptr<Pattern> mkMulti(smart_ptr<Group> g) {
+ if(g->groupCount()==0) {
+ std::string s = g->substring();
+ if("*" == s) {
+ return new Multi(0,max_int);
+ } else if("+" == s) {
+ return new Multi(1,max_int);
+ } else if("?" == s) {
+ return new Multi(0,1);
+ }
+ } else if(g->groupCount()==1) {
+ int mn = atol(g->group(0)->substring().c_str());
+ return new Multi(mn,mn);
+ } else if(g->groupCount()==2) {
+ int mn = atol(g->group(0)->substring().c_str());
+ if(g->group(1)->groupCount()>0) {
+ int mx = atol(g->group(1)->group(0)->substring().c_str());
+ return new Multi(mn,mx);
+ } else {
+ return new Multi(mn,max_int);
+ }
+ }
+ g->dump();
+ return NULL;
+}
+
+
+void compileFile(smart_ptr<Grammar> g,const char *buffer,signed long buffersize) {
+ if(buffersize < 0)
+ buffersize = strlen(buffer);
+ smart_ptr<Grammar> grammar = AutoGrammar::fileParserGenerator();
+ smart_ptr<Matcher> m = new Matcher(grammar,"file",buffer,buffersize);
+ bool b = m->matches();
+ if(!b) {
+ m->showError();
+ assert(false);
+ }
+
+ for(int i=0;i<m->groupCount();i++) {
+ smart_ptr<Group> rule = m->group(i);
+ smart_ptr<Pattern> ptmp = ::compile(rule->group(1), false, g);
+ std::string nm = rule->group(0)->substring();
+ g->patterns.put(nm,ptmp);
+ g->default_rule = nm;
+ }
+}
+
+smart_ptr<Pattern> compile(smart_ptr<Group> g,bool ignCase,smart_ptr<Grammar> gram) {
+ std::string pn = g->getPatternName();
+ if("literal" == pn) {
+ char c = getChar(g);
+ if(ignCase)
+ return new ILiteral(c);
+ else
+ return new Literal(c);
+ } else if("pattern" == pn) {
+ if(g->groupCount()==0)
+ return new Nothing();
+ return compile(g->group(0),ignCase,gram);
+ } else if("pelem" == pn) {
+ if(g->groupCount()==2) {
+ smart_ptr<Pattern> pm = mkMulti(g->group(1));
+ Multi *m = (Multi *)pm.ptr();
+ m->pattern = compile(g->group(0),ignCase,gram);
+ return pm;
+ }
+ return compile(g->group(0),ignCase,gram);
+ } else if("pelems" == pn||"pelems_top" == pn||"pelems_next" == pn) {
+ vector<smart_ptr<Pattern> > li;
+ for(int i=0;i<g->groupCount();i++) {
+ li.push_back(compile(g->group(i),ignCase,gram));
+ }
+ if(li.size()==1)
+ return li[0];
+ return new Seq(li,false,false);
+ } else if("group_inside" == pn||"group_top" == pn) {
+ if(g->groupCount()==1)
+ return compile(g->group(0),ignCase,gram);
+ vector<smart_ptr<Pattern> > li;
+ for(int i=0;i<g->groupCount();i++) {
+ li.push_back(compile(g->group(i),ignCase,gram));
+ }
+ Or *or_ = new Or(false,false);
+ or_->patterns = li;
+ smart_ptr<Pattern> orp = or_;
+ return orp;
+ } else if("group" == pn) {
+ Or *or_ = new Or(false,false);
+ smart_ptr<Pattern> orp_ = or_;
+ bool ignC = ignCase;
+ smart_ptr<Group> inside = NULL;
+ if(g->groupCount()==2) {
+ ignC = or_->igcShow = true;
+ std::string ps = g->group(0)->getPatternName();
+ if(ps == "ign_on") {
+ ignC = or_->ignCase = true;
+ } else if(ps == "ign_off") {
+ ignC = or_->ignCase = false;
+ } else if(ps == "neglookahead") {
+ return new NegLookAhead(compile(g->group(1),ignCase,gram));
+ } else if(ps == "lookahead") {
+ return new LookAhead(compile(g->group(1),ignCase,gram));
+ }
+ inside = g->group(1);
+ } else {
+ inside = g->group(0);
+ }
+ for(int i=0;i<inside->groupCount();i++) {
+ or_->patterns.push_back(compile(inside->group(i),ignC,gram));
+ }
+ if(or_->igcShow == false && or_->patterns.size()==1)
+ return or_->patterns[0];
+ return orp_;
+ } else if("start" == pn) {
+ return new Start();
+ } else if("end" == pn) {
+ return new End();
+ } else if("boundary" == pn) {
+ return new Boundary();
+ } else if("charclass" == pn) {
+ Bracket *br = new Bracket();
+ smart_ptr<Pattern> brp = br;
+ int i=0;
+ if(g->groupCount()>0 && g->group(0)->getPatternName() == "neg") {
+ i++;
+ br->neg = true;
+ }
+ for(;i < g->groupCount();i++) {
+ std::string gn = g->group(i)->getPatternName();
+ if("range"==gn) {
+ char c0 = getChar(g->group(i)->group(0));
+ char c1 = getChar(g->group(i)->group(1));
+ br->addRange(c0, c1, ignCase);
+ } else {
+ char c = getChar(g->group(i));
+ br->addRange(c,c, ignCase);
+ }
+ }
+ return brp;
+ } else if("named" == pn) {
+ std::string lookup = g->group(0)->substring();
+ if("brk" == lookup)
+ return new Break();
+ return new Lookup(lookup, gram);
+ } else if("nothing" == pn) {
+ return new Nothing();
+ } else if("s" == pn||"s0" == pn) {
+ return new Lookup("-skipper", gram);
+ } else if("dot" == pn) {
+ return new Dot();
+ } else if("backref" == pn) {
+ return new BackRef(g->substring()[1]-'0', ignCase);
+ }
+ return NULL;
+}