summaryrefslogtreecommitdiff
path: root/src/piraha/Piraha.hpp
blob: 004a3ba0a5f34767f4fecbb475e32153fbf4dcde (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
#ifndef PIRAHA_HPP
#define PIRAHA_HPP
#include <assert.h>
#include <map>
#include <string>
#include <vector>
#include <iostream>
#include <smart_ptr.hpp>

namespace cctki_piraha {

const int max_int = 10000;

using std::map;
using std::vector;

inline char uc_(char a) {
    if(a >= 'a' && a <= 'z')
        return a + 'A' - 'a';
    else
        return a;
}

inline char lc_(char a) {
    if(a >= 'A' && a <= 'Z')
        return a + 'a' - 'A';
    else
        return a;
}

class Group {
public:
    std::string pattern;
    const char *input;
    int start_,end_;
    vector<smart_ptr<Group> > children;

    Group(const char *p,const char *value)
        : pattern(p), input(value), start_(0) {
            for(end_=0;value[end_] != '\0';end_++);
        }
    Group(std::string p,const char *input_)
        : pattern(p), input(input_), start_(0), end_(0), children() {}
    Group(std::string p,const char *input_,int s,int e,
        vector<smart_ptr<Group> > ch)
        : pattern(p), input(input_), start_(s), end_(e), children(ch) {}

    virtual ~Group() {}

    int start() { return start_; }
    int end() { return end_; }
    int childCount(), line();
    std::string getPatternName();
    std::string substring();
    smart_ptr<Group> child(int i);
    void dump(std::ostream& o=std::cout);
    void dump(int n,std::ostream& o,int indent=0);
    void dumpPerl(std::ostream&o=std::cout);
    void dumpPerl(std::ostream&o,int indent);
    int groupCount() { return children.size(); }
    smart_ptr<Group> group(int i) { return children[i]; }
    smart_ptr<Group> group(const char *nm,int ix=0) {
    	for(unsigned int i=0;i<children.size();i++) {
    		if(children[i]->getPatternName() == nm) {
    			if(ix == 0) {
    				return children[i];
    			}
    			ix--;
    		}
    	}
    	smart_ptr<Group> ret;
    	return ret;
    }
};

class Grammar;

class Matcher;

class Pattern {
public:
    virtual bool match(Matcher *m)=0;
    Pattern() {}
    virtual ~Pattern() {}
    virtual std::string fmt() { return "blank"; }
    virtual void insert(std::ostream& o) { o << "{?}"; }
};

inline std::ostream& operator<<(std::ostream& o,Pattern& p) {
    p.insert(o);
    return o;
}

class JMap {
    map<std::string,smart_ptr<Pattern> > m;
public:
    JMap() : m() {}
    smart_ptr<Pattern> get(std::string key) {
        typedef map<std::string,smart_ptr<Pattern> >::iterator mit;
        mit it = m.find(key);
        mit me = m.end();
        if(it == me) {
            return NULL;
        }
        smart_ptr<Pattern> res = m[key];
        assert(res.valid());
        return res;
    }
    void put(std::string key,smart_ptr<Pattern> p) {
        assert(p.valid());
        m[key] = p;
    }
    friend std::ostream& operator<<(std::ostream&,JMap&);
};
inline std::ostream& operator<<(std::ostream& o,JMap& jmap) {
    typedef map<std::string,smart_ptr<Pattern> >::iterator mit;
    mit mb = jmap.m.begin();
    mit me = jmap.m.end();
    o << "{";
	for(mit i = mb; i != me;++i) {
		o << "[" << i->first << "]";
	}
	o << "}";
	return o;
}


class Grammar {
public:
    Grammar() {}
    virtual ~Grammar() {}
    JMap patterns;
    std::string default_rule;
};

class Seq : public Pattern {
    vector<smart_ptr<Pattern> > patterns;
public:
    Seq(Pattern *p,...);
    Seq(vector<smart_ptr<Pattern> > patterns,bool ign,bool show);
    virtual ~Seq() {}
    bool match(Matcher *m);
    virtual void insert(std::ostream& o) {
        for(unsigned int i=0;i<patterns.size();i++)
            o << *patterns[i];
    }
};

class Or : public Pattern {
public:
    vector<smart_ptr<Pattern> > patterns;
    bool ignCase, igcShow;
    Or(bool ign,bool show) : ignCase(ign), igcShow(show) {}
    Or(Pattern *p,...);
    virtual ~Or() {}
    bool match(Matcher *m);
    virtual void insert(std::ostream& o) {
        o << "(";
        for(unsigned int i=0;i<patterns.size();i++) {
            if(i > 0) o << "|";
            o << *patterns[i];
        }
        o << ")";
    }
};

class Literal : public Pattern {
public:
    const char c;
    Literal(char b) : c(b) {}
    bool match(Matcher *m);
    std::string fmt() {
        std::string s = "literal(";
        s += c+")";
        return s;
    }
    virtual void insert(std::ostream& o) {
        if(c == '\n')
            o << "\\n";
        else if(c == '\r')
            o << "\\r";
        else if(c == '\t')
            o << "\\t";
        else if(c == '\b')
            o << "\\b";
        else if(c >= 'a' && c <= 'z')
            o << c;
        else if(c >= 'A' && c <= 'Z')
            o << c;
        else if(c >= '0' && c <= '9')
            o << c;
        else
            o << "\\" << c;
    }
};

class ILiteral : public Pattern {
public:
    const char lc,uc;
    ILiteral(char b);
    bool match(Matcher *m);
    std::string fmt() {
        std::string s = "Iliteral(";
        s += lc;
        s += ",";
        s += uc;
        s += ")";
        return s;
    }
    virtual void insert(std::ostream& o) {
        char c = lc;
        if(lc != uc)
            o << "[" << lc << uc << "]";
        else if(c == '\n')
            o << "\\n";
        else if(c == '\r')
            o << "\\r";
        else if(c == '\t')
            o << "\\t";
        else if(c == '\b')
            o << "\\b";
        else if(c >= 'a' && c <= 'z')
            o << c;
        else if(c >= 'A' && c <= 'Z')
            o << c;
        else if(c >= '0' && c <= '9')
            o << c;
        else
            o << "\\" << c;
    }
};

class Lookup : public Pattern {
    smart_ptr<Grammar> gram;
    std::string name;
    bool capture;
public:
    Lookup(std::string s,smart_ptr<Grammar> g);
    virtual ~Lookup() {}
    bool match(Matcher *m);
    std::string fmt() {
        return "Literal:"+name;
    }
    virtual void insert(std::ostream& o) {
        o << "{" << name << "}";
    }
};

class Nothing : public Pattern {
public:
    Nothing() {}
    bool match(Matcher *m) { return true; }
};

class Start : public Pattern {
public:
    Start() {}
    bool match(Matcher *m);
    virtual void insert(std::ostream& o) { o << "^"; }
};

class End : public Pattern {
public:
    End() {}
    bool match(Matcher *m);
    virtual void insert(std::ostream& o) { o << "$"; }
};

class Dot : public Pattern {
public:
    Dot() {}
    bool match(Matcher *m);
    virtual void insert(std::ostream& o) { o << "."; }
};

class Multi : public Pattern {
    const int minv,maxv;
public:
    smart_ptr<Pattern> pattern;
    Multi(int min_,int max_) : minv(min_), maxv(max_), pattern(NULL) {}
    Multi(Pattern *p,int min_,int max_) : minv(min_), maxv(max_), pattern(p) {}
    virtual ~Multi() {}
    bool match(Matcher *m);
    virtual void insert(std::ostream& o) {
        o << *pattern << "{" << minv << "," << maxv << "}";
    }
};

class Range : public Pattern {
public:
    char lo,hi;
    bool match(Matcher *m);
    Range(char lo_,char hi_) : lo(lo_), hi(hi_) {}
};

class Bracket : public Pattern {
public:
    bool neg;
    vector<smart_ptr<Range> > ranges;
    Bracket() : neg(false) {}
    virtual ~Bracket() {}
    Bracket(bool b);
    Bracket *addRange(char lo,char hi);
    Bracket *addRange(char lo,char hi,bool ign);
    bool match(Matcher *m);
    virtual void insert(std::ostream& o);
};

class NegLookAhead : public Pattern {
public:
    smart_ptr<Pattern> pattern;
    NegLookAhead(smart_ptr<Pattern> p) : pattern(p) {}
    virtual ~NegLookAhead() {}
    bool match(Matcher *m);
};

class LookAhead : public Pattern {
public:
    smart_ptr<Pattern> pattern;
    LookAhead(smart_ptr<Pattern> p) : pattern(p) {}
    virtual ~LookAhead() {}
    bool match(Matcher *m) { assert(false); }//TODO: Fill in
};

class Boundary : public Pattern {
    virtual bool match(Matcher *m);
};

class Break : public Pattern {
    virtual bool match(Matcher *m) { assert(false); }//TODO: Fill in
};

class BackRef : public Pattern {
public:
    int index;
    bool ignCase;
    BackRef(int in,bool ign) : index(in), ignCase(ign) {}
    virtual bool match(Matcher *m) { assert(false); }//TODO: Fill in
};

class AutoGrammar {
public:
    static smart_ptr<Grammar> reparserGenerator();
    static smart_ptr<Grammar> fileParserGenerator();
};

class Matcher : public Group {
public:
    Matcher(smart_ptr<Grammar> g,const char *pat_,const char *input_,int input_size=-1);
    virtual ~Matcher() {}
    const char *input;
    smart_ptr<Grammar> g;
    int input_size;
    int pos;
    int max_pos;
    const char *pat;
    bool matches();
    Bracket expected;
    void showError();
    std::string inrule;
    std::string inrule_max;
    int err_pos;
    void fail(Bracket *ex);
    void fail(char lo,char hi);
};

extern smart_ptr<Grammar> pegGrammar;
extern smart_ptr<Pattern> compile(smart_ptr<Group> g,bool ignCase,smart_ptr<Grammar> gram);
extern void compileFile(smart_ptr<Grammar> g,const char *buffer,signed long buffersize=-1);
void compile(smart_ptr<Grammar> thisg,std::string name,std::string pattern);
void compile(smart_ptr<Grammar> thisg,std::string name,smart_ptr<Group> pattern);
void insertc(std::ostream& o,char c);

}

#endif