001/**
002 * Copyright (c) 2001, Sergey A. Samokhodkin
003 * All rights reserved.
004 * <p>
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 * <p>
008 * - Redistributions of source code must retain the above copyright notice,
009 * this list of conditions and the following disclaimer.
010 * - Redistributions in binary form
011 * must reproduce the above copyright notice, this list of conditions and the following
012 * disclaimer in the documentation and/or other materials provided with the distribution.
013 * - Neither the name of jregex nor the names of its contributors may be used
014 * to endorse or promote products derived from this software without specific prior
015 * written permission.
016 * <p>
017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
018 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020 * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
021 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
022 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
023 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
024 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
025 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
026 *
027 * @version 1.2_01
028 */
029
030package regexodus;
031
032import regexodus.ds.IntBitSet;
033
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.HashMap;
037
038public class Term implements REFlags {
039
040    //runtime Term types
041    static final int CHAR = 0;
042    static final int BITSET = 1;
043    static final int BITSET2 = 2;
044    static final int ANY_CHAR = 4;
045    static final int ANY_CHAR_NE = 5;
046
047    static final int REG = 6;
048    static final int REG_I = 7;
049    static final int FIND = 8;
050    static final int FINDREG = 9;
051    static final int SUCCESS = 10;
052
053    /*optimization-transparent types*/
054    static final int BOUNDARY = 11;
055    static final int DIRECTION = 12;
056    static final int UBOUNDARY = 13;
057    static final int UDIRECTION = 14;
058
059    static final int GROUP_IN = 15;
060    static final int GROUP_OUT = 16;
061    static final int VOID = 17;
062
063    static final int START = 18;
064    static final int END = 19;
065    static final int END_EOL = 20;
066    static final int LINE_START = 21;
067    static final int LINE_END = 22;
068    static final int LAST_MATCH_END = 23;
069
070    static final int CNT_SET_0 = 24;
071    static final int CNT_INC = 25;
072    static final int CNT_GT_EQ = 26;
073    static final int READ_CNT_LT = 27;
074
075    static final int CRSTORE_CRINC = 28; //store on 'actual' search entry
076    static final int CR_SET_0 = 29;
077    static final int CR_LT = 30;
078    static final int CR_GT_EQ = 31;
079
080
081    static final int LITERAL_START = 60;
082    static final int LITERAL_END = 61;
083
084    /*optimization-nontransparent types*/
085    static final int BRANCH = 32;
086    static final int BRANCH_STORE_CNT = 33;
087    static final int BRANCH_STORE_CNT_AUX1 = 34;
088
089    static final int PLOOKAHEAD_IN = 35;
090    static final int PLOOKAHEAD_OUT = 36;
091    static final int NLOOKAHEAD_IN = 37;
092    static final int NLOOKAHEAD_OUT = 38;
093    static final int PLOOKBEHIND_IN = 39;
094    static final int PLOOKBEHIND_OUT = 40;
095    static final int NLOOKBEHIND_IN = 41;
096    static final int NLOOKBEHIND_OUT = 42;
097    static final int INDEPENDENT_IN = 43; //functionally the same as NLOOKAHEAD_IN
098    static final int INDEPENDENT_OUT = 44;
099
100    static final int REPEAT_0_INF = 45;
101    static final int REPEAT_MIN_INF = 46;
102    static final int REPEAT_MIN_MAX = 47;
103    static final int REPEAT_REG_MIN_INF = 48;
104    static final int REPEAT_REG_MIN_MAX = 49;
105
106    static final int BACKTRACK_0 = 50;
107    static final int BACKTRACK_MIN = 51;
108    static final int BACKTRACK_FIND_MIN = 52;
109    static final int BACKTRACK_FINDREG_MIN = 53;
110    static final int BACKTRACK_REG_MIN = 54;
111
112    static final int MEMREG_CONDITION = 55;
113    static final int LOOKAHEAD_CONDITION_IN = 56;
114    static final int LOOKAHEAD_CONDITION_OUT = 57;
115    static final int LOOKBEHIND_CONDITION_IN = 58;
116    static final int LOOKBEHIND_CONDITION_OUT = 59;
117
118    //optimization
119    static final int FIRST_TRANSPARENT = BOUNDARY;
120    static final int LAST_TRANSPARENT = CR_GT_EQ;
121
122    // compile-time: length of vars[] (see makeTree())
123    private static final int VARS_LENGTH = 4;
124
125    // compile-time variable indices:
126    private static final int MEMREG_COUNT = 0;    //refers current memreg index
127    private static final int CNTREG_COUNT = 1;   //refers current counters number
128    private static final int DEPTH = 2;      //refers current depth: (((depth=3)))
129    private static final int LOOKAHEAD_COUNT = 3;    //refers current memreg index
130
131    private static final int LIMITS_LENGTH = 3;
132    private static final int LIMITS_PARSE_RESULT_INDEX = 2;
133    private static final int LIMITS_OK = 1;
134    private static final int LIMITS_FAILURE = 2;
135
136    private static final int LITERAL_FLAG = 64;
137
138    //static CustomParser[] customParsers=new CustomParser[256];
139
140    // **** CONTROL FLOW ****
141
142    // next-to-execute and next-if-failed commands;
143    Term next, failNext;
144
145    // **** TYPES ****
146
147    int type = VOID;
148    boolean inverse;
149
150    // used with type=CHAR
151    char c;
152
153    // used with type=FIND
154    int distance;
155    boolean eat;
156
157    // used with type=BITSET(2);
158    IntBitSet bitset;
159    IntBitSet[] bitset2;
160    private boolean[] categoryBitset;  //types(unicode categories)
161
162    // used with type=BALANCE;
163    private char[] brackets;
164
165    // used for optimization with type=BITSET,BITSET2
166    int weight;
167
168    // **** MEMORISATION ****
169
170    // memory slot, used with type=REG,GROUP_IN,GROUP_OUT
171    int memreg = -1;
172
173
174    // **** COUNTERS ****
175
176    // max|min number of iterations
177    // used with CNT_GT_EQ ,REPEAT_* etc.;
178    int minCount, maxCount;
179
180    // used with REPEAT_*,REPEAT_REG_*;
181    Term target;
182
183    // a counter slot to increment & compare with maxCount (CNT_INC etc.);
184    int cntreg = 0;
185
186    // lookahead group id;
187    int lookaheadId;
188
189    // **** COMPILE HELPERS ****
190
191    Term prev;
192    Term in;
193    Term out;
194    Term out1;
195    protected Term first;
196    Term current;
197
198    //new!!
199    Term branchOut;
200
201    //protected  boolean newBranch=false,closed=false;
202    //protected  boolean newBranch=false;
203
204    //for debugging
205    private static int instances;
206    private int instanceNum;
207
208    Term() {
209        //for debugging
210        instanceNum = instances;
211        instances++;
212        in = out = this;
213    }
214
215    Term(int type) {
216        this();
217        this.type = type;
218    }
219
220    static void makeTree(String s, int flags, Pattern re) throws PatternSyntaxException {
221        instances = 0;
222        char[] data = s.toCharArray();
223        makeTree(data, 0, data.length, flags, re);
224    }
225
226    private static void makeTree(char[] data, int offset, int end,
227                                 int flags, Pattern re) throws PatternSyntaxException {
228        // memreg,counter,depth,lookahead
229        int[] vars = {1, 0, 0, 0}; //don't use counters[0]
230
231        //collect iterators for subsequent optimization
232        ArrayList<TermIterator> iterators = new ArrayList<TermIterator>();
233        HashMap<String, Integer> groupNames = new HashMap<String, Integer>();
234
235        Pretokenizer t = new Pretokenizer(data, offset, end);
236        Term term = makeTree(t, data, vars, flags, new Group(), iterators, groupNames);
237
238        // convert closing outer bracket into success term
239        term.out.type = SUCCESS;
240
241        //throw out opening bracket
242        Term first = term.next;
243
244        // Optimisation:
245        //Term optimized = first;
246        //Optimizer opt = Optimizer.find(first);
247        //if (opt != null) optimized = opt.makeFirst(first);
248
249        for (TermIterator i : iterators) {
250            i.optimize();
251        }
252
253        //re.root = optimized;
254        re.root = first;
255        re.root0 = first;
256        re.memregs = vars[MEMREG_COUNT];
257        re.counters = vars[CNTREG_COUNT];
258        re.lookaheads = vars[LOOKAHEAD_COUNT];
259        re.namedGroupMap = groupNames;
260    }
261
262    private static Term makeTree(Pretokenizer t, char[] data, int[] vars,
263                                 int flags, Term term, ArrayList<TermIterator> iterators, HashMap<String, Integer> groupNames) throws PatternSyntaxException {
264        if (vars.length != VARS_LENGTH)
265            throw new IllegalArgumentException("vars.length should be " + VARS_LENGTH + ", not " + vars.length);
266        //Term term=new Term(isMemReg? vars[MEMREG_COUNT]: -1);
267        // use memreg 0 as insignificant
268        //Term term=new Group(isMemReg? vars[MEMREG_COUNT]: 0);
269        while (true) {
270            t.next();
271            term.append(t.tOffset, t.tOutside, data, vars, flags, iterators, groupNames);
272            switch (t.ttype) {
273                case Pretokenizer.FLAGS:
274                    flags = t.flags(flags);
275                    continue;
276                case Pretokenizer.CLASS_GROUP:
277                    t.next();
278                    Term clg = new Term();
279                    CharacterClass.parseGroup(data, t.tOffset, t.tOutside, clg,
280                            (flags & IGNORE_CASE) > 0, (flags & IGNORE_SPACES) > 0,
281                            (flags & UNICODE) > 0, (flags & XML_SCHEMA) > 0);
282                    term.append(clg);
283                    continue;
284                case Pretokenizer.PLAIN_GROUP:
285                    vars[DEPTH]++;
286                    term.append(makeTree(t, data, vars, t.flags(flags), new Group(), iterators, groupNames));
287                    break;
288                case Pretokenizer.NAMED_GROUP:
289                    String gname = t.groupName;
290                    int id;
291                    if (Character.isDigit(gname.charAt(0))) {
292                        try {
293                            id = Integer.parseInt(gname);
294                        } catch (NumberFormatException e) {
295                            throw new PatternSyntaxException("group name starts with digit but is not a number");
296                        }
297                        if (groupNames.containsValue(id)) {
298                            if (t.groupDeclared)
299                                throw new PatternSyntaxException("group redeclaration: " + gname + "; use ({=id}...) for multiple group assignments");
300                        }
301                        if (vars[MEMREG_COUNT] <= id) vars[MEMREG_COUNT] = id + 1;
302                    } else {
303                        Integer no = groupNames.get(gname);
304                        if (no == null) {
305                            id = vars[MEMREG_COUNT]++;
306                            groupNames.put(t.groupName, id);
307                        } else {
308                            if (t.groupDeclared)
309                                throw new PatternSyntaxException("group redeclaration " + gname + "; use ({=name}...) for group reassignments");
310                            id = no;
311                        }
312                    }
313                    vars[DEPTH]++;
314                    term.append(makeTree(t, data, vars, flags, new Group(id), iterators, groupNames));
315                    break;
316                case '(':
317                    vars[DEPTH]++;
318                    term.append(makeTree(t, data, vars, flags, new Group(vars[MEMREG_COUNT]++), iterators, groupNames));
319                    break;
320                case Pretokenizer.POS_LOOKAHEAD:
321                    vars[DEPTH]++;
322                    term.append(makeTree(t, data, vars, flags, new Lookahead(vars[LOOKAHEAD_COUNT]++, true), iterators, groupNames));
323                    break;
324                case Pretokenizer.NEG_LOOKAHEAD:
325                    vars[DEPTH]++;
326                    term.append(makeTree(t, data, vars, flags, new Lookahead(vars[LOOKAHEAD_COUNT]++, false), iterators, groupNames));
327                    break;
328                case Pretokenizer.POS_LOOKBEHIND:
329                    vars[DEPTH]++;
330                    term.append(makeTree(t, data, vars, flags, new Lookbehind(vars[LOOKAHEAD_COUNT]++, true), iterators, groupNames));
331                    break;
332                case Pretokenizer.NEG_LOOKBEHIND:
333                    vars[DEPTH]++;
334                    term.append(makeTree(t, data, vars, flags, new Lookbehind(vars[LOOKAHEAD_COUNT]++, false), iterators, groupNames));
335                    break;
336                case Pretokenizer.INDEPENDENT_REGEX:
337                    vars[DEPTH]++;
338                    term.append(makeTree(t, data, vars, flags, new IndependentGroup(vars[LOOKAHEAD_COUNT]++), iterators, groupNames));
339                    break;
340                case Pretokenizer.CONDITIONAL_GROUP:
341                    vars[DEPTH]++;
342                    t.next();
343                    Term fork;
344                    boolean positive = true;
345                    switch (t.ttype) {
346                        case Pretokenizer.NEG_LOOKAHEAD:
347                            positive = false;
348                        case Pretokenizer.POS_LOOKAHEAD:
349                            vars[DEPTH]++;
350                            Lookahead la = new Lookahead(vars[LOOKAHEAD_COUNT]++, positive);
351                            makeTree(t, data, vars, flags, la, iterators, groupNames);
352                            fork = new ConditionalExpr(la);
353                            break;
354                        case Pretokenizer.NEG_LOOKBEHIND:
355                            positive = false;
356                        case Pretokenizer.POS_LOOKBEHIND:
357                            vars[DEPTH]++;
358                            Lookbehind lb = new Lookbehind(vars[LOOKAHEAD_COUNT]++, positive);
359                            makeTree(t, data, vars, flags, lb, iterators, groupNames);
360                            fork = new ConditionalExpr(lb);
361                            break;
362                        case '(':
363                            t.next();
364                            if (t.ttype != ')') throw new PatternSyntaxException("malformed condition");
365                            int memregNo;
366                            if (Character.isDigit(data[t.tOffset])) memregNo = makeNumber(t.tOffset, t.tOutside, data);
367                            else {
368                                String gn = new String(data, t.tOffset, t.tOutside - t.tOffset);
369                                Integer gno = groupNames.get(gn);
370                                if (gno == null)
371                                    throw new PatternSyntaxException("unknown group name in conditional expr.: " + gn);
372                                memregNo = gno;
373                            }
374                            fork = new ConditionalExpr(memregNo);
375                            break;
376                        default:
377                            throw new PatternSyntaxException("malformed conditional expression: " + t.ttype + " '" + (char) t.ttype + "'");
378                    }
379                    term.append(makeTree(t, data, vars, flags, fork, iterators, groupNames));
380                    break;
381                case '|':
382                    term.newBranch();
383                    break;
384                case Pretokenizer.END:
385                    if (vars[DEPTH] > 0) throw new PatternSyntaxException("unbalanced parenthesis");
386                    term.close();
387                    return term;
388                case ')':
389                    if (vars[DEPTH] <= 0) throw new PatternSyntaxException("unbalanced parenthesis");
390                    term.close();
391                    vars[DEPTH]--;
392                    return term;
393                case Pretokenizer.COMMENT:
394                    while (t.ttype != ')') t.next();
395                    continue;
396                default:
397                    throw new PatternSyntaxException("unknown token type: " + t.ttype);
398            }
399        }
400    }
401
402    private static int makeNumber(int off, int out, char[] data) {
403        int n = 0;
404        for (int i = off; i < out; i++) {
405            int d = data[i] - '0';
406            if (d < 0 || d > 9) return -1;
407            n *= 10;
408            n += d;
409        }
410        return n;
411    }
412
413    private void append(int offset, int end, char[] data,
414                        int[] vars, int flags, ArrayList<TermIterator> iterators, HashMap<String, Integer> gmap) throws PatternSyntaxException {
415        int[] limits = new int[3];
416        int i = offset;
417        Term tmp, current = this.current;
418        while (i < end) {
419            char c = data[i];
420            boolean greedy = true;
421            if((flags & LITERAL_FLAG) != LITERAL_FLAG) {
422                switch (c) {
423                    //operations
424                    case '*':
425                        if (current == null) throw new PatternSyntaxException("missing term before *");
426                        i++;
427                        if (i < end && data[i] == '?') {
428                            greedy = false;
429                            i++;
430                        }
431                        tmp = greedy ? makeGreedyStar(vars, current, iterators) :
432                                makeLazyStar(vars, current);
433                        current = replaceCurrent(tmp);
434                        break;
435
436                    case '+':
437                        if (current == null) throw new PatternSyntaxException("missing term before +");
438                        i++;
439                        if (i < end && data[i] == '?') {
440                            greedy = false;
441                            i++;
442                        }
443                        tmp = greedy ? makeGreedyPlus(vars, current, iterators) :
444                                makeLazyPlus(vars, current);
445                        current = replaceCurrent(tmp);
446                        break;
447
448                    case '?':
449                        if (current == null) throw new PatternSyntaxException("missing term before ?");
450                        i++;
451                        if (i < end && data[i] == '?') {
452                            greedy = false;
453                            i++;
454                        }
455
456                        tmp = greedy ? makeGreedyQMark(vars, current) :
457                                makeLazyQMark(vars, current);
458                        current = replaceCurrent(tmp);
459                        break;
460
461                    case '{':
462                        limits[0] = 0;
463                        limits[1] = -1;
464                        int le = parseLimits(i + 1, end, data, limits);
465                        if (limits[LIMITS_PARSE_RESULT_INDEX] == LIMITS_OK) { //parse ok
466                            if (current == null) throw new PatternSyntaxException("missing term before {}");
467                            i = le;
468                            if (i < end && data[i] == '?') {
469                                greedy = false;
470                                i++;
471                            }
472                            tmp = greedy ? makeGreedyLimits(vars, current, limits, iterators) :
473                                    makeLazyLimits(vars, current, limits);
474                            current = replaceCurrent(tmp);
475                            break;
476                        } else { //unicode class or named backreference
477                            if (data[i + 1] == '\\') { //'{\name}' - backreference
478                                int p = i + 2;
479                                if (p == end) throw new PatternSyntaxException("'group_id' expected");
480                                while (Category.Z.contains(data[p])) {
481                                    p++;
482                                    if (p == end) throw new PatternSyntaxException("'group_id' expected");
483                                }
484                                BackReference br = new BackReference(-1, (flags & IGNORE_CASE) > 0);
485                                i = parseGroupId(data, p, end, br, gmap);
486                                current = append(br);
487                                continue;
488                            } else {
489                                Term t = new Term();
490                                i = CharacterClass.parseName(data, i, end, t, false, (flags & IGNORE_SPACES) > 0);
491                                current = append(t);
492                                continue;
493                            }
494                        }
495
496                    case ' ':
497                    case '\t':
498                    case '\r':
499                    case '\n':
500                        if ((flags & IGNORE_SPACES) > 0) {
501                            i++;
502                            continue;
503                        }
504                        //else go on as default
505
506                        //symbolic items
507                    default:
508                        tmp = new Term();
509                        i = parseTerm(data, i, end, tmp, flags);
510
511                        if (tmp.type == LITERAL_START) {
512                            flags |= LITERAL_FLAG;
513                            break;
514                        } else if (tmp.type == LITERAL_END) {
515                            flags &= ~LITERAL_FLAG;
516                            break;
517                        }
518
519                        if (tmp.type == END && i < end) {
520                            throw new PatternSyntaxException("'$' is not a last term in the group: <" + new String(data, offset, end - offset) + ">");
521                        }
522                        //"\A"
523                        //if(tmp.type==START && i>(offset+1)){
524                        //   throw new PatternSyntaxException("'^' is not a first term in the group: <"+new String(data,offset,end-offset)+">");
525                        //}
526
527                        current = append(tmp);
528                        break;
529                }
530            }
531            else {
532                tmp = new Term();
533                i = parseTerm(data, i, end, tmp, flags);
534
535                if (tmp.type == LITERAL_START) {
536                    flags |= LITERAL_FLAG;
537                    break;
538                } else if (tmp.type == LITERAL_END) {
539                    flags &= ~LITERAL_FLAG;
540                    break;
541                }
542
543                if (tmp.type == END && i < end) {
544                    throw new PatternSyntaxException("'$' is not a last term in the group: <" + new String(data, offset, end - offset) + ">");
545                }
546
547                current = append(tmp);
548            }
549        }
550    }
551
552    /*
553    static boolean isIdentifierPart()
554    {
555
556    }*/
557
558
559    private static int parseGroupId(char[] data, int i, int end, Term term, HashMap<String, Integer> gmap) throws PatternSyntaxException {
560        int id;
561        int nstart = i;
562        if (Character.isDigit(data[i])) {
563            while (Character.isDigit(data[i])) {
564                i++;
565                if (i == end) throw new PatternSyntaxException("group_id expected");
566            }
567            id = makeNumber(nstart, i, data);
568        } else {
569            while (Category.IdentifierPart.contains(data[i])) {
570                i++;
571                if (i == end) throw new PatternSyntaxException("group_id expected");
572            }
573            String s = new String(data, nstart, i - nstart);
574            Integer no = gmap.get(s);
575            if (no == null) throw new PatternSyntaxException("backreference to unknown group: " + s);
576            id = no;
577        }
578        while (Category.Z.contains(data[i])) {
579            i++;
580            if (i == end) throw new PatternSyntaxException("'}' expected");
581        }
582
583        int c = data[i++];
584
585        if (c != '}') throw new PatternSyntaxException("'}' expected");
586
587        term.memreg = id;
588        return i;
589    }
590
591    Term append(Term term) throws PatternSyntaxException {
592        //Term prev=this.prev;
593        Term current = this.current;
594        if (current == null) {
595            in.next = term;
596            term.prev = in;
597            this.current = term;
598            return term;
599        }
600        link(current, term);
601        //this.prev=current;
602        this.current = term;
603        return term;
604    }
605
606    Term replaceCurrent(Term term) throws PatternSyntaxException {
607        //Term prev=this.prev;
608        Term prev = current.prev;
609        if (prev != null) {
610            Term in = this.in;
611            if (prev == in) {
612                //in.next=term;
613                //term.prev=in;
614                in.next = term.in;
615                term.in.prev = in;
616            } else link(prev, term);
617        }
618        this.current = term;
619        return term;
620    }
621
622
623    private void newBranch() throws PatternSyntaxException {
624        close();
625        startNewBranch();
626    }
627
628
629    void close() throws PatternSyntaxException {
630      /*
631      Term prev=this.prev;
632      if(prev!=null){
633         Term current=this.current;
634         if(current!=null){
635            link(prev,current);
636            prev=current;
637            this.current=null;
638         }
639         link(prev,out);
640         this.prev=null;
641      }
642      */
643        Term current = this.current;
644        if (current != null) linkd(current, out);
645        else in.next = out;
646    }
647
648    private static void link(Term term, Term next) {
649        linkd(term, next.in);
650        next.prev = term;
651    }
652
653    private static void linkd(Term term, Term next) {
654        Term prev_out = term.out;
655        if (prev_out != null) {
656            prev_out.next = next;
657        }
658        Term prev_out1 = term.out1;
659        if (prev_out1 != null) {
660            prev_out1.next = next;
661        }
662        Term prev_branch = term.branchOut;
663        if (prev_branch != null) {
664            prev_branch.failNext = next;
665        }
666    }
667
668    void startNewBranch() throws PatternSyntaxException {
669        Term tmp = in.next;
670        Term b = new Branch();
671        in.next = b;
672        b.next = tmp;
673        b.in = null;
674        b.out = null;
675        b.out1 = null;
676        b.branchOut = b;
677        current = b;
678    }
679
680    private static Term makeGreedyStar(int[] vars, Term term, ArrayList<TermIterator> iterators) throws PatternSyntaxException {
681        //vars[STACK_SIZE]++;
682        switch (term.type) {
683            case GROUP_IN: {
684                Term b = new Branch();
685                b.next = term.in;
686                term.out.next = b;
687
688                b.in = b;
689                b.out = null;
690                b.out1 = null;
691                b.branchOut = b;
692
693                return b;
694            }
695            default: {
696                return new TermIterator(term, 0, -1, iterators);
697            }
698        }
699    }
700
701    private static Term makeLazyStar(int[] vars, Term term) {
702        //vars[STACK_SIZE]++;
703        switch (term.type) {
704            case GROUP_IN: {
705                Term b = new Branch();
706                b.failNext = term.in;
707                term.out.next = b;
708
709                b.in = b;
710                b.out = b;
711                b.out1 = null;
712                b.branchOut = null;
713
714                return b;
715            }
716            default: {
717                Term b = new Branch();
718                b.failNext = term;
719                term.next = b;
720
721                b.in = b;
722                b.out = b;
723                b.out1 = null;
724                b.branchOut = null;
725
726                return b;
727            }
728        }
729    }
730
731    private static Term makeGreedyPlus(int[] vars, Term term, ArrayList<TermIterator> iterators) throws PatternSyntaxException {
732        //vars[STACK_SIZE]++;
733        switch (term.type) {
734            case INDEPENDENT_IN://?
735            case GROUP_IN: {
736                Term b = new Branch();
737                b.next = term.in;
738                term.out.next = b;
739
740                b.in = term.in;
741                b.out = null;
742                b.out1 = null;
743                b.branchOut = b;
744
745
746                return b;
747            }
748            default: {
749                return new TermIterator(term, 1, -1, iterators);
750            }
751        }
752    }
753
754    private static Term makeLazyPlus(int[] vars, Term term) {
755        //vars[STACK_SIZE]++;
756        switch (term.type) {
757            case GROUP_IN: {
758                Term b = new Branch();
759                term.out.next = b;
760                b.failNext = term.in;
761
762                b.in = term.in;
763                b.out = b;
764                b.out1 = null;
765                b.branchOut = null;
766
767                return b;
768            }
769            case REG:
770            default: {
771                Term b = new Branch();
772                term.next = b;
773                b.failNext = term;
774
775                b.in = term;
776                b.out = b;
777                b.out1 = null;
778                b.branchOut = null;
779
780                return b;
781            }
782        }
783    }
784
785    private static Term makeGreedyQMark(int[] vars, Term term) {
786        //vars[STACK_SIZE]++;
787        switch (term.type) {
788            case GROUP_IN: {
789                Term b = new Branch();
790                b.next = term.in;
791
792                b.in = b;
793                b.out = term.out;
794                b.out1 = null;
795                b.branchOut = b;
796
797                return b;
798            }
799            case REG:
800            default: {
801                Term b = new Branch();
802                b.next = term;
803
804                b.in = b;
805                b.out = term;
806                b.out1 = null;
807                b.branchOut = b;
808
809                return b;
810            }
811        }
812    }
813
814    private static Term makeLazyQMark(int[] vars, Term term) {
815        //vars[STACK_SIZE]++;
816        switch (term.type) {
817            case GROUP_IN: {
818                Term b = new Branch();
819                b.failNext = term.in;
820
821                b.in = b;
822                b.out = b;
823                b.out1 = term.out;
824                b.branchOut = null;
825
826                return b;
827            }
828            case REG:
829            default: {
830                Term b = new Branch();
831                b.failNext = term;
832
833                b.in = b;
834                b.out = b;
835                b.out1 = term;
836                b.branchOut = null;
837
838                return b;
839            }
840        }
841    }
842
843    private static Term makeGreedyLimits(int[] vars, Term term, int[] limits, ArrayList<TermIterator> iterators) throws PatternSyntaxException {
844        //vars[STACK_SIZE]++;
845        int m = limits[0];
846        int n = limits[1];
847        switch (term.type) {
848            case GROUP_IN: {
849                int cntreg = vars[CNTREG_COUNT]++;
850                Term reset = new Term(CR_SET_0);
851                reset.cntreg = cntreg;
852                Term b = new Term(BRANCH);
853
854                Term inc = new Term(CRSTORE_CRINC);
855                inc.cntreg = cntreg;
856
857                reset.next = b;
858
859                if (n >= 0) {
860                    Term lt = new Term(CR_LT);
861                    lt.cntreg = cntreg;
862                    lt.maxCount = n;
863                    b.next = lt;
864                    lt.next = term.in;
865                } else {
866                    b.next = term.in;
867                }
868                term.out.next = inc;
869                inc.next = b;
870
871                if (m >= 0) {
872                    Term gt = new Term(CR_GT_EQ);
873                    gt.cntreg = cntreg;
874                    gt.maxCount = m;
875                    b.failNext = gt;
876
877                    reset.in = reset;
878                    reset.out = gt;
879                    reset.out1 = null;
880                    reset.branchOut = null;
881                } else {
882                    reset.in = reset;
883                    reset.out = null;
884                    reset.out1 = null;
885                    reset.branchOut = b;
886                }
887                return reset;
888            }
889            default: {
890                return new TermIterator(term, limits[0], limits[1], iterators);
891            }
892        }
893    }
894
895    private static Term makeLazyLimits(int[] vars, Term term, int[] limits) {
896        //vars[STACK_SIZE]++;
897        int m = limits[0];
898        int n = limits[1];
899        switch (term.type) {
900            case GROUP_IN: {
901                int cntreg = vars[CNTREG_COUNT]++;
902                Term reset = new Term(CR_SET_0);
903                reset.cntreg = cntreg;
904                Term b = new Term(BRANCH);
905                Term inc = new Term(CRSTORE_CRINC);
906                inc.cntreg = cntreg;
907
908                reset.next = b;
909
910                if (n >= 0) {
911                    Term lt = new Term(CR_LT);
912                    lt.cntreg = cntreg;
913                    lt.maxCount = n;
914                    b.failNext = lt;
915                    lt.next = term.in;
916                } else {
917                    b.failNext = term.in;
918                }
919                term.out.next = inc;
920                inc.next = b;
921
922                if (m >= 0) {
923                    Term gt = new Term(CR_GT_EQ);
924                    gt.cntreg = cntreg;
925                    gt.maxCount = m;
926                    b.next = gt;
927
928                    reset.in = reset;
929                    reset.out = gt;
930                    reset.out1 = null;
931                    reset.branchOut = null;
932
933                    return reset;
934                } else {
935                    reset.in = reset;
936                    reset.out = b;
937                    reset.out1 = null;
938                    reset.branchOut = null;
939
940                    return reset;
941                }
942            }
943            case REG:
944            default: {
945                Term reset = new Term(CNT_SET_0);
946                Term b = new Branch(BRANCH_STORE_CNT);
947                Term inc = new Term(CNT_INC);
948
949                reset.next = b;
950
951                if (n >= 0) {
952                    Term lt = new Term(READ_CNT_LT);
953                    lt.maxCount = n;
954                    b.failNext = lt;
955                    lt.next = term;
956                    term.next = inc;
957                    inc.next = b;
958                } else {
959                    b.next = term;
960                    term.next = inc;
961                    inc.next = term;
962                }
963
964                if (m >= 0) {
965                    Term gt = new Term(CNT_GT_EQ);
966                    gt.maxCount = m;
967                    b.next = gt;
968
969                    reset.in = reset;
970                    reset.out = gt;
971                    reset.out1 = null;
972                    reset.branchOut = null;
973
974                    return reset;
975                } else {
976                    reset.in = reset;
977                    reset.out = b;
978                    reset.out1 = null;
979                    reset.branchOut = null;
980
981                    return reset;
982                }
983            }
984        }
985    }
986
987
988    private int parseTerm(char[] data, int i, int out, Term term,
989                          int flags) throws PatternSyntaxException {
990        char c = data[i++];
991        boolean inv = false;
992        if((flags & LITERAL_FLAG) == LITERAL_FLAG)
993        {
994            switch (c)
995            {
996                case '\\':
997                    if(i < out + 1 && data[i] == 'E')
998                    {
999                        term.type = LITERAL_END;
1000                        return i + 1;
1001                    }
1002                default:
1003                    term.type = CHAR;
1004                    if ((flags & IGNORE_CASE) == 0) {
1005                        term.c = c;
1006                    } else {
1007                        term.c = Category.caseFold(c);
1008                    }
1009                    return i;
1010            }
1011        }
1012        switch (c) {
1013            case '[':
1014                return CharacterClass.parseClass(data, i, out, term, (flags & IGNORE_CASE) > 0, (flags & IGNORE_SPACES) > 0, (flags & UNICODE) > 0, (flags & XML_SCHEMA) > 0);
1015
1016            case '.':
1017                term.type = (flags & DOTALL) > 0 ? ANY_CHAR : ANY_CHAR_NE;
1018                break;
1019
1020            case '$':
1021                //term.type=mods[MULTILINE_IND]? LINE_END: END; //??
1022                term.type = (flags & MULTILINE) > 0 ? LINE_END : END_EOL;
1023                break;
1024
1025            case '^':
1026                term.type = (flags & MULTILINE) > 0 ? LINE_START : START;
1027                break;
1028
1029            case '\\':
1030                if (i >= out) throw new PatternSyntaxException("Escape without a character");
1031                c = data[i++];
1032                switch (c) {
1033                    case 'f':
1034                        c = '\f'; // form feed
1035                        break;
1036
1037                    case 'n':
1038                        c = '\n'; // new line
1039                        break;
1040
1041                    case 'r':
1042                        c = '\r'; // carriage return
1043                        break;
1044
1045
1046
1047                    case 't':
1048                        c = '\t'; // tab
1049                        break;
1050
1051                    case 'u':
1052                        if(i < out - 3)
1053                            c = (char) ((CharacterClass.toHexDigit(data[i++]) << 12) +
1054                                (CharacterClass.toHexDigit(data[i++]) << 8) +
1055                                (CharacterClass.toHexDigit(data[i++]) << 4) +
1056                                CharacterClass.toHexDigit(data[i++]));
1057                        else {
1058                            c = '\0';
1059                            i = out;
1060                        }
1061                        break;
1062
1063                    case 'x': {   // hex 2-digit number -> char
1064                        int hex = 0;
1065                        char d;
1066                        if ((d = data[i++]) == '{') {
1067                            while (i < out && (d = data[i++]) != '}') {
1068                                hex = (hex << 4) + CharacterClass.toHexDigit(d);
1069                                if (hex > 0xffff || i == out)
1070                                    throw new PatternSyntaxException("\\x{<out of range or incomplete>}");
1071                            }
1072                        } else {
1073                            hex = (CharacterClass.toHexDigit(d) << 4) +
1074                                    CharacterClass.toHexDigit(data[i++]);
1075                        }
1076                        c = (char) hex;
1077                        break;
1078                    }
1079                    case '0':
1080                    case 'o':   // oct 2- or 3-digit number -> char
1081                        int oct = 0;
1082                        for (; i < out; ) {
1083                            char d = data[i++];
1084                            if (d >= '0' && d <= '7') {
1085                                oct *= 8;
1086                                oct += d - '0';
1087                                if (oct > 0xffff) {
1088                                    oct -= d - '0';
1089                                    oct /= 8;
1090                                    break;
1091                                }
1092                            } else break;
1093                        }
1094                        c = (char) oct;
1095                        break;
1096
1097                    case 'm':   // decimal number -> char
1098                        int dec = 0;
1099                        for (; i < out; ) {
1100                            char d = data[i++];
1101                            if (d >= '0' && d <= '9') {
1102                                dec *= 10;
1103                                dec += d - '0';
1104                                if (dec > 0xffff){
1105                                    dec -= d - '0';
1106                                    dec /= 10;
1107                                    break;
1108                                }
1109                            } else break;
1110                        }
1111                        c = (char) dec;
1112                        break;
1113
1114                    case 'c':   // ctrl-char
1115                        c = (char) (data[i++] & 0x1f);
1116                        break;
1117
1118                    case 'D':   // non-digit
1119                        inv = true;
1120                        // go on
1121                    case 'd':   // digit
1122                        CharacterClass.makeDigit(term, inv, (flags & UNICODE) > 0);
1123                        return i;
1124
1125                    case 'S':   // non-space
1126                        inv = true;
1127                        // go on
1128                    case 's':   // space
1129                        CharacterClass.makeSpace(term, inv, (flags & UNICODE) > 0);
1130                        return i;
1131
1132                    case 'W':   // non-letter
1133                        inv = true;
1134                        // go on
1135                    case 'w':   // letter
1136                        CharacterClass.makeWordChar(term, inv, (flags & UNICODE) > 0);
1137                        return i;
1138
1139                    case 'B':   // non-(word boundary)
1140                        inv = true;
1141                        // go on
1142                    case 'b':   // word boundary
1143                        CharacterClass.makeWordBoundary(term, inv, (flags & UNICODE) > 0);
1144                        return i;
1145
1146                    case '<':   // word start
1147                        CharacterClass.makeWordStart(term, (flags & UNICODE) > 0);
1148                        return i;
1149
1150                    case '>':   // word end
1151                        CharacterClass.makeWordEnd(term, (flags & UNICODE) > 0);
1152                        return i;
1153
1154                    case 'A':   // text beginning
1155                        term.type = START;
1156                        return i;
1157
1158                    case 'Z':   // text end
1159                        term.type = END_EOL;
1160                        return i;
1161
1162                    case 'z':   // text end
1163                        term.type = END;
1164                        return i;
1165
1166                    case 'G':   // end of last match
1167                        term.type = LAST_MATCH_END;
1168                        return i;
1169
1170                    case 'P':   // \\P{..}
1171                        inv = true;
1172                    case 'p':   // \\p{..}
1173                        i = CharacterClass.parseName(data, i, out, term, inv, (flags & IGNORE_SPACES) > 0);
1174                        return i;
1175                    case 'Q':
1176                        term.type = LITERAL_START;
1177                        return i;
1178
1179
1180                    default:
1181                        if (c >= '1' && c <= '9') {
1182                            int n = c - '0';
1183                            while ((i < out) && (c = data[i]) >= '0' && c <= '9') {
1184                                n = (n * 10) + c - '0';
1185                                i++;
1186                            }
1187                            term.type = (flags & IGNORE_CASE) > 0 ? REG_I : REG;
1188                            term.memreg = n;
1189                            return i;
1190                        }
1191                  /*
1192                  if(c<256){
1193                     CustomParser termp=customParsers[c];
1194                     if(termp!=null){
1195                        i=termp.parse(i,data,term);
1196                        return i;
1197                     }
1198                  }
1199                  */
1200                }
1201                term.type = CHAR;
1202                term.c = c;
1203                break;
1204
1205            default:
1206                if ((flags & IGNORE_CASE) == 0) {
1207                    term.type = CHAR;
1208                    term.c = c;
1209                } else {
1210                    term.type = CHAR;
1211                    term.c = Category.caseFold(c);
1212                    //CharacterClass.makeICase(term, c);
1213                }
1214                break;
1215        }
1216        return i;
1217    }
1218
1219
1220    // one of {n},{n,},{,n},{n1,n2}
1221    private static int parseLimits(int i, int end, char[] data, int[] limits) throws PatternSyntaxException {
1222        if (limits.length != LIMITS_LENGTH)
1223            throw new IllegalArgumentException("limits.length=" + limits.length + ", should be " + LIMITS_LENGTH);
1224        limits[LIMITS_PARSE_RESULT_INDEX] = LIMITS_OK;
1225        int ind = 0;
1226        int v = 0;
1227        char c;
1228        while (i < end) {
1229            c = data[i++];
1230            switch (c) {
1231                case ' ':
1232                    continue;
1233
1234                case ',':
1235                    if (ind > 0) throw new PatternSyntaxException("illegal construction: {.. , , ..}");
1236                    limits[ind++] = v;
1237                    v = -1;
1238                    continue;
1239
1240                case '}':
1241                    limits[ind] = v;
1242                    if (ind == 0) limits[1] = v;
1243                    return i;
1244
1245                default:
1246                    if (c > '9' || c < '0') {
1247                        //throw new PatternSyntaxException("illegal symbol in iterator: '{"+c+"}'");
1248                        limits[LIMITS_PARSE_RESULT_INDEX] = LIMITS_FAILURE;
1249                        return i;
1250                    }
1251                    if (v < 0) v = 0;
1252                    v = v * 10 + (c - '0');
1253            }
1254        }
1255        throw new PatternSyntaxException("malformed quantifier");
1256    }
1257    static String termLookup(int t)
1258    {
1259        switch (t)
1260        {
1261            case CHAR: return "CHAR";
1262            case BITSET: return "BITSET";
1263            case BITSET2: return "BITSET2";
1264            case ANY_CHAR: return "ANY_CHAR";
1265            case ANY_CHAR_NE: return "ANY_CHAR_NE";
1266            case REG: return "REG";
1267            case REG_I: return "REG_I";
1268            case FIND: return "FIND";
1269            case FINDREG: return "FINDREG";
1270            case SUCCESS: return "SUCCESS";
1271            case BOUNDARY: return "BOUNDARY";
1272            case DIRECTION: return "DIRECTION";
1273            case UBOUNDARY: return "UBOUNDARY";
1274            case UDIRECTION: return "UDIRECTION";
1275            case GROUP_IN: return "GROUP_IN";
1276            case GROUP_OUT: return "GROUP_OUT";
1277            case VOID: return "VOID";
1278            case START: return "START";
1279            case END: return "END";
1280            case END_EOL: return "END_EOL";
1281            case LINE_START: return "LINE_START";
1282            case LINE_END: return "LINE_END";
1283            case LAST_MATCH_END: return "LAST_MATCH_END";
1284            case CNT_SET_0: return "CNT_SET_0";
1285            case CNT_INC: return "CNT_INC";
1286            case CNT_GT_EQ: return "CNT_GT_EQ";
1287            case READ_CNT_LT: return "READ_CNT_LT";
1288            case CRSTORE_CRINC: return "CRSTORE_CRINC";
1289            case CR_SET_0: return "CR_SET_0";
1290            case CR_LT: return "CR_LT";
1291            case CR_GT_EQ: return "CR_GT_EQ";
1292            case BRANCH: return "BRANCH";
1293            case BRANCH_STORE_CNT: return "BRANCH_STORE_CNT";
1294            case BRANCH_STORE_CNT_AUX1: return "BRANCH_STORE_CNT_AUX1";
1295            case PLOOKAHEAD_IN: return "PLOOKAHEAD_IN";
1296            case PLOOKAHEAD_OUT: return "PLOOKAHEAD_OUT";
1297            case NLOOKAHEAD_IN: return "NLOOKAHEAD_IN";
1298            case NLOOKAHEAD_OUT: return "NLOOKAHEAD_OUT";
1299            case PLOOKBEHIND_IN: return "PLOOKBEHIND_IN";
1300            case PLOOKBEHIND_OUT: return "PLOOKBEHIND_OUT";
1301            case NLOOKBEHIND_IN: return "NLOOKBEHIND_IN";
1302            case NLOOKBEHIND_OUT: return "NLOOKBEHIND_OUT";
1303            case INDEPENDENT_IN: return "INDEPENDENT_IN";
1304            case INDEPENDENT_OUT: return "INDEPENDENT_OUT";
1305            case REPEAT_0_INF: return "REPEAT_0_INF";
1306            case REPEAT_MIN_INF: return "REPEAT_MIN_INF";
1307            case REPEAT_MIN_MAX: return "REPEAT_MIN_MAX";
1308            case REPEAT_REG_MIN_INF: return "REPEAT_REG_MIN_INF";
1309            case REPEAT_REG_MIN_MAX: return "REPEAT_REG_MIN_MAX";
1310            case BACKTRACK_0: return "BACKTRACK_0";
1311            case BACKTRACK_MIN: return "BACKTRACK_MIN";
1312            case BACKTRACK_FIND_MIN: return "BACKTRACK_FIND_MIN";
1313            case BACKTRACK_FINDREG_MIN: return "BACKTRACK_FINDREG_MIN";
1314            case BACKTRACK_REG_MIN: return "BACKTRACK_REG_MIN";
1315            case MEMREG_CONDITION: return "MEMREG_CONDITION";
1316            case LOOKAHEAD_CONDITION_IN: return "LOOKAHEAD_CONDITION_IN";
1317            case LOOKAHEAD_CONDITION_OUT: return "LOOKAHEAD_CONDITION_OUT";
1318            case LOOKBEHIND_CONDITION_IN: return "LOOKBEHIND_CONDITION_IN";
1319            case LOOKBEHIND_CONDITION_OUT: return "LOOKBEHIND_CONDITION_OUT";
1320            default: return "UNKNOWN_TERM";
1321        }
1322    }
1323    public String toString() {
1324        StringBuilder b = new StringBuilder(100);
1325        //b.append(hashCode());
1326        b.append(instanceNum);
1327        b.append(' ');
1328        b.append(termLookup(type));
1329        b.append(": ");
1330        if (inverse) b.append('^');
1331        switch (type) {
1332            case VOID:
1333                b.append("[]");
1334                b.append(" , ");
1335                break;
1336            case CHAR:
1337                b.append(CharacterClass.stringValue(c));
1338                b.append(" , ");
1339                break;
1340            case ANY_CHAR:
1341                b.append("dotall, ");
1342                break;
1343            case ANY_CHAR_NE:
1344                b.append("dot-eols, ");
1345                break;
1346            case BITSET:
1347                b.append('[');
1348                b.append(CharacterClass.stringValue0(bitset));
1349                b.append(']');
1350                b.append(" , weight=");
1351                b.append(weight);
1352                b.append(" , ");
1353                break;
1354            case BITSET2:
1355                b.append('[');
1356                b.append(CharacterClass.stringValue2(bitset2));
1357                b.append(']');
1358                b.append(" , weight2=");
1359                b.append(weight);
1360                b.append(" , ");
1361                break;
1362            case START:
1363                b.append("abs.start");
1364                break;
1365            case END:
1366                b.append("abs.end");
1367                break;
1368            case END_EOL:
1369                b.append("abs.end-eol");
1370                break;
1371            case LINE_START:
1372                b.append("line start");
1373                break;
1374            case LINE_END:
1375                b.append("line end");
1376                break;
1377            case LAST_MATCH_END:
1378                if (inverse) b.append("non-");
1379                b.append("BOUNDARY");
1380                break;
1381            case BOUNDARY:
1382                if (inverse) b.append("non-");
1383                b.append("BOUNDARY");
1384                break;
1385            case UBOUNDARY:
1386                if (inverse) b.append("non-");
1387                b.append("UBOUNDARY");
1388                break;
1389            case DIRECTION:
1390                b.append("DIRECTION");
1391                break;
1392            case UDIRECTION:
1393                b.append("UDIRECTION");
1394                break;
1395            case FINDREG:
1396                b.append('%');
1397            case FIND:
1398                b.append(">>>{");
1399                b.append(target);
1400                b.append("}, <<");
1401                b.append(distance);
1402                if (eat) {
1403                    b.append(",eat");
1404                }
1405                b.append(", ");
1406                break;
1407            case REPEAT_0_INF:
1408                b.append("rpt{");
1409                b.append(target);
1410                b.append(",0,inf}");
1411                if (failNext != null) {
1412                    b.append(", =>");
1413                    b.append(failNext.instanceNum);
1414                    b.append(", ");
1415                }
1416                break;
1417            case REPEAT_MIN_INF:
1418                b.append("rpt{");
1419                b.append(target);
1420                b.append(",");
1421                b.append(minCount);
1422                b.append(",inf}");
1423                if (failNext != null) {
1424                    b.append(", =>");
1425                    b.append(failNext.instanceNum);
1426                    b.append(", ");
1427                }
1428                break;
1429            case REPEAT_MIN_MAX:
1430                b.append("rpt{");
1431                b.append(target);
1432                b.append(",");
1433                b.append(minCount);
1434                b.append(",");
1435                b.append(maxCount);
1436                b.append("}");
1437                if (failNext != null) {
1438                    b.append(", =>");
1439                    b.append(failNext.instanceNum);
1440                    b.append(", ");
1441                }
1442                break;
1443            case REPEAT_REG_MIN_INF:
1444                b.append("rpt{$");
1445                b.append(memreg);
1446                b.append(',');
1447                b.append(minCount);
1448                b.append(",inf}");
1449                if (failNext != null) {
1450                    b.append(", =>");
1451                    b.append(failNext.instanceNum);
1452                    b.append(", ");
1453                }
1454                break;
1455            case REPEAT_REG_MIN_MAX:
1456                b.append("rpt{$");
1457                b.append(memreg);
1458                b.append(',');
1459                b.append(minCount);
1460                b.append(',');
1461                b.append(maxCount);
1462                b.append("}");
1463                if (failNext != null) {
1464                    b.append(", =>");
1465                    b.append(failNext.instanceNum);
1466                    b.append(", ");
1467                }
1468                break;
1469            case BACKTRACK_0:
1470                b.append("back(0)");
1471                break;
1472            case BACKTRACK_MIN:
1473                b.append("back(");
1474                b.append(minCount);
1475                b.append(")");
1476                break;
1477            case BACKTRACK_REG_MIN:
1478                b.append("back");
1479                b.append("_$");
1480                b.append(memreg);
1481                b.append("(");
1482                b.append(minCount);
1483                b.append(")");
1484                break;
1485            case GROUP_IN:
1486                b.append('(');
1487                if (memreg > 0) b.append(memreg);
1488                b.append('-');
1489                b.append(" , ");
1490                break;
1491            case GROUP_OUT:
1492                b.append('-');
1493                if (memreg > 0) b.append(memreg);
1494                b.append(')');
1495                b.append(" , ");
1496                break;
1497            case PLOOKAHEAD_IN:
1498                b.append('(');
1499                b.append("=");
1500                b.append(lookaheadId);
1501                b.append(" , ");
1502                break;
1503            case PLOOKAHEAD_OUT:
1504                b.append('=');
1505                b.append(lookaheadId);
1506                b.append(')');
1507                b.append(" , ");
1508                break;
1509            case NLOOKAHEAD_IN:
1510                b.append("(!");
1511                b.append(lookaheadId);
1512                b.append(" , ");
1513                if (failNext != null) {
1514                    b.append(", =>");
1515                    b.append(failNext.instanceNum);
1516                    b.append(", ");
1517                }
1518                break;
1519            case NLOOKAHEAD_OUT:
1520                b.append('!');
1521                b.append(lookaheadId);
1522                b.append(')');
1523                b.append(" , ");
1524                break;
1525            case PLOOKBEHIND_IN:
1526                b.append('(');
1527                b.append("<=");
1528                b.append(lookaheadId);
1529                b.append(" , dist=");
1530                b.append(distance);
1531                b.append(" , ");
1532                break;
1533            case PLOOKBEHIND_OUT:
1534                b.append("<=");
1535                b.append(lookaheadId);
1536                b.append(')');
1537                b.append(" , ");
1538                break;
1539            case NLOOKBEHIND_IN:
1540                b.append("(<!");
1541                b.append(lookaheadId);
1542                b.append(" , dist=");
1543                b.append(distance);
1544                b.append(" , ");
1545                if (failNext != null) {
1546                    b.append(", =>");
1547                    b.append(failNext.instanceNum);
1548                    b.append(", ");
1549                }
1550                break;
1551            case NLOOKBEHIND_OUT:
1552                b.append("<!");
1553                b.append(lookaheadId);
1554                b.append(')');
1555                b.append(" , ");
1556                break;
1557            case MEMREG_CONDITION:
1558                b.append("(reg");
1559                b.append(memreg);
1560                b.append("?)");
1561                if (failNext != null) {
1562                    b.append(", =>");
1563                    b.append(failNext.instanceNum);
1564                    b.append(", ");
1565                }
1566                break;
1567            case LOOKAHEAD_CONDITION_IN:
1568                b.append("(cond");
1569                b.append(lookaheadId);
1570                b.append(((Lookahead) this).isPositive ? '=' : '!');
1571                b.append(" , ");
1572                if (failNext != null) {
1573                    b.append(", =>");
1574                    b.append(failNext.instanceNum);
1575                    b.append(", ");
1576                }
1577                break;
1578            case LOOKAHEAD_CONDITION_OUT:
1579                b.append("cond");
1580                b.append(lookaheadId);
1581                b.append(")");
1582                if (failNext != null) {
1583                    b.append(", =>");
1584                    b.append(failNext.instanceNum);
1585                    b.append(", ");
1586                }
1587                break;
1588            case REG:
1589                b.append("$");
1590                b.append(memreg);
1591                b.append(", ");
1592                break;
1593            case SUCCESS:
1594                b.append("END");
1595                break;
1596            case BRANCH_STORE_CNT_AUX1:
1597                b.append("(aux1)");
1598            case BRANCH_STORE_CNT:
1599                b.append("(cnt)");
1600            case BRANCH:
1601                b.append("=>");
1602                if (failNext != null) b.append(failNext.instanceNum);
1603                else b.append("null");
1604                b.append(" , ");
1605                break;
1606            default:
1607                b.append('[');
1608                switch (type) {
1609                    case CNT_SET_0:
1610                        b.append("cnt=0");
1611                        break;
1612                    case CNT_INC:
1613                        b.append("cnt++");
1614                        break;
1615                    case CNT_GT_EQ:
1616                        b.append("cnt>=").append(maxCount);
1617                        break;
1618                    case READ_CNT_LT:
1619                        b.append("->cnt<").append(maxCount);
1620                        break;
1621                    case CRSTORE_CRINC:
1622                        b.append("M(").append(memreg).append(")->,Cr(").append(cntreg).append(")->,Cr(").append(cntreg).append(")++");
1623                        break;
1624                    case CR_SET_0:
1625                        b.append("Cr(").append(cntreg).append(")=0");
1626                        break;
1627                    case CR_LT:
1628                        b.append("Cr(").append(cntreg).append(")<").append(maxCount);
1629                        break;
1630                    case CR_GT_EQ:
1631                        b.append("Cr(").append(cntreg).append(")>=").append(maxCount);
1632                        break;
1633                    default:
1634                        b.append("unknown type: ").append(type);
1635                }
1636                b.append("] , ");
1637        }
1638        if (next != null) {
1639            b.append("->");
1640            b.append(next.instanceNum);
1641            b.append(", ");
1642        }
1643        //b.append("\r\n");
1644        return b.toString();
1645    }
1646
1647    public String toStringAll() {
1648        return toStringAll(new ArrayList<Integer>());
1649    }
1650
1651    private String toStringAll(ArrayList<Integer> v) {
1652        v.add(instanceNum);
1653        String s = toString();
1654        if (next != null) {
1655            if (!v.contains(next.instanceNum)) {
1656                s += "\r\n";
1657                s += next.toStringAll(v);
1658            }
1659        }
1660        if (failNext != null) {
1661            if (!v.contains(failNext.instanceNum)) {
1662                s += "\r\n";
1663                s += failNext.toStringAll(v);
1664            }
1665        }
1666        return s;
1667    }
1668
1669    @Override
1670    public boolean equals(Object o) {
1671        if (this == o) return true;
1672        if (o == null || getClass() != o.getClass()) return false;
1673
1674        Term term = (Term) o;
1675
1676        if (type != term.type) return false;
1677        if (inverse != term.inverse) return false;
1678        if (c != term.c) return false;
1679        if (distance != term.distance) return false;
1680        if (eat != term.eat) return false;
1681        if (weight != term.weight) return false;
1682        if (memreg != term.memreg) return false;
1683        if (minCount != term.minCount) return false;
1684        if (maxCount != term.maxCount) return false;
1685        if (cntreg != term.cntreg) return false;
1686        if (lookaheadId != term.lookaheadId) return false;
1687        if (next != null ? !next.equals(term.next) : term.next != null) return false;
1688        if (bitset != null ? !bitset.equals(term.bitset) : term.bitset != null) return false;
1689        // Probably incorrect - comparing Object[] arrays with Arrays.equals
1690        return Arrays.equals(bitset2, term.bitset2) && Arrays.equals(categoryBitset, term.categoryBitset) && Arrays.equals(brackets, term.brackets);
1691//if (!Arrays.equals(brackets, term.brackets)) return false;
1692        /*
1693        if (failNext != null ? !failNext.equals(term.failNext) : term.failNext != null) return false;
1694        if (target != null ? !target.equals(term.target) : term.target != null) return false;
1695        if (prev != null ? !prev.equals(term.prev) : term.prev != null) return false;
1696        if (in != null ? !in.equals(term.in) : term.in != null) return false;
1697        if (out != null ? !out.equals(term.out) : term.out != null) return false;
1698        if (out1 != null ? !out1.equals(term.out1) : term.out1 != null) return false;
1699        if (first != null ? !first.equals(term.first) : term.first != null) return false;
1700        if (current != null ? !current.equals(term.current) : term.current != null) return false;
1701        return branchOut != null ? branchOut.equals(term.branchOut) : term.branchOut == null;
1702        */
1703    }
1704
1705    @Override
1706    public int hashCode() {
1707        int result = next != null ? next.hashCode() : 0;
1708        result = 31 * result + type;
1709        result = 31 * result + (inverse ? 1 : 0);
1710        result = 31 * result + (int) c;
1711        result = 31 * result + distance;
1712        result = 31 * result + (eat ? 1 : 0);
1713        result = 31 * result + (bitset != null ? bitset.hashCode() : 0);
1714        result = 31 * result + Arrays.hashCode(bitset2);
1715        result = 31 * result + Arrays.hashCode(categoryBitset);
1716        result = 31 * result + Arrays.hashCode(brackets);
1717        result = 31 * result + weight;
1718        result = 31 * result + memreg;
1719        result = 31 * result + minCount;
1720        result = 31 * result + maxCount;
1721        result = 31 * result + cntreg;
1722        result = 31 * result + lookaheadId;
1723        /*
1724        result = 31 * result + (failNext != null ? failNext.hashCode() : 0);
1725        result = 31 * result + (target != null ? (this == target ? 73 : target.hashCode()) : 0);
1726        result = 31 * result + (prev != null ? (this == prev ? 73 : prev.hashCode()) : 0);
1727        result = 31 * result + (in != null ? (this == in ? 73 : in.hashCode()) : 0);
1728        result = 31 * result + (out != null ? (this == out ? 73 : out.hashCode()) : 0);
1729        result = 31 * result + (out1 != null ? (this == out1 ? 73 : out1.hashCode()) : 0);
1730        result = 31 * result + (first != null ? (this == first ? 73 : first.hashCode()) : 0);
1731        result = 31 * result + (current != null ? (this == current ? 73 : current.hashCode()) : 0);
1732        result = 31 * result + (branchOut != null ? (this == branchOut ? 73 : branchOut.hashCode()) : 0);
1733        */
1734        return result;
1735    }
1736}
1737
1738class Pretokenizer {
1739    private static final int START = 1;
1740    static final int END = 2;
1741    static final int PLAIN_GROUP = 3;
1742    static final int POS_LOOKAHEAD = 4;
1743    static final int NEG_LOOKAHEAD = 5;
1744    static final int POS_LOOKBEHIND = 6;
1745    static final int NEG_LOOKBEHIND = 7;
1746    static final int INDEPENDENT_REGEX = 8;
1747    static final int COMMENT = 9;
1748    static final int CONDITIONAL_GROUP = 10;
1749    static final int FLAGS = 11;
1750    static final int CLASS_GROUP = 12;
1751    static final int NAMED_GROUP = 13;
1752
1753    int tOffset;
1754    int tOutside;
1755    private int skip;
1756    private int offset;
1757    private int end;
1758    int c;
1759
1760    int ttype = START;
1761
1762    private char[] data;
1763
1764    //results
1765    private int flags;
1766    private boolean flagsChanged;
1767
1768    char[] brackets;
1769    String groupName;
1770    boolean groupDeclared;
1771
1772    Pretokenizer(char[] data, int offset, int end) {
1773        if (offset < 0 || end > data.length)
1774            throw new IndexOutOfBoundsException("offset=" + offset + ", end=" + end + ", length=" + data.length);
1775        this.offset = offset;
1776        this.end = end;
1777
1778        this.tOffset = offset;
1779        this.tOutside = offset;
1780
1781        this.data = data;
1782    }
1783
1784    int flags(int def) {
1785        return flagsChanged ? flags : def;
1786    }
1787
1788    void next() throws PatternSyntaxException {
1789        int tOffset = this.tOutside;
1790        int skip = this.skip;
1791
1792        tOffset += skip;
1793        flagsChanged = false;
1794
1795        int end = this.end;
1796        char[] data = this.data;
1797        boolean esc = false;
1798        for (int i = tOffset; i < end; i++) {
1799            char c = data[i];
1800            if (esc) {
1801                if(c == 'Q')
1802                {
1803
1804                    for (; i < end; i++) {
1805                        char c1 = data[i];
1806                        if(c1 == '\\') {
1807                            if (i + 1 < end && data[i + 1] == 'E') {
1808                                i++;
1809                                esc = false;
1810                                break;
1811                            }
1812                        }
1813                    }
1814                }
1815                else {
1816                    esc = false;
1817                }
1818                continue;
1819            }
1820            switch (c) {
1821                case '\\':
1822                    esc = true;
1823                    continue;
1824                case '|':
1825                case ')':
1826                    ttype = c;
1827                    this.tOffset = tOffset;
1828                    this.tOutside = i;
1829                    this.skip = 1;
1830                    return;
1831                case '(':
1832                    if (((i + 2) < end) && (data[i + 1] == '?')) {
1833                        char c1 = data[i + 2];
1834                        switch (c1) {
1835                            case ':':
1836                                ttype = PLAIN_GROUP;
1837                                skip = 3; // "(?:" - skip 3 chars
1838                                break;
1839                            case '=':
1840                                ttype = POS_LOOKAHEAD;
1841                                skip = 3;  // "(?="
1842                                break;
1843                            case '!':
1844                                ttype = NEG_LOOKAHEAD;
1845                                skip = 3;  // "(?!"
1846                                break;
1847                            case '<':
1848                                switch (c1 = data[i + 3]) {
1849                                    case '=':
1850                                        ttype = POS_LOOKBEHIND;
1851                                        skip = 4; // "(?<="
1852                                        break;
1853                                    case '!':
1854                                        ttype = NEG_LOOKBEHIND;
1855                                        skip = 4; // "(?<!"
1856                                        break;
1857                                    default:
1858                                        throw new PatternSyntaxException("invalid character after '(?<' : " + c1);
1859                                }
1860                                break;
1861                            case '>':
1862                                ttype = INDEPENDENT_REGEX;
1863                                skip = 3;  // "(?>"
1864                                break;
1865                            case '#':
1866                                ttype = COMMENT;
1867                                skip = 3; // ="(?#".length, the makeTree() skips the rest by itself
1868                                break;
1869                            case '(':
1870                                ttype = CONDITIONAL_GROUP;
1871                                skip = 2; //"(?"+"(..." - skip "(?" (2 chars) and parse condition as a group
1872                                break;
1873                            case '[':
1874                                ttype = CLASS_GROUP;
1875                                skip = 2; // "(?"+"[..]+...-...&...)" - skip 2 chars and parse a class group
1876                                break;
1877                            default:
1878                                int mOff, mLen;
1879                                mLoop:
1880                                for (int p = i + 2; p < end; p++) {
1881                                    char c2 = data[p];
1882                                    switch (c2) {
1883                                        case '-':
1884                                        case 'i':
1885                                        case 'm':
1886                                        case 's':
1887                                        case 'x':
1888                                        case 'u':
1889                                        case 'X':
1890                                            continue mLoop;
1891
1892                                        case ':':
1893                                            mOff = i + 2;
1894                                            mLen = p - mOff;
1895                                            if (mLen > 0) {
1896                                                flags = Pattern.parseFlags(data, mOff, mLen);
1897                                                flagsChanged = true;
1898                                            }
1899                                            ttype = PLAIN_GROUP;
1900                                            skip = mLen + 3; // "(?imsx:" mLen=4; skip= "(?".len + ":".len + mLen = 2+1+4=7
1901                                            break mLoop;
1902                                        case ')':
1903                                            flags = Pattern.parseFlags(data, mOff = (i + 2), mLen = (p - mOff));
1904                                            flagsChanged = true;
1905                                            ttype = FLAGS;
1906                                            skip = mLen + 3; // "(?imsx)" mLen=4, skip="(?".len+")".len+mLen=2+1+4=7
1907                                            break mLoop;
1908                                        default:
1909                                            throw new PatternSyntaxException("wrong char after \"(?\": " + c2);
1910                                    }
1911                                }
1912                                break;
1913                        }
1914                    } else if (((i + 2) < end) && (data[i + 1] == '{')) { //parse named group: ({name}....),({=name}....)
1915                        int p = i + 2;
1916                        skip = 3; //'({' + '}'
1917                        int nstart, nend;
1918                        boolean isDecl;
1919                        c = data[p];
1920                        while (Category.Z.contains(c)) {
1921                            c = data[++p];
1922                            skip++;
1923                            if (p == end) throw new PatternSyntaxException("malformed named group");
1924                        }
1925
1926                        if (c == '=') {
1927                            isDecl = false;
1928                            c = data[++p];
1929                            skip++;
1930                            if (p == end) throw new PatternSyntaxException("malformed named group");
1931                        } else isDecl = true;
1932
1933                        nstart = p;
1934                        while (Category.IdentifierPart.contains(c)) {
1935                            c = data[++p];
1936                            skip++;
1937                            if (p == end) throw new PatternSyntaxException("malformed named group");
1938                        }
1939                        nend = p;
1940                        while (Category.Z.contains(c)) {
1941                            c = data[++p];
1942                            skip++;
1943                            if (p == end) throw new PatternSyntaxException("malformed named group");
1944                        }
1945                        if (c != '}')
1946                            throw new PatternSyntaxException("'}' expected at " + (p - i) + " in " + new String(data, i, end - i));
1947
1948                        this.groupName = new String(data, nstart, nend - nstart);
1949                        this.groupDeclared = isDecl;
1950                        ttype = NAMED_GROUP;
1951                    } else {
1952                        ttype = '(';
1953                        skip = 1;
1954                    }
1955                    this.tOffset = tOffset;
1956                    this.tOutside = i;
1957                    this.skip = skip;
1958                    return;
1959                case '[':
1960                    loop:
1961                    for (; ; i++) {
1962                        if (i == end) throw new PatternSyntaxException("malformed character class");
1963                        char c1 = data[i];
1964                        switch (c1) {
1965                            case '\\':
1966                                i++;
1967                                continue;
1968                            case ']':
1969                                break loop;
1970                        }
1971                    }
1972            }
1973        }
1974        ttype = END;
1975        this.tOffset = tOffset;
1976        this.tOutside = end;
1977    }
1978
1979}
1980
1981class Branch extends Term {
1982    Branch() {
1983        type = BRANCH;
1984    }
1985
1986    Branch(int type) {
1987        switch (type) {
1988            case BRANCH:
1989            case BRANCH_STORE_CNT:
1990            case BRANCH_STORE_CNT_AUX1:
1991                this.type = type;
1992                break;
1993            default:
1994                throw new IllegalArgumentException("not a branch type: " + type);
1995        }
1996    }
1997}
1998
1999class BackReference extends Term {
2000    BackReference(int no, boolean icase) {
2001        super(icase ? REG_I : REG);
2002        memreg = no;
2003    }
2004}
2005
2006class Group extends Term {
2007    Group() {
2008        this(0);
2009    }
2010
2011    Group(int memreg) {
2012        type = GROUP_IN;
2013        this.memreg = memreg;
2014
2015        //used in append()
2016        current = null;
2017        in = this;
2018        prev = null;
2019
2020        out = new Term();
2021        out.type = GROUP_OUT;
2022        out.memreg = memreg;
2023    }
2024}
2025
2026class ConditionalExpr extends Group {
2027    private Term node;
2028    private boolean newBranchStarted = false;
2029    private boolean linkAsBranch = true;
2030
2031    ConditionalExpr(Lookahead la) {
2032        super(0);
2033      /*
2034      * This all is rather tricky.
2035      * See how this types are handled in Matcher.
2036      * The shortcoming is that we strongly rely upon
2037      * the internal structure of Lookahead.
2038      */
2039        la.in.type = LOOKAHEAD_CONDITION_IN;
2040        la.out.type = LOOKAHEAD_CONDITION_OUT;
2041        if (la.isPositive) {
2042            node = la.in;
2043            linkAsBranch = true;
2044
2045            //empty 2'nd branch
2046            node.failNext = out;
2047        } else {
2048            node = la.out;
2049            linkAsBranch = false;
2050
2051            //empty 2'nd branch
2052            node.next = out;
2053        }
2054
2055        //node.prev=in;
2056        //in.next=node;
2057
2058        la.prev = in;
2059        in.next = la;
2060
2061        current = la;
2062        //current=node;
2063    }
2064
2065    ConditionalExpr(Lookbehind lb) {
2066        super(0);
2067      /*
2068      * This all is rather tricky.
2069      * See how this types are handled in Matcher.
2070      * The shortcoming is that we strongly rely upon
2071      * the internal structure of Lookahead.
2072      */
2073        lb.in.type = LOOKBEHIND_CONDITION_IN;
2074        lb.out.type = LOOKBEHIND_CONDITION_OUT;
2075        if (lb.isPositive) {
2076            node = lb.in;
2077            linkAsBranch = true;
2078
2079            //empty 2'nd branch
2080            node.failNext = out;
2081        } else {
2082            node = lb.out;
2083            linkAsBranch = false;
2084
2085            //empty 2'nd branch
2086            node.next = out;
2087        }
2088
2089        lb.prev = in;
2090        in.next = lb;
2091
2092        current = lb;
2093        //current=node;
2094    }
2095
2096    ConditionalExpr(int memreg) {
2097        super(0);
2098        Term condition = new Term(MEMREG_CONDITION);
2099        condition.memreg = memreg;
2100        condition.out = condition;
2101        condition.out1 = null;
2102        condition.branchOut = null;
2103
2104        //default branch
2105        condition.failNext = out;
2106
2107        node = current = condition;
2108        linkAsBranch = true;
2109
2110        condition.prev = in;
2111        in.next = condition;
2112
2113        current = condition;
2114    }
2115
2116    protected void startNewBranch() throws PatternSyntaxException {
2117        if (newBranchStarted) throw new PatternSyntaxException("attempt to set a 3'd choice in a conditional expr.");
2118        Term node = this.node;
2119        node.out1 = null;
2120        if (linkAsBranch) {
2121            node.out = null;
2122            node.branchOut = node;
2123        } else {
2124            node.out = node;
2125            node.branchOut = null;
2126        }
2127        newBranchStarted = true;
2128        current = node;
2129    }
2130
2131    @Override
2132    public boolean equals(Object o) {
2133        if (this == o) return true;
2134        if (o == null || getClass() != o.getClass()) return false;
2135        if (!super.equals(o)) return false;
2136
2137        ConditionalExpr that = (ConditionalExpr) o;
2138
2139        return newBranchStarted == that.newBranchStarted && linkAsBranch == that.linkAsBranch && (node != null ? node.equals(that.node) : that.node == null);
2140
2141    }
2142
2143    @Override
2144    public int hashCode() {
2145        int result = super.hashCode();
2146        result = 31 * result + (node != null ? node.hashCode() : 0);
2147        result = 31 * result + (newBranchStarted ? 1 : 0);
2148        result = 31 * result + (linkAsBranch ? 1 : 0);
2149        return result;
2150    }
2151}
2152
2153class IndependentGroup extends Term {
2154    IndependentGroup(int id) {
2155        super(0);
2156        in = this;
2157        out = new Term();
2158        type = INDEPENDENT_IN;
2159        out.type = INDEPENDENT_OUT;
2160        lookaheadId = out.lookaheadId = id;
2161    }
2162}
2163
2164class Lookahead extends Term {
2165    final boolean isPositive;
2166
2167    Lookahead(int id, boolean isPositive) {
2168        this.isPositive = isPositive;
2169        in = this;
2170        out = new Term();
2171        if (isPositive) {
2172            type = PLOOKAHEAD_IN;
2173            out.type = PLOOKAHEAD_OUT;
2174        } else {
2175            type = NLOOKAHEAD_IN;
2176            out.type = NLOOKAHEAD_OUT;
2177            branchOut = this;
2178        }
2179        lookaheadId = id;
2180        out.lookaheadId = id;
2181    }
2182
2183    @Override
2184    public boolean equals(Object o) {
2185        if (this == o) return true;
2186        if (o == null || getClass() != o.getClass()) return false;
2187        if (!super.equals(o)) return false;
2188
2189        Lookahead lookahead = (Lookahead) o;
2190
2191        return isPositive == lookahead.isPositive;
2192
2193    }
2194
2195    @Override
2196    public int hashCode() {
2197        int result = super.hashCode();
2198        result = 31 * result + (isPositive ? 1 : 0);
2199        return result;
2200    }
2201}
2202
2203class Lookbehind extends Term {
2204    final boolean isPositive;
2205    private int prevDistance = -1;
2206
2207    Lookbehind(int id, boolean isPositive) {
2208        distance = 0;
2209        this.isPositive = isPositive;
2210        in = this;
2211        out = new Term();
2212        if (isPositive) {
2213            type = PLOOKBEHIND_IN;
2214            out.type = PLOOKBEHIND_OUT;
2215        } else {
2216            type = NLOOKBEHIND_IN;
2217            out.type = NLOOKBEHIND_OUT;
2218            branchOut = this;
2219        }
2220        lookaheadId = id;
2221        out.lookaheadId = id;
2222    }
2223
2224    protected Term append(Term t) throws PatternSyntaxException {
2225        distance += length(t);
2226        return super.append(t);
2227    }
2228
2229    protected Term replaceCurrent(Term t) throws PatternSyntaxException {
2230        distance += length(t) - length(current);
2231        return super.replaceCurrent(t);
2232    }
2233
2234    private static int length(Term t) throws PatternSyntaxException {
2235        int type = t.type;
2236        switch (type) {
2237            case CHAR:
2238            case BITSET:
2239            case BITSET2:
2240            case ANY_CHAR:
2241            case ANY_CHAR_NE:
2242                return 1;
2243            case BOUNDARY:
2244            case DIRECTION:
2245            case UBOUNDARY:
2246            case UDIRECTION:
2247                return 0;
2248            default:
2249                if (type >= FIRST_TRANSPARENT && type <= LAST_TRANSPARENT) return 0;
2250                throw new PatternSyntaxException("variable length element within a lookbehind assertion");
2251        }
2252    }
2253
2254    protected void startNewBranch() throws PatternSyntaxException {
2255        prevDistance = distance;
2256        distance = 0;
2257        super.startNewBranch();
2258    }
2259
2260    protected void close() throws PatternSyntaxException {
2261        int pd = prevDistance;
2262        if (pd >= 0) {
2263            if (distance != pd)
2264                throw new PatternSyntaxException("non-equal branch lengths within a lookbehind assertion");
2265        }
2266        super.close();
2267    }
2268
2269    @Override
2270    public boolean equals(Object o) {
2271        if (this == o) return true;
2272        if (o == null || getClass() != o.getClass()) return false;
2273        if (!super.equals(o)) return false;
2274
2275        Lookbehind that = (Lookbehind) o;
2276
2277        return isPositive == that.isPositive && prevDistance == that.prevDistance;
2278
2279    }
2280
2281    @Override
2282    public int hashCode() {
2283        int result = super.hashCode();
2284        result = 31 * result + (isPositive ? 1 : 0);
2285        result = 31 * result + prevDistance;
2286        return result;
2287    }
2288}
2289
2290class TermIterator extends Term {
2291
2292    TermIterator(Term term, int min, int max, ArrayList<TermIterator> collection) throws PatternSyntaxException {
2293        collection.add(this);
2294        switch (term.type) {
2295            case CHAR:
2296            case ANY_CHAR:
2297            case ANY_CHAR_NE:
2298            case BITSET:
2299            case BITSET2: {
2300                target = term;
2301                Term back = new Term();
2302                if (min <= 0 && max < 0) {
2303                    type = REPEAT_0_INF;
2304                    back.type = BACKTRACK_0;
2305                } else if (min > 0 && max < 0) {
2306                    type = REPEAT_MIN_INF;
2307                    back.type = BACKTRACK_MIN;
2308                    minCount = back.minCount = min;
2309                } else {
2310                    type = REPEAT_MIN_MAX;
2311                    back.type = BACKTRACK_MIN;
2312                    minCount = back.minCount = min;
2313                    maxCount = max;
2314                }
2315
2316                failNext = back;
2317
2318                in = this;
2319                out = this;
2320                out1 = back;
2321                branchOut = null;
2322                return;
2323            }
2324            case REG: {
2325                target = term;
2326                memreg = term.memreg;
2327                Term back = new Term();
2328                if (max < 0) {
2329                    type = REPEAT_REG_MIN_INF;
2330                    back.type = BACKTRACK_REG_MIN;
2331                    minCount = back.minCount = min;
2332                } else {
2333                    type = REPEAT_REG_MIN_MAX;
2334                    back.type = BACKTRACK_REG_MIN;
2335                    minCount = back.minCount = min;
2336                    maxCount = max;
2337                }
2338
2339                failNext = back;
2340
2341                in = this;
2342                out = this;
2343                out1 = back;
2344                branchOut = null;
2345                return;
2346            }
2347            default:
2348                throw new PatternSyntaxException("can't iterate this type: " + term.type);
2349        }
2350    }
2351
2352    void optimize() {
2353//BACKTRACK_MIN_REG_FIND
2354        Term back = failNext;
2355        Optimizer opt = Optimizer.find(back.next);
2356        if (opt == null) return;
2357        failNext = opt.makeBacktrack(back);
2358    }
2359
2360}