001/** 002 * Copyright (c) 2001, Sergey A. Samokhodkin 003 * All rights reserved. 004 * <p> 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * <p> 008 * - Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * - Redistributions in binary form 011 * must reproduce the above copyright notice, this list of conditions and the following 012 * disclaimer in the documentation and/or other materials provided with the distribution. 013 * - Neither the name of jregex nor the names of its contributors may be used 014 * to endorse or promote products derived from this software without specific prior 015 * written permission. 016 * <p> 017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 018 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 019 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 020 * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 021 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 022 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 023 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 024 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 025 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 026 * 027 * @version 1.2_01 028 */ 029 030package regexodus; 031 032import regexodus.ds.IntBitSet; 033 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.HashMap; 037 038public class Term implements REFlags { 039 040 //runtime Term types 041 static final int CHAR = 0; 042 static final int BITSET = 1; 043 static final int BITSET2 = 2; 044 static final int ANY_CHAR = 4; 045 static final int ANY_CHAR_NE = 5; 046 047 static final int REG = 6; 048 static final int REG_I = 7; 049 static final int FIND = 8; 050 static final int FINDREG = 9; 051 static final int SUCCESS = 10; 052 053 /*optimization-transparent types*/ 054 static final int BOUNDARY = 11; 055 static final int DIRECTION = 12; 056 static final int UBOUNDARY = 13; 057 static final int UDIRECTION = 14; 058 059 static final int GROUP_IN = 15; 060 static final int GROUP_OUT = 16; 061 static final int VOID = 17; 062 063 static final int START = 18; 064 static final int END = 19; 065 static final int END_EOL = 20; 066 static final int LINE_START = 21; 067 static final int LINE_END = 22; 068 static final int LAST_MATCH_END = 23; 069 070 static final int CNT_SET_0 = 24; 071 static final int CNT_INC = 25; 072 static final int CNT_GT_EQ = 26; 073 static final int READ_CNT_LT = 27; 074 075 static final int CRSTORE_CRINC = 28; //store on 'actual' search entry 076 static final int CR_SET_0 = 29; 077 static final int CR_LT = 30; 078 static final int CR_GT_EQ = 31; 079 080 081 static final int LITERAL_START = 60; 082 static final int LITERAL_END = 61; 083 084 /*optimization-nontransparent types*/ 085 static final int BRANCH = 32; 086 static final int BRANCH_STORE_CNT = 33; 087 static final int BRANCH_STORE_CNT_AUX1 = 34; 088 089 static final int PLOOKAHEAD_IN = 35; 090 static final int PLOOKAHEAD_OUT = 36; 091 static final int NLOOKAHEAD_IN = 37; 092 static final int NLOOKAHEAD_OUT = 38; 093 static final int PLOOKBEHIND_IN = 39; 094 static final int PLOOKBEHIND_OUT = 40; 095 static final int NLOOKBEHIND_IN = 41; 096 static final int NLOOKBEHIND_OUT = 42; 097 static final int INDEPENDENT_IN = 43; //functionally the same as NLOOKAHEAD_IN 098 static final int INDEPENDENT_OUT = 44; 099 100 static final int REPEAT_0_INF = 45; 101 static final int REPEAT_MIN_INF = 46; 102 static final int REPEAT_MIN_MAX = 47; 103 static final int REPEAT_REG_MIN_INF = 48; 104 static final int REPEAT_REG_MIN_MAX = 49; 105 106 static final int BACKTRACK_0 = 50; 107 static final int BACKTRACK_MIN = 51; 108 static final int BACKTRACK_FIND_MIN = 52; 109 static final int BACKTRACK_FINDREG_MIN = 53; 110 static final int BACKTRACK_REG_MIN = 54; 111 112 static final int MEMREG_CONDITION = 55; 113 static final int LOOKAHEAD_CONDITION_IN = 56; 114 static final int LOOKAHEAD_CONDITION_OUT = 57; 115 static final int LOOKBEHIND_CONDITION_IN = 58; 116 static final int LOOKBEHIND_CONDITION_OUT = 59; 117 118 //optimization 119 static final int FIRST_TRANSPARENT = BOUNDARY; 120 static final int LAST_TRANSPARENT = CR_GT_EQ; 121 122 // compile-time: length of vars[] (see makeTree()) 123 private static final int VARS_LENGTH = 4; 124 125 // compile-time variable indices: 126 private static final int MEMREG_COUNT = 0; //refers current memreg index 127 private static final int CNTREG_COUNT = 1; //refers current counters number 128 private static final int DEPTH = 2; //refers current depth: (((depth=3))) 129 private static final int LOOKAHEAD_COUNT = 3; //refers current memreg index 130 131 private static final int LIMITS_LENGTH = 3; 132 private static final int LIMITS_PARSE_RESULT_INDEX = 2; 133 private static final int LIMITS_OK = 1; 134 private static final int LIMITS_FAILURE = 2; 135 136 private static final int LITERAL_FLAG = 64; 137 138 //static CustomParser[] customParsers=new CustomParser[256]; 139 140 // **** CONTROL FLOW **** 141 142 // next-to-execute and next-if-failed commands; 143 Term next, failNext; 144 145 // **** TYPES **** 146 147 int type = VOID; 148 boolean inverse; 149 150 // used with type=CHAR 151 char c; 152 153 // used with type=FIND 154 int distance; 155 boolean eat; 156 157 // used with type=BITSET(2); 158 IntBitSet bitset; 159 IntBitSet[] bitset2; 160 private boolean[] categoryBitset; //types(unicode categories) 161 162 // used with type=BALANCE; 163 private char[] brackets; 164 165 // used for optimization with type=BITSET,BITSET2 166 int weight; 167 168 // **** MEMORISATION **** 169 170 // memory slot, used with type=REG,GROUP_IN,GROUP_OUT 171 int memreg = -1; 172 173 174 // **** COUNTERS **** 175 176 // max|min number of iterations 177 // used with CNT_GT_EQ ,REPEAT_* etc.; 178 int minCount, maxCount; 179 180 // used with REPEAT_*,REPEAT_REG_*; 181 Term target; 182 183 // a counter slot to increment & compare with maxCount (CNT_INC etc.); 184 int cntreg = 0; 185 186 // lookahead group id; 187 int lookaheadId; 188 189 // **** COMPILE HELPERS **** 190 191 Term prev; 192 Term in; 193 Term out; 194 Term out1; 195 protected Term first; 196 Term current; 197 198 //new!! 199 Term branchOut; 200 201 //protected boolean newBranch=false,closed=false; 202 //protected boolean newBranch=false; 203 204 //for debugging 205 private static int instances; 206 private int instanceNum; 207 208 Term() { 209 //for debugging 210 instanceNum = instances; 211 instances++; 212 in = out = this; 213 } 214 215 Term(int type) { 216 this(); 217 this.type = type; 218 } 219 220 static void makeTree(String s, int flags, Pattern re) throws PatternSyntaxException { 221 instances = 0; 222 char[] data = s.toCharArray(); 223 makeTree(data, 0, data.length, flags, re); 224 } 225 226 private static void makeTree(char[] data, int offset, int end, 227 int flags, Pattern re) throws PatternSyntaxException { 228 // memreg,counter,depth,lookahead 229 int[] vars = {1, 0, 0, 0}; //don't use counters[0] 230 231 //collect iterators for subsequent optimization 232 ArrayList<TermIterator> iterators = new ArrayList<TermIterator>(); 233 HashMap<String, Integer> groupNames = new HashMap<String, Integer>(); 234 235 Pretokenizer t = new Pretokenizer(data, offset, end); 236 Term term = makeTree(t, data, vars, flags, new Group(), iterators, groupNames); 237 238 // convert closing outer bracket into success term 239 term.out.type = SUCCESS; 240 241 //throw out opening bracket 242 Term first = term.next; 243 244 // Optimisation: 245 //Term optimized = first; 246 //Optimizer opt = Optimizer.find(first); 247 //if (opt != null) optimized = opt.makeFirst(first); 248 249 for (TermIterator i : iterators) { 250 i.optimize(); 251 } 252 253 //re.root = optimized; 254 re.root = first; 255 re.root0 = first; 256 re.memregs = vars[MEMREG_COUNT]; 257 re.counters = vars[CNTREG_COUNT]; 258 re.lookaheads = vars[LOOKAHEAD_COUNT]; 259 re.namedGroupMap = groupNames; 260 } 261 262 private static Term makeTree(Pretokenizer t, char[] data, int[] vars, 263 int flags, Term term, ArrayList<TermIterator> iterators, HashMap<String, Integer> groupNames) throws PatternSyntaxException { 264 if (vars.length != VARS_LENGTH) 265 throw new IllegalArgumentException("vars.length should be " + VARS_LENGTH + ", not " + vars.length); 266 //Term term=new Term(isMemReg? vars[MEMREG_COUNT]: -1); 267 // use memreg 0 as insignificant 268 //Term term=new Group(isMemReg? vars[MEMREG_COUNT]: 0); 269 while (true) { 270 t.next(); 271 term.append(t.tOffset, t.tOutside, data, vars, flags, iterators, groupNames); 272 switch (t.ttype) { 273 case Pretokenizer.FLAGS: 274 flags = t.flags(flags); 275 continue; 276 case Pretokenizer.CLASS_GROUP: 277 t.next(); 278 Term clg = new Term(); 279 CharacterClass.parseGroup(data, t.tOffset, t.tOutside, clg, 280 (flags & IGNORE_CASE) > 0, (flags & IGNORE_SPACES) > 0, 281 (flags & UNICODE) > 0, (flags & XML_SCHEMA) > 0); 282 term.append(clg); 283 continue; 284 case Pretokenizer.PLAIN_GROUP: 285 vars[DEPTH]++; 286 term.append(makeTree(t, data, vars, t.flags(flags), new Group(), iterators, groupNames)); 287 break; 288 case Pretokenizer.NAMED_GROUP: 289 String gname = t.groupName; 290 int id; 291 if (Character.isDigit(gname.charAt(0))) { 292 try { 293 id = Integer.parseInt(gname); 294 } catch (NumberFormatException e) { 295 throw new PatternSyntaxException("group name starts with digit but is not a number"); 296 } 297 if (groupNames.containsValue(id)) { 298 if (t.groupDeclared) 299 throw new PatternSyntaxException("group redeclaration: " + gname + "; use ({=id}...) for multiple group assignments"); 300 } 301 if (vars[MEMREG_COUNT] <= id) vars[MEMREG_COUNT] = id + 1; 302 } else { 303 Integer no = groupNames.get(gname); 304 if (no == null) { 305 id = vars[MEMREG_COUNT]++; 306 groupNames.put(t.groupName, id); 307 } else { 308 if (t.groupDeclared) 309 throw new PatternSyntaxException("group redeclaration " + gname + "; use ({=name}...) for group reassignments"); 310 id = no; 311 } 312 } 313 vars[DEPTH]++; 314 term.append(makeTree(t, data, vars, flags, new Group(id), iterators, groupNames)); 315 break; 316 case '(': 317 vars[DEPTH]++; 318 term.append(makeTree(t, data, vars, flags, new Group(vars[MEMREG_COUNT]++), iterators, groupNames)); 319 break; 320 case Pretokenizer.POS_LOOKAHEAD: 321 vars[DEPTH]++; 322 term.append(makeTree(t, data, vars, flags, new Lookahead(vars[LOOKAHEAD_COUNT]++, true), iterators, groupNames)); 323 break; 324 case Pretokenizer.NEG_LOOKAHEAD: 325 vars[DEPTH]++; 326 term.append(makeTree(t, data, vars, flags, new Lookahead(vars[LOOKAHEAD_COUNT]++, false), iterators, groupNames)); 327 break; 328 case Pretokenizer.POS_LOOKBEHIND: 329 vars[DEPTH]++; 330 term.append(makeTree(t, data, vars, flags, new Lookbehind(vars[LOOKAHEAD_COUNT]++, true), iterators, groupNames)); 331 break; 332 case Pretokenizer.NEG_LOOKBEHIND: 333 vars[DEPTH]++; 334 term.append(makeTree(t, data, vars, flags, new Lookbehind(vars[LOOKAHEAD_COUNT]++, false), iterators, groupNames)); 335 break; 336 case Pretokenizer.INDEPENDENT_REGEX: 337 vars[DEPTH]++; 338 term.append(makeTree(t, data, vars, flags, new IndependentGroup(vars[LOOKAHEAD_COUNT]++), iterators, groupNames)); 339 break; 340 case Pretokenizer.CONDITIONAL_GROUP: 341 vars[DEPTH]++; 342 t.next(); 343 Term fork; 344 boolean positive = true; 345 switch (t.ttype) { 346 case Pretokenizer.NEG_LOOKAHEAD: 347 positive = false; 348 case Pretokenizer.POS_LOOKAHEAD: 349 vars[DEPTH]++; 350 Lookahead la = new Lookahead(vars[LOOKAHEAD_COUNT]++, positive); 351 makeTree(t, data, vars, flags, la, iterators, groupNames); 352 fork = new ConditionalExpr(la); 353 break; 354 case Pretokenizer.NEG_LOOKBEHIND: 355 positive = false; 356 case Pretokenizer.POS_LOOKBEHIND: 357 vars[DEPTH]++; 358 Lookbehind lb = new Lookbehind(vars[LOOKAHEAD_COUNT]++, positive); 359 makeTree(t, data, vars, flags, lb, iterators, groupNames); 360 fork = new ConditionalExpr(lb); 361 break; 362 case '(': 363 t.next(); 364 if (t.ttype != ')') throw new PatternSyntaxException("malformed condition"); 365 int memregNo; 366 if (Character.isDigit(data[t.tOffset])) memregNo = makeNumber(t.tOffset, t.tOutside, data); 367 else { 368 String gn = new String(data, t.tOffset, t.tOutside - t.tOffset); 369 Integer gno = groupNames.get(gn); 370 if (gno == null) 371 throw new PatternSyntaxException("unknown group name in conditional expr.: " + gn); 372 memregNo = gno; 373 } 374 fork = new ConditionalExpr(memregNo); 375 break; 376 default: 377 throw new PatternSyntaxException("malformed conditional expression: " + t.ttype + " '" + (char) t.ttype + "'"); 378 } 379 term.append(makeTree(t, data, vars, flags, fork, iterators, groupNames)); 380 break; 381 case '|': 382 term.newBranch(); 383 break; 384 case Pretokenizer.END: 385 if (vars[DEPTH] > 0) throw new PatternSyntaxException("unbalanced parenthesis"); 386 term.close(); 387 return term; 388 case ')': 389 if (vars[DEPTH] <= 0) throw new PatternSyntaxException("unbalanced parenthesis"); 390 term.close(); 391 vars[DEPTH]--; 392 return term; 393 case Pretokenizer.COMMENT: 394 while (t.ttype != ')') t.next(); 395 continue; 396 default: 397 throw new PatternSyntaxException("unknown token type: " + t.ttype); 398 } 399 } 400 } 401 402 private static int makeNumber(int off, int out, char[] data) { 403 int n = 0; 404 for (int i = off; i < out; i++) { 405 int d = data[i] - '0'; 406 if (d < 0 || d > 9) return -1; 407 n *= 10; 408 n += d; 409 } 410 return n; 411 } 412 413 private void append(int offset, int end, char[] data, 414 int[] vars, int flags, ArrayList<TermIterator> iterators, HashMap<String, Integer> gmap) throws PatternSyntaxException { 415 int[] limits = new int[3]; 416 int i = offset; 417 Term tmp, current = this.current; 418 while (i < end) { 419 char c = data[i]; 420 boolean greedy = true; 421 if((flags & LITERAL_FLAG) != LITERAL_FLAG) { 422 switch (c) { 423 //operations 424 case '*': 425 if (current == null) throw new PatternSyntaxException("missing term before *"); 426 i++; 427 if (i < end && data[i] == '?') { 428 greedy = false; 429 i++; 430 } 431 tmp = greedy ? makeGreedyStar(vars, current, iterators) : 432 makeLazyStar(vars, current); 433 current = replaceCurrent(tmp); 434 break; 435 436 case '+': 437 if (current == null) throw new PatternSyntaxException("missing term before +"); 438 i++; 439 if (i < end && data[i] == '?') { 440 greedy = false; 441 i++; 442 } 443 tmp = greedy ? makeGreedyPlus(vars, current, iterators) : 444 makeLazyPlus(vars, current); 445 current = replaceCurrent(tmp); 446 break; 447 448 case '?': 449 if (current == null) throw new PatternSyntaxException("missing term before ?"); 450 i++; 451 if (i < end && data[i] == '?') { 452 greedy = false; 453 i++; 454 } 455 456 tmp = greedy ? makeGreedyQMark(vars, current) : 457 makeLazyQMark(vars, current); 458 current = replaceCurrent(tmp); 459 break; 460 461 case '{': 462 limits[0] = 0; 463 limits[1] = -1; 464 int le = parseLimits(i + 1, end, data, limits); 465 if (limits[LIMITS_PARSE_RESULT_INDEX] == LIMITS_OK) { //parse ok 466 if (current == null) throw new PatternSyntaxException("missing term before {}"); 467 i = le; 468 if (i < end && data[i] == '?') { 469 greedy = false; 470 i++; 471 } 472 tmp = greedy ? makeGreedyLimits(vars, current, limits, iterators) : 473 makeLazyLimits(vars, current, limits); 474 current = replaceCurrent(tmp); 475 break; 476 } else { //unicode class or named backreference 477 if (data[i + 1] == '\\') { //'{\name}' - backreference 478 int p = i + 2; 479 if (p == end) throw new PatternSyntaxException("'group_id' expected"); 480 while (Category.Z.contains(data[p])) { 481 p++; 482 if (p == end) throw new PatternSyntaxException("'group_id' expected"); 483 } 484 BackReference br = new BackReference(-1, (flags & IGNORE_CASE) > 0); 485 i = parseGroupId(data, p, end, br, gmap); 486 current = append(br); 487 continue; 488 } else { 489 Term t = new Term(); 490 i = CharacterClass.parseName(data, i, end, t, false, (flags & IGNORE_SPACES) > 0); 491 current = append(t); 492 continue; 493 } 494 } 495 496 case ' ': 497 case '\t': 498 case '\r': 499 case '\n': 500 if ((flags & IGNORE_SPACES) > 0) { 501 i++; 502 continue; 503 } 504 //else go on as default 505 506 //symbolic items 507 default: 508 tmp = new Term(); 509 i = parseTerm(data, i, end, tmp, flags); 510 511 if (tmp.type == LITERAL_START) { 512 flags |= LITERAL_FLAG; 513 break; 514 } else if (tmp.type == LITERAL_END) { 515 flags &= ~LITERAL_FLAG; 516 break; 517 } 518 519 if (tmp.type == END && i < end) { 520 throw new PatternSyntaxException("'$' is not a last term in the group: <" + new String(data, offset, end - offset) + ">"); 521 } 522 //"\A" 523 //if(tmp.type==START && i>(offset+1)){ 524 // throw new PatternSyntaxException("'^' is not a first term in the group: <"+new String(data,offset,end-offset)+">"); 525 //} 526 527 current = append(tmp); 528 break; 529 } 530 } 531 else { 532 tmp = new Term(); 533 i = parseTerm(data, i, end, tmp, flags); 534 535 if (tmp.type == LITERAL_START) { 536 flags |= LITERAL_FLAG; 537 break; 538 } else if (tmp.type == LITERAL_END) { 539 flags &= ~LITERAL_FLAG; 540 break; 541 } 542 543 if (tmp.type == END && i < end) { 544 throw new PatternSyntaxException("'$' is not a last term in the group: <" + new String(data, offset, end - offset) + ">"); 545 } 546 547 current = append(tmp); 548 } 549 } 550 } 551 552 /* 553 static boolean isIdentifierPart() 554 { 555 556 }*/ 557 558 559 private static int parseGroupId(char[] data, int i, int end, Term term, HashMap<String, Integer> gmap) throws PatternSyntaxException { 560 int id; 561 int nstart = i; 562 if (Character.isDigit(data[i])) { 563 while (Character.isDigit(data[i])) { 564 i++; 565 if (i == end) throw new PatternSyntaxException("group_id expected"); 566 } 567 id = makeNumber(nstart, i, data); 568 } else { 569 while (Category.IdentifierPart.contains(data[i])) { 570 i++; 571 if (i == end) throw new PatternSyntaxException("group_id expected"); 572 } 573 String s = new String(data, nstart, i - nstart); 574 Integer no = gmap.get(s); 575 if (no == null) throw new PatternSyntaxException("backreference to unknown group: " + s); 576 id = no; 577 } 578 while (Category.Z.contains(data[i])) { 579 i++; 580 if (i == end) throw new PatternSyntaxException("'}' expected"); 581 } 582 583 int c = data[i++]; 584 585 if (c != '}') throw new PatternSyntaxException("'}' expected"); 586 587 term.memreg = id; 588 return i; 589 } 590 591 Term append(Term term) throws PatternSyntaxException { 592 //Term prev=this.prev; 593 Term current = this.current; 594 if (current == null) { 595 in.next = term; 596 term.prev = in; 597 this.current = term; 598 return term; 599 } 600 link(current, term); 601 //this.prev=current; 602 this.current = term; 603 return term; 604 } 605 606 Term replaceCurrent(Term term) throws PatternSyntaxException { 607 //Term prev=this.prev; 608 Term prev = current.prev; 609 if (prev != null) { 610 Term in = this.in; 611 if (prev == in) { 612 //in.next=term; 613 //term.prev=in; 614 in.next = term.in; 615 term.in.prev = in; 616 } else link(prev, term); 617 } 618 this.current = term; 619 return term; 620 } 621 622 623 private void newBranch() throws PatternSyntaxException { 624 close(); 625 startNewBranch(); 626 } 627 628 629 void close() throws PatternSyntaxException { 630 /* 631 Term prev=this.prev; 632 if(prev!=null){ 633 Term current=this.current; 634 if(current!=null){ 635 link(prev,current); 636 prev=current; 637 this.current=null; 638 } 639 link(prev,out); 640 this.prev=null; 641 } 642 */ 643 Term current = this.current; 644 if (current != null) linkd(current, out); 645 else in.next = out; 646 } 647 648 private static void link(Term term, Term next) { 649 linkd(term, next.in); 650 next.prev = term; 651 } 652 653 private static void linkd(Term term, Term next) { 654 Term prev_out = term.out; 655 if (prev_out != null) { 656 prev_out.next = next; 657 } 658 Term prev_out1 = term.out1; 659 if (prev_out1 != null) { 660 prev_out1.next = next; 661 } 662 Term prev_branch = term.branchOut; 663 if (prev_branch != null) { 664 prev_branch.failNext = next; 665 } 666 } 667 668 void startNewBranch() throws PatternSyntaxException { 669 Term tmp = in.next; 670 Term b = new Branch(); 671 in.next = b; 672 b.next = tmp; 673 b.in = null; 674 b.out = null; 675 b.out1 = null; 676 b.branchOut = b; 677 current = b; 678 } 679 680 private static Term makeGreedyStar(int[] vars, Term term, ArrayList<TermIterator> iterators) throws PatternSyntaxException { 681 //vars[STACK_SIZE]++; 682 switch (term.type) { 683 case GROUP_IN: { 684 Term b = new Branch(); 685 b.next = term.in; 686 term.out.next = b; 687 688 b.in = b; 689 b.out = null; 690 b.out1 = null; 691 b.branchOut = b; 692 693 return b; 694 } 695 default: { 696 return new TermIterator(term, 0, -1, iterators); 697 } 698 } 699 } 700 701 private static Term makeLazyStar(int[] vars, Term term) { 702 //vars[STACK_SIZE]++; 703 switch (term.type) { 704 case GROUP_IN: { 705 Term b = new Branch(); 706 b.failNext = term.in; 707 term.out.next = b; 708 709 b.in = b; 710 b.out = b; 711 b.out1 = null; 712 b.branchOut = null; 713 714 return b; 715 } 716 default: { 717 Term b = new Branch(); 718 b.failNext = term; 719 term.next = b; 720 721 b.in = b; 722 b.out = b; 723 b.out1 = null; 724 b.branchOut = null; 725 726 return b; 727 } 728 } 729 } 730 731 private static Term makeGreedyPlus(int[] vars, Term term, ArrayList<TermIterator> iterators) throws PatternSyntaxException { 732 //vars[STACK_SIZE]++; 733 switch (term.type) { 734 case INDEPENDENT_IN://? 735 case GROUP_IN: { 736 Term b = new Branch(); 737 b.next = term.in; 738 term.out.next = b; 739 740 b.in = term.in; 741 b.out = null; 742 b.out1 = null; 743 b.branchOut = b; 744 745 746 return b; 747 } 748 default: { 749 return new TermIterator(term, 1, -1, iterators); 750 } 751 } 752 } 753 754 private static Term makeLazyPlus(int[] vars, Term term) { 755 //vars[STACK_SIZE]++; 756 switch (term.type) { 757 case GROUP_IN: { 758 Term b = new Branch(); 759 term.out.next = b; 760 b.failNext = term.in; 761 762 b.in = term.in; 763 b.out = b; 764 b.out1 = null; 765 b.branchOut = null; 766 767 return b; 768 } 769 case REG: 770 default: { 771 Term b = new Branch(); 772 term.next = b; 773 b.failNext = term; 774 775 b.in = term; 776 b.out = b; 777 b.out1 = null; 778 b.branchOut = null; 779 780 return b; 781 } 782 } 783 } 784 785 private static Term makeGreedyQMark(int[] vars, Term term) { 786 //vars[STACK_SIZE]++; 787 switch (term.type) { 788 case GROUP_IN: { 789 Term b = new Branch(); 790 b.next = term.in; 791 792 b.in = b; 793 b.out = term.out; 794 b.out1 = null; 795 b.branchOut = b; 796 797 return b; 798 } 799 case REG: 800 default: { 801 Term b = new Branch(); 802 b.next = term; 803 804 b.in = b; 805 b.out = term; 806 b.out1 = null; 807 b.branchOut = b; 808 809 return b; 810 } 811 } 812 } 813 814 private static Term makeLazyQMark(int[] vars, Term term) { 815 //vars[STACK_SIZE]++; 816 switch (term.type) { 817 case GROUP_IN: { 818 Term b = new Branch(); 819 b.failNext = term.in; 820 821 b.in = b; 822 b.out = b; 823 b.out1 = term.out; 824 b.branchOut = null; 825 826 return b; 827 } 828 case REG: 829 default: { 830 Term b = new Branch(); 831 b.failNext = term; 832 833 b.in = b; 834 b.out = b; 835 b.out1 = term; 836 b.branchOut = null; 837 838 return b; 839 } 840 } 841 } 842 843 private static Term makeGreedyLimits(int[] vars, Term term, int[] limits, ArrayList<TermIterator> iterators) throws PatternSyntaxException { 844 //vars[STACK_SIZE]++; 845 int m = limits[0]; 846 int n = limits[1]; 847 switch (term.type) { 848 case GROUP_IN: { 849 int cntreg = vars[CNTREG_COUNT]++; 850 Term reset = new Term(CR_SET_0); 851 reset.cntreg = cntreg; 852 Term b = new Term(BRANCH); 853 854 Term inc = new Term(CRSTORE_CRINC); 855 inc.cntreg = cntreg; 856 857 reset.next = b; 858 859 if (n >= 0) { 860 Term lt = new Term(CR_LT); 861 lt.cntreg = cntreg; 862 lt.maxCount = n; 863 b.next = lt; 864 lt.next = term.in; 865 } else { 866 b.next = term.in; 867 } 868 term.out.next = inc; 869 inc.next = b; 870 871 if (m >= 0) { 872 Term gt = new Term(CR_GT_EQ); 873 gt.cntreg = cntreg; 874 gt.maxCount = m; 875 b.failNext = gt; 876 877 reset.in = reset; 878 reset.out = gt; 879 reset.out1 = null; 880 reset.branchOut = null; 881 } else { 882 reset.in = reset; 883 reset.out = null; 884 reset.out1 = null; 885 reset.branchOut = b; 886 } 887 return reset; 888 } 889 default: { 890 return new TermIterator(term, limits[0], limits[1], iterators); 891 } 892 } 893 } 894 895 private static Term makeLazyLimits(int[] vars, Term term, int[] limits) { 896 //vars[STACK_SIZE]++; 897 int m = limits[0]; 898 int n = limits[1]; 899 switch (term.type) { 900 case GROUP_IN: { 901 int cntreg = vars[CNTREG_COUNT]++; 902 Term reset = new Term(CR_SET_0); 903 reset.cntreg = cntreg; 904 Term b = new Term(BRANCH); 905 Term inc = new Term(CRSTORE_CRINC); 906 inc.cntreg = cntreg; 907 908 reset.next = b; 909 910 if (n >= 0) { 911 Term lt = new Term(CR_LT); 912 lt.cntreg = cntreg; 913 lt.maxCount = n; 914 b.failNext = lt; 915 lt.next = term.in; 916 } else { 917 b.failNext = term.in; 918 } 919 term.out.next = inc; 920 inc.next = b; 921 922 if (m >= 0) { 923 Term gt = new Term(CR_GT_EQ); 924 gt.cntreg = cntreg; 925 gt.maxCount = m; 926 b.next = gt; 927 928 reset.in = reset; 929 reset.out = gt; 930 reset.out1 = null; 931 reset.branchOut = null; 932 933 return reset; 934 } else { 935 reset.in = reset; 936 reset.out = b; 937 reset.out1 = null; 938 reset.branchOut = null; 939 940 return reset; 941 } 942 } 943 case REG: 944 default: { 945 Term reset = new Term(CNT_SET_0); 946 Term b = new Branch(BRANCH_STORE_CNT); 947 Term inc = new Term(CNT_INC); 948 949 reset.next = b; 950 951 if (n >= 0) { 952 Term lt = new Term(READ_CNT_LT); 953 lt.maxCount = n; 954 b.failNext = lt; 955 lt.next = term; 956 term.next = inc; 957 inc.next = b; 958 } else { 959 b.next = term; 960 term.next = inc; 961 inc.next = term; 962 } 963 964 if (m >= 0) { 965 Term gt = new Term(CNT_GT_EQ); 966 gt.maxCount = m; 967 b.next = gt; 968 969 reset.in = reset; 970 reset.out = gt; 971 reset.out1 = null; 972 reset.branchOut = null; 973 974 return reset; 975 } else { 976 reset.in = reset; 977 reset.out = b; 978 reset.out1 = null; 979 reset.branchOut = null; 980 981 return reset; 982 } 983 } 984 } 985 } 986 987 988 private int parseTerm(char[] data, int i, int out, Term term, 989 int flags) throws PatternSyntaxException { 990 char c = data[i++]; 991 boolean inv = false; 992 if((flags & LITERAL_FLAG) == LITERAL_FLAG) 993 { 994 switch (c) 995 { 996 case '\\': 997 if(i < out + 1 && data[i] == 'E') 998 { 999 term.type = LITERAL_END; 1000 return i + 1; 1001 } 1002 default: 1003 term.type = CHAR; 1004 if ((flags & IGNORE_CASE) == 0) { 1005 term.c = c; 1006 } else { 1007 term.c = Category.caseFold(c); 1008 } 1009 return i; 1010 } 1011 } 1012 switch (c) { 1013 case '[': 1014 return CharacterClass.parseClass(data, i, out, term, (flags & IGNORE_CASE) > 0, (flags & IGNORE_SPACES) > 0, (flags & UNICODE) > 0, (flags & XML_SCHEMA) > 0); 1015 1016 case '.': 1017 term.type = (flags & DOTALL) > 0 ? ANY_CHAR : ANY_CHAR_NE; 1018 break; 1019 1020 case '$': 1021 //term.type=mods[MULTILINE_IND]? LINE_END: END; //?? 1022 term.type = (flags & MULTILINE) > 0 ? LINE_END : END_EOL; 1023 break; 1024 1025 case '^': 1026 term.type = (flags & MULTILINE) > 0 ? LINE_START : START; 1027 break; 1028 1029 case '\\': 1030 if (i >= out) throw new PatternSyntaxException("Escape without a character"); 1031 c = data[i++]; 1032 switch (c) { 1033 case 'f': 1034 c = '\f'; // form feed 1035 break; 1036 1037 case 'n': 1038 c = '\n'; // new line 1039 break; 1040 1041 case 'r': 1042 c = '\r'; // carriage return 1043 break; 1044 1045 1046 1047 case 't': 1048 c = '\t'; // tab 1049 break; 1050 1051 case 'u': 1052 if(i < out - 3) 1053 c = (char) ((CharacterClass.toHexDigit(data[i++]) << 12) + 1054 (CharacterClass.toHexDigit(data[i++]) << 8) + 1055 (CharacterClass.toHexDigit(data[i++]) << 4) + 1056 CharacterClass.toHexDigit(data[i++])); 1057 else { 1058 c = '\0'; 1059 i = out; 1060 } 1061 break; 1062 1063 case 'x': { // hex 2-digit number -> char 1064 int hex = 0; 1065 char d; 1066 if ((d = data[i++]) == '{') { 1067 while (i < out && (d = data[i++]) != '}') { 1068 hex = (hex << 4) + CharacterClass.toHexDigit(d); 1069 if (hex > 0xffff || i == out) 1070 throw new PatternSyntaxException("\\x{<out of range or incomplete>}"); 1071 } 1072 } else { 1073 hex = (CharacterClass.toHexDigit(d) << 4) + 1074 CharacterClass.toHexDigit(data[i++]); 1075 } 1076 c = (char) hex; 1077 break; 1078 } 1079 case '0': 1080 case 'o': // oct 2- or 3-digit number -> char 1081 int oct = 0; 1082 for (; i < out; ) { 1083 char d = data[i++]; 1084 if (d >= '0' && d <= '7') { 1085 oct *= 8; 1086 oct += d - '0'; 1087 if (oct > 0xffff) { 1088 oct -= d - '0'; 1089 oct /= 8; 1090 break; 1091 } 1092 } else break; 1093 } 1094 c = (char) oct; 1095 break; 1096 1097 case 'm': // decimal number -> char 1098 int dec = 0; 1099 for (; i < out; ) { 1100 char d = data[i++]; 1101 if (d >= '0' && d <= '9') { 1102 dec *= 10; 1103 dec += d - '0'; 1104 if (dec > 0xffff){ 1105 dec -= d - '0'; 1106 dec /= 10; 1107 break; 1108 } 1109 } else break; 1110 } 1111 c = (char) dec; 1112 break; 1113 1114 case 'c': // ctrl-char 1115 c = (char) (data[i++] & 0x1f); 1116 break; 1117 1118 case 'D': // non-digit 1119 inv = true; 1120 // go on 1121 case 'd': // digit 1122 CharacterClass.makeDigit(term, inv, (flags & UNICODE) > 0); 1123 return i; 1124 1125 case 'S': // non-space 1126 inv = true; 1127 // go on 1128 case 's': // space 1129 CharacterClass.makeSpace(term, inv, (flags & UNICODE) > 0); 1130 return i; 1131 1132 case 'W': // non-letter 1133 inv = true; 1134 // go on 1135 case 'w': // letter 1136 CharacterClass.makeWordChar(term, inv, (flags & UNICODE) > 0); 1137 return i; 1138 1139 case 'B': // non-(word boundary) 1140 inv = true; 1141 // go on 1142 case 'b': // word boundary 1143 CharacterClass.makeWordBoundary(term, inv, (flags & UNICODE) > 0); 1144 return i; 1145 1146 case '<': // word start 1147 CharacterClass.makeWordStart(term, (flags & UNICODE) > 0); 1148 return i; 1149 1150 case '>': // word end 1151 CharacterClass.makeWordEnd(term, (flags & UNICODE) > 0); 1152 return i; 1153 1154 case 'A': // text beginning 1155 term.type = START; 1156 return i; 1157 1158 case 'Z': // text end 1159 term.type = END_EOL; 1160 return i; 1161 1162 case 'z': // text end 1163 term.type = END; 1164 return i; 1165 1166 case 'G': // end of last match 1167 term.type = LAST_MATCH_END; 1168 return i; 1169 1170 case 'P': // \\P{..} 1171 inv = true; 1172 case 'p': // \\p{..} 1173 i = CharacterClass.parseName(data, i, out, term, inv, (flags & IGNORE_SPACES) > 0); 1174 return i; 1175 case 'Q': 1176 term.type = LITERAL_START; 1177 return i; 1178 1179 1180 default: 1181 if (c >= '1' && c <= '9') { 1182 int n = c - '0'; 1183 while ((i < out) && (c = data[i]) >= '0' && c <= '9') { 1184 n = (n * 10) + c - '0'; 1185 i++; 1186 } 1187 term.type = (flags & IGNORE_CASE) > 0 ? REG_I : REG; 1188 term.memreg = n; 1189 return i; 1190 } 1191 /* 1192 if(c<256){ 1193 CustomParser termp=customParsers[c]; 1194 if(termp!=null){ 1195 i=termp.parse(i,data,term); 1196 return i; 1197 } 1198 } 1199 */ 1200 } 1201 term.type = CHAR; 1202 term.c = c; 1203 break; 1204 1205 default: 1206 if ((flags & IGNORE_CASE) == 0) { 1207 term.type = CHAR; 1208 term.c = c; 1209 } else { 1210 term.type = CHAR; 1211 term.c = Category.caseFold(c); 1212 //CharacterClass.makeICase(term, c); 1213 } 1214 break; 1215 } 1216 return i; 1217 } 1218 1219 1220 // one of {n},{n,},{,n},{n1,n2} 1221 private static int parseLimits(int i, int end, char[] data, int[] limits) throws PatternSyntaxException { 1222 if (limits.length != LIMITS_LENGTH) 1223 throw new IllegalArgumentException("limits.length=" + limits.length + ", should be " + LIMITS_LENGTH); 1224 limits[LIMITS_PARSE_RESULT_INDEX] = LIMITS_OK; 1225 int ind = 0; 1226 int v = 0; 1227 char c; 1228 while (i < end) { 1229 c = data[i++]; 1230 switch (c) { 1231 case ' ': 1232 continue; 1233 1234 case ',': 1235 if (ind > 0) throw new PatternSyntaxException("illegal construction: {.. , , ..}"); 1236 limits[ind++] = v; 1237 v = -1; 1238 continue; 1239 1240 case '}': 1241 limits[ind] = v; 1242 if (ind == 0) limits[1] = v; 1243 return i; 1244 1245 default: 1246 if (c > '9' || c < '0') { 1247 //throw new PatternSyntaxException("illegal symbol in iterator: '{"+c+"}'"); 1248 limits[LIMITS_PARSE_RESULT_INDEX] = LIMITS_FAILURE; 1249 return i; 1250 } 1251 if (v < 0) v = 0; 1252 v = v * 10 + (c - '0'); 1253 } 1254 } 1255 throw new PatternSyntaxException("malformed quantifier"); 1256 } 1257 static String termLookup(int t) 1258 { 1259 switch (t) 1260 { 1261 case CHAR: return "CHAR"; 1262 case BITSET: return "BITSET"; 1263 case BITSET2: return "BITSET2"; 1264 case ANY_CHAR: return "ANY_CHAR"; 1265 case ANY_CHAR_NE: return "ANY_CHAR_NE"; 1266 case REG: return "REG"; 1267 case REG_I: return "REG_I"; 1268 case FIND: return "FIND"; 1269 case FINDREG: return "FINDREG"; 1270 case SUCCESS: return "SUCCESS"; 1271 case BOUNDARY: return "BOUNDARY"; 1272 case DIRECTION: return "DIRECTION"; 1273 case UBOUNDARY: return "UBOUNDARY"; 1274 case UDIRECTION: return "UDIRECTION"; 1275 case GROUP_IN: return "GROUP_IN"; 1276 case GROUP_OUT: return "GROUP_OUT"; 1277 case VOID: return "VOID"; 1278 case START: return "START"; 1279 case END: return "END"; 1280 case END_EOL: return "END_EOL"; 1281 case LINE_START: return "LINE_START"; 1282 case LINE_END: return "LINE_END"; 1283 case LAST_MATCH_END: return "LAST_MATCH_END"; 1284 case CNT_SET_0: return "CNT_SET_0"; 1285 case CNT_INC: return "CNT_INC"; 1286 case CNT_GT_EQ: return "CNT_GT_EQ"; 1287 case READ_CNT_LT: return "READ_CNT_LT"; 1288 case CRSTORE_CRINC: return "CRSTORE_CRINC"; 1289 case CR_SET_0: return "CR_SET_0"; 1290 case CR_LT: return "CR_LT"; 1291 case CR_GT_EQ: return "CR_GT_EQ"; 1292 case BRANCH: return "BRANCH"; 1293 case BRANCH_STORE_CNT: return "BRANCH_STORE_CNT"; 1294 case BRANCH_STORE_CNT_AUX1: return "BRANCH_STORE_CNT_AUX1"; 1295 case PLOOKAHEAD_IN: return "PLOOKAHEAD_IN"; 1296 case PLOOKAHEAD_OUT: return "PLOOKAHEAD_OUT"; 1297 case NLOOKAHEAD_IN: return "NLOOKAHEAD_IN"; 1298 case NLOOKAHEAD_OUT: return "NLOOKAHEAD_OUT"; 1299 case PLOOKBEHIND_IN: return "PLOOKBEHIND_IN"; 1300 case PLOOKBEHIND_OUT: return "PLOOKBEHIND_OUT"; 1301 case NLOOKBEHIND_IN: return "NLOOKBEHIND_IN"; 1302 case NLOOKBEHIND_OUT: return "NLOOKBEHIND_OUT"; 1303 case INDEPENDENT_IN: return "INDEPENDENT_IN"; 1304 case INDEPENDENT_OUT: return "INDEPENDENT_OUT"; 1305 case REPEAT_0_INF: return "REPEAT_0_INF"; 1306 case REPEAT_MIN_INF: return "REPEAT_MIN_INF"; 1307 case REPEAT_MIN_MAX: return "REPEAT_MIN_MAX"; 1308 case REPEAT_REG_MIN_INF: return "REPEAT_REG_MIN_INF"; 1309 case REPEAT_REG_MIN_MAX: return "REPEAT_REG_MIN_MAX"; 1310 case BACKTRACK_0: return "BACKTRACK_0"; 1311 case BACKTRACK_MIN: return "BACKTRACK_MIN"; 1312 case BACKTRACK_FIND_MIN: return "BACKTRACK_FIND_MIN"; 1313 case BACKTRACK_FINDREG_MIN: return "BACKTRACK_FINDREG_MIN"; 1314 case BACKTRACK_REG_MIN: return "BACKTRACK_REG_MIN"; 1315 case MEMREG_CONDITION: return "MEMREG_CONDITION"; 1316 case LOOKAHEAD_CONDITION_IN: return "LOOKAHEAD_CONDITION_IN"; 1317 case LOOKAHEAD_CONDITION_OUT: return "LOOKAHEAD_CONDITION_OUT"; 1318 case LOOKBEHIND_CONDITION_IN: return "LOOKBEHIND_CONDITION_IN"; 1319 case LOOKBEHIND_CONDITION_OUT: return "LOOKBEHIND_CONDITION_OUT"; 1320 default: return "UNKNOWN_TERM"; 1321 } 1322 } 1323 public String toString() { 1324 StringBuilder b = new StringBuilder(100); 1325 //b.append(hashCode()); 1326 b.append(instanceNum); 1327 b.append(' '); 1328 b.append(termLookup(type)); 1329 b.append(": "); 1330 if (inverse) b.append('^'); 1331 switch (type) { 1332 case VOID: 1333 b.append("[]"); 1334 b.append(" , "); 1335 break; 1336 case CHAR: 1337 b.append(CharacterClass.stringValue(c)); 1338 b.append(" , "); 1339 break; 1340 case ANY_CHAR: 1341 b.append("dotall, "); 1342 break; 1343 case ANY_CHAR_NE: 1344 b.append("dot-eols, "); 1345 break; 1346 case BITSET: 1347 b.append('['); 1348 b.append(CharacterClass.stringValue0(bitset)); 1349 b.append(']'); 1350 b.append(" , weight="); 1351 b.append(weight); 1352 b.append(" , "); 1353 break; 1354 case BITSET2: 1355 b.append('['); 1356 b.append(CharacterClass.stringValue2(bitset2)); 1357 b.append(']'); 1358 b.append(" , weight2="); 1359 b.append(weight); 1360 b.append(" , "); 1361 break; 1362 case START: 1363 b.append("abs.start"); 1364 break; 1365 case END: 1366 b.append("abs.end"); 1367 break; 1368 case END_EOL: 1369 b.append("abs.end-eol"); 1370 break; 1371 case LINE_START: 1372 b.append("line start"); 1373 break; 1374 case LINE_END: 1375 b.append("line end"); 1376 break; 1377 case LAST_MATCH_END: 1378 if (inverse) b.append("non-"); 1379 b.append("BOUNDARY"); 1380 break; 1381 case BOUNDARY: 1382 if (inverse) b.append("non-"); 1383 b.append("BOUNDARY"); 1384 break; 1385 case UBOUNDARY: 1386 if (inverse) b.append("non-"); 1387 b.append("UBOUNDARY"); 1388 break; 1389 case DIRECTION: 1390 b.append("DIRECTION"); 1391 break; 1392 case UDIRECTION: 1393 b.append("UDIRECTION"); 1394 break; 1395 case FINDREG: 1396 b.append('%'); 1397 case FIND: 1398 b.append(">>>{"); 1399 b.append(target); 1400 b.append("}, <<"); 1401 b.append(distance); 1402 if (eat) { 1403 b.append(",eat"); 1404 } 1405 b.append(", "); 1406 break; 1407 case REPEAT_0_INF: 1408 b.append("rpt{"); 1409 b.append(target); 1410 b.append(",0,inf}"); 1411 if (failNext != null) { 1412 b.append(", =>"); 1413 b.append(failNext.instanceNum); 1414 b.append(", "); 1415 } 1416 break; 1417 case REPEAT_MIN_INF: 1418 b.append("rpt{"); 1419 b.append(target); 1420 b.append(","); 1421 b.append(minCount); 1422 b.append(",inf}"); 1423 if (failNext != null) { 1424 b.append(", =>"); 1425 b.append(failNext.instanceNum); 1426 b.append(", "); 1427 } 1428 break; 1429 case REPEAT_MIN_MAX: 1430 b.append("rpt{"); 1431 b.append(target); 1432 b.append(","); 1433 b.append(minCount); 1434 b.append(","); 1435 b.append(maxCount); 1436 b.append("}"); 1437 if (failNext != null) { 1438 b.append(", =>"); 1439 b.append(failNext.instanceNum); 1440 b.append(", "); 1441 } 1442 break; 1443 case REPEAT_REG_MIN_INF: 1444 b.append("rpt{$"); 1445 b.append(memreg); 1446 b.append(','); 1447 b.append(minCount); 1448 b.append(",inf}"); 1449 if (failNext != null) { 1450 b.append(", =>"); 1451 b.append(failNext.instanceNum); 1452 b.append(", "); 1453 } 1454 break; 1455 case REPEAT_REG_MIN_MAX: 1456 b.append("rpt{$"); 1457 b.append(memreg); 1458 b.append(','); 1459 b.append(minCount); 1460 b.append(','); 1461 b.append(maxCount); 1462 b.append("}"); 1463 if (failNext != null) { 1464 b.append(", =>"); 1465 b.append(failNext.instanceNum); 1466 b.append(", "); 1467 } 1468 break; 1469 case BACKTRACK_0: 1470 b.append("back(0)"); 1471 break; 1472 case BACKTRACK_MIN: 1473 b.append("back("); 1474 b.append(minCount); 1475 b.append(")"); 1476 break; 1477 case BACKTRACK_REG_MIN: 1478 b.append("back"); 1479 b.append("_$"); 1480 b.append(memreg); 1481 b.append("("); 1482 b.append(minCount); 1483 b.append(")"); 1484 break; 1485 case GROUP_IN: 1486 b.append('('); 1487 if (memreg > 0) b.append(memreg); 1488 b.append('-'); 1489 b.append(" , "); 1490 break; 1491 case GROUP_OUT: 1492 b.append('-'); 1493 if (memreg > 0) b.append(memreg); 1494 b.append(')'); 1495 b.append(" , "); 1496 break; 1497 case PLOOKAHEAD_IN: 1498 b.append('('); 1499 b.append("="); 1500 b.append(lookaheadId); 1501 b.append(" , "); 1502 break; 1503 case PLOOKAHEAD_OUT: 1504 b.append('='); 1505 b.append(lookaheadId); 1506 b.append(')'); 1507 b.append(" , "); 1508 break; 1509 case NLOOKAHEAD_IN: 1510 b.append("(!"); 1511 b.append(lookaheadId); 1512 b.append(" , "); 1513 if (failNext != null) { 1514 b.append(", =>"); 1515 b.append(failNext.instanceNum); 1516 b.append(", "); 1517 } 1518 break; 1519 case NLOOKAHEAD_OUT: 1520 b.append('!'); 1521 b.append(lookaheadId); 1522 b.append(')'); 1523 b.append(" , "); 1524 break; 1525 case PLOOKBEHIND_IN: 1526 b.append('('); 1527 b.append("<="); 1528 b.append(lookaheadId); 1529 b.append(" , dist="); 1530 b.append(distance); 1531 b.append(" , "); 1532 break; 1533 case PLOOKBEHIND_OUT: 1534 b.append("<="); 1535 b.append(lookaheadId); 1536 b.append(')'); 1537 b.append(" , "); 1538 break; 1539 case NLOOKBEHIND_IN: 1540 b.append("(<!"); 1541 b.append(lookaheadId); 1542 b.append(" , dist="); 1543 b.append(distance); 1544 b.append(" , "); 1545 if (failNext != null) { 1546 b.append(", =>"); 1547 b.append(failNext.instanceNum); 1548 b.append(", "); 1549 } 1550 break; 1551 case NLOOKBEHIND_OUT: 1552 b.append("<!"); 1553 b.append(lookaheadId); 1554 b.append(')'); 1555 b.append(" , "); 1556 break; 1557 case MEMREG_CONDITION: 1558 b.append("(reg"); 1559 b.append(memreg); 1560 b.append("?)"); 1561 if (failNext != null) { 1562 b.append(", =>"); 1563 b.append(failNext.instanceNum); 1564 b.append(", "); 1565 } 1566 break; 1567 case LOOKAHEAD_CONDITION_IN: 1568 b.append("(cond"); 1569 b.append(lookaheadId); 1570 b.append(((Lookahead) this).isPositive ? '=' : '!'); 1571 b.append(" , "); 1572 if (failNext != null) { 1573 b.append(", =>"); 1574 b.append(failNext.instanceNum); 1575 b.append(", "); 1576 } 1577 break; 1578 case LOOKAHEAD_CONDITION_OUT: 1579 b.append("cond"); 1580 b.append(lookaheadId); 1581 b.append(")"); 1582 if (failNext != null) { 1583 b.append(", =>"); 1584 b.append(failNext.instanceNum); 1585 b.append(", "); 1586 } 1587 break; 1588 case REG: 1589 b.append("$"); 1590 b.append(memreg); 1591 b.append(", "); 1592 break; 1593 case SUCCESS: 1594 b.append("END"); 1595 break; 1596 case BRANCH_STORE_CNT_AUX1: 1597 b.append("(aux1)"); 1598 case BRANCH_STORE_CNT: 1599 b.append("(cnt)"); 1600 case BRANCH: 1601 b.append("=>"); 1602 if (failNext != null) b.append(failNext.instanceNum); 1603 else b.append("null"); 1604 b.append(" , "); 1605 break; 1606 default: 1607 b.append('['); 1608 switch (type) { 1609 case CNT_SET_0: 1610 b.append("cnt=0"); 1611 break; 1612 case CNT_INC: 1613 b.append("cnt++"); 1614 break; 1615 case CNT_GT_EQ: 1616 b.append("cnt>=").append(maxCount); 1617 break; 1618 case READ_CNT_LT: 1619 b.append("->cnt<").append(maxCount); 1620 break; 1621 case CRSTORE_CRINC: 1622 b.append("M(").append(memreg).append(")->,Cr(").append(cntreg).append(")->,Cr(").append(cntreg).append(")++"); 1623 break; 1624 case CR_SET_0: 1625 b.append("Cr(").append(cntreg).append(")=0"); 1626 break; 1627 case CR_LT: 1628 b.append("Cr(").append(cntreg).append(")<").append(maxCount); 1629 break; 1630 case CR_GT_EQ: 1631 b.append("Cr(").append(cntreg).append(")>=").append(maxCount); 1632 break; 1633 default: 1634 b.append("unknown type: ").append(type); 1635 } 1636 b.append("] , "); 1637 } 1638 if (next != null) { 1639 b.append("->"); 1640 b.append(next.instanceNum); 1641 b.append(", "); 1642 } 1643 //b.append("\r\n"); 1644 return b.toString(); 1645 } 1646 1647 public String toStringAll() { 1648 return toStringAll(new ArrayList<Integer>()); 1649 } 1650 1651 private String toStringAll(ArrayList<Integer> v) { 1652 v.add(instanceNum); 1653 String s = toString(); 1654 if (next != null) { 1655 if (!v.contains(next.instanceNum)) { 1656 s += "\r\n"; 1657 s += next.toStringAll(v); 1658 } 1659 } 1660 if (failNext != null) { 1661 if (!v.contains(failNext.instanceNum)) { 1662 s += "\r\n"; 1663 s += failNext.toStringAll(v); 1664 } 1665 } 1666 return s; 1667 } 1668 1669 @Override 1670 public boolean equals(Object o) { 1671 if (this == o) return true; 1672 if (o == null || getClass() != o.getClass()) return false; 1673 1674 Term term = (Term) o; 1675 1676 if (type != term.type) return false; 1677 if (inverse != term.inverse) return false; 1678 if (c != term.c) return false; 1679 if (distance != term.distance) return false; 1680 if (eat != term.eat) return false; 1681 if (weight != term.weight) return false; 1682 if (memreg != term.memreg) return false; 1683 if (minCount != term.minCount) return false; 1684 if (maxCount != term.maxCount) return false; 1685 if (cntreg != term.cntreg) return false; 1686 if (lookaheadId != term.lookaheadId) return false; 1687 if (next != null ? !next.equals(term.next) : term.next != null) return false; 1688 if (bitset != null ? !bitset.equals(term.bitset) : term.bitset != null) return false; 1689 // Probably incorrect - comparing Object[] arrays with Arrays.equals 1690 return Arrays.equals(bitset2, term.bitset2) && Arrays.equals(categoryBitset, term.categoryBitset) && Arrays.equals(brackets, term.brackets); 1691//if (!Arrays.equals(brackets, term.brackets)) return false; 1692 /* 1693 if (failNext != null ? !failNext.equals(term.failNext) : term.failNext != null) return false; 1694 if (target != null ? !target.equals(term.target) : term.target != null) return false; 1695 if (prev != null ? !prev.equals(term.prev) : term.prev != null) return false; 1696 if (in != null ? !in.equals(term.in) : term.in != null) return false; 1697 if (out != null ? !out.equals(term.out) : term.out != null) return false; 1698 if (out1 != null ? !out1.equals(term.out1) : term.out1 != null) return false; 1699 if (first != null ? !first.equals(term.first) : term.first != null) return false; 1700 if (current != null ? !current.equals(term.current) : term.current != null) return false; 1701 return branchOut != null ? branchOut.equals(term.branchOut) : term.branchOut == null; 1702 */ 1703 } 1704 1705 @Override 1706 public int hashCode() { 1707 int result = next != null ? next.hashCode() : 0; 1708 result = 31 * result + type; 1709 result = 31 * result + (inverse ? 1 : 0); 1710 result = 31 * result + (int) c; 1711 result = 31 * result + distance; 1712 result = 31 * result + (eat ? 1 : 0); 1713 result = 31 * result + (bitset != null ? bitset.hashCode() : 0); 1714 result = 31 * result + Arrays.hashCode(bitset2); 1715 result = 31 * result + Arrays.hashCode(categoryBitset); 1716 result = 31 * result + Arrays.hashCode(brackets); 1717 result = 31 * result + weight; 1718 result = 31 * result + memreg; 1719 result = 31 * result + minCount; 1720 result = 31 * result + maxCount; 1721 result = 31 * result + cntreg; 1722 result = 31 * result + lookaheadId; 1723 /* 1724 result = 31 * result + (failNext != null ? failNext.hashCode() : 0); 1725 result = 31 * result + (target != null ? (this == target ? 73 : target.hashCode()) : 0); 1726 result = 31 * result + (prev != null ? (this == prev ? 73 : prev.hashCode()) : 0); 1727 result = 31 * result + (in != null ? (this == in ? 73 : in.hashCode()) : 0); 1728 result = 31 * result + (out != null ? (this == out ? 73 : out.hashCode()) : 0); 1729 result = 31 * result + (out1 != null ? (this == out1 ? 73 : out1.hashCode()) : 0); 1730 result = 31 * result + (first != null ? (this == first ? 73 : first.hashCode()) : 0); 1731 result = 31 * result + (current != null ? (this == current ? 73 : current.hashCode()) : 0); 1732 result = 31 * result + (branchOut != null ? (this == branchOut ? 73 : branchOut.hashCode()) : 0); 1733 */ 1734 return result; 1735 } 1736} 1737 1738class Pretokenizer { 1739 private static final int START = 1; 1740 static final int END = 2; 1741 static final int PLAIN_GROUP = 3; 1742 static final int POS_LOOKAHEAD = 4; 1743 static final int NEG_LOOKAHEAD = 5; 1744 static final int POS_LOOKBEHIND = 6; 1745 static final int NEG_LOOKBEHIND = 7; 1746 static final int INDEPENDENT_REGEX = 8; 1747 static final int COMMENT = 9; 1748 static final int CONDITIONAL_GROUP = 10; 1749 static final int FLAGS = 11; 1750 static final int CLASS_GROUP = 12; 1751 static final int NAMED_GROUP = 13; 1752 1753 int tOffset; 1754 int tOutside; 1755 private int skip; 1756 private int offset; 1757 private int end; 1758 int c; 1759 1760 int ttype = START; 1761 1762 private char[] data; 1763 1764 //results 1765 private int flags; 1766 private boolean flagsChanged; 1767 1768 char[] brackets; 1769 String groupName; 1770 boolean groupDeclared; 1771 1772 Pretokenizer(char[] data, int offset, int end) { 1773 if (offset < 0 || end > data.length) 1774 throw new IndexOutOfBoundsException("offset=" + offset + ", end=" + end + ", length=" + data.length); 1775 this.offset = offset; 1776 this.end = end; 1777 1778 this.tOffset = offset; 1779 this.tOutside = offset; 1780 1781 this.data = data; 1782 } 1783 1784 int flags(int def) { 1785 return flagsChanged ? flags : def; 1786 } 1787 1788 void next() throws PatternSyntaxException { 1789 int tOffset = this.tOutside; 1790 int skip = this.skip; 1791 1792 tOffset += skip; 1793 flagsChanged = false; 1794 1795 int end = this.end; 1796 char[] data = this.data; 1797 boolean esc = false; 1798 for (int i = tOffset; i < end; i++) { 1799 char c = data[i]; 1800 if (esc) { 1801 if(c == 'Q') 1802 { 1803 1804 for (; i < end; i++) { 1805 char c1 = data[i]; 1806 if(c1 == '\\') { 1807 if (i + 1 < end && data[i + 1] == 'E') { 1808 i++; 1809 esc = false; 1810 break; 1811 } 1812 } 1813 } 1814 } 1815 else { 1816 esc = false; 1817 } 1818 continue; 1819 } 1820 switch (c) { 1821 case '\\': 1822 esc = true; 1823 continue; 1824 case '|': 1825 case ')': 1826 ttype = c; 1827 this.tOffset = tOffset; 1828 this.tOutside = i; 1829 this.skip = 1; 1830 return; 1831 case '(': 1832 if (((i + 2) < end) && (data[i + 1] == '?')) { 1833 char c1 = data[i + 2]; 1834 switch (c1) { 1835 case ':': 1836 ttype = PLAIN_GROUP; 1837 skip = 3; // "(?:" - skip 3 chars 1838 break; 1839 case '=': 1840 ttype = POS_LOOKAHEAD; 1841 skip = 3; // "(?=" 1842 break; 1843 case '!': 1844 ttype = NEG_LOOKAHEAD; 1845 skip = 3; // "(?!" 1846 break; 1847 case '<': 1848 switch (c1 = data[i + 3]) { 1849 case '=': 1850 ttype = POS_LOOKBEHIND; 1851 skip = 4; // "(?<=" 1852 break; 1853 case '!': 1854 ttype = NEG_LOOKBEHIND; 1855 skip = 4; // "(?<!" 1856 break; 1857 default: 1858 throw new PatternSyntaxException("invalid character after '(?<' : " + c1); 1859 } 1860 break; 1861 case '>': 1862 ttype = INDEPENDENT_REGEX; 1863 skip = 3; // "(?>" 1864 break; 1865 case '#': 1866 ttype = COMMENT; 1867 skip = 3; // ="(?#".length, the makeTree() skips the rest by itself 1868 break; 1869 case '(': 1870 ttype = CONDITIONAL_GROUP; 1871 skip = 2; //"(?"+"(..." - skip "(?" (2 chars) and parse condition as a group 1872 break; 1873 case '[': 1874 ttype = CLASS_GROUP; 1875 skip = 2; // "(?"+"[..]+...-...&...)" - skip 2 chars and parse a class group 1876 break; 1877 default: 1878 int mOff, mLen; 1879 mLoop: 1880 for (int p = i + 2; p < end; p++) { 1881 char c2 = data[p]; 1882 switch (c2) { 1883 case '-': 1884 case 'i': 1885 case 'm': 1886 case 's': 1887 case 'x': 1888 case 'u': 1889 case 'X': 1890 continue mLoop; 1891 1892 case ':': 1893 mOff = i + 2; 1894 mLen = p - mOff; 1895 if (mLen > 0) { 1896 flags = Pattern.parseFlags(data, mOff, mLen); 1897 flagsChanged = true; 1898 } 1899 ttype = PLAIN_GROUP; 1900 skip = mLen + 3; // "(?imsx:" mLen=4; skip= "(?".len + ":".len + mLen = 2+1+4=7 1901 break mLoop; 1902 case ')': 1903 flags = Pattern.parseFlags(data, mOff = (i + 2), mLen = (p - mOff)); 1904 flagsChanged = true; 1905 ttype = FLAGS; 1906 skip = mLen + 3; // "(?imsx)" mLen=4, skip="(?".len+")".len+mLen=2+1+4=7 1907 break mLoop; 1908 default: 1909 throw new PatternSyntaxException("wrong char after \"(?\": " + c2); 1910 } 1911 } 1912 break; 1913 } 1914 } else if (((i + 2) < end) && (data[i + 1] == '{')) { //parse named group: ({name}....),({=name}....) 1915 int p = i + 2; 1916 skip = 3; //'({' + '}' 1917 int nstart, nend; 1918 boolean isDecl; 1919 c = data[p]; 1920 while (Category.Z.contains(c)) { 1921 c = data[++p]; 1922 skip++; 1923 if (p == end) throw new PatternSyntaxException("malformed named group"); 1924 } 1925 1926 if (c == '=') { 1927 isDecl = false; 1928 c = data[++p]; 1929 skip++; 1930 if (p == end) throw new PatternSyntaxException("malformed named group"); 1931 } else isDecl = true; 1932 1933 nstart = p; 1934 while (Category.IdentifierPart.contains(c)) { 1935 c = data[++p]; 1936 skip++; 1937 if (p == end) throw new PatternSyntaxException("malformed named group"); 1938 } 1939 nend = p; 1940 while (Category.Z.contains(c)) { 1941 c = data[++p]; 1942 skip++; 1943 if (p == end) throw new PatternSyntaxException("malformed named group"); 1944 } 1945 if (c != '}') 1946 throw new PatternSyntaxException("'}' expected at " + (p - i) + " in " + new String(data, i, end - i)); 1947 1948 this.groupName = new String(data, nstart, nend - nstart); 1949 this.groupDeclared = isDecl; 1950 ttype = NAMED_GROUP; 1951 } else { 1952 ttype = '('; 1953 skip = 1; 1954 } 1955 this.tOffset = tOffset; 1956 this.tOutside = i; 1957 this.skip = skip; 1958 return; 1959 case '[': 1960 loop: 1961 for (; ; i++) { 1962 if (i == end) throw new PatternSyntaxException("malformed character class"); 1963 char c1 = data[i]; 1964 switch (c1) { 1965 case '\\': 1966 i++; 1967 continue; 1968 case ']': 1969 break loop; 1970 } 1971 } 1972 } 1973 } 1974 ttype = END; 1975 this.tOffset = tOffset; 1976 this.tOutside = end; 1977 } 1978 1979} 1980 1981class Branch extends Term { 1982 Branch() { 1983 type = BRANCH; 1984 } 1985 1986 Branch(int type) { 1987 switch (type) { 1988 case BRANCH: 1989 case BRANCH_STORE_CNT: 1990 case BRANCH_STORE_CNT_AUX1: 1991 this.type = type; 1992 break; 1993 default: 1994 throw new IllegalArgumentException("not a branch type: " + type); 1995 } 1996 } 1997} 1998 1999class BackReference extends Term { 2000 BackReference(int no, boolean icase) { 2001 super(icase ? REG_I : REG); 2002 memreg = no; 2003 } 2004} 2005 2006class Group extends Term { 2007 Group() { 2008 this(0); 2009 } 2010 2011 Group(int memreg) { 2012 type = GROUP_IN; 2013 this.memreg = memreg; 2014 2015 //used in append() 2016 current = null; 2017 in = this; 2018 prev = null; 2019 2020 out = new Term(); 2021 out.type = GROUP_OUT; 2022 out.memreg = memreg; 2023 } 2024} 2025 2026class ConditionalExpr extends Group { 2027 private Term node; 2028 private boolean newBranchStarted = false; 2029 private boolean linkAsBranch = true; 2030 2031 ConditionalExpr(Lookahead la) { 2032 super(0); 2033 /* 2034 * This all is rather tricky. 2035 * See how this types are handled in Matcher. 2036 * The shortcoming is that we strongly rely upon 2037 * the internal structure of Lookahead. 2038 */ 2039 la.in.type = LOOKAHEAD_CONDITION_IN; 2040 la.out.type = LOOKAHEAD_CONDITION_OUT; 2041 if (la.isPositive) { 2042 node = la.in; 2043 linkAsBranch = true; 2044 2045 //empty 2'nd branch 2046 node.failNext = out; 2047 } else { 2048 node = la.out; 2049 linkAsBranch = false; 2050 2051 //empty 2'nd branch 2052 node.next = out; 2053 } 2054 2055 //node.prev=in; 2056 //in.next=node; 2057 2058 la.prev = in; 2059 in.next = la; 2060 2061 current = la; 2062 //current=node; 2063 } 2064 2065 ConditionalExpr(Lookbehind lb) { 2066 super(0); 2067 /* 2068 * This all is rather tricky. 2069 * See how this types are handled in Matcher. 2070 * The shortcoming is that we strongly rely upon 2071 * the internal structure of Lookahead. 2072 */ 2073 lb.in.type = LOOKBEHIND_CONDITION_IN; 2074 lb.out.type = LOOKBEHIND_CONDITION_OUT; 2075 if (lb.isPositive) { 2076 node = lb.in; 2077 linkAsBranch = true; 2078 2079 //empty 2'nd branch 2080 node.failNext = out; 2081 } else { 2082 node = lb.out; 2083 linkAsBranch = false; 2084 2085 //empty 2'nd branch 2086 node.next = out; 2087 } 2088 2089 lb.prev = in; 2090 in.next = lb; 2091 2092 current = lb; 2093 //current=node; 2094 } 2095 2096 ConditionalExpr(int memreg) { 2097 super(0); 2098 Term condition = new Term(MEMREG_CONDITION); 2099 condition.memreg = memreg; 2100 condition.out = condition; 2101 condition.out1 = null; 2102 condition.branchOut = null; 2103 2104 //default branch 2105 condition.failNext = out; 2106 2107 node = current = condition; 2108 linkAsBranch = true; 2109 2110 condition.prev = in; 2111 in.next = condition; 2112 2113 current = condition; 2114 } 2115 2116 protected void startNewBranch() throws PatternSyntaxException { 2117 if (newBranchStarted) throw new PatternSyntaxException("attempt to set a 3'd choice in a conditional expr."); 2118 Term node = this.node; 2119 node.out1 = null; 2120 if (linkAsBranch) { 2121 node.out = null; 2122 node.branchOut = node; 2123 } else { 2124 node.out = node; 2125 node.branchOut = null; 2126 } 2127 newBranchStarted = true; 2128 current = node; 2129 } 2130 2131 @Override 2132 public boolean equals(Object o) { 2133 if (this == o) return true; 2134 if (o == null || getClass() != o.getClass()) return false; 2135 if (!super.equals(o)) return false; 2136 2137 ConditionalExpr that = (ConditionalExpr) o; 2138 2139 return newBranchStarted == that.newBranchStarted && linkAsBranch == that.linkAsBranch && (node != null ? node.equals(that.node) : that.node == null); 2140 2141 } 2142 2143 @Override 2144 public int hashCode() { 2145 int result = super.hashCode(); 2146 result = 31 * result + (node != null ? node.hashCode() : 0); 2147 result = 31 * result + (newBranchStarted ? 1 : 0); 2148 result = 31 * result + (linkAsBranch ? 1 : 0); 2149 return result; 2150 } 2151} 2152 2153class IndependentGroup extends Term { 2154 IndependentGroup(int id) { 2155 super(0); 2156 in = this; 2157 out = new Term(); 2158 type = INDEPENDENT_IN; 2159 out.type = INDEPENDENT_OUT; 2160 lookaheadId = out.lookaheadId = id; 2161 } 2162} 2163 2164class Lookahead extends Term { 2165 final boolean isPositive; 2166 2167 Lookahead(int id, boolean isPositive) { 2168 this.isPositive = isPositive; 2169 in = this; 2170 out = new Term(); 2171 if (isPositive) { 2172 type = PLOOKAHEAD_IN; 2173 out.type = PLOOKAHEAD_OUT; 2174 } else { 2175 type = NLOOKAHEAD_IN; 2176 out.type = NLOOKAHEAD_OUT; 2177 branchOut = this; 2178 } 2179 lookaheadId = id; 2180 out.lookaheadId = id; 2181 } 2182 2183 @Override 2184 public boolean equals(Object o) { 2185 if (this == o) return true; 2186 if (o == null || getClass() != o.getClass()) return false; 2187 if (!super.equals(o)) return false; 2188 2189 Lookahead lookahead = (Lookahead) o; 2190 2191 return isPositive == lookahead.isPositive; 2192 2193 } 2194 2195 @Override 2196 public int hashCode() { 2197 int result = super.hashCode(); 2198 result = 31 * result + (isPositive ? 1 : 0); 2199 return result; 2200 } 2201} 2202 2203class Lookbehind extends Term { 2204 final boolean isPositive; 2205 private int prevDistance = -1; 2206 2207 Lookbehind(int id, boolean isPositive) { 2208 distance = 0; 2209 this.isPositive = isPositive; 2210 in = this; 2211 out = new Term(); 2212 if (isPositive) { 2213 type = PLOOKBEHIND_IN; 2214 out.type = PLOOKBEHIND_OUT; 2215 } else { 2216 type = NLOOKBEHIND_IN; 2217 out.type = NLOOKBEHIND_OUT; 2218 branchOut = this; 2219 } 2220 lookaheadId = id; 2221 out.lookaheadId = id; 2222 } 2223 2224 protected Term append(Term t) throws PatternSyntaxException { 2225 distance += length(t); 2226 return super.append(t); 2227 } 2228 2229 protected Term replaceCurrent(Term t) throws PatternSyntaxException { 2230 distance += length(t) - length(current); 2231 return super.replaceCurrent(t); 2232 } 2233 2234 private static int length(Term t) throws PatternSyntaxException { 2235 int type = t.type; 2236 switch (type) { 2237 case CHAR: 2238 case BITSET: 2239 case BITSET2: 2240 case ANY_CHAR: 2241 case ANY_CHAR_NE: 2242 return 1; 2243 case BOUNDARY: 2244 case DIRECTION: 2245 case UBOUNDARY: 2246 case UDIRECTION: 2247 return 0; 2248 default: 2249 if (type >= FIRST_TRANSPARENT && type <= LAST_TRANSPARENT) return 0; 2250 throw new PatternSyntaxException("variable length element within a lookbehind assertion"); 2251 } 2252 } 2253 2254 protected void startNewBranch() throws PatternSyntaxException { 2255 prevDistance = distance; 2256 distance = 0; 2257 super.startNewBranch(); 2258 } 2259 2260 protected void close() throws PatternSyntaxException { 2261 int pd = prevDistance; 2262 if (pd >= 0) { 2263 if (distance != pd) 2264 throw new PatternSyntaxException("non-equal branch lengths within a lookbehind assertion"); 2265 } 2266 super.close(); 2267 } 2268 2269 @Override 2270 public boolean equals(Object o) { 2271 if (this == o) return true; 2272 if (o == null || getClass() != o.getClass()) return false; 2273 if (!super.equals(o)) return false; 2274 2275 Lookbehind that = (Lookbehind) o; 2276 2277 return isPositive == that.isPositive && prevDistance == that.prevDistance; 2278 2279 } 2280 2281 @Override 2282 public int hashCode() { 2283 int result = super.hashCode(); 2284 result = 31 * result + (isPositive ? 1 : 0); 2285 result = 31 * result + prevDistance; 2286 return result; 2287 } 2288} 2289 2290class TermIterator extends Term { 2291 2292 TermIterator(Term term, int min, int max, ArrayList<TermIterator> collection) throws PatternSyntaxException { 2293 collection.add(this); 2294 switch (term.type) { 2295 case CHAR: 2296 case ANY_CHAR: 2297 case ANY_CHAR_NE: 2298 case BITSET: 2299 case BITSET2: { 2300 target = term; 2301 Term back = new Term(); 2302 if (min <= 0 && max < 0) { 2303 type = REPEAT_0_INF; 2304 back.type = BACKTRACK_0; 2305 } else if (min > 0 && max < 0) { 2306 type = REPEAT_MIN_INF; 2307 back.type = BACKTRACK_MIN; 2308 minCount = back.minCount = min; 2309 } else { 2310 type = REPEAT_MIN_MAX; 2311 back.type = BACKTRACK_MIN; 2312 minCount = back.minCount = min; 2313 maxCount = max; 2314 } 2315 2316 failNext = back; 2317 2318 in = this; 2319 out = this; 2320 out1 = back; 2321 branchOut = null; 2322 return; 2323 } 2324 case REG: { 2325 target = term; 2326 memreg = term.memreg; 2327 Term back = new Term(); 2328 if (max < 0) { 2329 type = REPEAT_REG_MIN_INF; 2330 back.type = BACKTRACK_REG_MIN; 2331 minCount = back.minCount = min; 2332 } else { 2333 type = REPEAT_REG_MIN_MAX; 2334 back.type = BACKTRACK_REG_MIN; 2335 minCount = back.minCount = min; 2336 maxCount = max; 2337 } 2338 2339 failNext = back; 2340 2341 in = this; 2342 out = this; 2343 out1 = back; 2344 branchOut = null; 2345 return; 2346 } 2347 default: 2348 throw new PatternSyntaxException("can't iterate this type: " + term.type); 2349 } 2350 } 2351 2352 void optimize() { 2353//BACKTRACK_MIN_REG_FIND 2354 Term back = failNext; 2355 Optimizer opt = Optimizer.find(back.next); 2356 if (opt == null) return; 2357 failNext = opt.makeBacktrack(back); 2358 } 2359 2360}