001/** 002 * Copyright (c) 2001, Sergey A. Samokhodkin 003 * All rights reserved. 004 * <p> 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * <p> 008 * - Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * - Redistributions in binary form 011 * must reproduce the above copyright notice, this list of conditions and the following 012 * disclaimer in the documentation and/or other materials provided with the distribution. 013 * - Neither the name of jregex nor the names of its contributors may be used 014 * to endorse or promote products derived from this software without specific prior 015 * written permission. 016 * <p> 017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 018 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 019 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 020 * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 021 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 022 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 023 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 024 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 025 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 026 * 027 * @version 1.2_01 028 */ 029 030package regexodus; 031 032import regexodus.ds.IntBitSet; 033 034import java.io.IOException; 035import java.io.Reader; 036import java.util.ArrayList; 037import java.util.Arrays; 038import java.util.NoSuchElementException; 039 040import static regexodus.Replacer.wrap; 041 042/** 043 * Matcher is an automaton that actually performs matching. It provides the following methods: 044 * <ul> 045 * <li> searching for a matching sub-strings : matcher.find() or matcher.findAll();</li> 046 * <li> testing whether a text matches a whole pattern : matcher.matches();</li> 047 * <li> testing whether the text matches the beginning of a pattern : matcher.matchesPrefix();</li> 048 * <li> searching with custom options : matcher.find(int options)</li> 049 * </ul> 050 * <p> 051 * <b>Obtaining results</b> 052 * <br> 053 * After the search succeeded, i.e. if one of above methods returned <code>true</code> 054 * one may obtain an information on the match: 055 * <ul> 056 * <li> may check whether some group is captured : matcher.isCaptured(int);</li> 057 * <li> may obtain start and end positions of the match and its length : matcher.start(int),matcher.end(int),matcher.length(int);</li> 058 * <li> may obtain match contents as String : matcher.group(int).</li> 059 * </ul> 060 * <br> 061 * The same way can be obtained the match prefix and suffix information. 062 * The appropriate methods are grouped in MatchResult interface, which the Matcher class implements. 063 * <br> 064 * You typically obtain a Matcher through a Pattern instance's matcher() method. See the Pattern documentation for the 065 * normal ways to create a Pattern; if you are already familiar with java.util.regex.Pattern, constructing a regexodus 066 * Pattern should be no different. 067 * <br> 068 * Matcher (and Pattern) objects are not thread-safe, so only one thread may use a matcher instance at a time. 069 */ 070 071public class Matcher implements MatchResult { 072 /* Matching options*/ 073 /** 074 * The same effect as "^" without REFlags.MULTILINE. 075 * 076 * @see Matcher#find(int) 077 */ 078 private static final int ANCHOR_START = 1; 079 080 /** 081 * The same effect as "\\G". 082 * 083 * @see Matcher#find(int) 084 */ 085 private static final int ANCHOR_LASTMATCH = 2; 086 087 /** 088 * The same effect as "$" without REFlags.MULTILINE. 089 * 090 * @see Matcher#find(int) 091 */ 092 private static final int ANCHOR_END = 4; 093 094 /** 095 * Experimental option; if a text ends up before the end of a pattern,report a match. 096 * 097 * @see Matcher#find(int) 098 */ 099 private static final int ACCEPT_INCOMPLETE = 8; 100 101 //see search(ANCHOR_START|...) 102 private static Term startAnchor = new Term(Term.START); 103 104 //see search(ANCHOR_LASTMATCH|...) 105 private static Term lastMatchAnchor = new Term(Term.LAST_MATCH_END); 106 107 private Pattern re; 108 private int[] counters; 109 private MemReg[] memregs; 110 private LAEntry[] lookaheads; 111 private int counterCount; 112 private int memregCount; 113 private int lookaheadCount; 114 115 private char[] data; 116 private int offset, end, wOffset, wEnd; 117 private boolean shared; 118 119 private SearchEntry top; //stack entry 120 private SearchEntry first; //object pool entry 121 private SearchEntry defaultEntry; //called when moving the window 122 123 private boolean called; 124 125 private int minQueueLength; 126 127 private CharSequence cache; 128 129 //cache may be longer than the actual data 130 //and contrariwise; so cacheOffset may have both signs. 131 //cacheOffset is actually -(data offset). 132 private int cacheOffset, cacheLength; 133 134 private MemReg prefixBounds, suffixBounds, targetBounds; 135 136 public Matcher(Pattern regex) { 137 this.re = regex; 138 //int memregCount=(memregs=new MemReg[regex.memregs]).length; 139 //for(int i=0;i<memregCount;i++){ 140 // this.memregs[i]=new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indices by definition 141 //} 142 //counters=new int[regex.counters]; 143 //int lookaheadCount=(lookaheads=new LAEntry[regex.lookaheads]).length; 144 //for(int i=0;i<lookaheadCount;i++){ 145 // this.lookaheads[i]=new LAEntry(); 146 //} 147 148 int memregCount, counterCount, lookaheadCount; 149 if ((memregCount = regex.memregs) > 0) { 150 MemReg[] memregs = new MemReg[memregCount]; 151 for (int i = 0; i < memregCount; i++) { 152 memregs[i] = new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indices by definition 153 } 154 this.memregs = memregs; 155 } 156 157 if ((counterCount = regex.counters) > 0) counters = new int[counterCount]; 158 159 if ((lookaheadCount = regex.lookaheads) > 0) { 160 LAEntry[] lookaheads = new LAEntry[lookaheadCount]; 161 for (int i = 0; i < lookaheadCount; i++) { 162 lookaheads[i] = new LAEntry(); 163 } 164 this.lookaheads = lookaheads; 165 } 166 167 this.memregCount = memregCount; 168 this.counterCount = counterCount; 169 this.lookaheadCount = lookaheadCount; 170 171 first = new SearchEntry(); 172 defaultEntry = new SearchEntry(); 173 minQueueLength = regex.stringRepr.length() / 2; // just evaluation!!! 174 } 175 176 /** 177 * This method allows to efficiently pass data between matchers. 178 * Note that a matcher may pass data to itself:<pre> 179 * Matcher m=new Pattern("\\w+").matcher(myString); 180 * if(m.find())m.setTarget(m,m.SUFFIX); //forget all that is not a suffix 181 * </pre> 182 * Resets current search position to zero. 183 * 184 * @param m - a matcher that is a source of data 185 * @param groupId - which group to take data from 186 * @see Matcher#setTarget(java.lang.CharSequence) 187 * @see Matcher#setTarget(java.lang.CharSequence, int, int) 188 * @see Matcher#setTarget(char[], int, int) 189 * @see Matcher#setTarget(java.io.Reader, int) 190 */ 191 public final void setTarget(Matcher m, int groupId) { 192 MemReg mr = m.bounds(groupId); 193 if (mr == null) throw new IllegalArgumentException("group #" + groupId + " is not assigned"); 194 data = m.data; 195 offset = mr.in; 196 end = mr.out; 197 cache = m.cache; 198 cacheLength = m.cacheLength; 199 cacheOffset = m.cacheOffset; 200 if (m != this) { 201 shared = true; 202 m.shared = true; 203 } 204 init(); 205 } 206 207 208 /** 209 * Supplies a text to search in/match with. 210 * Resets current search position to zero. 211 * 212 * @param text - a data 213 * @see Matcher#setTarget(regexodus.Matcher, int) 214 * @see Matcher#setTarget(java.lang.CharSequence, int, int) 215 * @see Matcher#setTarget(char[], int, int) 216 * @see Matcher#setTarget(java.io.Reader, int) 217 */ 218 public void setTarget(CharSequence text) { 219 setTarget(text, 0, text.length()); 220 } 221 222 223 /** 224 * Supplies a text to search in/match with, as a part of String. 225 * Resets current search position to zero. 226 * 227 * @param text - a data source 228 * @param start - where the target starts 229 * @param len - how long is the target 230 * @see Matcher#setTarget(regexodus.Matcher, int) 231 * @see Matcher#setTarget(java.lang.CharSequence) 232 * @see Matcher#setTarget(char[], int, int) 233 * @see Matcher#setTarget(java.io.Reader, int) 234 */ 235 public void setTarget(CharSequence text, int start, int len) { 236 char[] mychars = data; 237 if (mychars == null || shared || mychars.length < len) { 238 data = mychars = new char[(int) (1.7f * len)]; 239 shared = false; 240 } 241 for (int i = start, p = 0; i < len; i++, p++) { 242 mychars[p] = text.charAt(i); 243 } 244 //text.getChars(start, len, mychars, 0); //(srcBegin,srcEnd,dst[],dstBegin) 245 offset = 0; 246 end = len; 247 248 cache = text; 249 cacheOffset = -start; 250 cacheLength = text.length(); 251 252 init(); 253 } 254 255 /** 256 * Supplies a text to search in/match with, as a part of char array. 257 * Resets current search position to zero. 258 * 259 * @param text - a data source 260 * @param start - where the target starts 261 * @param len - how long is the target 262 * @see Matcher#setTarget(regexodus.Matcher, int) 263 * @see Matcher#setTarget(java.lang.CharSequence) 264 * @see Matcher#setTarget(java.lang.CharSequence, int, int) 265 * @see Matcher#setTarget(java.io.Reader, int) 266 */ 267 public void setTarget(char[] text, int start, int len) { 268 setTarget(text, start, len, true); 269 } 270 271 /** 272 * To be used with much care. 273 * Supplies a text to search in/match with, as a part of a char array, as above, but also allows to permit 274 * to use the array as internal buffer for subsequent inputs. That is, if we call it with <code>shared=false</code>:<pre> 275 * myMatcher.setTarget(myCharArray,x,y,<b>false</b>); //we declare that array contents is NEITHER shared NOR will be used later, so may modifications on it are permitted 276 * </pre> 277 * then we should expect the array contents to be changed on subsequent setTarget(..) operations. 278 * Such method may yield some increase in perfomance in the case of multiple setTarget() calls. 279 * Resets current search position to zero. 280 * 281 * @param text - a data source 282 * @param start - where the target starts 283 * @param len - how long is the target 284 * @param shared - if <code>true</code>: data are shared or used later, <b>don't</b> modify it; if <code>false</code>: possible modifications of the text on subsequent <code>setTarget()</code> calls are perceived and allowed. 285 * @see Matcher#setTarget(regexodus.Matcher, int) 286 * @see Matcher#setTarget(java.lang.CharSequence) 287 * @see Matcher#setTarget(java.lang.CharSequence, int, int) 288 * @see Matcher#setTarget(char[], int, int) 289 * @see Matcher#setTarget(java.io.Reader, int) 290 */ 291 public void setTarget(char[] text, int start, int len, boolean shared) { 292 cache = null; 293 data = text; 294 offset = start; 295 end = start + len; 296 this.shared = shared; 297 init(); 298 } 299 300 301 /** 302 * Supplies a text to search in/match with through a stream. 303 * Resets current search position to zero. 304 * 305 * @param in - a data stream; 306 * @param len - how much characters should be read; if len is -1, read the entire stream. 307 * @see Matcher#setTarget(regexodus.Matcher, int) 308 * @see Matcher#setTarget(java.lang.CharSequence) 309 * @see Matcher#setTarget(java.lang.CharSequence, int, int) 310 * @see Matcher#setTarget(char[], int, int) 311 */ 312 @GwtIncompatible 313 public void setTarget(Reader in, int len) throws IOException { 314 if (len < 0) { 315 setAll(in); 316 return; 317 } 318 char[] mychars = data; 319 boolean shared = this.shared; 320 if (mychars == null || shared || mychars.length < len) { 321 mychars = new char[len]; 322 shared = false; 323 } 324 int count = 0; 325 int c; 326 while ((c = in.read(mychars, count, len)) >= 0) { 327 len -= c; 328 count += c; 329 if (len == 0) break; 330 } 331 setTarget(mychars, 0, count, shared); 332 } 333 334 @GwtIncompatible 335 public void setAll(Reader in) throws IOException { 336 char[] mychars = data; 337 int free; 338 if (mychars == null || shared) { 339 mychars = new char[free = 1024]; 340 } else free = mychars.length; 341 int count = 0; 342 int c; 343 while ((c = in.read(mychars, count, free)) >= 0) { 344 free -= c; 345 count += c; 346 if (free == 0) { 347 int newsize = count * 3; 348 char[] newchars = new char[newsize]; 349 System.arraycopy(mychars, 0, newchars, 0, count); 350 mychars = newchars; 351 free = newsize - count; 352 } 353 } 354 setTarget(mychars, 0, count, false); 355 } 356 357 public String getString(int start, int end) { 358 /*if(end < 0) 359 { 360 return "<<<Incomplete Match>>> " + cache; 361 }*/ 362 if (cache != null) { 363 int co = cacheOffset; 364 return cache.toString().substring(start - co, end - co); 365 } 366 CharSequence src; 367 int tOffset = this.offset, tLen = this.end - tOffset; 368 char[] data = this.data; 369 if ((end - start) >= (tLen / 3)) { 370 //it makes sense to make a cache 371 cache = src = new String(data, tOffset, tLen); 372 cacheOffset = tOffset; 373 cacheLength = tLen; 374 return src.toString().substring(start - tOffset, end - tOffset); 375 } 376 return new String(data, start, end - start); 377 } 378 379 /* Matching */ 380 381 /** 382 * Tells whether the entire target matches the beginning of the pattern. 383 * The whole pattern is also regarded as its beginning.<br> 384 * This feature allows to find a mismatch by examining only a beginning part of 385 * the target (as if the beginning of the target doesn't match the beginning of the pattern, then the entire target 386 * also couldn't match).<br> 387 * For example the following assertions yield <code>true</code>:<pre> 388 * Pattern p=new Pattern("abcd"); 389 * p.matcher("").matchesPrefix(); 390 * p.matcher("a").matchesPrefix(); 391 * p.matcher("ab").matchesPrefix(); 392 * p.matcher("abc").matchesPrefix(); 393 * p.matcher("abcd").matchesPrefix(); 394 * </pre> 395 * and the following yield <code>false</code>:<pre> 396 * p.matcher("b").isPrefix(); 397 * p.matcher("abcdef").isPrefix(); 398 * p.matcher("x").isPrefix(); 399 * </pre> 400 * 401 * @return true if the entire target matches the beginning of the pattern 402 */ 403 public final boolean matchesPrefix() { 404 setPosition(0); 405 return search(ANCHOR_START | ACCEPT_INCOMPLETE | ANCHOR_END); 406 } 407 408 /** 409 * Just an old name for isPrefix().<br> 410 * Retained for backwards compatibility. 411 * 412 * @deprecated Replaced by isPrefix() 413 */ 414 public final boolean isStart() { 415 return matchesPrefix(); 416 } 417 418 /** 419 * Tells whether a current target matches the whole pattern. 420 * For example the following yields the <code>true</code>:<pre> 421 * Pattern p=new Pattern("\\w+"); 422 * p.matcher("a").matches(); 423 * p.matcher("ab").matches(); 424 * p.matcher("abc").matches(); 425 * </pre> 426 * and the following yields the <code>false</code>:<pre> 427 * p.matcher("abc def").matches(); 428 * p.matcher("bcd ").matches(); 429 * p.matcher(" bcd").matches(); 430 * p.matcher("#xyz#").matches(); 431 * </pre> 432 * 433 * @return whether a current target matches the whole pattern. 434 */ 435 public final boolean matches() { 436 if (called) setPosition(0); 437 return search(ANCHOR_START | ANCHOR_END); 438 } 439 440 /** 441 * Just a combination of setTarget(String) and matches(). 442 * 443 * @param s the target string; 444 * @return whether the specified string matches the whole pattern. 445 */ 446 public final boolean matches(String s) { 447 setTarget(s); 448 return search(ANCHOR_START | ANCHOR_END); 449 } 450 451 /** 452 * Allows to set a position the subsequent find()/find(int) will start from. 453 * 454 * @param pos the position to start from; 455 * @see Matcher#find() 456 * @see Matcher#find(int) 457 */ 458 public void setPosition(int pos) { 459 wOffset = offset + pos; 460 wEnd = -1; 461 called = false; 462 flush(); 463 } 464 465 /** 466 * Searches through a target for a matching substring, starting from just after the end of last match. 467 * If there wasn't any search performed, starts from zero. 468 * 469 * @return <code>true</code> if a match found. 470 */ 471 public final boolean find() { 472 if (called) skip(); 473 return search(0); 474 } 475 476 /** 477 * Searches through a target for a matching substring, starting from just after the end of last match. 478 * If there wasn't any search performed, starts from zero. 479 * 480 * @param anchors a zero or a combination(bitwise OR) of ANCHOR_START,ANCHOR_END,ANCHOR_LASTMATCH,ACCEPT_INCOMPLETE 481 * @return <code>true</code> if a match found. 482 */ 483 public boolean find(int anchors) { 484 if (called) skip(); 485 return search(anchors); 486 } 487 488 489 /** 490 * The same as findAll(int), but with default behaviour; 491 */ 492 public MatchIterator findAll() { 493 return findAll(0); 494 } 495 496 /** 497 * Returns an iterator over the matches found by subsequently calling find(options), the search starts from the zero position. 498 */ 499 public MatchIterator findAll(final int options) { 500 //setPosition(0); 501 return new MatchIterator() { 502 private boolean checked = false; 503 private boolean hasMore = false; 504 505 public boolean hasMore() { 506 if (!checked) check(); 507 return hasMore; 508 } 509 510 public MatchResult nextMatch() { 511 if (!checked) check(); 512 if (!hasMore) throw new NoSuchElementException(); 513 checked = false; 514 return Matcher.this; 515 } 516 517 private void check() { 518 hasMore = find(options); 519 checked = true; 520 } 521 522 public int count() { 523 if (!checked) check(); 524 if (!hasMore) return 0; 525 int c = 1; 526 while (find(options)) c++; 527 checked = false; 528 return c; 529 } 530 }; 531 } 532 533 /** 534 * Continues to search from where the last search left off. 535 * The same as proceed(0). 536 * 537 * @see Matcher#proceed(int) 538 */ 539 public final boolean proceed() { 540 return proceed(0); 541 } 542 543 /** 544 * Continues to search from where the last search left off using specified options:<pre> 545 * Matcher m=new Pattern("\\w+").matcher("abc"); 546 * while(m.proceed(0)){ 547 * System.out.println(m.group(0)); 548 * } 549 * </pre> 550 * Output:<pre> 551 * abc 552 * ab 553 * a 554 * bc 555 * b 556 * c 557 * </pre> 558 * For example, let's find all odd numbers occurring in a text:<pre> 559 * Matcher m=new Pattern("\\d+").matcher("123"); 560 * while(m.proceed(0)){ 561 * String match=m.group(0); 562 * if(isOdd(Integer.parseInt(match))) System.out.println(match); 563 * } 564 * 565 * static boolean isOdd(int i){ 566 * return (i&1)>0; 567 * } 568 * </pre> 569 * This outputs:<pre> 570 * 123 571 * 1 572 * 23 573 * 3 574 * </pre> 575 * Note that using <code>find()</code> method we would find '123' only. 576 * 577 * @param options search options, some of ANCHOR_START|ANCHOR_END|ANCHOR_LASTMATCH|ACCEPT_INCOMPLETE; zero value(default) stands for usual search for substring. 578 */ 579 public boolean proceed(int options) { 580 if (called) { 581 if (top == null) { 582 wOffset++; 583 } 584 } 585 return search(0); 586 } 587 588 /** 589 * Sets the current search position just after the end of last match. 590 */ 591 public void skip() { 592 int we = wEnd; 593 if (wOffset == we) { //requires special handling 594 //if no variants at 'wOutside',advance pointer and clear 595 if (top == null) { 596 wOffset++; 597 flush(); 598 } 599 //otherwise, if there exist a variant, 600 //don't clear(), i.e. allow it to match 601 return; 602 } else { 603 if (we < 0) wOffset = 0; 604 else wOffset = we; 605 } 606 //rflush(); //rflush() works faster on simple regexes (with a small group/branch number) 607 flush(); 608 } 609 610 private void init() { 611 //wOffset=-1; 612 wOffset = offset; 613 wEnd = -1; 614 called = false; 615 flush(); 616 } 617 618 /** 619 * Resets the internal state. 620 */ 621 public void flush() { 622 top = null; 623 defaultEntry.reset(0); 624 first.reset(minQueueLength); 625 626 for (int i = memregs.length - 1; i > 0; i--) { 627 MemReg mr = memregs[i]; 628 mr.in = mr.out = -1; 629 } 630 for (int i = memregs.length - 1; i > 0; i--) { 631 MemReg mr = memregs[i]; 632 mr.in = mr.out = -1; 633 } 634 called = false; 635 } 636 637 //reverse flush 638 //may work significantly faster, 639 //need testing 640 private void rflush() { 641 SearchEntry entry = top; 642 top = null; 643 MemReg[] memregs = this.memregs; 644 int[] counters = this.counters; 645 while (entry != null) { 646 SearchEntry next = entry.sub; 647 SearchEntry.popState(entry, memregs, counters); 648 entry = next; 649 } 650 SearchEntry.popState(defaultEntry, memregs, counters); 651 } 652 653 /** 654 */ 655 public String toString() { 656 return toString_d(); 657 //return getString(wOffset, wEnd); 658 } 659 660 public Pattern pattern() { 661 return re; 662 } 663 664 public String target() { 665 return getString(offset, end); 666 } 667 668 /** 669 */ 670 public char[] targetChars() { 671 shared = true; 672 return data; 673 } 674 675 /** 676 */ 677 public int targetStart() { 678 return offset; 679 } 680 681 /** 682 */ 683 public int targetEnd() { 684 return end; 685 } 686 687 public char charAt(int i) { 688 int in = this.wOffset; 689 int out = this.wEnd; 690 if (in < 0 || out < in) throw new IllegalStateException("unassigned"); 691 return data[in + i]; 692 } 693 694 public char charAt(int i, int groupId) { 695 MemReg mr = bounds(groupId); 696 if (mr == null) throw new IllegalStateException("group #" + groupId + " is not assigned"); 697 int in = mr.in; 698 if (i < 0 || i > (mr.out - in)) throw new StringIndexOutOfBoundsException("" + i); 699 return data[in + i]; 700 } 701 702 public final int length() { 703 return wEnd - wOffset; 704 } 705 706 /** 707 */ 708 public final int start() { 709 return wOffset - offset; 710 } 711 712 /** 713 */ 714 public final int end() { 715 return wEnd - offset; 716 } 717 718 /** 719 */ 720 public String prefix() { 721 return getString(offset, wOffset); 722 } 723 724 /** 725 */ 726 public String suffix() { 727 return getString(wEnd, end); 728 } 729 730 /** 731 */ 732 public int groupCount() { 733 return memregs.length; 734 } 735 736 /** 737 */ 738 public String group(int n) { 739 MemReg mr = bounds(n); 740 if (mr == null) return null; 741 return getString(mr.in, mr.out); 742 } 743 744 /** 745 */ 746 public String group(String name) { 747 Integer id = re.groupId(name); 748 if (id == null) throw new IllegalArgumentException("<" + name + "> isn't defined"); 749 return group(id); 750 } 751 752 /** 753 */ 754 public boolean getGroup(int n, TextBuffer tb) { 755 MemReg mr = bounds(n); 756 if (mr == null) return false; 757 int in; 758 tb.append(data, in = mr.in, mr.out - in); 759 return true; 760 } 761 762 /** 763 */ 764 public boolean getGroup(String name, TextBuffer tb) { 765 Integer id = re.groupId(name); 766 if (id == null) throw new IllegalArgumentException("unknown group: \"" + name + "\""); 767 return getGroup(id, tb); 768 } 769 770 /** 771 */ 772 public boolean getGroup(int n, StringBuilder sb) { 773 MemReg mr = bounds(n); 774 if (mr == null) return false; 775 int in; 776 sb.append(data, in = mr.in, mr.out - in); 777 return true; 778 } 779 780 /** 781 */ 782 public boolean getGroup(String name, StringBuilder sb) { 783 Integer id = re.groupId(name); 784 if (id == null) throw new IllegalArgumentException("unknown group: \"" + name + "\""); 785 return getGroup(id, sb); 786 } 787 788 /** 789 */ 790 public String[] groups() { 791 MemReg[] memregs = this.memregs; 792 String[] groups = new String[memregs.length]; 793 int in, out; 794 MemReg mr; 795 for (int i = 0; i < memregs.length; i++) { 796 mr = memregs[i]; 797 out = mr.out; 798 if ((in = mr.in) < 0 || mr.out < in) continue; 799 groups[i] = getString(in, out); 800 } 801 return groups; 802 } 803 804 /** 805 */ 806 public ArrayList<String> groupv() { 807 MemReg[] memregs = this.memregs; 808 ArrayList<String> v = new ArrayList<String>(); 809 MemReg mr; 810 for (int i = 0; i < memregs.length; i++) { 811 mr = bounds(i); 812 if (mr == null) { 813 v.add("empty"); 814 continue; 815 } 816 String s = getString(mr.in, mr.out); 817 v.add(s); 818 } 819 return v; 820 } 821 822 private MemReg bounds(int id) { 823 MemReg mr; 824 if (id >= 0) { 825 mr = memregs[id]; 826 } else switch (id) { 827 case PREFIX: 828 mr = prefixBounds; 829 if (mr == null) prefixBounds = mr = new MemReg(PREFIX); 830 mr.in = offset; 831 mr.out = wOffset; 832 break; 833 case SUFFIX: 834 mr = suffixBounds; 835 if (mr == null) suffixBounds = mr = new MemReg(SUFFIX); 836 mr.in = wEnd; 837 mr.out = end; 838 break; 839 case TARGET: 840 mr = targetBounds; 841 if (mr == null) targetBounds = mr = new MemReg(TARGET); 842 mr.in = offset; 843 mr.out = end; 844 break; 845 default: 846 throw new IllegalArgumentException("illegal group id: " + id + "; must either nonnegative int, or MatchResult.PREFIX, or MatchResult.SUFFIX"); 847 } 848 int in; 849 if ((in = mr.in) < 0 || mr.out < in) return null; 850 return mr; 851 } 852 853 /** 854 */ 855 public final boolean isCaptured() { 856 return wOffset >= 0 && wEnd >= wOffset; 857 } 858 859 /** 860 */ 861 public final boolean isCaptured(int id) { 862 return bounds(id) != null; 863 } 864 865 /** 866 */ 867 public final boolean isCaptured(String groupName) { 868 Integer id = re.groupId(groupName); 869 if (id == null) throw new IllegalArgumentException("unknown group: \"" + groupName + "\""); 870 return isCaptured(id); 871 } 872 873 /** 874 */ 875 public final int length(int id) { 876 MemReg mr = bounds(id); 877 if(mr != null) 878 return mr.out - mr.in; 879 return 0; 880 } 881 882 /** 883 */ 884 public final int start(int id) { 885 return bounds(id).in - offset; 886 } 887 888 /** 889 */ 890 public final int end(int id) { 891 return bounds(id).out - offset; 892 } 893 894 public boolean search(int anchors) { 895 called = true; 896 final int end = this.end; 897 int offset = this.offset; 898 char[] data = this.data; 899 int wOffset = this.wOffset; 900 int wEnd = this.wEnd; 901 902 MemReg[] memregs = this.memregs; 903 int[] counters = this.counters; 904 LAEntry[] lookaheads = this.lookaheads; 905 906 //int memregCount=memregs.length; 907 //int cntCount=counters.length; 908 909 SearchEntry defaultEntry = this.defaultEntry; 910 SearchEntry first = this.first; 911 SearchEntry top = this.top; 912 SearchEntry actual; 913 int cnt, regLen; 914 int i; 915 916 final boolean matchEnd = (anchors & ANCHOR_END) > 0; 917 final boolean allowIncomplete = (anchors & ACCEPT_INCOMPLETE) > 0; 918 919 Pattern re = this.re; 920 Term root = re.root; 921 Term term; 922 if (top == null) { 923 if ((anchors & ANCHOR_START) > 0) { 924 term = re.root0; //raw root 925 root = startAnchor; 926 } else if ((anchors & ANCHOR_LASTMATCH) > 0) { 927 term = re.root0; //raw root 928 root = lastMatchAnchor; 929 } else { 930 term = root; //optimized root 931 } 932 i = wOffset; 933 actual = first; 934 SearchEntry.popState(defaultEntry, memregs, counters); 935 } else { 936 top = (actual = top).sub; 937 term = actual.term; 938 i = actual.index; 939 SearchEntry.popState(actual, memregs, counters); 940 } 941 cnt = actual.cnt; 942 regLen = actual.regLen; 943 944 main: 945 while (wOffset <= end) { 946 matchHere: 947 for (; ; ) { 948 int memreg, cntreg; 949 char c; 950 switch (term.type) { 951 case Term.FIND: { 952 int jump = find(data, i + term.distance, end, term.target); //don't eat the last match 953 if (jump < 0) break main; //return false 954 i += jump; 955 wOffset = i; //force window to move 956 if (term.eat) { 957 if (i == end) break; 958 i++; 959 } 960 term = term.next; 961 continue matchHere; 962 } 963 case Term.FINDREG: { 964 MemReg mr = memregs[term.target.memreg]; 965 int sampleOff = mr.in; 966 int sampleLen = mr.out - sampleOff; 967 //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg); 968 /*@since 1.2*/ 969 if (sampleOff < 0 || sampleLen < 0) { 970 break; 971 } else if (sampleLen == 0) { 972 term = term.next; 973 continue matchHere; 974 } 975 int jump = findReg(data, i + term.distance, sampleOff, sampleLen, term.target, end); //don't eat the last match 976 if (jump < 0) break main; //return false 977 i += jump; 978 wOffset = i; //force window to move 979 if (term.eat) { 980 i += sampleLen; 981 if (i > end) break; 982 } 983 term = term.next; 984 continue matchHere; 985 } 986 case Term.VOID: 987 term = term.next; 988 continue matchHere; 989 990 case Term.CHAR: 991 //can only be 1-char-wide 992 // \/ 993 if (i >= end || (re.caseless ? Category.caseFold(data[i]) : data[i]) != term.c) 994 break; 995 i++; 996 term = term.next; 997 continue matchHere; 998 999 case Term.ANY_CHAR: 1000 //can only be 1-char-wide 1001 // \/ 1002 if (i >= end) break; 1003 i++; 1004 term = term.next; 1005 continue matchHere; 1006 1007 case Term.ANY_CHAR_NE: 1008 //can only be 1-char-wide 1009 // \/ 1010 if (i >= end || (c = data[i]) == '\r' || c == '\n') break; 1011 i++; 1012 term = term.next; 1013 continue matchHere; 1014 1015 case Term.END: 1016 if (i >= end) { //meets 1017 term = term.next; 1018 continue matchHere; 1019 } 1020 break; 1021 1022 case Term.END_EOL: //perl's $ 1023 if (i >= end) { //meets 1024 term = term.next; 1025 continue matchHere; 1026 } else { 1027 boolean matches = 1028 i >= end | 1029 ((i + 1) == end && data[i] == '\n') | 1030 ((i + 2) == end && data[i] == '\r' && data[i + 1] == '\n'); 1031 1032 if (matches) { 1033 term = term.next; 1034 continue matchHere; 1035 } else break; 1036 } 1037 1038 case Term.LINE_END: 1039 if (i >= end) { //meets 1040 term = term.next; 1041 continue matchHere; 1042 } else { 1043 /* 1044 if(((c=data[i])=='\r' || c=='\n') && 1045 (c=data[i-1])!='\r' && c!='\n'){ 1046 term=term.next; 1047 continue matchHere; 1048 } 1049 */ 1050 //5 aug 2001 1051 if ((c = data[i]) == '\r' || c == '\n') { 1052 term = term.next; 1053 continue matchHere; 1054 } 1055 } 1056 break; 1057 1058 case Term.START: //Perl's "^" 1059 if (i == offset) { //meets 1060 term = term.next; 1061 continue matchHere; 1062 } 1063 //break; 1064 1065 //changed on 27-04-2002 1066 //due to a side effect: if ALLOW_INCOMPLETE is enabled, 1067 //the anchorStart moves up to the end and succeeds 1068 //(see comments at the last lines of matchHere, ~line 1830) 1069 //Solution: if there are some entries on the stack ("^a|b$"), 1070 //try them; otherwise it's a final 'no' 1071 //if(top!=null) break; 1072 //else break main; 1073 1074 //changed on 25-05-2002 1075 //rationale: if the term is startAnchor, 1076 //it's the root term by definition, 1077 //so if it doesn't match, the entire pattern 1078 //couldn't match too; 1079 //otherwise we could have the following problem: 1080 //"c|^a" against "abc" finds only "a" 1081 if (top != null) break; 1082 if (term != startAnchor) break; 1083 else break main; 1084 1085 case Term.LAST_MATCH_END: 1086 if (i == wEnd) { //meets 1087 term = term.next; 1088 continue matchHere; 1089 } 1090 break main; //return false 1091 1092 case Term.LINE_START: 1093 if (i == offset) { //meets 1094 term = term.next; 1095 continue matchHere; 1096 } else if (i < end) { 1097 /* 1098 if(((c=data[i-1])=='\r' || c=='\n') && 1099 (c=data[i])!='\r' && c!='\n'){ 1100 term=term.next; 1101 continue matchHere; 1102 } 1103 */ 1104 //5 aug 2001 1105 //if((c=data[i-1])=='\r' || c=='\n'){ ?? 1106 if ((c = data[i - 1]) == '\n' || ((c == '\r') && (data[i] != '\n'))) { 1107 term = term.next; 1108 continue matchHere; 1109 } 1110 } 1111 break; 1112 1113 case Term.BITSET: { 1114 //can only be 1-char-wide 1115 // \/ 1116 if (i >= end) break; 1117 c = re.caseless ? Category.caseFold(data[i]) : data[i]; 1118 if (!(c <= 255 && term.bitset.get(c)) ^ term.inverse) break; 1119 i++; 1120 term = term.next; 1121 continue matchHere; 1122 } 1123 case Term.BITSET2: { 1124 //can only be 1-char-wide 1125 // \/ 1126 if (i >= end) break; 1127 c = re.caseless ? Category.caseFold(data[i]) : data[i]; 1128 IntBitSet arr = term.bitset2[c >> 8]; 1129 if (arr == null || !arr.get(c & 255) ^ term.inverse) break; 1130 i++; 1131 term = term.next; 1132 continue matchHere; 1133 } 1134 case Term.BOUNDARY: { 1135 boolean ch1Meets = false, ch2Meets = false; 1136 IntBitSet bitset = term.bitset; 1137 test1: 1138 { 1139 int j = i - 1; 1140 //if(j<offset || j>=end) break test1; 1141 if (j < offset) break test1; 1142 c = re.caseless ? Category.caseFold(data[j]) : data[j]; 1143 ch1Meets = (c < 256 && bitset.get(c)); 1144 } 1145 test2: 1146 { 1147 //if(i<offset || i>=end) break test2; 1148 if (i >= end) break test2; 1149 c = re.caseless ? Category.caseFold(data[i]) : data[i]; 1150 ch2Meets = (c < 256 && bitset.get(c)); 1151 } 1152 if (ch1Meets ^ ch2Meets ^ term.inverse) { //meets 1153 term = term.next; 1154 continue matchHere; 1155 } else break; 1156 } 1157 case Term.UBOUNDARY: { 1158 boolean ch1Meets = false, ch2Meets = false; 1159 IntBitSet[] bitset2 = term.bitset2; 1160 test1: 1161 { 1162 int j = i - 1; 1163 //if(j<offset || j>=end) break test1; 1164 if (j < offset) break test1; 1165 c = re.caseless ? Category.caseFold(data[j]) : data[j]; 1166 IntBitSet bits = bitset2[c >> 8]; 1167 ch1Meets = bits != null && bits.get(c & 0xff); 1168 } 1169 test2: 1170 { 1171 //if(i<offset || i>=end) break test2; 1172 if (i >= end) break test2; 1173 c = re.caseless ? Category.caseFold(data[i]) : data[i]; 1174 IntBitSet bits = bitset2[c >> 8]; 1175 ch2Meets = bits != null && bits.get(c & 0xff); 1176 } 1177 if (ch1Meets ^ ch2Meets ^ term.inverse) { //is boundary ^ inv 1178 term = term.next; 1179 continue matchHere; 1180 } else break; 1181 } 1182 case Term.DIRECTION: { 1183 boolean ch1Meets = false, ch2Meets = false; 1184 IntBitSet bitset = term.bitset; 1185 boolean inv = term.inverse; 1186 int j = i - 1; 1187 //if(j>=offset && j<end){ 1188 if (j >= offset) { 1189 c = re.caseless ? Category.caseFold(data[j]) : data[j]; 1190 ch1Meets = c < 256 && bitset.get(c); 1191 } 1192 if (ch1Meets ^ inv) break; 1193 1194 //if(i>=offset && i<end){ 1195 if (i < end) { 1196 c = re.caseless ? Category.caseFold(data[i]) : data[i]; 1197 ch2Meets = c < 256 && bitset.get(c); 1198 } 1199 if (!ch2Meets ^ inv) break; 1200 1201 1202 term = term.next; 1203 continue matchHere; 1204 } 1205 case Term.UDIRECTION: { 1206 boolean ch1Meets = false, ch2Meets = false; 1207 IntBitSet[] bitset2 = term.bitset2; 1208 boolean inv = term.inverse; 1209 int j = i - 1; 1210 1211 //if(j>=offset && j<end){ 1212 if (j >= offset) { 1213 c = re.caseless ? Category.caseFold(data[j]) : data[j]; 1214 IntBitSet bits = bitset2[c >> 8]; 1215 ch1Meets = bits != null && bits.get(c & 0xff); 1216 } 1217 if (ch1Meets ^ inv) break; 1218 1219 //if(i>=offset && i<end){ 1220 if (i < end) { 1221 c = re.caseless ? Category.caseFold(data[i]) : data[i]; 1222 IntBitSet bits = bitset2[c >> 8]; 1223 ch2Meets = bits != null && bits.get(c & 0xff); 1224 } 1225 if (!ch2Meets ^ inv) break; 1226 1227 term = term.next; 1228 continue matchHere; 1229 } 1230 case Term.REG: { 1231 MemReg mr = memregs[term.memreg]; 1232 int sampleOffset = mr.in; 1233 int sampleOutside = mr.out; 1234 int rLen; 1235 if (sampleOffset < 0 || (rLen = sampleOutside - sampleOffset) < 0) { 1236 break; 1237 } else if (rLen == 0) { 1238 term = term.next; 1239 continue matchHere; 1240 } 1241 1242 // don't prevent us from reaching the 'end' 1243 if ((i + rLen) > end) break; 1244 1245 if (compareRegions(data, sampleOffset, i, rLen, end)) { 1246 i += rLen; 1247 term = term.next; 1248 continue matchHere; 1249 } 1250 break; 1251 } 1252 case Term.REG_I: { 1253 MemReg mr = memregs[term.memreg]; 1254 int sampleOffset = mr.in; 1255 int sampleOutside = mr.out; 1256 int rLen; 1257 if (sampleOffset < 0 || (rLen = sampleOutside - sampleOffset) < 0) { 1258 break; 1259 } else if (rLen == 0) { 1260 term = term.next; 1261 continue matchHere; 1262 } 1263 1264 // don't prevent us from reaching the 'end' 1265 if ((i + rLen) > end) break; 1266 1267 if (compareRegionsI(data, sampleOffset, i, rLen, end)) { 1268 i += rLen; 1269 term = term.next; 1270 continue matchHere; 1271 } 1272 break; 1273 } 1274 case Term.REPEAT_0_INF: { 1275 //i+=(cnt=repeat(data,i,end,term.target)); 1276 if ((cnt = repeat(data, i, end, term.target)) <= 0) { 1277 term = term.next; 1278 continue; 1279 } 1280 i += cnt; 1281 1282 //branch out the backtracker (that is term.failNext, see Term.make*()) 1283 actual.cnt = cnt; 1284 actual.term = term.failNext; 1285 actual.index = i; 1286 actual = (top = actual).on; 1287 if (actual == null) { 1288 actual = new SearchEntry(); 1289 top.on = actual; 1290 actual.sub = top; 1291 } 1292 term = term.next; 1293 continue; 1294 } 1295 case Term.REPEAT_MIN_INF: { 1296 cnt = repeat(data, i, end, term.target); 1297 if (cnt < term.minCount) break; 1298 i += cnt; 1299 1300 //branch out the backtracker (that is term.failNext, see Term.make*()) 1301 actual.cnt = cnt; 1302 actual.term = term.failNext; 1303 actual.index = i; 1304 actual = (top = actual).on; 1305 if (actual == null) { 1306 actual = new SearchEntry(); 1307 top.on = actual; 1308 actual.sub = top; 1309 } 1310 term = term.next; 1311 continue; 1312 } 1313 case Term.REPEAT_MIN_MAX: { 1314 int out2 = i + term.maxCount; 1315 cnt = repeat(data, i, end < out2 ? end : out2, term.target); 1316 if (cnt < term.minCount) break; 1317 i += cnt; 1318 1319 //branch out the backtracker (that is term.failNext, see Term.make*()) 1320 actual.cnt = cnt; 1321 actual.term = term.failNext; 1322 actual.index = i; 1323 actual = (top = actual).on; 1324 if (actual == null) { 1325 actual = new SearchEntry(); 1326 top.on = actual; 1327 actual.sub = top; 1328 } 1329 term = term.next; 1330 continue; 1331 } 1332 case Term.REPEAT_REG_MIN_INF: { 1333 MemReg mr = memregs[term.memreg]; 1334 int sampleOffset = mr.in; 1335 int sampleOutside = mr.out; 1336 /*@since 1.2*/ 1337 int bitset; 1338 if (sampleOffset < 0 || (bitset = sampleOutside - sampleOffset) < 0) { 1339 break; 1340 } else if (bitset == 0) { 1341 term = term.next; 1342 continue matchHere; 1343 } 1344 1345 cnt = 0; 1346 1347 while (compareRegions(data, i, sampleOffset, bitset, end)) { 1348 cnt++; 1349 i += bitset; 1350 } 1351 1352 if (cnt < term.minCount) break; 1353 1354 actual.cnt = cnt; 1355 actual.term = term.failNext; 1356 actual.index = i; 1357 actual.regLen = bitset; 1358 actual = (top = actual).on; 1359 if (actual == null) { 1360 actual = new SearchEntry(); 1361 top.on = actual; 1362 actual.sub = top; 1363 } 1364 term = term.next; 1365 continue; 1366 } 1367 case Term.REPEAT_REG_MIN_MAX: { 1368 MemReg mr = memregs[term.memreg]; 1369 int sampleOffset = mr.in; 1370 int sampleOutside = mr.out; 1371 /*@since 1.2*/ 1372 int bitset; 1373 if (sampleOffset < 0 || (bitset = sampleOutside - sampleOffset) < 0) { 1374 break; 1375 } else if (bitset == 0) { 1376 term = term.next; 1377 continue matchHere; 1378 } 1379 1380 cnt = 0; 1381 int countBack = term.maxCount; 1382 while (countBack > 0 && compareRegions(data, i, sampleOffset, bitset, end)) { 1383 cnt++; 1384 i += bitset; 1385 countBack--; 1386 } 1387 1388 if (cnt < term.minCount) break; 1389 1390 actual.cnt = cnt; 1391 actual.term = term.failNext; 1392 actual.index = i; 1393 actual.regLen = bitset; 1394 actual = (top = actual).on; 1395 if (actual == null) { 1396 actual = new SearchEntry(); 1397 top.on = actual; 1398 actual.sub = top; 1399 } 1400 term = term.next; 1401 continue; 1402 } 1403 case Term.BACKTRACK_0: 1404 cnt = actual.cnt; 1405 if (cnt > 0) { 1406 cnt--; 1407 i--; 1408 actual.cnt = cnt; 1409 actual.index = i; 1410 actual.term = term; 1411 actual = (top = actual).on; 1412 if (actual == null) { 1413 actual = new SearchEntry(); 1414 top.on = actual; 1415 actual.sub = top; 1416 } 1417 term = term.next; 1418 continue; 1419 } else break; 1420 1421 case Term.BACKTRACK_MIN: 1422 cnt = actual.cnt; 1423 if (cnt > term.minCount) { 1424 cnt--; 1425 i--; 1426 actual.cnt = cnt; 1427 actual.index = i; 1428 actual.term = term; 1429 actual = (top = actual).on; 1430 if (actual == null) { 1431 actual = new SearchEntry(); 1432 top.on = actual; 1433 actual.sub = top; 1434 } 1435 term = term.next; 1436 continue; 1437 } else break; 1438 1439 case Term.BACKTRACK_FIND_MIN: { 1440 cnt = actual.cnt; 1441 int minCnt; 1442 if (cnt > (minCnt = term.minCount)) { 1443 int start = i + term.distance; 1444 if (start > end) { 1445 int exceed = start - end; 1446 cnt -= exceed; 1447 if (cnt <= minCnt) break; 1448 i -= exceed; 1449 } 1450 int back = findBack(data, i + term.distance, cnt - minCnt, term.target); 1451 if (back < 0) break; 1452 1453 //cnt-=back; 1454 //i-=back; 1455 if ((cnt -= back) <= minCnt) { 1456 i -= back; 1457 if (term.eat) i++; 1458 term = term.next; 1459 continue; 1460 } 1461 i -= back; 1462 1463 actual.cnt = cnt; 1464 actual.index = i; 1465 1466 if (term.eat) i++; 1467 1468 actual.term = term; 1469 actual = (top = actual).on; 1470 if (actual == null) { 1471 actual = new SearchEntry(); 1472 top.on = actual; 1473 actual.sub = top; 1474 } 1475 term = term.next; 1476 continue; 1477 } else break; 1478 } 1479 1480 case Term.BACKTRACK_FINDREG_MIN: { 1481 cnt = actual.cnt; 1482 int minCnt; 1483 if (cnt > (minCnt = term.minCount)) { 1484 int start = i + term.distance; 1485 if (start > end) { 1486 int exceed = start - end; 1487 cnt -= exceed; 1488 if (cnt <= minCnt) break; 1489 i -= exceed; 1490 } 1491 MemReg mr = memregs[term.target.memreg]; 1492 int sampleOff = mr.in; 1493 int sampleLen = mr.out - sampleOff; 1494 /*@since 1.2*/ 1495 int back; 1496 if (sampleOff < 0 || sampleLen < 0) { 1497 //the group is not def., as in the case of '(\w+)\1' 1498 //treat as usual BACKTRACK_MIN 1499 cnt--; 1500 i--; 1501 actual.cnt = cnt; 1502 actual.index = i; 1503 actual.term = term; 1504 actual = (top = actual).on; 1505 if (actual == null) { 1506 actual = new SearchEntry(); 1507 top.on = actual; 1508 actual.sub = top; 1509 } 1510 term = term.next; 1511 continue; 1512 } else if (sampleLen == 0) { 1513 back = -1; 1514 } else { 1515 back = findBackReg(data, i + term.distance, sampleOff, sampleLen, cnt - minCnt, term.target, end); 1516 if (back < 0) break; 1517 } 1518 cnt -= back; 1519 i -= back; 1520 actual.cnt = cnt; 1521 actual.index = i; 1522 1523 if (term.eat) i += sampleLen; 1524 1525 actual.term = term; 1526 actual = (top = actual).on; 1527 if (actual == null) { 1528 actual = new SearchEntry(); 1529 top.on = actual; 1530 actual.sub = top; 1531 } 1532 term = term.next; 1533 continue; 1534 } else break; 1535 } 1536 1537 case Term.BACKTRACK_REG_MIN: 1538 cnt = actual.cnt; 1539 if (cnt > term.minCount) { 1540 regLen = actual.regLen; 1541 cnt--; 1542 i -= regLen; 1543 actual.cnt = cnt; 1544 actual.index = i; 1545 actual.term = term; 1546 //actual.regLen=regLen; 1547 actual = (top = actual).on; 1548 if (actual == null) { 1549 actual = new SearchEntry(); 1550 top.on = actual; 1551 actual.sub = top; 1552 } 1553 term = term.next; 1554 continue; 1555 } else break; 1556 1557 case Term.GROUP_IN: { 1558 memreg = term.memreg; 1559 //memreg=0 is a regex itself; we don't need to handle it 1560 //because regex bounds already are in wOffset and wEnd 1561 if (memreg > 0) { 1562 memregs[memreg].tmp = i; //assume 1563 } 1564 term = term.next; 1565 continue; 1566 } 1567 case Term.GROUP_OUT: 1568 memreg = term.memreg; 1569 //see above 1570 if (memreg > 0) { 1571 MemReg mr = memregs[memreg]; 1572 SearchEntry.saveMemregState((top != null) ? top : defaultEntry, memreg, mr); 1573 mr.in = mr.tmp; //commit 1574 mr.out = i; 1575 } 1576 term = term.next; 1577 continue; 1578 1579 case Term.PLOOKBEHIND_IN: { 1580 int tmp = i - term.distance; 1581 if (tmp < offset) break; 1582 LAEntry le = lookaheads[term.lookaheadId]; 1583 le.index = i; 1584 i = tmp; 1585 le.actual = actual; 1586 le.top = top; 1587 term = term.next; 1588 continue; 1589 } 1590 case Term.INDEPENDENT_IN: 1591 case Term.PLOOKAHEAD_IN: { 1592 LAEntry le = lookaheads[term.lookaheadId]; 1593 le.index = i; 1594 le.actual = actual; 1595 le.top = top; 1596 term = term.next; 1597 continue; 1598 } 1599 case Term.LOOKBEHIND_CONDITION_OUT: 1600 case Term.LOOKAHEAD_CONDITION_OUT: 1601 case Term.PLOOKAHEAD_OUT: 1602 case Term.PLOOKBEHIND_OUT: { 1603 LAEntry le = lookaheads[term.lookaheadId]; 1604 i = le.index; 1605 actual = le.actual; 1606 top = le.top; 1607 term = term.next; 1608 continue; 1609 } 1610 case Term.INDEPENDENT_OUT: { 1611 LAEntry le = lookaheads[term.lookaheadId]; 1612 actual = le.actual; 1613 top = le.top; 1614 term = term.next; 1615 continue; 1616 } 1617 case Term.NLOOKBEHIND_IN: { 1618 int tmp = i - term.distance; 1619 if (tmp < offset) { 1620 term = term.failNext; 1621 continue; 1622 } 1623 LAEntry le = lookaheads[term.lookaheadId]; 1624 le.actual = actual; 1625 le.top = top; 1626 1627 actual.term = term.failNext; 1628 actual.index = i; 1629 i = tmp; 1630 actual = (top = actual).on; 1631 if (actual == null) { 1632 actual = new SearchEntry(); 1633 top.on = actual; 1634 actual.sub = top; 1635 } 1636 term = term.next; 1637 continue; 1638 } 1639 case Term.NLOOKAHEAD_IN: { 1640 LAEntry le = lookaheads[term.lookaheadId]; 1641 le.actual = actual; 1642 le.top = top; 1643 1644 actual.term = term.failNext; 1645 actual.index = i; 1646 actual = (top = actual).on; 1647 if (actual == null) { 1648 actual = new SearchEntry(); 1649 top.on = actual; 1650 actual.sub = top; 1651 } 1652 1653 term = term.next; 1654 continue; 1655 } 1656 case Term.NLOOKBEHIND_OUT: 1657 case Term.NLOOKAHEAD_OUT: { 1658 LAEntry le = lookaheads[term.lookaheadId]; 1659 actual = le.actual; 1660 top = le.top; 1661 break; 1662 } 1663 case Term.LOOKBEHIND_CONDITION_IN: { 1664 int tmp = i - term.distance; 1665 if (tmp < offset) { 1666 term = term.failNext; 1667 continue; 1668 } 1669 LAEntry le = lookaheads[term.lookaheadId]; 1670 le.index = i; 1671 le.actual = actual; 1672 le.top = top; 1673 1674 actual.term = term.failNext; 1675 actual.index = i; 1676 actual = (top = actual).on; 1677 if (actual == null) { 1678 actual = new SearchEntry(); 1679 top.on = actual; 1680 actual.sub = top; 1681 } 1682 1683 i = tmp; 1684 1685 term = term.next; 1686 continue; 1687 } 1688 case Term.LOOKAHEAD_CONDITION_IN: { 1689 LAEntry le = lookaheads[term.lookaheadId]; 1690 le.index = i; 1691 le.actual = actual; 1692 le.top = top; 1693 1694 actual.term = term.failNext; 1695 actual.index = i; 1696 actual = (top = actual).on; 1697 if (actual == null) { 1698 actual = new SearchEntry(); 1699 top.on = actual; 1700 actual.sub = top; 1701 } 1702 1703 term = term.next; 1704 continue; 1705 } 1706 case Term.MEMREG_CONDITION: { 1707 MemReg mr = memregs[term.memreg]; 1708 int sampleOffset = mr.in; 1709 int sampleOutside = mr.out; 1710 if (sampleOffset >= 0 && sampleOutside >= 0 && sampleOutside >= sampleOffset) { 1711 term = term.next; 1712 } else { 1713 term = term.failNext; 1714 } 1715 continue; 1716 } 1717 case Term.BRANCH_STORE_CNT_AUX1: 1718 actual.regLen = regLen; 1719 case Term.BRANCH_STORE_CNT: 1720 actual.cnt = cnt; 1721 case Term.BRANCH: 1722 actual.term = term.failNext; 1723 actual.index = i; 1724 actual = (top = actual).on; 1725 if (actual == null) { 1726 actual = new SearchEntry(); 1727 top.on = actual; 1728 actual.sub = top; 1729 } 1730 term = term.next; 1731 continue; 1732 1733 case Term.SUCCESS: 1734 if (!matchEnd || i == end) { 1735 this.wOffset = memregs[0].in = wOffset; 1736 this.wEnd = memregs[0].out = i; 1737 this.top = top; 1738 return true; 1739 } else break; 1740 1741 case Term.CNT_SET_0: 1742 cnt = 0; 1743 term = term.next; 1744 continue; 1745 1746 case Term.CNT_INC: 1747 cnt++; 1748 term = term.next; 1749 continue; 1750 1751 case Term.CNT_GT_EQ: 1752 if (cnt >= term.maxCount) { 1753 term = term.next; 1754 continue; 1755 } else break; 1756 1757 case Term.READ_CNT_LT: 1758 cnt = actual.cnt; 1759 if (cnt < term.maxCount) { 1760 term = term.next; 1761 continue; 1762 } else break; 1763 1764 case Term.CRSTORE_CRINC: { 1765 int cntvalue = counters[cntreg = term.cntreg]; 1766 SearchEntry.saveCntState((top != null) ? top : defaultEntry, cntreg, cntvalue); 1767 counters[cntreg] = ++cntvalue; 1768 term = term.next; 1769 continue; 1770 } 1771 case Term.CR_SET_0: 1772 counters[term.cntreg] = 0; 1773 1774 term = term.next; 1775 continue; 1776 1777 case Term.CR_LT: 1778 if (counters[term.cntreg] < term.maxCount) { 1779 term = term.next; 1780 continue; 1781 } else break; 1782 1783 case Term.CR_GT_EQ: 1784 if (counters[term.cntreg] >= term.maxCount) { 1785 term = term.next; 1786 continue; 1787 } else break; 1788 1789 default: 1790 throw new Error("unknown term type: " + term.type); 1791 } 1792 1793 if (allowIncomplete && i == end) { 1794 //an attempt to implement matchesPrefix() 1795 //not sure it's a good way 1796 //27-04-2002: just as expected, 1797 //the side effect was found (and POSSIBLY fixed); 1798 //see the case Term.START 1799 return true; 1800 } 1801 if (top == null) { 1802 break; 1803 } 1804 1805 //pop the stack 1806 top = (actual = top).sub; 1807 term = actual.term; 1808 i = actual.index; 1809 if (actual.isState) { 1810 SearchEntry.popState(actual, memregs, counters); 1811 } 1812 } 1813 1814 if (defaultEntry.isState) SearchEntry.popState(defaultEntry, memregs, counters); 1815 1816 term = root; 1817 //wOffset++; 1818 //i=wOffset; 1819 i = ++wOffset; 1820 } 1821 this.wOffset = wOffset; 1822 this.top = top; 1823 1824 return false; 1825 } 1826 1827 private static boolean compareRegions(char[] arr, int off1, int off2, int len, int out) { 1828 int p1 = off1 + len - 1; 1829 int p2 = off2 + len - 1; 1830 if (p1 >= out || p2 >= out) { 1831 return false; 1832 } 1833 for (int c = len; c > 0; c--, p1--, p2--) { 1834 if (arr[p1] != arr[p2]) { 1835 return false; 1836 } 1837 } 1838 return true; 1839 } 1840 1841 private static boolean compareRegionsI(char[] arr, int off1, int off2, int len, int out) { 1842 int p1 = off1 + len - 1; 1843 int p2 = off2 + len - 1; 1844 if (p1 >= out || p2 >= out) { 1845 return false; 1846 } 1847 for (int c = len; c > 0; c--, p1--, p2--) { 1848 if(Category.caseFold(arr[p1]) != Category.caseFold(arr[p2])) return false; 1849 /* 1850 if ((c1 = arr[p1]) != Character.toLowerCase(c2 = arr[p2]) && 1851 c1 != Character.toUpperCase(c2) && 1852 c1 != Character.toTitleCase(c2)) return false; 1853 */ 1854 } 1855 return true; 1856 } 1857 1858 //repeat while matches 1859 private static int repeat(char[] data, int off, int out, Term term) { 1860 switch (term.type) { 1861 case Term.CHAR: { 1862 char c = term.c; 1863 int i = off; 1864 while (i < out) { 1865 if (data[i] != c) break; 1866 i++; 1867 } 1868 return i - off; 1869 } 1870 case Term.ANY_CHAR: { 1871 return out - off; 1872 } 1873 case Term.ANY_CHAR_NE: { 1874 int i = off; 1875 char c; 1876 while (i < out) { 1877 if ((c = data[i]) == '\r' || c == '\n') break; 1878 i++; 1879 } 1880 return i - off; 1881 } 1882 case Term.BITSET: { 1883 IntBitSet arr = term.bitset; 1884 int i = off; 1885 char c; 1886 if (term.inverse) while (i < out) { 1887 if ((c = data[i]) <= 255 && arr.get(c)) break; 1888 else i++; 1889 } 1890 else while (i < out) { 1891 if ((c = data[i]) <= 255 && arr.get(c)) i++; 1892 else break; 1893 } 1894 return i - off; 1895 } 1896 case Term.BITSET2: { 1897 int i = off; 1898 IntBitSet[] bitset2 = term.bitset2; 1899 char c; 1900 if (term.inverse) while (i < out) { 1901 IntBitSet arr = bitset2[(c = data[i]) >> 8]; 1902 if (arr != null && arr.get(c & 0xff)) break; 1903 else i++; 1904 } 1905 else while (i < out) { 1906 IntBitSet arr = bitset2[(c = data[i]) >> 8]; 1907 if (arr != null && arr.get(c & 0xff)) i++; 1908 else break; 1909 } 1910 return i - off; 1911 } 1912 } 1913 throw new Error("this kind of term can't be quantified:" + term.type); 1914 } 1915 1916 //repeat while doesn't match 1917 private static int find(char[] data, int off, int out, Term term) { 1918 if (off >= out) return -1; 1919 switch (term.type) { 1920 case Term.CHAR: { 1921 char c = term.c; 1922 int i = off; 1923 while (i < out) { 1924 if (data[i] == c) break; 1925 i++; 1926 } 1927 return i - off; 1928 } 1929 case Term.BITSET: { 1930 IntBitSet arr = term.bitset; 1931 int i = off; 1932 char c; 1933 if (!term.inverse) while (i < out) { 1934 if ((c = data[i]) <= 255 && arr.get(c)) break; 1935 else i++; 1936 } 1937 else while (i < out) { 1938 if ((c = data[i]) <= 255 && arr.get(c)) i++; 1939 else break; 1940 } 1941 return i - off; 1942 } 1943 case Term.BITSET2: { 1944 int i = off; 1945 IntBitSet[] bitset2 = term.bitset2; 1946 char c; 1947 if (!term.inverse) while (i < out) { 1948 IntBitSet arr = bitset2[(c = data[i]) >> 8]; 1949 if (arr != null && arr.get(c & 0xff)) break; 1950 else i++; 1951 } 1952 else while (i < out) { 1953 IntBitSet arr = bitset2[(c = data[i]) >> 8]; 1954 if (arr != null && arr.get(c & 0xff)) i++; 1955 else break; 1956 } 1957 return i - off; 1958 } 1959 } 1960 throw new IllegalArgumentException("can't seek this kind of term:" + term.type); 1961 } 1962 1963 1964 private static int findReg(char[] data, int off, int regOff, int regLen, Term term, int out) { 1965 if (off >= out) return -1; 1966 int i = off; 1967 if (term.type == Term.REG) { 1968 while (i < out) { 1969 if (compareRegions(data, i, regOff, regLen, out)) break; 1970 i++; 1971 } 1972 } else if (term.type == Term.REG_I) { 1973 while (i < out) { 1974 if (compareRegionsI(data, i, regOff, regLen, out)) break; 1975 i++; 1976 } 1977 } else throw new IllegalArgumentException("wrong findReg() target:" + term.type); 1978 return off - i; 1979 } 1980 1981 private static int findBack(char[] data, int off, int maxCount, Term term) { 1982 switch (term.type) { 1983 case Term.CHAR: { 1984 char c = term.c; 1985 int i = off; 1986 int iMin = off - maxCount; 1987 for (; ; ) { 1988 if (data[--i] == c) break; 1989 if (i <= iMin) return -1; 1990 } 1991 return off - i; 1992 } 1993 case Term.BITSET: { 1994 IntBitSet arr = term.bitset; 1995 int i = off; 1996 char c; 1997 int iMin = off - maxCount; 1998 if (!term.inverse) for (; ; ) { 1999 if ((c = data[--i]) <= 255 && arr.get(c)) break; 2000 if (i <= iMin) return -1; 2001 } 2002 else for (; ; ) { 2003 if ((c = data[--i]) > 255 || !arr.get(c)) break; 2004 if (i <= iMin) return -1; 2005 } 2006 return off - i; 2007 } 2008 case Term.BITSET2: { 2009 IntBitSet[] bitset2 = term.bitset2; 2010 int i = off; 2011 char c; 2012 int iMin = off - maxCount; 2013 if (!term.inverse) for (; ; ) { 2014 IntBitSet arr = bitset2[(c = data[--i]) >> 8]; 2015 if (arr != null && arr.get(c & 0xff)) break; 2016 if (i <= iMin) return -1; 2017 } 2018 else for (; ; ) { 2019 IntBitSet arr = bitset2[(c = data[--i]) >> 8]; 2020 if (arr == null || arr.get(c & 0xff)) break; 2021 if (i <= iMin) return -1; 2022 } 2023 return off - i; 2024 } 2025 } 2026 throw new IllegalArgumentException("can't find this kind of term:" + term.type); 2027 } 2028 2029 private static int findBackReg(char[] data, int off, int regOff, int regLen, int maxCount, Term term, int out) { 2030 //assume that the cases when regLen==0 or maxCount==0 are handled by caller 2031 int i = off; 2032 int iMin = off - maxCount; 2033 if (term.type == Term.REG) { 2034 /*@since 1.2*/ 2035 char first = data[regOff]; 2036 regOff++; 2037 regLen--; 2038 for (; ; ) { 2039 i--; 2040 if (data[i] == first && compareRegions(data, i + 1, regOff, regLen, out)) break; 2041 if (i <= iMin) return -1; 2042 } 2043 } else if (term.type == Term.REG_I) { 2044 /*@since 1.2*/ 2045 /* 2046 char c = data[regOff]; 2047 char firstLower = Character.toLowerCase(c); 2048 char firstUpper = Character.toUpperCase(c); 2049 char firstTitle = Character.toTitleCase(c); 2050 */ 2051 char c, firstChar = Category.caseFold(data[regOff]); 2052 regOff++; 2053 regLen--; 2054 for (; ; ) { 2055 i--; 2056 if (((c = Category.caseFold(data[i])) == firstChar) && compareRegionsI(data, i + 1, regOff, regLen, out)) 2057 break; 2058 if (i <= iMin) return -1; 2059 } 2060 return off - i; 2061 } else throw new IllegalArgumentException("wrong findBackReg() target type :" + term.type); 2062 return off - i; 2063 } 2064 2065 private String toString_d() { 2066 StringBuilder s = new StringBuilder(); 2067 s.append("counters: "); 2068 s.append(counters == null ? 0 : counters.length); 2069 2070 s.append("\r\nmemregs: "); 2071 s.append(memregs.length); 2072 for (int i = 0; i < memregs.length; i++) { 2073 if(memregs[i].in < 0 || memregs[i].out < 0) 2074 s.append("\r\n #").append(i).append(": [INVALID]"); 2075 else 2076 s.append("\r\n #").append(i).append(": [").append(memregs[i].in).append(",").append(memregs[i].out).append("](\"").append(getString(memregs[i].in, memregs[i].out)).append("\")"); 2077 } 2078 s.append("\r\ndata: "); 2079 if (data != null) s.append(data.length); 2080 else s.append("[none]"); 2081 2082 s.append("\r\noffset: "); 2083 s.append(offset); 2084 2085 s.append("\r\nend: "); 2086 s.append(end); 2087 2088 s.append("\r\nwOffset: "); 2089 s.append(wOffset); 2090 2091 s.append("\r\nwEnd: "); 2092 s.append(wEnd); 2093 2094 s.append("\r\nregex: "); 2095 s.append(re); 2096 return s.toString(); 2097 } 2098 2099 @Override 2100 public boolean equals(Object o) { 2101 if (this == o) return true; 2102 if (o == null || getClass() != o.getClass()) return false; 2103 2104 Matcher matcher = (Matcher) o; 2105 2106 if (counterCount != matcher.counterCount) return false; 2107 if (memregCount != matcher.memregCount) return false; 2108 if (lookaheadCount != matcher.lookaheadCount) return false; 2109 if (offset != matcher.offset) return false; 2110 if (end != matcher.end) return false; 2111 if (wOffset != matcher.wOffset) return false; 2112 if (wEnd != matcher.wEnd) return false; 2113 if (shared != matcher.shared) return false; 2114 if (called != matcher.called) return false; 2115 if (minQueueLength != matcher.minQueueLength) return false; 2116 if (cacheOffset != matcher.cacheOffset) return false; 2117 if (cacheLength != matcher.cacheLength) return false; 2118 if (re != null ? !re.equals(matcher.re) : matcher.re != null) return false; 2119 if (!Arrays.equals(counters, matcher.counters)) return false; 2120 // Probably incorrect - comparing Object[] arrays with Arrays.equals 2121 if (!Arrays.equals(memregs, matcher.memregs)) return false; 2122 // Probably incorrect - comparing Object[] arrays with Arrays.equals 2123 if (!Arrays.equals(lookaheads, matcher.lookaheads)) return false; 2124 if (!Arrays.equals(data, matcher.data)) return false; 2125 if (top != null ? !top.equals(matcher.top) : matcher.top != null) return false; 2126 if (first != null ? !first.equals(matcher.first) : matcher.first != null) return false; 2127 if (defaultEntry != null ? !defaultEntry.equals(matcher.defaultEntry) : matcher.defaultEntry != null) 2128 return false; 2129 if (cache != null ? !cache.equals(matcher.cache) : matcher.cache != null) return false; 2130 return prefixBounds != null ? prefixBounds.equals(matcher.prefixBounds) : matcher.prefixBounds == null && (suffixBounds != null ? suffixBounds.equals(matcher.suffixBounds) : matcher.suffixBounds == null && (targetBounds != null ? targetBounds.equals(matcher.targetBounds) : matcher.targetBounds == null)); 2131 2132 } 2133 2134 @Override 2135 public int hashCode() { 2136 int result = re != null ? re.hashCode() : 0; 2137 result = 31 * result + Arrays.hashCode(counters); 2138 result = 31 * result + Arrays.hashCode(memregs); 2139 result = 31 * result + Arrays.hashCode(lookaheads); 2140 result = 31 * result + counterCount; 2141 result = 31 * result + memregCount; 2142 result = 31 * result + lookaheadCount; 2143 result = 31 * result + Arrays.hashCode(data); 2144 result = 31 * result + offset; 2145 result = 31 * result + end; 2146 result = 31 * result + wOffset; 2147 result = 31 * result + wEnd; 2148 result = 31 * result + (shared ? 1 : 0); 2149 result = 31 * result + (top != null ? top.hashCode() : 0); 2150 result = 31 * result + (first != null ? first.hashCode() : 0); 2151 result = 31 * result + (defaultEntry != null ? defaultEntry.hashCode() : 0); 2152 result = 31 * result + (called ? 1 : 0); 2153 result = 31 * result + minQueueLength; 2154 result = 31 * result + (cache != null ? cache.hashCode() : 0); 2155 result = 31 * result + cacheOffset; 2156 result = 31 * result + cacheLength; 2157 result = 31 * result + (prefixBounds != null ? prefixBounds.hashCode() : 0); 2158 result = 31 * result + (suffixBounds != null ? suffixBounds.hashCode() : 0); 2159 result = 31 * result + (targetBounds != null ? targetBounds.hashCode() : 0); 2160 return result; 2161 } 2162 2163 public String replaceFirst(String replacement) 2164 { 2165 TextBuffer tb = wrap(new StringBuilder(data.length)); 2166 Replacer.replace(this, new PerlSubstitution(replacement), tb, 1); 2167 return tb.toString(); 2168 } 2169 2170 public String replaceAmount(String replacement, int amount) 2171 { 2172 TextBuffer tb = wrap(new StringBuilder(data.length)); 2173 Replacer.replace(this, new PerlSubstitution(replacement), tb, amount); 2174 return tb.toString(); 2175 } 2176 2177 public String replaceAll(String replacement) 2178 { 2179 TextBuffer tb = wrap(new StringBuilder(data.length)); 2180 Replacer.replace(this, new PerlSubstitution(replacement), tb); 2181 return tb.toString(); 2182 } 2183 2184} 2185 2186class SearchEntry { 2187 Term term; 2188 int index; 2189 int cnt; 2190 int regLen; 2191 2192 boolean isState; 2193 2194 SearchEntry sub, on; 2195 2196 private static class MState { 2197 int index, in, out; 2198 MState next, prev; 2199 } 2200 2201 private static class CState { 2202 int index, value; 2203 CState next, prev; 2204 } 2205 2206 private MState mHead, mCurrent; 2207 private CState cHead, cCurrent; 2208 2209 static void saveMemregState(SearchEntry entry, int memreg, MemReg mr) { 2210 entry.isState = true; 2211 MState current = entry.mCurrent; 2212 if (current == null) { 2213 MState head = entry.mHead; 2214 if (head == null) entry.mHead = entry.mCurrent = current = new MState(); 2215 else current = head; 2216 } else { 2217 MState next = current.next; 2218 if (next == null) { 2219 current.next = next = new MState(); 2220 next.prev = current; 2221 } 2222 current = next; 2223 } 2224 current.index = memreg; 2225 current.in = mr.in; 2226 current.out = mr.out; 2227 entry.mCurrent = current; 2228 } 2229 2230 static void saveCntState(SearchEntry entry, int cntreg, int value) { 2231 entry.isState = true; 2232 CState current = entry.cCurrent; 2233 if (current == null) { 2234 CState head = entry.cHead; 2235 if (head == null) entry.cHead = entry.cCurrent = current = new CState(); 2236 else current = head; 2237 } else { 2238 CState next = current.next; 2239 if (next == null) { 2240 current.next = next = new CState(); 2241 next.prev = current; 2242 } 2243 current = next; 2244 } 2245 current.index = cntreg; 2246 current.value = value; 2247 entry.cCurrent = current; 2248 } 2249 2250 static void popState(SearchEntry entry, MemReg[] memregs, int[] counters) { 2251 MState ms = entry.mCurrent; 2252 while (ms != null) { 2253 MemReg mr = memregs[ms.index]; 2254 mr.in = ms.in; 2255 mr.out = ms.out; 2256 ms = ms.prev; 2257 } 2258 CState cs = entry.cCurrent; 2259 while (cs != null) { 2260 counters[cs.index] = cs.value; 2261 cs = cs.prev; 2262 } 2263 entry.mCurrent = null; 2264 entry.cCurrent = null; 2265 entry.isState = false; 2266 } 2267 2268 final void reset(int restQueue) { 2269 term = null; 2270 index = cnt = regLen = 0; 2271 2272 mCurrent = null; 2273 cCurrent = null; 2274 isState = false; 2275 2276 SearchEntry on = this.on; 2277 if (on != null) { 2278 if (restQueue > 0) on.reset(restQueue - 1); 2279 else { 2280 this.on = null; 2281 on.sub = null; 2282 } 2283 } 2284 } 2285 2286 @Override 2287 public boolean equals(Object o) { 2288 if (this == o) return true; 2289 if (o == null || getClass() != o.getClass()) return false; 2290 2291 SearchEntry that = (SearchEntry) o; 2292 2293 if (index != that.index) return false; 2294 if (cnt != that.cnt) return false; 2295 if (regLen != that.regLen) return false; 2296 if (isState != that.isState) return false; 2297 if (term != null ? !term.equals(that.term) : that.term != null) return false; 2298 if (sub != null ? !sub.equals(that.sub) : that.sub != null) return false; 2299 if (on != null ? !on.equals(that.on) : that.on != null) return false; 2300 if (mHead != null ? !mHead.equals(that.mHead) : that.mHead != null) return false; 2301 return mCurrent != null ? mCurrent.equals(that.mCurrent) : that.mCurrent == null && (cHead != null ? cHead.equals(that.cHead) : that.cHead == null && (cCurrent != null ? cCurrent.equals(that.cCurrent) : that.cCurrent == null)); 2302 2303 } 2304 2305 @Override 2306 public int hashCode() { 2307 int result = term != null ? term.hashCode() : 0; 2308 result = 31 * result + index; 2309 result = 31 * result + cnt; 2310 result = 31 * result + regLen; 2311 result = 31 * result + (isState ? 1 : 0); 2312 result = 31 * result + (sub != null ? sub.hashCode() : 0); 2313 result = 31 * result + (on != null ? on.hashCode() : 0); 2314 result = 31 * result + (mHead != null ? mHead.hashCode() : 0); 2315 result = 31 * result + (mCurrent != null ? mCurrent.hashCode() : 0); 2316 result = 31 * result + (cHead != null ? cHead.hashCode() : 0); 2317 result = 31 * result + (cCurrent != null ? cCurrent.hashCode() : 0); 2318 return result; 2319 } 2320 2321 @Override 2322 public String toString() { 2323 return "SearchEntry{" + 2324 "term=" + term + 2325 ", index=" + index + 2326 ", cnt=" + cnt + 2327 ", regLen=" + regLen + 2328 ", isState=" + isState + 2329 ", sub=" + sub + 2330 ", on=" + on + 2331 ", mHead=" + mHead + 2332 ", mCurrent=" + mCurrent + 2333 ", cHead=" + cHead + 2334 ", cCurrent=" + cCurrent + 2335 '}'; 2336 } 2337} 2338 2339class MemReg { 2340 private int index; 2341 2342 int in = -1, out = -1; 2343 int tmp = -1; //for assuming at GROUP_IN 2344 2345 MemReg(int index) { 2346 this.index = index; 2347 } 2348 2349 void reset() { 2350 in = out = -1; 2351 } 2352 2353 @Override 2354 public boolean equals(Object o) { 2355 if (this == o) return true; 2356 if (o == null || getClass() != o.getClass()) return false; 2357 2358 MemReg memReg = (MemReg) o; 2359 2360 if (index != memReg.index) return false; 2361 return in == memReg.in && out == memReg.out && tmp == memReg.tmp; 2362 2363 } 2364 2365 @Override 2366 public int hashCode() { 2367 int result = index; 2368 result = 31 * result + in; 2369 result = 31 * result + out; 2370 result = 31 * result + tmp; 2371 return result; 2372 } 2373 2374 @Override 2375 public String toString() { 2376 return "MemReg{" + 2377 "index=" + index + 2378 ", in=" + in + 2379 ", out=" + out + 2380 ", tmp=" + tmp + 2381 '}'; 2382 } 2383} 2384 2385class LAEntry { 2386 int index; 2387 SearchEntry top, actual; 2388 2389 @Override 2390 public boolean equals(Object o) { 2391 if (this == o) return true; 2392 if (o == null || getClass() != o.getClass()) return false; 2393 2394 LAEntry laEntry = (LAEntry) o; 2395 2396 return index == laEntry.index && (top != null ? top.equals(laEntry.top) : laEntry.top == null && (actual != null ? actual.equals(laEntry.actual) : laEntry.actual == null)); 2397 2398 } 2399 2400 @Override 2401 public int hashCode() { 2402 int result = index; 2403 result = 31 * result + (top != null ? top.hashCode() : 0); 2404 result = 31 * result + (actual != null ? actual.hashCode() : 0); 2405 return result; 2406 } 2407 2408 @Override 2409 public String toString() { 2410 return "LAEntry{" + 2411 "index=" + index + 2412 ", top=" + top + 2413 ", actual=" + actual + 2414 '}'; 2415 } 2416}