001/* 002 * Copyright 2023 the original author or authors. 003 * <p> 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * <p> 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * <p> 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package de.cuioss.tools.io; 017 018import java.io.File; 019import java.util.ArrayDeque; 020import java.util.ArrayList; 021import java.util.Collection; 022import java.util.Deque; 023 024import lombok.experimental.UtilityClass; 025 026/** 027 * Copied from commons.io:org.apache.commons.io.FilenameUtils 028 * <p> 029 * General filename and filepath manipulation utilities. 030 * <p> 031 * When dealing with filenames you can hit problems when moving from a Windows 032 * based development machine to a Unix based production machine. This class aims 033 * to help avoid those problems. 034 * <p> 035 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 036 * using JDK {@link java.io.File File} objects and the two argument constructor 037 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. 038 * <p> 039 * Most methods on this class are designed to work the same on both Unix and 040 * Windows. Those that don't include 'System', 'Unix' or 'Windows' in their 041 * name. 042 * <p> 043 * Most methods recognise both separators (forward and back), and both sets of 044 * prefixes. See the javadoc of each method for details. 045 * <p> 046 * This class defines six components within a filename (example 047 * C:\dev\project\file.txt): 048 * <ul> 049 * <li>the prefix - C:\</li> 050 * <li>the path - dev\project\</li> 051 * <li>the full path - C:\dev\project\</li> 052 * <li>the name - file.txt</li> 053 * <li>the base name - file</li> 054 * <li>the extension - txt</li> 055 * </ul> 056 * Note that this class works best if directory filenames end with a separator. 057 * If you omit the last separator, it is impossible to determine if the filename 058 * corresponds to a file or a directory. As a result, we have chosen to say it 059 * corresponds to a file. 060 * <p> 061 * This class only supports Unix and Windows style names. Prefixes are matched 062 * as follows: 063 * 064 * <pre> 065 * Windows: 066 * a\b\c.txt --> "" --> relative 067 * \a\b\c.txt --> "\" --> current drive absolute 068 * C:a\b\c.txt --> "C:" --> drive relative 069 * C:\a\b\c.txt --> "C:\" --> absolute 070 * \\server\a\b\c.txt --> "\\server\" --> UNC 071 * 072 * Unix: 073 * a/b/c.txt --> "" --> relative 074 * /a/b/c.txt --> "/" --> absolute 075 * ~/a/b/c.txt --> "~/" --> current user 076 * ~ --> "~/" --> current user (slash added) 077 * ~user/a/b/c.txt --> "~user/" --> named user 078 * ~user --> "~user/" --> named user (slash added) 079 * </pre> 080 * 081 * Both prefix styles are matched always, irrespective of the machine that you 082 * are currently running on. 083 * <p> 084 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. 085 * </p> 086 * 087 */ 088@UtilityClass 089public class FilenameUtils { 090 091 private static final int NOT_FOUND = -1; 092 093 /** 094 * The extension separator character. 095 * 096 */ 097 public static final char EXTENSION_SEPARATOR = '.'; 098 099 /** 100 * The extension separator String. 101 * 102 */ 103 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 104 105 /** 106 * The Unix separator character. 107 */ 108 private static final char UNIX_SEPARATOR = '/'; 109 110 /** 111 * The Windows separator character. 112 */ 113 private static final char WINDOWS_SEPARATOR = '\\'; 114 115 /** 116 * The system separator character. 117 */ 118 private static final char SYSTEM_SEPARATOR = File.separatorChar; 119 120 /** 121 * The separator character that is the opposite of the system separator. 122 */ 123 private static final char OTHER_SEPARATOR; 124 static { 125 if (isSystemWindows()) { 126 OTHER_SEPARATOR = UNIX_SEPARATOR; 127 } else { 128 OTHER_SEPARATOR = WINDOWS_SEPARATOR; 129 } 130 } 131 132 // ----------------------------------------------------------------------- 133 /** 134 * Determines if Windows file system is in use. 135 * 136 * @return true if the system is Windows 137 */ 138 static boolean isSystemWindows() { 139 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; 140 } 141 142 // ----------------------------------------------------------------------- 143 /** 144 * Checks if the character is a separator. 145 * 146 * @param ch the character to check 147 * @return true if it is a separator character 148 */ 149 private static boolean isSeparator(final char ch) { 150 return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR; 151 } 152 153 // ----------------------------------------------------------------------- 154 /** 155 * Normalizes a path, removing double and single dot path steps. 156 * <p> 157 * This method normalizes a path to a standard format. The input may contain 158 * separators in either Unix or Windows format. The output will contain 159 * separators in the format of the system. 160 * <p> 161 * A trailing slash will be retained. A double slash will be merged to a single 162 * slash (but UNC names are handled). A single dot path segment will be removed. 163 * A double dot will cause that path segment and the one before to be removed. 164 * If the double dot has no parent path segment to work with, {@code null} is 165 * returned. 166 * <p> 167 * The output will be the same on both Unix and Windows except for the separator 168 * character. 169 * 170 * <pre> 171 * /foo// --> /foo/ 172 * /foo/./ --> /foo/ 173 * /foo/../bar --> /bar 174 * /foo/../bar/ --> /bar/ 175 * /foo/../bar/../baz --> /baz 176 * //foo//./bar --> /foo/bar 177 * /../ --> null 178 * ../foo --> null 179 * foo/bar/.. --> foo/ 180 * foo/../../bar --> null 181 * foo/../bar --> bar 182 * //server/foo/../bar --> //server/bar 183 * //server/../bar --> null 184 * C:\foo\..\bar --> C:\bar 185 * C:\..\bar --> null 186 * ~/foo/../bar/ --> ~/bar/ 187 * ~/../bar --> null 188 * </pre> 189 * 190 * (Note the file separator returned will be correct for Windows/Unix) 191 * 192 * @param filename the filename to normalize, null returns null 193 * @return the normalized filename, or null if invalid. Null bytes inside string 194 * will be removed 195 */ 196 public static String normalize(final String filename) { 197 return doNormalize(filename, SYSTEM_SEPARATOR, true); 198 } 199 200 /** 201 * Normalizes a path, removing double and single dot path steps. 202 * <p> 203 * This method normalizes a path to a standard format. The input may contain 204 * separators in either Unix or Windows format. The output will contain 205 * separators in the format specified. 206 * <p> 207 * A trailing slash will be retained. A double slash will be merged to a single 208 * slash (but UNC names are handled). A single dot path segment will be removed. 209 * A double dot will cause that path segment and the one before to be removed. 210 * If the double dot has no parent path segment to work with, {@code null} is 211 * returned. 212 * <p> 213 * The output will be the same on both Unix and Windows except for the separator 214 * character. 215 * 216 * <pre> 217 * /foo// --> /foo/ 218 * /foo/./ --> /foo/ 219 * /foo/../bar --> /bar 220 * /foo/../bar/ --> /bar/ 221 * /foo/../bar/../baz --> /baz 222 * //foo//./bar --> /foo/bar 223 * /../ --> null 224 * ../foo --> null 225 * foo/bar/.. --> foo/ 226 * foo/../../bar --> null 227 * foo/../bar --> bar 228 * //server/foo/../bar --> //server/bar 229 * //server/../bar --> null 230 * C:\foo\..\bar --> C:\bar 231 * C:\..\bar --> null 232 * ~/foo/../bar/ --> ~/bar/ 233 * ~/../bar --> null 234 * </pre> 235 * 236 * The output will be the same on both Unix and Windows including the separator 237 * character. 238 * 239 * @param filename the filename to normalize, null returns null 240 * @param unixSeparator {@code true} if a unix separator should be used or 241 * {@code false} if a windows separator should be used. 242 * @return the normalized filename, or null if invalid. Null bytes inside string 243 * will be removed 244 */ 245 public static String normalize(final String filename, final boolean unixSeparator) { 246 final var separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 247 return doNormalize(filename, separator, true); 248 } 249 250 // ----------------------------------------------------------------------- 251 /** 252 * Normalizes a path, removing double and single dot path steps, and removing 253 * any final directory separator. 254 * <p> 255 * This method normalizes a path to a standard format. The input may contain 256 * separators in either Unix or Windows format. The output will contain 257 * separators in the format of the system. 258 * <p> 259 * A trailing slash will be removed. A double slash will be merged to a single 260 * slash (but UNC names are handled). A single dot path segment will be removed. 261 * A double dot will cause that path segment and the one before to be removed. 262 * If the double dot has no parent path segment to work with, {@code null} is 263 * returned. 264 * <p> 265 * The output will be the same on both Unix and Windows except for the separator 266 * character. 267 * 268 * <pre> 269 * /foo// --> /foo 270 * /foo/./ --> /foo 271 * /foo/../bar --> /bar 272 * /foo/../bar/ --> /bar 273 * /foo/../bar/../baz --> /baz 274 * //foo//./bar --> /foo/bar 275 * /../ --> null 276 * ../foo --> null 277 * foo/bar/.. --> foo 278 * foo/../../bar --> null 279 * foo/../bar --> bar 280 * //server/foo/../bar --> //server/bar 281 * //server/../bar --> null 282 * C:\foo\..\bar --> C:\bar 283 * C:\..\bar --> null 284 * ~/foo/../bar/ --> ~/bar 285 * ~/../bar --> null 286 * </pre> 287 * 288 * (Note the file separator returned will be correct for Windows/Unix) 289 * 290 * @param filename the filename to normalize, null returns null 291 * @return the normalized filename, or null if invalid. Null bytes inside string 292 * will be removed 293 */ 294 public static String normalizeNoEndSeparator(final String filename) { 295 return doNormalize(filename, SYSTEM_SEPARATOR, false); 296 } 297 298 /** 299 * Normalizes a path, removing double and single dot path steps, and removing 300 * any final directory separator. 301 * <p> 302 * This method normalizes a path to a standard format. The input may contain 303 * separators in either Unix or Windows format. The output will contain 304 * separators in the format specified. 305 * <p> 306 * A trailing slash will be removed. A double slash will be merged to a single 307 * slash (but UNC names are handled). A single dot path segment will be removed. 308 * A double dot will cause that path segment and the one before to be removed. 309 * If the double dot has no parent path segment to work with, {@code null} is 310 * returned. 311 * <p> 312 * The output will be the same on both Unix and Windows including the separator 313 * character. 314 * 315 * <pre> 316 * /foo// --> /foo 317 * /foo/./ --> /foo 318 * /foo/../bar --> /bar 319 * /foo/../bar/ --> /bar 320 * /foo/../bar/../baz --> /baz 321 * //foo//./bar --> /foo/bar 322 * /../ --> null 323 * ../foo --> null 324 * foo/bar/.. --> foo 325 * foo/../../bar --> null 326 * foo/../bar --> bar 327 * //server/foo/../bar --> //server/bar 328 * //server/../bar --> null 329 * C:\foo\..\bar --> C:\bar 330 * C:\..\bar --> null 331 * ~/foo/../bar/ --> ~/bar 332 * ~/../bar --> null 333 * </pre> 334 * 335 * @param filename the filename to normalize, null returns null 336 * @param unixSeparator {@code true} if a unix separator should be used or 337 * {@code false} if a windows separator should be used. 338 * @return the normalized filename, or null if invalid. Null bytes inside string 339 * will be removed 340 */ 341 public static String normalizeNoEndSeparator(final String filename, final boolean unixSeparator) { 342 final var separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 343 return doNormalize(filename, separator, false); 344 } 345 346 /** 347 * Internal method to perform the normalization. 348 * 349 * @param filename the filename 350 * @param separator The separator character to use 351 * @param keepSeparator true to keep the final separator 352 * @return the normalized filename. Null bytes inside string will be removed. 353 */ 354 @SuppressWarnings({ "squid:S3776", "squid:LabelsShouldNotBeUsedCheck", "squid:ForLoopCounterChangedCheck", 355 "java:S6541" }) // owolff: original code 356 private static String doNormalize(final String filename, final char separator, final boolean keepSeparator) { 357 if (filename == null) { 358 return null; 359 } 360 361 failIfNullBytePresent(filename); 362 363 var size = filename.length(); 364 if (size == 0) { 365 return filename; 366 } 367 final var prefix = getPrefixLength(filename); 368 if (prefix < 0) { 369 return null; 370 } 371 372 final var array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 373 filename.getChars(0, filename.length(), array, 0); 374 375 // fix separators throughout 376 final var otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR; 377 for (var i = 0; i < array.length; i++) { 378 if (array[i] == otherSeparator) { 379 array[i] = separator; 380 } 381 } 382 383 // add extra separator on the end to simplify code below 384 var lastIsDirectory = true; 385 if (array[size - 1] != separator) { 386 array[size] = separator; 387 size++; 388 lastIsDirectory = false; 389 } 390 391 // adjoining slashes 392 for (var i = prefix + 1; i < size; i++) { 393 if (array[i] == separator && array[i - 1] == separator) { 394 System.arraycopy(array, i, array, i - 1, size - i); 395 size--; 396 i--; 397 } 398 } 399 400 // dot slash 401 for (var i = prefix + 1; i < size; i++) { 402 if (array[i] == separator && array[i - 1] == '.' && (i == prefix + 1 || array[i - 2] == separator)) { 403 if (i == size - 1) { 404 lastIsDirectory = true; 405 } 406 System.arraycopy(array, i + 1, array, i - 1, size - i); 407 size -= 2; 408 i--; 409 } 410 } 411 412 // double dot slash 413 outer: for (var i = prefix + 2; i < size; i++) { 414 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' 415 && (i == prefix + 2 || array[i - 3] == separator)) { 416 if (i == prefix + 2) { 417 return null; 418 } 419 if (i == size - 1) { 420 lastIsDirectory = true; 421 } 422 int j; 423 for (j = i - 4; j >= prefix; j--) { 424 if (array[j] == separator) { 425 // remove b/../ from a/b/../c 426 System.arraycopy(array, i + 1, array, j + 1, size - i); 427 size -= i - j; 428 i = j + 1; 429 continue outer; 430 } 431 } 432 // remove a/../ from a/../c 433 System.arraycopy(array, i + 1, array, prefix, size - i); 434 size -= i + 1 - prefix; 435 i = prefix + 1; 436 } 437 } 438 439 if (size <= 0) { // should never be less than 0 440 return ""; 441 } 442 if (size <= prefix || lastIsDirectory && keepSeparator) { 443 return new String(array, 0, size); // keep trailing separator 444 } 445 return new String(array, 0, size - 1); // lose trailing separator 446 } 447 448 // ----------------------------------------------------------------------- 449 /** 450 * Concatenates a filename to a base path using normal command line style rules. 451 * <p> 452 * The effect is equivalent to resultant directory after changing directory to 453 * the first argument, followed by changing directory to the second argument. 454 * <p> 455 * The first argument is the base path, the second is the path to concatenate. 456 * The returned path is always normalized via {@link #normalize(String)}, thus 457 * <code>..</code> is handled. 458 * <p> 459 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then it will 460 * be normalized and returned. Otherwise, the paths will be joined, normalized 461 * and returned. 462 * <p> 463 * The output will be the same on both Unix and Windows except for the separator 464 * character. 465 * 466 * <pre> 467 * /foo/ + bar --> /foo/bar 468 * /foo + bar --> /foo/bar 469 * /foo + /bar --> /bar 470 * /foo + C:/bar --> C:/bar 471 * /foo + C:bar --> C:bar (*) 472 * /foo/a/ + ../bar --> foo/bar 473 * /foo/ + ../../bar --> null 474 * /foo/ + /bar --> /bar 475 * /foo/.. + /bar --> /bar 476 * /foo + bar/c.txt --> /foo/bar/c.txt 477 * /foo/c.txt + bar --> /foo/c.txt/bar (!) 478 * </pre> 479 * 480 * (*) Note that the Windows relative drive prefix is unreliable when used with 481 * this method. (!) Note that the first parameter must be a path. If it ends 482 * with a name, then the name will be built into the concatenated path. If this 483 * might be a problem, use {@link #getFullPath(String)} on the base path 484 * argument. 485 * 486 * @param basePath the base path to attach to, always treated as a path 487 * @param fullFilenameToAdd the filename (or path) to attach to the base 488 * @return the concatenated path, or null if invalid. Null bytes inside string 489 * will be removed 490 */ 491 public static String concat(final String basePath, final String fullFilenameToAdd) { 492 final var prefix = getPrefixLength(fullFilenameToAdd); 493 if (prefix < 0) { 494 return null; 495 } 496 if (prefix > 0) { 497 return normalize(fullFilenameToAdd); 498 } 499 if (basePath == null) { 500 return null; 501 } 502 final var len = basePath.length(); 503 if (len == 0) { 504 return normalize(fullFilenameToAdd); 505 } 506 final var ch = basePath.charAt(len - 1); 507 if (isSeparator(ch)) { 508 return normalize(basePath + fullFilenameToAdd); 509 } 510 return normalize(basePath + '/' + fullFilenameToAdd); 511 } 512 513 /** 514 * Determines whether the {@code parent} directory contains the {@code child} 515 * element (a file or directory). 516 * <p> 517 * The files names are expected to be normalized. 518 * </p> 519 * 520 * Edge cases: 521 * <ul> 522 * <li>A {@code directory} must not be null: if null, throw 523 * IllegalArgumentException</li> 524 * <li>A directory does not contain itself: return false</li> 525 * <li>A null child file is not contained in any parent: return false</li> 526 * </ul> 527 * 528 * @param canonicalParent the file to consider as the parent. 529 * @param canonicalChild the file to consider as the child. 530 * @return true is the candidate leaf is under by the specified composite. False 531 * otherwise. 532 */ 533 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) { 534 535 // Fail fast against NullPointerException 536 if (canonicalParent == null) { 537 throw new IllegalArgumentException("Directory must not be null"); 538 } 539 540 if (canonicalChild == null || IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { 541 return false; 542 } 543 544 return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent); 545 } 546 547 // ----------------------------------------------------------------------- 548 /** 549 * Converts all separators to the Unix separator of forward slash. 550 * 551 * @param path the path to be changed, null ignored 552 * @return the updated path 553 */ 554 public static String separatorsToUnix(final String path) { 555 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) { 556 return path; 557 } 558 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); 559 } 560 561 /** 562 * Converts all separators to the Windows separator of backslash. 563 * 564 * @param path the path to be changed, null ignored 565 * @return the updated path 566 */ 567 public static String separatorsToWindows(final String path) { 568 if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) { 569 return path; 570 } 571 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); 572 } 573 574 /** 575 * Converts all separators to the system separator. 576 * 577 * @param path the path to be changed, null ignored 578 * @return the updated path 579 */ 580 public static String separatorsToSystem(final String path) { 581 if (path == null) { 582 return null; 583 } 584 if (isSystemWindows()) { 585 return separatorsToWindows(path); 586 } 587 return separatorsToUnix(path); 588 } 589 590 // ----------------------------------------------------------------------- 591 /** 592 * Returns the length of the filename prefix, such as <code>C:/</code> or 593 * <code>~/</code>. 594 * <p> 595 * This method will handle a file in either Unix or Windows format. 596 * <p> 597 * The prefix length includes the first slash in the full filename if 598 * applicable. Thus, it is possible that the length returned is greater than the 599 * length of the input string. 600 * 601 * <pre> 602 * Windows: 603 * a\b\c.txt --> "" --> relative 604 * \a\b\c.txt --> "\" --> current drive absolute 605 * C:a\b\c.txt --> "C:" --> drive relative 606 * C:\a\b\c.txt --> "C:\" --> absolute 607 * \\server\a\b\c.txt --> "\\server\" --> UNC 608 * \\\a\b\c.txt --> error, length = -1 609 * 610 * Unix: 611 * a/b/c.txt --> "" --> relative 612 * /a/b/c.txt --> "/" --> absolute 613 * ~/a/b/c.txt --> "~/" --> current user 614 * ~ --> "~/" --> current user (slash added) 615 * ~user/a/b/c.txt --> "~user/" --> named user 616 * ~user --> "~user/" --> named user (slash added) 617 * //server/a/b/c.txt --> "//server/" 618 * ///a/b/c.txt --> error, length = -1 619 * </pre> 620 * <p> 621 * The output will be the same irrespective of the machine that the code is 622 * running on. i.e. both Unix and Windows prefixes are matched regardless. 623 * <p> 624 * Note that a leading // (or \\) is used to indicate a UNC name on Windows. 625 * These must be followed by a server name, so double-slashes are not collapsed 626 * to a single slash at the start of the filename. 627 * 628 * @param filename the filename to find the prefix in, null returns -1 629 * @return the length of the prefix, -1 if invalid or null 630 */ 631 @SuppressWarnings({ "squid:S3776" }) // owolff: original code 632 public static int getPrefixLength(final String filename) { 633 if (filename == null) { 634 return NOT_FOUND; 635 } 636 final var len = filename.length(); 637 if (len == 0) { 638 return 0; 639 } 640 var ch0 = filename.charAt(0); 641 if (ch0 == ':') { 642 return NOT_FOUND; 643 } 644 if (len == 1) { 645 if (ch0 == '~') { 646 return 2; // return a length greater than the input 647 } 648 return isSeparator(ch0) ? 1 : 0; 649 } 650 if (ch0 == '~') { 651 var posUnix = filename.indexOf(UNIX_SEPARATOR, 1); 652 var posWin = filename.indexOf(WINDOWS_SEPARATOR, 1); 653 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { 654 return len + 1; // return a length greater than the input 655 } 656 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 657 posWin = posWin == NOT_FOUND ? posUnix : posWin; 658 return Math.min(posUnix, posWin) + 1; 659 } 660 final var ch1 = filename.charAt(1); 661 if (ch1 == ':') { 662 ch0 = Character.toUpperCase(ch0); 663 if (ch0 >= 'A' && ch0 <= 'Z') { 664 if (len == 2 || !isSeparator(filename.charAt(2))) { 665 return 2; 666 } 667 return 3; 668 } 669 if (ch0 == UNIX_SEPARATOR) { 670 return 1; 671 } 672 return NOT_FOUND; 673 674 } 675 if (!isSeparator(ch0) || !isSeparator(ch1)) { 676 return isSeparator(ch0) ? 1 : 0; 677 } 678 var posUnix = filename.indexOf(UNIX_SEPARATOR, 2); 679 var posWin = filename.indexOf(WINDOWS_SEPARATOR, 2); 680 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { 681 return NOT_FOUND; 682 } 683 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 684 posWin = posWin == NOT_FOUND ? posUnix : posWin; 685 return Math.min(posUnix, posWin) + 1; 686 } 687 688 /** 689 * Returns the index of the last directory separator character. 690 * <p> 691 * This method will handle a file in either Unix or Windows format. The position 692 * of the last forward or backslash is returned. 693 * <p> 694 * The output will be the same irrespective of the machine that the code is 695 * running on. 696 * 697 * @param filename the filename to find the last path separator in, null returns 698 * -1 699 * @return the index of the last separator character, or -1 if there is no such 700 * character 701 */ 702 public static int indexOfLastSeparator(final String filename) { 703 if (filename == null) { 704 return NOT_FOUND; 705 } 706 final var lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR); 707 final var lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR); 708 return Math.max(lastUnixPos, lastWindowsPos); 709 } 710 711 /** 712 * Returns the index of the last extension separator character, which is a dot. 713 * <p> 714 * This method also checks that there is no directory separator after the last 715 * dot. To do this it uses {@link #indexOfLastSeparator(String)} which will 716 * handle a file in either Unix or Windows format. 717 * </p> 718 * <p> 719 * The output will be the same irrespective of the machine that the code is 720 * running on. 721 * </p> 722 * 723 * @param filename the filename to find the last extension separator in, null 724 * returns -1 725 * @return the index of the last extension separator character, or -1 if there 726 * is no such character 727 */ 728 public static int indexOfExtension(final String filename) { 729 if (filename == null) { 730 return NOT_FOUND; 731 } 732 final var extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR); 733 final var lastSeparator = indexOfLastSeparator(filename); 734 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; 735 } 736 737 // ----------------------------------------------------------------------- 738 /** 739 * Gets the prefix from a full filename, such as <code>C:/</code> or 740 * <code>~/</code>. 741 * <p> 742 * This method will handle a file in either Unix or Windows format. The prefix 743 * includes the first slash in the full filename where applicable. 744 * 745 * <pre> 746 * Windows: 747 * a\b\c.txt --> "" --> relative 748 * \a\b\c.txt --> "\" --> current drive absolute 749 * C:a\b\c.txt --> "C:" --> drive relative 750 * C:\a\b\c.txt --> "C:\" --> absolute 751 * \\server\a\b\c.txt --> "\\server\" --> UNC 752 * 753 * Unix: 754 * a/b/c.txt --> "" --> relative 755 * /a/b/c.txt --> "/" --> absolute 756 * ~/a/b/c.txt --> "~/" --> current user 757 * ~ --> "~/" --> current user (slash added) 758 * ~user/a/b/c.txt --> "~user/" --> named user 759 * ~user --> "~user/" --> named user (slash added) 760 * </pre> 761 * <p> 762 * The output will be the same irrespective of the machine that the code is 763 * running on. i.e. both Unix and Windows prefixes are matched regardless. 764 * 765 * @param filename the filename to query, null returns null 766 * @return the prefix of the file, null if invalid. Null bytes inside string 767 * will be removed 768 */ 769 public static String getPrefix(final String filename) { 770 if (filename == null) { 771 return null; 772 } 773 final var len = getPrefixLength(filename); 774 if (len < 0) { 775 return null; 776 } 777 if (len > filename.length()) { 778 failIfNullBytePresent(filename + UNIX_SEPARATOR); 779 return filename + UNIX_SEPARATOR; 780 } 781 final var path = filename.substring(0, len); 782 failIfNullBytePresent(path); 783 return path; 784 } 785 786 /** 787 * Gets the path from a full filename, which excludes the prefix. 788 * <p> 789 * This method will handle a file in either Unix or Windows format. The method 790 * is entirely text based, and returns the text before and including the last 791 * forward or backslash. 792 * 793 * <pre> 794 * C:\a\b\c.txt --> a\b\ 795 * ~/a/b/c.txt --> a/b/ 796 * a.txt --> "" 797 * a/b/c --> a/b/ 798 * a/b/c/ --> a/b/c/ 799 * </pre> 800 * <p> 801 * The output will be the same irrespective of the machine that the code is 802 * running on. 803 * <p> 804 * This method drops the prefix from the result. See 805 * {@link #getFullPath(String)} for the method that retains the prefix. 806 * 807 * @param filename the filename to query, null returns null 808 * @return the path of the file, an empty string if none exists, null if 809 * invalid. Null bytes inside string will be removed 810 */ 811 public static String getPath(final String filename) { 812 return doGetPath(filename, 1); 813 } 814 815 /** 816 * Gets the path from a full filename, which excludes the prefix, and also 817 * excluding the final directory separator. 818 * <p> 819 * This method will handle a file in either Unix or Windows format. The method 820 * is entirely text based, and returns the text before the last forward or 821 * backslash. 822 * 823 * <pre> 824 * C:\a\b\c.txt --> a\b 825 * ~/a/b/c.txt --> a/b 826 * a.txt --> "" 827 * a/b/c --> a/b 828 * a/b/c/ --> a/b/c 829 * </pre> 830 * <p> 831 * The output will be the same irrespective of the machine that the code is 832 * running on. 833 * <p> 834 * This method drops the prefix from the result. See 835 * {@link #getFullPathNoEndSeparator(String)} for the method that retains the 836 * prefix. 837 * 838 * @param filename the filename to query, null returns null 839 * @return the path of the file, an empty string if none exists, null if 840 * invalid. Null bytes inside string will be removed 841 */ 842 public static String getPathNoEndSeparator(final String filename) { 843 return doGetPath(filename, 0); 844 } 845 846 /** 847 * Does the work of getting the path. 848 * 849 * @param filename the filename 850 * @param separatorAdd 0 to omit the end separator, 1 to return it 851 * @return the path. Null bytes inside string will be removed 852 */ 853 private static String doGetPath(final String filename, final int separatorAdd) { 854 if (filename == null) { 855 return null; 856 } 857 final var prefix = getPrefixLength(filename); 858 if (prefix < 0) { 859 return null; 860 } 861 final var index = indexOfLastSeparator(filename); 862 final var endIndex = index + separatorAdd; 863 if (prefix >= filename.length() || index < 0 || prefix >= endIndex) { 864 return ""; 865 } 866 final var path = filename.substring(prefix, endIndex); 867 failIfNullBytePresent(path); 868 return path; 869 } 870 871 /** 872 * Gets the full path from a full filename, which is the prefix + path. 873 * <p> 874 * This method will handle a file in either Unix or Windows format. The method 875 * is entirely text based, and returns the text before and including the last 876 * forward or backslash. 877 * 878 * <pre> 879 * C:\a\b\c.txt --> C:\a\b\ 880 * ~/a/b/c.txt --> ~/a/b/ 881 * a.txt --> "" 882 * a/b/c --> a/b/ 883 * a/b/c/ --> a/b/c/ 884 * C: --> C: 885 * C:\ --> C:\ 886 * ~ --> ~/ 887 * ~/ --> ~/ 888 * ~user --> ~user/ 889 * ~user/ --> ~user/ 890 * </pre> 891 * <p> 892 * The output will be the same irrespective of the machine that the code is 893 * running on. 894 * 895 * @param filename the filename to query, null returns null 896 * @return the path of the file, an empty string if none exists, null if invalid 897 */ 898 public static String getFullPath(final String filename) { 899 return doGetFullPath(filename, true); 900 } 901 902 /** 903 * Gets the full path from a full filename, which is the prefix + path, and also 904 * excluding the final directory separator. 905 * <p> 906 * This method will handle a file in either Unix or Windows format. The method 907 * is entirely text based, and returns the text before the last forward or 908 * backslash. 909 * 910 * <pre> 911 * C:\a\b\c.txt --> C:\a\b 912 * ~/a/b/c.txt --> ~/a/b 913 * a.txt --> "" 914 * a/b/c --> a/b 915 * a/b/c/ --> a/b/c 916 * C: --> C: 917 * C:\ --> C:\ 918 * ~ --> ~ 919 * ~/ --> ~ 920 * ~user --> ~user 921 * ~user/ --> ~user 922 * </pre> 923 * <p> 924 * The output will be the same irrespective of the machine that the code is 925 * running on. 926 * 927 * @param filename the filename to query, null returns null 928 * @return the path of the file, an empty string if none exists, null if invalid 929 */ 930 public static String getFullPathNoEndSeparator(final String filename) { 931 return doGetFullPath(filename, false); 932 } 933 934 /** 935 * Does the work of getting the path. 936 * 937 * @param filename the filename 938 * @param includeSeparator true to include the end separator 939 * @return the path 940 */ 941 private static String doGetFullPath(final String filename, final boolean includeSeparator) { 942 if (filename == null) { 943 return null; 944 } 945 final var prefix = getPrefixLength(filename); 946 if (prefix < 0) { 947 return null; 948 } 949 if (prefix >= filename.length()) { 950 if (includeSeparator) { 951 return getPrefix(filename); // add end slash if necessary 952 } 953 return filename; 954 } 955 final var index = indexOfLastSeparator(filename); 956 if (index < 0) { 957 return filename.substring(0, prefix); 958 } 959 var end = index + (includeSeparator ? 1 : 0); 960 if (end == 0) { 961 end++; 962 } 963 return filename.substring(0, end); 964 } 965 966 /** 967 * Gets the name minus the path from a full filename. 968 * <p> 969 * This method will handle a file in either Unix or Windows format. The text 970 * after the last forward or backslash is returned. 971 * 972 * <pre> 973 * a/b/c.txt --> c.txt 974 * a.txt --> a.txt 975 * a/b/c --> c 976 * a/b/c/ --> "" 977 * </pre> 978 * <p> 979 * The output will be the same irrespective of the machine that the code is 980 * running on. 981 * 982 * @param filename the filename to query, null returns null 983 * @return the name of the file without the path, or an empty string if none 984 * exists. Null bytes inside string will be removed 985 */ 986 public static String getName(final String filename) { 987 if (filename == null) { 988 return null; 989 } 990 failIfNullBytePresent(filename); 991 final var index = indexOfLastSeparator(filename); 992 return filename.substring(index + 1); 993 } 994 995 /** 996 * Check the input for null bytes, a sign of unsanitized data being passed to 997 * file level functions. 998 * <p> 999 * This may be used for poison byte attacks. 1000 * 1001 * @param path the path to check 1002 */ 1003 private static void failIfNullBytePresent(final String path) { 1004 final var len = path.length(); 1005 for (var i = 0; i < len; i++) { 1006 if (path.charAt(i) == 0) { 1007 throw new IllegalArgumentException(""" 1008 Null byte present in file/path name. There are no \ 1009 known legitimate use cases for such data, but several injection attacks may use it\ 1010 """); 1011 } 1012 } 1013 } 1014 1015 /** 1016 * Gets the base name, minus the full path and extension, from a full filename. 1017 * <p> 1018 * This method will handle a file in either Unix or Windows format. The text 1019 * after the last forward or backslash and before the last dot is returned. 1020 * 1021 * <pre> 1022 * a/b/c.txt --> c 1023 * a.txt --> a 1024 * a/b/c --> c 1025 * a/b/c/ --> "" 1026 * </pre> 1027 * <p> 1028 * The output will be the same irrespective of the machine that the code is 1029 * running on. 1030 * 1031 * @param filename the filename to query, null returns null 1032 * @return the name of the file without the path, or an empty string if none 1033 * exists. Null bytes inside string will be removed 1034 */ 1035 public static String getBaseName(final String filename) { 1036 return removeExtension(getName(filename)); 1037 } 1038 1039 /** 1040 * Gets the extension of a filename. 1041 * <p> 1042 * This method returns the textual part of the filename after the last dot. 1043 * There must be no directory separator after the dot. 1044 * 1045 * <pre> 1046 * foo.txt --> "txt" 1047 * a/b/c.jpg --> "jpg" 1048 * a/b.txt/c --> "" 1049 * a/b/c --> "" 1050 * </pre> 1051 * <p> 1052 * The output will be the same irrespective of the machine that the code is 1053 * running on. 1054 * 1055 * @param filename the filename to retrieve the extension of. 1056 * @return the extension of the file or an empty string if none exists or 1057 * {@code null} if the filename is {@code null}. 1058 */ 1059 public static String getExtension(final String filename) { 1060 if (filename == null) { 1061 return null; 1062 } 1063 final var index = indexOfExtension(filename); 1064 if (index == NOT_FOUND) { 1065 return ""; 1066 } 1067 return filename.substring(index + 1); 1068 } 1069 1070 // ----------------------------------------------------------------------- 1071 /** 1072 * Removes the extension from a filename. 1073 * <p> 1074 * This method returns the textual part of the filename before the last dot. 1075 * There must be no directory separator after the dot. 1076 * 1077 * <pre> 1078 * foo.txt --> foo 1079 * a\b\c.jpg --> a\b\c 1080 * a\b\c --> a\b\c 1081 * a.b\c --> a.b\c 1082 * </pre> 1083 * <p> 1084 * The output will be the same irrespective of the machine that the code is 1085 * running on. 1086 * 1087 * @param filename the filename to query, null returns null 1088 * @return the filename minus the extension 1089 */ 1090 public static String removeExtension(final String filename) { 1091 if (filename == null) { 1092 return null; 1093 } 1094 failIfNullBytePresent(filename); 1095 1096 final var index = indexOfExtension(filename); 1097 if (index == NOT_FOUND) { 1098 return filename; 1099 } 1100 return filename.substring(0, index); 1101 } 1102 1103 // ----------------------------------------------------------------------- 1104 /** 1105 * Checks whether two filenames are equal exactly. 1106 * <p> 1107 * No processing is performed on the filenames other than comparison, thus this 1108 * is merely a null-safe case-sensitive equals. 1109 * 1110 * @param filename1 the first filename to query, may be null 1111 * @param filename2 the second filename to query, may be null 1112 * @return true if the filenames are equal, null equals null 1113 */ 1114 public static boolean equals(final String filename1, final String filename2) { 1115 return equals(filename1, filename2, false, IOCase.SENSITIVE); 1116 } 1117 1118 /** 1119 * Checks whether two filenames are equal using the case rules of the system. 1120 * <p> 1121 * No processing is performed on the filenames other than comparison. The check 1122 * is case-sensitive on Unix and case-insensitive on Windows. 1123 * 1124 * @param filename1 the first filename to query, may be null 1125 * @param filename2 the second filename to query, may be null 1126 * @return true if the filenames are equal, null equals null 1127 */ 1128 public static boolean equalsOnSystem(final String filename1, final String filename2) { 1129 return equals(filename1, filename2, false, IOCase.SYSTEM); 1130 } 1131 1132 // ----------------------------------------------------------------------- 1133 /** 1134 * Checks whether two filenames are equal after both have been normalized. 1135 * <p> 1136 * Both filenames are first passed to {@link #normalize(String)}. The check is 1137 * then performed in a case-sensitive manner. 1138 * 1139 * @param filename1 the first filename to query, may be null 1140 * @param filename2 the second filename to query, may be null 1141 * @return true if the filenames are equal, null equals null 1142 */ 1143 public static boolean equalsNormalized(final String filename1, final String filename2) { 1144 return equals(filename1, filename2, true, IOCase.SENSITIVE); 1145 } 1146 1147 /** 1148 * Checks whether two filenames are equal after both have been normalized and 1149 * using the case rules of the system. 1150 * <p> 1151 * Both filenames are first passed to {@link #normalize(String)}. The check is 1152 * then performed case-sensitive on Unix and case-insensitive on Windows. 1153 * 1154 * @param filename1 the first filename to query, may be null 1155 * @param filename2 the second filename to query, may be null 1156 * @return true if the filenames are equal, null equals null 1157 */ 1158 public static boolean equalsNormalizedOnSystem(final String filename1, final String filename2) { 1159 return equals(filename1, filename2, true, IOCase.SYSTEM); 1160 } 1161 1162 /** 1163 * Checks whether two filenames are equal, optionally normalizing and providing 1164 * control over the case-sensitivity. 1165 * 1166 * @param filename1 the first filename to query, may be null 1167 * @param filename2 the second filename to query, may be null 1168 * @param normalized whether to normalize the filenames 1169 * @param caseSensitivity what case sensitivity rule to use, null means 1170 * case-sensitive 1171 * @return true if the filenames are equal, null equals null 1172 */ 1173 public static boolean equals(String filename1, String filename2, final boolean normalized, IOCase caseSensitivity) { 1174 1175 if (filename1 == null || filename2 == null) { 1176 return filename1 == null && filename2 == null; 1177 } 1178 if (normalized) { 1179 filename1 = normalize(filename1); 1180 filename2 = normalize(filename2); 1181 if (filename1 == null || filename2 == null) { 1182 throw new NullPointerException("Error normalizing one or both of the file names"); 1183 } 1184 } 1185 if (caseSensitivity == null) { 1186 caseSensitivity = IOCase.SENSITIVE; 1187 } 1188 return caseSensitivity.checkEquals(filename1, filename2); 1189 } 1190 1191 // ----------------------------------------------------------------------- 1192 /** 1193 * Checks whether the extension of the filename is that specified. 1194 * <p> 1195 * This method obtains the extension as the textual part of the filename after 1196 * the last dot. There must be no directory separator after the dot. The 1197 * extension check is case-sensitive on all platforms. 1198 * 1199 * @param filename the filename to query, null returns false 1200 * @param extension the extension to check for, null or empty checks for no 1201 * extension 1202 * @return true if the filename has the specified extension 1203 * @throws java.lang.IllegalArgumentException if the supplied filename contains 1204 * null bytes 1205 */ 1206 public static boolean isExtension(final String filename, final String extension) { 1207 if (filename == null) { 1208 return false; 1209 } 1210 failIfNullBytePresent(filename); 1211 1212 if (extension == null || extension.isEmpty()) { 1213 return indexOfExtension(filename) == NOT_FOUND; 1214 } 1215 final var fileExt = getExtension(filename); 1216 return fileExt.equals(extension); 1217 } 1218 1219 /** 1220 * Checks whether the extension of the filename is one of those specified. 1221 * <p> 1222 * This method obtains the extension as the textual part of the filename after 1223 * the last dot. There must be no directory separator after the dot. The 1224 * extension check is case-sensitive on all platforms. 1225 * 1226 * @param filename the filename to query, null returns false 1227 * @param extensions the extensions to check for, null checks for no extension 1228 * @return true if the filename is one of the extensions 1229 * @throws java.lang.IllegalArgumentException if the supplied filename contains 1230 * null bytes 1231 */ 1232 public static boolean isExtension(final String filename, final String... extensions) { 1233 if (filename == null) { 1234 return false; 1235 } 1236 failIfNullBytePresent(filename); 1237 1238 if (extensions == null || extensions.length == 0) { 1239 return indexOfExtension(filename) == NOT_FOUND; 1240 } 1241 final var fileExt = getExtension(filename); 1242 for (final String extension : extensions) { 1243 if (fileExt.equals(extension)) { 1244 return true; 1245 } 1246 } 1247 return false; 1248 } 1249 1250 /** 1251 * Checks whether the extension of the filename is one of those specified. 1252 * <p> 1253 * This method obtains the extension as the textual part of the filename after 1254 * the last dot. There must be no directory separator after the dot. The 1255 * extension check is case-sensitive on all platforms. 1256 * 1257 * @param filename the filename to query, null returns false 1258 * @param extensions the extensions to check for, null checks for no extension 1259 * @return true if the filename is one of the extensions 1260 * @throws java.lang.IllegalArgumentException if the supplied filename contains 1261 * null bytes 1262 */ 1263 public static boolean isExtension(final String filename, final Collection<String> extensions) { 1264 if (filename == null) { 1265 return false; 1266 } 1267 failIfNullBytePresent(filename); 1268 1269 if (extensions == null || extensions.isEmpty()) { 1270 return indexOfExtension(filename) == NOT_FOUND; 1271 } 1272 final var fileExt = getExtension(filename); 1273 for (final String extension : extensions) { 1274 if (fileExt.equals(extension)) { 1275 return true; 1276 } 1277 } 1278 return false; 1279 } 1280 1281 // ----------------------------------------------------------------------- 1282 /** 1283 * Checks a filename to see if it matches the specified wildcard matcher, always 1284 * testing case-sensitive. 1285 * <p> 1286 * The wildcard matcher uses the characters '?' and '*' to represent a single or 1287 * multiple (zero or more) wildcard characters. This is the same as often found 1288 * on Dos/Unix command lines. The check is case-sensitive always. 1289 * 1290 * <pre> 1291 * wildcardMatch("c.txt", "*.txt") --> true 1292 * wildcardMatch("c.txt", "*.jpg") --> false 1293 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1294 * wildcardMatch("c.txt", "*.???") --> true 1295 * wildcardMatch("c.txt", "*.????") --> false 1296 * </pre> 1297 * 1298 * N.B. the sequence "*?" does not work properly at present in match strings. 1299 * 1300 * @param filename the filename to match on 1301 * @param wildcardMatcher the wildcard string to match against 1302 * @return true if the filename matches the wildcard string 1303 */ 1304 public static boolean wildcardMatch(final String filename, final String wildcardMatcher) { 1305 return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE); 1306 } 1307 1308 /** 1309 * Checks a filename to see if it matches the specified wildcard matcher using 1310 * the case rules of the system. 1311 * <p> 1312 * The wildcard matcher uses the characters '?' and '*' to represent a single or 1313 * multiple (zero or more) wildcard characters. This is the same as often found 1314 * on Dos/Unix command lines. The check is case-sensitive on Unix and 1315 * case-insensitive on Windows. 1316 * 1317 * <pre> 1318 * wildcardMatch("c.txt", "*.txt") --> true 1319 * wildcardMatch("c.txt", "*.jpg") --> false 1320 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1321 * wildcardMatch("c.txt", "*.???") --> true 1322 * wildcardMatch("c.txt", "*.????") --> false 1323 * </pre> 1324 * 1325 * N.B. the sequence "*?" does not work properly at present in match strings. 1326 * 1327 * @param filename the filename to match on 1328 * @param wildcardMatcher the wildcard string to match against 1329 * @return true if the filename matches the wildcard string 1330 */ 1331 public static boolean wildcardMatchOnSystem(final String filename, final String wildcardMatcher) { 1332 return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM); 1333 } 1334 1335 /** 1336 * Checks a filename to see if it matches the specified wildcard matcher 1337 * allowing control over case-sensitivity. 1338 * <p> 1339 * The wildcard matcher uses the characters '?' and '*' to represent a single or 1340 * multiple (zero or more) wildcard characters. N.B. the sequence "*?" does not 1341 * work properly at present in match strings. 1342 * 1343 * @param filename the filename to match on 1344 * @param wildcardMatcher the wildcard string to match against 1345 * @param caseSensitivity what case sensitivity rule to use, null means 1346 * case-sensitive 1347 * @return true if the filename matches the wildcard string 1348 */ 1349 @SuppressWarnings({ "squid:S3776", "squid:S135" }) // owolff: original code 1350 public static boolean wildcardMatch(final String filename, final String wildcardMatcher, IOCase caseSensitivity) { 1351 if (filename == null && wildcardMatcher == null) { 1352 return true; 1353 } 1354 if (filename == null || wildcardMatcher == null) { 1355 return false; 1356 } 1357 if (caseSensitivity == null) { 1358 caseSensitivity = IOCase.SENSITIVE; 1359 } 1360 final var wcs = splitOnTokens(wildcardMatcher); 1361 var anyChars = false; 1362 var textIdx = 0; 1363 var wcsIdx = 0; 1364 final Deque<int[]> backtrack = new ArrayDeque<>(); 1365 1366 // loop around a backtrack stack, to handle complex * matching 1367 do { 1368 if (!backtrack.isEmpty()) { 1369 final var array = backtrack.pop(); 1370 wcsIdx = array[0]; 1371 textIdx = array[1]; 1372 anyChars = true; 1373 } 1374 1375 // loop whilst tokens and text left to process 1376 while (wcsIdx < wcs.length) { 1377 1378 if (wcs[wcsIdx].equals("?")) { 1379 // ? so move to next text char 1380 textIdx++; 1381 if (textIdx > filename.length()) { 1382 break; 1383 } 1384 anyChars = false; 1385 1386 } else if (wcs[wcsIdx].equals("*")) { 1387 // set any chars status 1388 anyChars = true; 1389 if (wcsIdx == wcs.length - 1) { 1390 textIdx = filename.length(); 1391 } 1392 1393 } else { 1394 // matching text token 1395 if (anyChars) { 1396 // any chars then try to locate text token 1397 textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]); 1398 if (textIdx == NOT_FOUND) { 1399 // token not found 1400 break; 1401 } 1402 final var repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]); 1403 if (repeat >= 0) { 1404 backtrack.push(new int[] { wcsIdx, repeat }); 1405 } 1406 } else // matching from current position 1407 if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) { 1408 // couldn't match token 1409 break; 1410 } 1411 1412 // matched text token, move text index to end of matched token 1413 textIdx += wcs[wcsIdx].length(); 1414 anyChars = false; 1415 } 1416 1417 wcsIdx++; 1418 } 1419 1420 // full match 1421 if (wcsIdx == wcs.length && textIdx == filename.length()) { 1422 return true; 1423 } 1424 1425 } while (!backtrack.isEmpty()); 1426 1427 return false; 1428 } 1429 1430 /** 1431 * Splits a string into a number of tokens. The text is split by '?' and '*'. 1432 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1433 * 1434 * @param text the text to split 1435 * @return the array of tokens, never null 1436 */ 1437 static String[] splitOnTokens(final String text) { 1438 // used by wildcardMatch 1439 // package level so a unit test may run on this 1440 1441 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { 1442 return new String[] { text }; 1443 } 1444 1445 final var array = text.toCharArray(); 1446 final var list = new ArrayList<String>(); 1447 final var buffer = new StringBuilder(); 1448 char prevChar = 0; 1449 for (final char ch : array) { 1450 if (ch == '?' || ch == '*') { 1451 if (buffer.length() != 0) { 1452 list.add(buffer.toString()); 1453 buffer.setLength(0); 1454 } 1455 if (ch == '?') { 1456 list.add("?"); 1457 } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' 1458 list.add("*"); 1459 } 1460 } else { 1461 buffer.append(ch); 1462 } 1463 prevChar = ch; 1464 } 1465 if (buffer.length() != 0) { 1466 list.add(buffer.toString()); 1467 } 1468 1469 return list.toArray(new String[list.size()]); 1470 } 1471 1472}