001/*
002 * Copyright 2023 the original author or authors.
003 * <p>
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 * <p>
008 * https://www.apache.org/licenses/LICENSE-2.0
009 * <p>
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package de.cuioss.tools.io;
017
018import java.io.File;
019import java.util.ArrayDeque;
020import java.util.ArrayList;
021import java.util.Collection;
022import java.util.Deque;
023
024import lombok.experimental.UtilityClass;
025
026/**
027 * Copied from commons.io:org.apache.commons.io.FilenameUtils
028 * <p>
029 * General filename and filepath manipulation utilities.
030 * <p>
031 * When dealing with filenames you can hit problems when moving from a Windows
032 * based development machine to a Unix based production machine. This class aims
033 * to help avoid those problems.
034 * <p>
035 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
036 * using JDK {@link java.io.File File} objects and the two argument constructor
037 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
038 * <p>
039 * Most methods on this class are designed to work the same on both Unix and
040 * Windows. Those that don't include 'System', 'Unix' or 'Windows' in their
041 * name.
042 * <p>
043 * Most methods recognise both separators (forward and back), and both sets of
044 * prefixes. See the javadoc of each method for details.
045 * <p>
046 * This class defines six components within a filename (example
047 * C:\dev\project\file.txt):
048 * <ul>
049 * <li>the prefix - C:\</li>
050 * <li>the path - dev\project\</li>
051 * <li>the full path - C:\dev\project\</li>
052 * <li>the name - file.txt</li>
053 * <li>the base name - file</li>
054 * <li>the extension - txt</li>
055 * </ul>
056 * Note that this class works best if directory filenames end with a separator.
057 * If you omit the last separator, it is impossible to determine if the filename
058 * corresponds to a file or a directory. As a result, we have chosen to say it
059 * corresponds to a file.
060 * <p>
061 * This class only supports Unix and Windows style names. Prefixes are matched
062 * as follows:
063 *
064 * <pre>
065 * Windows:
066 * a\b\c.txt           --&gt; ""          --&gt; relative
067 * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
068 * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
069 * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
070 * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
071 *
072 * Unix:
073 * a/b/c.txt           --&gt; ""          --&gt; relative
074 * /a/b/c.txt          --&gt; "/"         --&gt; absolute
075 * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
076 * ~                   --&gt; "~/"        --&gt; current user (slash added)
077 * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
078 * ~user               --&gt; "~user/"    --&gt; named user (slash added)
079 * </pre>
080 *
081 * Both prefix styles are matched always, irrespective of the machine that you
082 * are currently running on.
083 * <p>
084 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
085 * </p>
086 *
087 */
088@UtilityClass
089public class FilenameUtils {
090
091    private static final int NOT_FOUND = -1;
092
093    /**
094     * The extension separator character.
095     *
096     */
097    public static final char EXTENSION_SEPARATOR = '.';
098
099    /**
100     * The extension separator String.
101     *
102     */
103    public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
104
105    /**
106     * The Unix separator character.
107     */
108    private static final char UNIX_SEPARATOR = '/';
109
110    /**
111     * The Windows separator character.
112     */
113    private static final char WINDOWS_SEPARATOR = '\\';
114
115    /**
116     * The system separator character.
117     */
118    private static final char SYSTEM_SEPARATOR = File.separatorChar;
119
120    /**
121     * The separator character that is the opposite of the system separator.
122     */
123    private static final char OTHER_SEPARATOR;
124    static {
125        if (isSystemWindows()) {
126            OTHER_SEPARATOR = UNIX_SEPARATOR;
127        } else {
128            OTHER_SEPARATOR = WINDOWS_SEPARATOR;
129        }
130    }
131
132    // -----------------------------------------------------------------------
133    /**
134     * Determines if Windows file system is in use.
135     *
136     * @return true if the system is Windows
137     */
138    static boolean isSystemWindows() {
139        return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
140    }
141
142    // -----------------------------------------------------------------------
143    /**
144     * Checks if the character is a separator.
145     *
146     * @param ch the character to check
147     * @return true if it is a separator character
148     */
149    private static boolean isSeparator(final char ch) {
150        return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR;
151    }
152
153    // -----------------------------------------------------------------------
154    /**
155     * Normalizes a path, removing double and single dot path steps.
156     * <p>
157     * This method normalizes a path to a standard format. The input may contain
158     * separators in either Unix or Windows format. The output will contain
159     * separators in the format of the system.
160     * <p>
161     * A trailing slash will be retained. A double slash will be merged to a single
162     * slash (but UNC names are handled). A single dot path segment will be removed.
163     * A double dot will cause that path segment and the one before to be removed.
164     * If the double dot has no parent path segment to work with, {@code null} is
165     * returned.
166     * <p>
167     * The output will be the same on both Unix and Windows except for the separator
168     * character.
169     *
170     * <pre>
171     * /foo//               --&gt;   /foo/
172     * /foo/./              --&gt;   /foo/
173     * /foo/../bar          --&gt;   /bar
174     * /foo/../bar/         --&gt;   /bar/
175     * /foo/../bar/../baz   --&gt;   /baz
176     * //foo//./bar         --&gt;   /foo/bar
177     * /../                 --&gt;   null
178     * ../foo               --&gt;   null
179     * foo/bar/..           --&gt;   foo/
180     * foo/../../bar        --&gt;   null
181     * foo/../bar           --&gt;   bar
182     * //server/foo/../bar  --&gt;   //server/bar
183     * //server/../bar      --&gt;   null
184     * C:\foo\..\bar        --&gt;   C:\bar
185     * C:\..\bar            --&gt;   null
186     * ~/foo/../bar/        --&gt;   ~/bar/
187     * ~/../bar             --&gt;   null
188     * </pre>
189     *
190     * (Note the file separator returned will be correct for Windows/Unix)
191     *
192     * @param filename the filename to normalize, null returns null
193     * @return the normalized filename, or null if invalid. Null bytes inside string
194     *         will be removed
195     */
196    public static String normalize(final String filename) {
197        return doNormalize(filename, SYSTEM_SEPARATOR, true);
198    }
199
200    /**
201     * Normalizes a path, removing double and single dot path steps.
202     * <p>
203     * This method normalizes a path to a standard format. The input may contain
204     * separators in either Unix or Windows format. The output will contain
205     * separators in the format specified.
206     * <p>
207     * A trailing slash will be retained. A double slash will be merged to a single
208     * slash (but UNC names are handled). A single dot path segment will be removed.
209     * A double dot will cause that path segment and the one before to be removed.
210     * If the double dot has no parent path segment to work with, {@code null} is
211     * returned.
212     * <p>
213     * The output will be the same on both Unix and Windows except for the separator
214     * character.
215     *
216     * <pre>
217     * /foo//               --&gt;   /foo/
218     * /foo/./              --&gt;   /foo/
219     * /foo/../bar          --&gt;   /bar
220     * /foo/../bar/         --&gt;   /bar/
221     * /foo/../bar/../baz   --&gt;   /baz
222     * //foo//./bar         --&gt;   /foo/bar
223     * /../                 --&gt;   null
224     * ../foo               --&gt;   null
225     * foo/bar/..           --&gt;   foo/
226     * foo/../../bar        --&gt;   null
227     * foo/../bar           --&gt;   bar
228     * //server/foo/../bar  --&gt;   //server/bar
229     * //server/../bar      --&gt;   null
230     * C:\foo\..\bar        --&gt;   C:\bar
231     * C:\..\bar            --&gt;   null
232     * ~/foo/../bar/        --&gt;   ~/bar/
233     * ~/../bar             --&gt;   null
234     * </pre>
235     *
236     * The output will be the same on both Unix and Windows including the separator
237     * character.
238     *
239     * @param filename      the filename to normalize, null returns null
240     * @param unixSeparator {@code true} if a unix separator should be used or
241     *                      {@code false} if a windows separator should be used.
242     * @return the normalized filename, or null if invalid. Null bytes inside string
243     *         will be removed
244     */
245    public static String normalize(final String filename, final boolean unixSeparator) {
246        final var separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
247        return doNormalize(filename, separator, true);
248    }
249
250    // -----------------------------------------------------------------------
251    /**
252     * Normalizes a path, removing double and single dot path steps, and removing
253     * any final directory separator.
254     * <p>
255     * This method normalizes a path to a standard format. The input may contain
256     * separators in either Unix or Windows format. The output will contain
257     * separators in the format of the system.
258     * <p>
259     * A trailing slash will be removed. A double slash will be merged to a single
260     * slash (but UNC names are handled). A single dot path segment will be removed.
261     * A double dot will cause that path segment and the one before to be removed.
262     * If the double dot has no parent path segment to work with, {@code null} is
263     * returned.
264     * <p>
265     * The output will be the same on both Unix and Windows except for the separator
266     * character.
267     *
268     * <pre>
269     * /foo//               --&gt;   /foo
270     * /foo/./              --&gt;   /foo
271     * /foo/../bar          --&gt;   /bar
272     * /foo/../bar/         --&gt;   /bar
273     * /foo/../bar/../baz   --&gt;   /baz
274     * //foo//./bar         --&gt;   /foo/bar
275     * /../                 --&gt;   null
276     * ../foo               --&gt;   null
277     * foo/bar/..           --&gt;   foo
278     * foo/../../bar        --&gt;   null
279     * foo/../bar           --&gt;   bar
280     * //server/foo/../bar  --&gt;   //server/bar
281     * //server/../bar      --&gt;   null
282     * C:\foo\..\bar        --&gt;   C:\bar
283     * C:\..\bar            --&gt;   null
284     * ~/foo/../bar/        --&gt;   ~/bar
285     * ~/../bar             --&gt;   null
286     * </pre>
287     *
288     * (Note the file separator returned will be correct for Windows/Unix)
289     *
290     * @param filename the filename to normalize, null returns null
291     * @return the normalized filename, or null if invalid. Null bytes inside string
292     *         will be removed
293     */
294    public static String normalizeNoEndSeparator(final String filename) {
295        return doNormalize(filename, SYSTEM_SEPARATOR, false);
296    }
297
298    /**
299     * Normalizes a path, removing double and single dot path steps, and removing
300     * any final directory separator.
301     * <p>
302     * This method normalizes a path to a standard format. The input may contain
303     * separators in either Unix or Windows format. The output will contain
304     * separators in the format specified.
305     * <p>
306     * A trailing slash will be removed. A double slash will be merged to a single
307     * slash (but UNC names are handled). A single dot path segment will be removed.
308     * A double dot will cause that path segment and the one before to be removed.
309     * If the double dot has no parent path segment to work with, {@code null} is
310     * returned.
311     * <p>
312     * The output will be the same on both Unix and Windows including the separator
313     * character.
314     *
315     * <pre>
316     * /foo//               --&gt;   /foo
317     * /foo/./              --&gt;   /foo
318     * /foo/../bar          --&gt;   /bar
319     * /foo/../bar/         --&gt;   /bar
320     * /foo/../bar/../baz   --&gt;   /baz
321     * //foo//./bar         --&gt;   /foo/bar
322     * /../                 --&gt;   null
323     * ../foo               --&gt;   null
324     * foo/bar/..           --&gt;   foo
325     * foo/../../bar        --&gt;   null
326     * foo/../bar           --&gt;   bar
327     * //server/foo/../bar  --&gt;   //server/bar
328     * //server/../bar      --&gt;   null
329     * C:\foo\..\bar        --&gt;   C:\bar
330     * C:\..\bar            --&gt;   null
331     * ~/foo/../bar/        --&gt;   ~/bar
332     * ~/../bar             --&gt;   null
333     * </pre>
334     *
335     * @param filename      the filename to normalize, null returns null
336     * @param unixSeparator {@code true} if a unix separator should be used or
337     *                      {@code false} if a windows separator should be used.
338     * @return the normalized filename, or null if invalid. Null bytes inside string
339     *         will be removed
340     */
341    public static String normalizeNoEndSeparator(final String filename, final boolean unixSeparator) {
342        final var separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
343        return doNormalize(filename, separator, false);
344    }
345
346    /**
347     * Internal method to perform the normalization.
348     *
349     * @param filename      the filename
350     * @param separator     The separator character to use
351     * @param keepSeparator true to keep the final separator
352     * @return the normalized filename. Null bytes inside string will be removed.
353     */
354    @SuppressWarnings({ "squid:S3776", "squid:LabelsShouldNotBeUsedCheck", "squid:ForLoopCounterChangedCheck",
355            "java:S6541" }) // owolff: original code
356    private static String doNormalize(final String filename, final char separator, final boolean keepSeparator) {
357        if (filename == null) {
358            return null;
359        }
360
361        failIfNullBytePresent(filename);
362
363        var size = filename.length();
364        if (size == 0) {
365            return filename;
366        }
367        final var prefix = getPrefixLength(filename);
368        if (prefix < 0) {
369            return null;
370        }
371
372        final var array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy
373        filename.getChars(0, filename.length(), array, 0);
374
375        // fix separators throughout
376        final var otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR;
377        for (var i = 0; i < array.length; i++) {
378            if (array[i] == otherSeparator) {
379                array[i] = separator;
380            }
381        }
382
383        // add extra separator on the end to simplify code below
384        var lastIsDirectory = true;
385        if (array[size - 1] != separator) {
386            array[size] = separator;
387            size++;
388            lastIsDirectory = false;
389        }
390
391        // adjoining slashes
392        for (var i = prefix + 1; i < size; i++) {
393            if (array[i] == separator && array[i - 1] == separator) {
394                System.arraycopy(array, i, array, i - 1, size - i);
395                size--;
396                i--;
397            }
398        }
399
400        // dot slash
401        for (var i = prefix + 1; i < size; i++) {
402            if (array[i] == separator && array[i - 1] == '.' && (i == prefix + 1 || array[i - 2] == separator)) {
403                if (i == size - 1) {
404                    lastIsDirectory = true;
405                }
406                System.arraycopy(array, i + 1, array, i - 1, size - i);
407                size -= 2;
408                i--;
409            }
410        }
411
412        // double dot slash
413        outer: for (var i = prefix + 2; i < size; i++) {
414            if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.'
415                    && (i == prefix + 2 || array[i - 3] == separator)) {
416                if (i == prefix + 2) {
417                    return null;
418                }
419                if (i == size - 1) {
420                    lastIsDirectory = true;
421                }
422                int j;
423                for (j = i - 4; j >= prefix; j--) {
424                    if (array[j] == separator) {
425                        // remove b/../ from a/b/../c
426                        System.arraycopy(array, i + 1, array, j + 1, size - i);
427                        size -= i - j;
428                        i = j + 1;
429                        continue outer;
430                    }
431                }
432                // remove a/../ from a/../c
433                System.arraycopy(array, i + 1, array, prefix, size - i);
434                size -= i + 1 - prefix;
435                i = prefix + 1;
436            }
437        }
438
439        if (size <= 0) { // should never be less than 0
440            return "";
441        }
442        if (size <= prefix || lastIsDirectory && keepSeparator) {
443            return new String(array, 0, size); // keep trailing separator
444        }
445        return new String(array, 0, size - 1); // lose trailing separator
446    }
447
448    // -----------------------------------------------------------------------
449    /**
450     * Concatenates a filename to a base path using normal command line style rules.
451     * <p>
452     * The effect is equivalent to resultant directory after changing directory to
453     * the first argument, followed by changing directory to the second argument.
454     * <p>
455     * The first argument is the base path, the second is the path to concatenate.
456     * The returned path is always normalized via {@link #normalize(String)}, thus
457     * <code>..</code> is handled.
458     * <p>
459     * If <code>pathToAdd</code> is absolute (has an absolute prefix), then it will
460     * be normalized and returned. Otherwise, the paths will be joined, normalized
461     * and returned.
462     * <p>
463     * The output will be the same on both Unix and Windows except for the separator
464     * character.
465     *
466     * <pre>
467     * /foo/ + bar          --&gt;   /foo/bar
468     * /foo + bar           --&gt;   /foo/bar
469     * /foo + /bar          --&gt;   /bar
470     * /foo + C:/bar        --&gt;   C:/bar
471     * /foo + C:bar         --&gt;   C:bar (*)
472     * /foo/a/ + ../bar     --&gt;   foo/bar
473     * /foo/ + ../../bar    --&gt;   null
474     * /foo/ + /bar         --&gt;   /bar
475     * /foo/.. + /bar       --&gt;   /bar
476     * /foo + bar/c.txt     --&gt;   /foo/bar/c.txt
477     * /foo/c.txt + bar     --&gt;   /foo/c.txt/bar (!)
478     * </pre>
479     *
480     * (*) Note that the Windows relative drive prefix is unreliable when used with
481     * this method. (!) Note that the first parameter must be a path. If it ends
482     * with a name, then the name will be built into the concatenated path. If this
483     * might be a problem, use {@link #getFullPath(String)} on the base path
484     * argument.
485     *
486     * @param basePath          the base path to attach to, always treated as a path
487     * @param fullFilenameToAdd the filename (or path) to attach to the base
488     * @return the concatenated path, or null if invalid. Null bytes inside string
489     *         will be removed
490     */
491    public static String concat(final String basePath, final String fullFilenameToAdd) {
492        final var prefix = getPrefixLength(fullFilenameToAdd);
493        if (prefix < 0) {
494            return null;
495        }
496        if (prefix > 0) {
497            return normalize(fullFilenameToAdd);
498        }
499        if (basePath == null) {
500            return null;
501        }
502        final var len = basePath.length();
503        if (len == 0) {
504            return normalize(fullFilenameToAdd);
505        }
506        final var ch = basePath.charAt(len - 1);
507        if (isSeparator(ch)) {
508            return normalize(basePath + fullFilenameToAdd);
509        }
510        return normalize(basePath + '/' + fullFilenameToAdd);
511    }
512
513    /**
514     * Determines whether the {@code parent} directory contains the {@code child}
515     * element (a file or directory).
516     * <p>
517     * The files names are expected to be normalized.
518     * </p>
519     *
520     * Edge cases:
521     * <ul>
522     * <li>A {@code directory} must not be null: if null, throw
523     * IllegalArgumentException</li>
524     * <li>A directory does not contain itself: return false</li>
525     * <li>A null child file is not contained in any parent: return false</li>
526     * </ul>
527     *
528     * @param canonicalParent the file to consider as the parent.
529     * @param canonicalChild  the file to consider as the child.
530     * @return true is the candidate leaf is under by the specified composite. False
531     *         otherwise.
532     */
533    public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
534
535        // Fail fast against NullPointerException
536        if (canonicalParent == null) {
537            throw new IllegalArgumentException("Directory must not be null");
538        }
539
540        if (canonicalChild == null || IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
541            return false;
542        }
543
544        return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent);
545    }
546
547    // -----------------------------------------------------------------------
548    /**
549     * Converts all separators to the Unix separator of forward slash.
550     *
551     * @param path the path to be changed, null ignored
552     * @return the updated path
553     */
554    public static String separatorsToUnix(final String path) {
555        if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) {
556            return path;
557        }
558        return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
559    }
560
561    /**
562     * Converts all separators to the Windows separator of backslash.
563     *
564     * @param path the path to be changed, null ignored
565     * @return the updated path
566     */
567    public static String separatorsToWindows(final String path) {
568        if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) {
569            return path;
570        }
571        return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
572    }
573
574    /**
575     * Converts all separators to the system separator.
576     *
577     * @param path the path to be changed, null ignored
578     * @return the updated path
579     */
580    public static String separatorsToSystem(final String path) {
581        if (path == null) {
582            return null;
583        }
584        if (isSystemWindows()) {
585            return separatorsToWindows(path);
586        }
587        return separatorsToUnix(path);
588    }
589
590    // -----------------------------------------------------------------------
591    /**
592     * Returns the length of the filename prefix, such as <code>C:/</code> or
593     * <code>~/</code>.
594     * <p>
595     * This method will handle a file in either Unix or Windows format.
596     * <p>
597     * The prefix length includes the first slash in the full filename if
598     * applicable. Thus, it is possible that the length returned is greater than the
599     * length of the input string.
600     *
601     * <pre>
602     * Windows:
603     * a\b\c.txt           --&gt; ""          --&gt; relative
604     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
605     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
606     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
607     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
608     * \\\a\b\c.txt        --&gt;  error, length = -1
609     *
610     * Unix:
611     * a/b/c.txt           --&gt; ""          --&gt; relative
612     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
613     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
614     * ~                   --&gt; "~/"        --&gt; current user (slash added)
615     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
616     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
617     * //server/a/b/c.txt  --&gt; "//server/"
618     * ///a/b/c.txt        --&gt; error, length = -1
619     * </pre>
620     * <p>
621     * The output will be the same irrespective of the machine that the code is
622     * running on. i.e. both Unix and Windows prefixes are matched regardless.
623     * <p>
624     * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
625     * These must be followed by a server name, so double-slashes are not collapsed
626     * to a single slash at the start of the filename.
627     *
628     * @param filename the filename to find the prefix in, null returns -1
629     * @return the length of the prefix, -1 if invalid or null
630     */
631    @SuppressWarnings({ "squid:S3776" }) // owolff: original code
632    public static int getPrefixLength(final String filename) {
633        if (filename == null) {
634            return NOT_FOUND;
635        }
636        final var len = filename.length();
637        if (len == 0) {
638            return 0;
639        }
640        var ch0 = filename.charAt(0);
641        if (ch0 == ':') {
642            return NOT_FOUND;
643        }
644        if (len == 1) {
645            if (ch0 == '~') {
646                return 2; // return a length greater than the input
647            }
648            return isSeparator(ch0) ? 1 : 0;
649        }
650        if (ch0 == '~') {
651            var posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
652            var posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
653            if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
654                return len + 1; // return a length greater than the input
655            }
656            posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
657            posWin = posWin == NOT_FOUND ? posUnix : posWin;
658            return Math.min(posUnix, posWin) + 1;
659        }
660        final var ch1 = filename.charAt(1);
661        if (ch1 == ':') {
662            ch0 = Character.toUpperCase(ch0);
663            if (ch0 >= 'A' && ch0 <= 'Z') {
664                if (len == 2 || !isSeparator(filename.charAt(2))) {
665                    return 2;
666                }
667                return 3;
668            }
669            if (ch0 == UNIX_SEPARATOR) {
670                return 1;
671            }
672            return NOT_FOUND;
673
674        }
675        if (!isSeparator(ch0) || !isSeparator(ch1)) {
676            return isSeparator(ch0) ? 1 : 0;
677        }
678        var posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
679        var posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
680        if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
681            return NOT_FOUND;
682        }
683        posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
684        posWin = posWin == NOT_FOUND ? posUnix : posWin;
685        return Math.min(posUnix, posWin) + 1;
686    }
687
688    /**
689     * Returns the index of the last directory separator character.
690     * <p>
691     * This method will handle a file in either Unix or Windows format. The position
692     * of the last forward or backslash is returned.
693     * <p>
694     * The output will be the same irrespective of the machine that the code is
695     * running on.
696     *
697     * @param filename the filename to find the last path separator in, null returns
698     *                 -1
699     * @return the index of the last separator character, or -1 if there is no such
700     *         character
701     */
702    public static int indexOfLastSeparator(final String filename) {
703        if (filename == null) {
704            return NOT_FOUND;
705        }
706        final var lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
707        final var lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
708        return Math.max(lastUnixPos, lastWindowsPos);
709    }
710
711    /**
712     * Returns the index of the last extension separator character, which is a dot.
713     * <p>
714     * This method also checks that there is no directory separator after the last
715     * dot. To do this it uses {@link #indexOfLastSeparator(String)} which will
716     * handle a file in either Unix or Windows format.
717     * </p>
718     * <p>
719     * The output will be the same irrespective of the machine that the code is
720     * running on.
721     * </p>
722     *
723     * @param filename the filename to find the last extension separator in, null
724     *                 returns -1
725     * @return the index of the last extension separator character, or -1 if there
726     *         is no such character
727     */
728    public static int indexOfExtension(final String filename) {
729        if (filename == null) {
730            return NOT_FOUND;
731        }
732        final var extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
733        final var lastSeparator = indexOfLastSeparator(filename);
734        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
735    }
736
737    // -----------------------------------------------------------------------
738    /**
739     * Gets the prefix from a full filename, such as <code>C:/</code> or
740     * <code>~/</code>.
741     * <p>
742     * This method will handle a file in either Unix or Windows format. The prefix
743     * includes the first slash in the full filename where applicable.
744     *
745     * <pre>
746     * Windows:
747     * a\b\c.txt           --&gt; ""          --&gt; relative
748     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
749     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
750     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
751     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
752     *
753     * Unix:
754     * a/b/c.txt           --&gt; ""          --&gt; relative
755     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
756     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
757     * ~                   --&gt; "~/"        --&gt; current user (slash added)
758     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
759     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
760     * </pre>
761     * <p>
762     * The output will be the same irrespective of the machine that the code is
763     * running on. i.e. both Unix and Windows prefixes are matched regardless.
764     *
765     * @param filename the filename to query, null returns null
766     * @return the prefix of the file, null if invalid. Null bytes inside string
767     *         will be removed
768     */
769    public static String getPrefix(final String filename) {
770        if (filename == null) {
771            return null;
772        }
773        final var len = getPrefixLength(filename);
774        if (len < 0) {
775            return null;
776        }
777        if (len > filename.length()) {
778            failIfNullBytePresent(filename + UNIX_SEPARATOR);
779            return filename + UNIX_SEPARATOR;
780        }
781        final var path = filename.substring(0, len);
782        failIfNullBytePresent(path);
783        return path;
784    }
785
786    /**
787     * Gets the path from a full filename, which excludes the prefix.
788     * <p>
789     * This method will handle a file in either Unix or Windows format. The method
790     * is entirely text based, and returns the text before and including the last
791     * forward or backslash.
792     *
793     * <pre>
794     * C:\a\b\c.txt --&gt; a\b\
795     * ~/a/b/c.txt  --&gt; a/b/
796     * a.txt        --&gt; ""
797     * a/b/c        --&gt; a/b/
798     * a/b/c/       --&gt; a/b/c/
799     * </pre>
800     * <p>
801     * The output will be the same irrespective of the machine that the code is
802     * running on.
803     * <p>
804     * This method drops the prefix from the result. See
805     * {@link #getFullPath(String)} for the method that retains the prefix.
806     *
807     * @param filename the filename to query, null returns null
808     * @return the path of the file, an empty string if none exists, null if
809     *         invalid. Null bytes inside string will be removed
810     */
811    public static String getPath(final String filename) {
812        return doGetPath(filename, 1);
813    }
814
815    /**
816     * Gets the path from a full filename, which excludes the prefix, and also
817     * excluding the final directory separator.
818     * <p>
819     * This method will handle a file in either Unix or Windows format. The method
820     * is entirely text based, and returns the text before the last forward or
821     * backslash.
822     *
823     * <pre>
824     * C:\a\b\c.txt --&gt; a\b
825     * ~/a/b/c.txt  --&gt; a/b
826     * a.txt        --&gt; ""
827     * a/b/c        --&gt; a/b
828     * a/b/c/       --&gt; a/b/c
829     * </pre>
830     * <p>
831     * The output will be the same irrespective of the machine that the code is
832     * running on.
833     * <p>
834     * This method drops the prefix from the result. See
835     * {@link #getFullPathNoEndSeparator(String)} for the method that retains the
836     * prefix.
837     *
838     * @param filename the filename to query, null returns null
839     * @return the path of the file, an empty string if none exists, null if
840     *         invalid. Null bytes inside string will be removed
841     */
842    public static String getPathNoEndSeparator(final String filename) {
843        return doGetPath(filename, 0);
844    }
845
846    /**
847     * Does the work of getting the path.
848     *
849     * @param filename     the filename
850     * @param separatorAdd 0 to omit the end separator, 1 to return it
851     * @return the path. Null bytes inside string will be removed
852     */
853    private static String doGetPath(final String filename, final int separatorAdd) {
854        if (filename == null) {
855            return null;
856        }
857        final var prefix = getPrefixLength(filename);
858        if (prefix < 0) {
859            return null;
860        }
861        final var index = indexOfLastSeparator(filename);
862        final var endIndex = index + separatorAdd;
863        if (prefix >= filename.length() || index < 0 || prefix >= endIndex) {
864            return "";
865        }
866        final var path = filename.substring(prefix, endIndex);
867        failIfNullBytePresent(path);
868        return path;
869    }
870
871    /**
872     * Gets the full path from a full filename, which is the prefix + path.
873     * <p>
874     * This method will handle a file in either Unix or Windows format. The method
875     * is entirely text based, and returns the text before and including the last
876     * forward or backslash.
877     *
878     * <pre>
879     * C:\a\b\c.txt --&gt; C:\a\b\
880     * ~/a/b/c.txt  --&gt; ~/a/b/
881     * a.txt        --&gt; ""
882     * a/b/c        --&gt; a/b/
883     * a/b/c/       --&gt; a/b/c/
884     * C:           --&gt; C:
885     * C:\          --&gt; C:\
886     * ~            --&gt; ~/
887     * ~/           --&gt; ~/
888     * ~user        --&gt; ~user/
889     * ~user/       --&gt; ~user/
890     * </pre>
891     * <p>
892     * The output will be the same irrespective of the machine that the code is
893     * running on.
894     *
895     * @param filename the filename to query, null returns null
896     * @return the path of the file, an empty string if none exists, null if invalid
897     */
898    public static String getFullPath(final String filename) {
899        return doGetFullPath(filename, true);
900    }
901
902    /**
903     * Gets the full path from a full filename, which is the prefix + path, and also
904     * excluding the final directory separator.
905     * <p>
906     * This method will handle a file in either Unix or Windows format. The method
907     * is entirely text based, and returns the text before the last forward or
908     * backslash.
909     *
910     * <pre>
911     * C:\a\b\c.txt --&gt; C:\a\b
912     * ~/a/b/c.txt  --&gt; ~/a/b
913     * a.txt        --&gt; ""
914     * a/b/c        --&gt; a/b
915     * a/b/c/       --&gt; a/b/c
916     * C:           --&gt; C:
917     * C:\          --&gt; C:\
918     * ~            --&gt; ~
919     * ~/           --&gt; ~
920     * ~user        --&gt; ~user
921     * ~user/       --&gt; ~user
922     * </pre>
923     * <p>
924     * The output will be the same irrespective of the machine that the code is
925     * running on.
926     *
927     * @param filename the filename to query, null returns null
928     * @return the path of the file, an empty string if none exists, null if invalid
929     */
930    public static String getFullPathNoEndSeparator(final String filename) {
931        return doGetFullPath(filename, false);
932    }
933
934    /**
935     * Does the work of getting the path.
936     *
937     * @param filename         the filename
938     * @param includeSeparator true to include the end separator
939     * @return the path
940     */
941    private static String doGetFullPath(final String filename, final boolean includeSeparator) {
942        if (filename == null) {
943            return null;
944        }
945        final var prefix = getPrefixLength(filename);
946        if (prefix < 0) {
947            return null;
948        }
949        if (prefix >= filename.length()) {
950            if (includeSeparator) {
951                return getPrefix(filename); // add end slash if necessary
952            }
953            return filename;
954        }
955        final var index = indexOfLastSeparator(filename);
956        if (index < 0) {
957            return filename.substring(0, prefix);
958        }
959        var end = index + (includeSeparator ? 1 : 0);
960        if (end == 0) {
961            end++;
962        }
963        return filename.substring(0, end);
964    }
965
966    /**
967     * Gets the name minus the path from a full filename.
968     * <p>
969     * This method will handle a file in either Unix or Windows format. The text
970     * after the last forward or backslash is returned.
971     *
972     * <pre>
973     * a/b/c.txt --&gt; c.txt
974     * a.txt     --&gt; a.txt
975     * a/b/c     --&gt; c
976     * a/b/c/    --&gt; ""
977     * </pre>
978     * <p>
979     * The output will be the same irrespective of the machine that the code is
980     * running on.
981     *
982     * @param filename the filename to query, null returns null
983     * @return the name of the file without the path, or an empty string if none
984     *         exists. Null bytes inside string will be removed
985     */
986    public static String getName(final String filename) {
987        if (filename == null) {
988            return null;
989        }
990        failIfNullBytePresent(filename);
991        final var index = indexOfLastSeparator(filename);
992        return filename.substring(index + 1);
993    }
994
995    /**
996     * Check the input for null bytes, a sign of unsanitized data being passed to
997     * file level functions.
998     * <p>
999     * This may be used for poison byte attacks.
1000     *
1001     * @param path the path to check
1002     */
1003    private static void failIfNullBytePresent(final String path) {
1004        final var len = path.length();
1005        for (var i = 0; i < len; i++) {
1006            if (path.charAt(i) == 0) {
1007                throw new IllegalArgumentException("""
1008                        Null byte present in file/path name. There are no \
1009                        known legitimate use cases for such data, but several injection attacks may use it\
1010                        """);
1011            }
1012        }
1013    }
1014
1015    /**
1016     * Gets the base name, minus the full path and extension, from a full filename.
1017     * <p>
1018     * This method will handle a file in either Unix or Windows format. The text
1019     * after the last forward or backslash and before the last dot is returned.
1020     *
1021     * <pre>
1022     * a/b/c.txt --&gt; c
1023     * a.txt     --&gt; a
1024     * a/b/c     --&gt; c
1025     * a/b/c/    --&gt; ""
1026     * </pre>
1027     * <p>
1028     * The output will be the same irrespective of the machine that the code is
1029     * running on.
1030     *
1031     * @param filename the filename to query, null returns null
1032     * @return the name of the file without the path, or an empty string if none
1033     *         exists. Null bytes inside string will be removed
1034     */
1035    public static String getBaseName(final String filename) {
1036        return removeExtension(getName(filename));
1037    }
1038
1039    /**
1040     * Gets the extension of a filename.
1041     * <p>
1042     * This method returns the textual part of the filename after the last dot.
1043     * There must be no directory separator after the dot.
1044     *
1045     * <pre>
1046     * foo.txt      --&gt; "txt"
1047     * a/b/c.jpg    --&gt; "jpg"
1048     * a/b.txt/c    --&gt; ""
1049     * a/b/c        --&gt; ""
1050     * </pre>
1051     * <p>
1052     * The output will be the same irrespective of the machine that the code is
1053     * running on.
1054     *
1055     * @param filename the filename to retrieve the extension of.
1056     * @return the extension of the file or an empty string if none exists or
1057     *         {@code null} if the filename is {@code null}.
1058     */
1059    public static String getExtension(final String filename) {
1060        if (filename == null) {
1061            return null;
1062        }
1063        final var index = indexOfExtension(filename);
1064        if (index == NOT_FOUND) {
1065            return "";
1066        }
1067        return filename.substring(index + 1);
1068    }
1069
1070    // -----------------------------------------------------------------------
1071    /**
1072     * Removes the extension from a filename.
1073     * <p>
1074     * This method returns the textual part of the filename before the last dot.
1075     * There must be no directory separator after the dot.
1076     *
1077     * <pre>
1078     * foo.txt    --&gt; foo
1079     * a\b\c.jpg  --&gt; a\b\c
1080     * a\b\c      --&gt; a\b\c
1081     * a.b\c      --&gt; a.b\c
1082     * </pre>
1083     * <p>
1084     * The output will be the same irrespective of the machine that the code is
1085     * running on.
1086     *
1087     * @param filename the filename to query, null returns null
1088     * @return the filename minus the extension
1089     */
1090    public static String removeExtension(final String filename) {
1091        if (filename == null) {
1092            return null;
1093        }
1094        failIfNullBytePresent(filename);
1095
1096        final var index = indexOfExtension(filename);
1097        if (index == NOT_FOUND) {
1098            return filename;
1099        }
1100        return filename.substring(0, index);
1101    }
1102
1103    // -----------------------------------------------------------------------
1104    /**
1105     * Checks whether two filenames are equal exactly.
1106     * <p>
1107     * No processing is performed on the filenames other than comparison, thus this
1108     * is merely a null-safe case-sensitive equals.
1109     *
1110     * @param filename1 the first filename to query, may be null
1111     * @param filename2 the second filename to query, may be null
1112     * @return true if the filenames are equal, null equals null
1113     */
1114    public static boolean equals(final String filename1, final String filename2) {
1115        return equals(filename1, filename2, false, IOCase.SENSITIVE);
1116    }
1117
1118    /**
1119     * Checks whether two filenames are equal using the case rules of the system.
1120     * <p>
1121     * No processing is performed on the filenames other than comparison. The check
1122     * is case-sensitive on Unix and case-insensitive on Windows.
1123     *
1124     * @param filename1 the first filename to query, may be null
1125     * @param filename2 the second filename to query, may be null
1126     * @return true if the filenames are equal, null equals null
1127     */
1128    public static boolean equalsOnSystem(final String filename1, final String filename2) {
1129        return equals(filename1, filename2, false, IOCase.SYSTEM);
1130    }
1131
1132    // -----------------------------------------------------------------------
1133    /**
1134     * Checks whether two filenames are equal after both have been normalized.
1135     * <p>
1136     * Both filenames are first passed to {@link #normalize(String)}. The check is
1137     * then performed in a case-sensitive manner.
1138     *
1139     * @param filename1 the first filename to query, may be null
1140     * @param filename2 the second filename to query, may be null
1141     * @return true if the filenames are equal, null equals null
1142     */
1143    public static boolean equalsNormalized(final String filename1, final String filename2) {
1144        return equals(filename1, filename2, true, IOCase.SENSITIVE);
1145    }
1146
1147    /**
1148     * Checks whether two filenames are equal after both have been normalized and
1149     * using the case rules of the system.
1150     * <p>
1151     * Both filenames are first passed to {@link #normalize(String)}. The check is
1152     * then performed case-sensitive on Unix and case-insensitive on Windows.
1153     *
1154     * @param filename1 the first filename to query, may be null
1155     * @param filename2 the second filename to query, may be null
1156     * @return true if the filenames are equal, null equals null
1157     */
1158    public static boolean equalsNormalizedOnSystem(final String filename1, final String filename2) {
1159        return equals(filename1, filename2, true, IOCase.SYSTEM);
1160    }
1161
1162    /**
1163     * Checks whether two filenames are equal, optionally normalizing and providing
1164     * control over the case-sensitivity.
1165     *
1166     * @param filename1       the first filename to query, may be null
1167     * @param filename2       the second filename to query, may be null
1168     * @param normalized      whether to normalize the filenames
1169     * @param caseSensitivity what case sensitivity rule to use, null means
1170     *                        case-sensitive
1171     * @return true if the filenames are equal, null equals null
1172     */
1173    public static boolean equals(String filename1, String filename2, final boolean normalized, IOCase caseSensitivity) {
1174
1175        if (filename1 == null || filename2 == null) {
1176            return filename1 == null && filename2 == null;
1177        }
1178        if (normalized) {
1179            filename1 = normalize(filename1);
1180            filename2 = normalize(filename2);
1181            if (filename1 == null || filename2 == null) {
1182                throw new NullPointerException("Error normalizing one or both of the file names");
1183            }
1184        }
1185        if (caseSensitivity == null) {
1186            caseSensitivity = IOCase.SENSITIVE;
1187        }
1188        return caseSensitivity.checkEquals(filename1, filename2);
1189    }
1190
1191    // -----------------------------------------------------------------------
1192    /**
1193     * Checks whether the extension of the filename is that specified.
1194     * <p>
1195     * This method obtains the extension as the textual part of the filename after
1196     * the last dot. There must be no directory separator after the dot. The
1197     * extension check is case-sensitive on all platforms.
1198     *
1199     * @param filename  the filename to query, null returns false
1200     * @param extension the extension to check for, null or empty checks for no
1201     *                  extension
1202     * @return true if the filename has the specified extension
1203     * @throws java.lang.IllegalArgumentException if the supplied filename contains
1204     *                                            null bytes
1205     */
1206    public static boolean isExtension(final String filename, final String extension) {
1207        if (filename == null) {
1208            return false;
1209        }
1210        failIfNullBytePresent(filename);
1211
1212        if (extension == null || extension.isEmpty()) {
1213            return indexOfExtension(filename) == NOT_FOUND;
1214        }
1215        final var fileExt = getExtension(filename);
1216        return fileExt.equals(extension);
1217    }
1218
1219    /**
1220     * Checks whether the extension of the filename is one of those specified.
1221     * <p>
1222     * This method obtains the extension as the textual part of the filename after
1223     * the last dot. There must be no directory separator after the dot. The
1224     * extension check is case-sensitive on all platforms.
1225     *
1226     * @param filename   the filename to query, null returns false
1227     * @param extensions the extensions to check for, null checks for no extension
1228     * @return true if the filename is one of the extensions
1229     * @throws java.lang.IllegalArgumentException if the supplied filename contains
1230     *                                            null bytes
1231     */
1232    public static boolean isExtension(final String filename, final String... extensions) {
1233        if (filename == null) {
1234            return false;
1235        }
1236        failIfNullBytePresent(filename);
1237
1238        if (extensions == null || extensions.length == 0) {
1239            return indexOfExtension(filename) == NOT_FOUND;
1240        }
1241        final var fileExt = getExtension(filename);
1242        for (final String extension : extensions) {
1243            if (fileExt.equals(extension)) {
1244                return true;
1245            }
1246        }
1247        return false;
1248    }
1249
1250    /**
1251     * Checks whether the extension of the filename is one of those specified.
1252     * <p>
1253     * This method obtains the extension as the textual part of the filename after
1254     * the last dot. There must be no directory separator after the dot. The
1255     * extension check is case-sensitive on all platforms.
1256     *
1257     * @param filename   the filename to query, null returns false
1258     * @param extensions the extensions to check for, null checks for no extension
1259     * @return true if the filename is one of the extensions
1260     * @throws java.lang.IllegalArgumentException if the supplied filename contains
1261     *                                            null bytes
1262     */
1263    public static boolean isExtension(final String filename, final Collection<String> extensions) {
1264        if (filename == null) {
1265            return false;
1266        }
1267        failIfNullBytePresent(filename);
1268
1269        if (extensions == null || extensions.isEmpty()) {
1270            return indexOfExtension(filename) == NOT_FOUND;
1271        }
1272        final var fileExt = getExtension(filename);
1273        for (final String extension : extensions) {
1274            if (fileExt.equals(extension)) {
1275                return true;
1276            }
1277        }
1278        return false;
1279    }
1280
1281    // -----------------------------------------------------------------------
1282    /**
1283     * Checks a filename to see if it matches the specified wildcard matcher, always
1284     * testing case-sensitive.
1285     * <p>
1286     * The wildcard matcher uses the characters '?' and '*' to represent a single or
1287     * multiple (zero or more) wildcard characters. This is the same as often found
1288     * on Dos/Unix command lines. The check is case-sensitive always.
1289     *
1290     * <pre>
1291     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1292     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1293     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1294     * wildcardMatch("c.txt", "*.???")      --&gt; true
1295     * wildcardMatch("c.txt", "*.????")     --&gt; false
1296     * </pre>
1297     *
1298     * N.B. the sequence "*?" does not work properly at present in match strings.
1299     *
1300     * @param filename        the filename to match on
1301     * @param wildcardMatcher the wildcard string to match against
1302     * @return true if the filename matches the wildcard string
1303     */
1304    public static boolean wildcardMatch(final String filename, final String wildcardMatcher) {
1305        return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1306    }
1307
1308    /**
1309     * Checks a filename to see if it matches the specified wildcard matcher using
1310     * the case rules of the system.
1311     * <p>
1312     * The wildcard matcher uses the characters '?' and '*' to represent a single or
1313     * multiple (zero or more) wildcard characters. This is the same as often found
1314     * on Dos/Unix command lines. The check is case-sensitive on Unix and
1315     * case-insensitive on Windows.
1316     *
1317     * <pre>
1318     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1319     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1320     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1321     * wildcardMatch("c.txt", "*.???")      --&gt; true
1322     * wildcardMatch("c.txt", "*.????")     --&gt; false
1323     * </pre>
1324     *
1325     * N.B. the sequence "*?" does not work properly at present in match strings.
1326     *
1327     * @param filename        the filename to match on
1328     * @param wildcardMatcher the wildcard string to match against
1329     * @return true if the filename matches the wildcard string
1330     */
1331    public static boolean wildcardMatchOnSystem(final String filename, final String wildcardMatcher) {
1332        return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1333    }
1334
1335    /**
1336     * Checks a filename to see if it matches the specified wildcard matcher
1337     * allowing control over case-sensitivity.
1338     * <p>
1339     * The wildcard matcher uses the characters '?' and '*' to represent a single or
1340     * multiple (zero or more) wildcard characters. N.B. the sequence "*?" does not
1341     * work properly at present in match strings.
1342     *
1343     * @param filename        the filename to match on
1344     * @param wildcardMatcher the wildcard string to match against
1345     * @param caseSensitivity what case sensitivity rule to use, null means
1346     *                        case-sensitive
1347     * @return true if the filename matches the wildcard string
1348     */
1349    @SuppressWarnings({ "squid:S3776", "squid:S135" }) // owolff: original code
1350    public static boolean wildcardMatch(final String filename, final String wildcardMatcher, IOCase caseSensitivity) {
1351        if (filename == null && wildcardMatcher == null) {
1352            return true;
1353        }
1354        if (filename == null || wildcardMatcher == null) {
1355            return false;
1356        }
1357        if (caseSensitivity == null) {
1358            caseSensitivity = IOCase.SENSITIVE;
1359        }
1360        final var wcs = splitOnTokens(wildcardMatcher);
1361        var anyChars = false;
1362        var textIdx = 0;
1363        var wcsIdx = 0;
1364        final Deque<int[]> backtrack = new ArrayDeque<>();
1365
1366        // loop around a backtrack stack, to handle complex * matching
1367        do {
1368            if (!backtrack.isEmpty()) {
1369                final var array = backtrack.pop();
1370                wcsIdx = array[0];
1371                textIdx = array[1];
1372                anyChars = true;
1373            }
1374
1375            // loop whilst tokens and text left to process
1376            while (wcsIdx < wcs.length) {
1377
1378                if (wcs[wcsIdx].equals("?")) {
1379                    // ? so move to next text char
1380                    textIdx++;
1381                    if (textIdx > filename.length()) {
1382                        break;
1383                    }
1384                    anyChars = false;
1385
1386                } else if (wcs[wcsIdx].equals("*")) {
1387                    // set any chars status
1388                    anyChars = true;
1389                    if (wcsIdx == wcs.length - 1) {
1390                        textIdx = filename.length();
1391                    }
1392
1393                } else {
1394                    // matching text token
1395                    if (anyChars) {
1396                        // any chars then try to locate text token
1397                        textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]);
1398                        if (textIdx == NOT_FOUND) {
1399                            // token not found
1400                            break;
1401                        }
1402                        final var repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]);
1403                        if (repeat >= 0) {
1404                            backtrack.push(new int[] { wcsIdx, repeat });
1405                        }
1406                    } else // matching from current position
1407                    if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) {
1408                        // couldn't match token
1409                        break;
1410                    }
1411
1412                    // matched text token, move text index to end of matched token
1413                    textIdx += wcs[wcsIdx].length();
1414                    anyChars = false;
1415                }
1416
1417                wcsIdx++;
1418            }
1419
1420            // full match
1421            if (wcsIdx == wcs.length && textIdx == filename.length()) {
1422                return true;
1423            }
1424
1425        } while (!backtrack.isEmpty());
1426
1427        return false;
1428    }
1429
1430    /**
1431     * Splits a string into a number of tokens. The text is split by '?' and '*'.
1432     * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1433     *
1434     * @param text the text to split
1435     * @return the array of tokens, never null
1436     */
1437    static String[] splitOnTokens(final String text) {
1438        // used by wildcardMatch
1439        // package level so a unit test may run on this
1440
1441        if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1442            return new String[] { text };
1443        }
1444
1445        final var array = text.toCharArray();
1446        final var list = new ArrayList<String>();
1447        final var buffer = new StringBuilder();
1448        char prevChar = 0;
1449        for (final char ch : array) {
1450            if (ch == '?' || ch == '*') {
1451                if (buffer.length() != 0) {
1452                    list.add(buffer.toString());
1453                    buffer.setLength(0);
1454                }
1455                if (ch == '?') {
1456                    list.add("?");
1457                } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*'
1458                    list.add("*");
1459                }
1460            } else {
1461                buffer.append(ch);
1462            }
1463            prevChar = ch;
1464        }
1465        if (buffer.length() != 0) {
1466            list.add(buffer.toString());
1467        }
1468
1469        return list.toArray(new String[list.size()]);
1470    }
1471
1472}