001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.File;
020import java.io.IOException;
021import java.util.ArrayDeque;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Deque;
026import java.util.List;
027import java.util.Objects;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030
031/**
032 * General file name and file path manipulation utilities.
033 * <p>
034 * When dealing with file names you can hit problems when moving from a Windows
035 * based development machine to a Unix based production machine.
036 * This class aims to help avoid those problems.
037 * <p>
038 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
039 * using JDK {@link java.io.File File} objects and the two argument constructor
040 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
041 * <p>
042 * Most methods on this class are designed to work the same on both Unix and Windows.
043 * Those that don't include 'System', 'Unix' or 'Windows' in their name.
044 * <p>
045 * Most methods recognize both separators (forward and back), and both
046 * sets of prefixes. See the Javadoc of each method for details.
047 * <p>
048 * This class defines six components within a file name
049 * (example C:\dev\project\file.txt):
050 * <ul>
051 * <li>the prefix - C:\</li>
052 * <li>the path - dev\project\</li>
053 * <li>the full path - C:\dev\project\</li>
054 * <li>the name - file.txt</li>
055 * <li>the base name - file</li>
056 * <li>the extension - txt</li>
057 * </ul>
058 * Note that this class works best if directory file names end with a separator.
059 * If you omit the last separator, it is impossible to determine if the file name
060 * corresponds to a file or a directory. As a result, we have chosen to say
061 * it corresponds to a file.
062 * <p>
063 * This class only supports Unix and Windows style names.
064 * Prefixes are matched as follows:
065 * <pre>
066 * Windows:
067 * a\b\c.txt           --&gt; ""          --&gt; relative
068 * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
069 * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
070 * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
071 * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
072 *
073 * Unix:
074 * a/b/c.txt           --&gt; ""          --&gt; relative
075 * /a/b/c.txt          --&gt; "/"         --&gt; absolute
076 * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
077 * ~                   --&gt; "~/"        --&gt; current user (slash added)
078 * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
079 * ~user               --&gt; "~user/"    --&gt; named user (slash added)
080 * </pre>
081 * Both prefix styles are matched always, irrespective of the machine that you are
082 * currently running on.
083 * <p>
084 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
085 *
086 * @since 1.1
087 */
088public class FilenameUtils {
089
090    private static final String[] EMPTY_STRING_ARRAY = {};
091
092    private static final String EMPTY_STRING = "";
093
094    private static final int NOT_FOUND = -1;
095
096    /**
097     * The extension separator character.
098     * @since 1.4
099     */
100    public static final char EXTENSION_SEPARATOR = '.';
101
102    /**
103     * The extension separator String.
104     * @since 1.4
105     */
106    public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
107
108    /**
109     * The Unix separator character.
110     */
111    private static final char UNIX_SEPARATOR = '/';
112
113    /**
114     * The Windows separator character.
115     */
116    private static final char WINDOWS_SEPARATOR = '\\';
117
118    /**
119     * The system separator character.
120     */
121    private static final char SYSTEM_SEPARATOR = File.separatorChar;
122
123    /**
124     * The separator character that is the opposite of the system separator.
125     */
126    private static final char OTHER_SEPARATOR;
127    static {
128        if (isSystemWindows()) {
129            OTHER_SEPARATOR = UNIX_SEPARATOR;
130        } else {
131            OTHER_SEPARATOR = WINDOWS_SEPARATOR;
132        }
133    }
134
135    /**
136     * Instances should NOT be constructed in standard programming.
137     */
138    public FilenameUtils() {
139    }
140
141    //-----------------------------------------------------------------------
142    /**
143     * Determines if Windows file system is in use.
144     *
145     * @return true if the system is Windows
146     */
147    static boolean isSystemWindows() {
148        return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
149    }
150
151    //-----------------------------------------------------------------------
152    /**
153     * Checks if the character is a separator.
154     *
155     * @param ch  the character to check
156     * @return true if it is a separator character
157     */
158    private static boolean isSeparator(final char ch) {
159        return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR;
160    }
161
162    //-----------------------------------------------------------------------
163    /**
164     * Normalizes a path, removing double and single dot path steps.
165     * <p>
166     * This method normalizes a path to a standard format.
167     * The input may contain separators in either Unix or Windows format.
168     * The output will contain separators in the format of the system.
169     * <p>
170     * A trailing slash will be retained.
171     * A double slash will be merged to a single slash (but UNC names are handled).
172     * A single dot path segment will be removed.
173     * A double dot will cause that path segment and the one before to be removed.
174     * If the double dot has no parent path segment to work with, {@code null}
175     * is returned.
176     * <p>
177     * The output will be the same on both Unix and Windows except
178     * for the separator character.
179     * <pre>
180     * /foo//               --&gt;   /foo/
181     * /foo/./              --&gt;   /foo/
182     * /foo/../bar          --&gt;   /bar
183     * /foo/../bar/         --&gt;   /bar/
184     * /foo/../bar/../baz   --&gt;   /baz
185     * //foo//./bar         --&gt;   /foo/bar
186     * /../                 --&gt;   null
187     * ../foo               --&gt;   null
188     * foo/bar/..           --&gt;   foo/
189     * foo/../../bar        --&gt;   null
190     * foo/../bar           --&gt;   bar
191     * //server/foo/../bar  --&gt;   //server/bar
192     * //server/../bar      --&gt;   null
193     * C:\foo\..\bar        --&gt;   C:\bar
194     * C:\..\bar            --&gt;   null
195     * ~/foo/../bar/        --&gt;   ~/bar/
196     * ~/../bar             --&gt;   null
197     * </pre>
198     * (Note the file separator returned will be correct for Windows/Unix)
199     *
200     * @param fileName  the fileName to normalize, null returns null
201     * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed
202     */
203    public static String normalize(final String fileName) {
204        return doNormalize(fileName, SYSTEM_SEPARATOR, true);
205    }
206    /**
207     * Normalizes a path, removing double and single dot path steps.
208     * <p>
209     * This method normalizes a path to a standard format.
210     * The input may contain separators in either Unix or Windows format.
211     * The output will contain separators in the format specified.
212     * <p>
213     * A trailing slash will be retained.
214     * A double slash will be merged to a single slash (but UNC names are handled).
215     * A single dot path segment will be removed.
216     * A double dot will cause that path segment and the one before to be removed.
217     * If the double dot has no parent path segment to work with, {@code null}
218     * is returned.
219     * <p>
220     * The output will be the same on both Unix and Windows except
221     * for the separator character.
222     * <pre>
223     * /foo//               --&gt;   /foo/
224     * /foo/./              --&gt;   /foo/
225     * /foo/../bar          --&gt;   /bar
226     * /foo/../bar/         --&gt;   /bar/
227     * /foo/../bar/../baz   --&gt;   /baz
228     * //foo//./bar         --&gt;   /foo/bar
229     * /../                 --&gt;   null
230     * ../foo               --&gt;   null
231     * foo/bar/..           --&gt;   foo/
232     * foo/../../bar        --&gt;   null
233     * foo/../bar           --&gt;   bar
234     * //server/foo/../bar  --&gt;   //server/bar
235     * //server/../bar      --&gt;   null
236     * C:\foo\..\bar        --&gt;   C:\bar
237     * C:\..\bar            --&gt;   null
238     * ~/foo/../bar/        --&gt;   ~/bar/
239     * ~/../bar             --&gt;   null
240     * </pre>
241     * The output will be the same on both Unix and Windows including
242     * the separator character.
243     *
244     * @param fileName  the fileName to normalize, null returns null
245     * @param unixSeparator {@code true} if a unix separator should
246     * be used or {@code false} if a windows separator should be used.
247     * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed
248     * @since 2.0
249     */
250    public static String normalize(final String fileName, final boolean unixSeparator) {
251        final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
252        return doNormalize(fileName, separator, true);
253    }
254
255    //-----------------------------------------------------------------------
256    /**
257     * Normalizes a path, removing double and single dot path steps,
258     * and removing any final directory separator.
259     * <p>
260     * This method normalizes a path to a standard format.
261     * The input may contain separators in either Unix or Windows format.
262     * The output will contain separators in the format of the system.
263     * <p>
264     * A trailing slash will be removed.
265     * A double slash will be merged to a single slash (but UNC names are handled).
266     * A single dot path segment will be removed.
267     * A double dot will cause that path segment and the one before to be removed.
268     * If the double dot has no parent path segment to work with, {@code null}
269     * is returned.
270     * <p>
271     * The output will be the same on both Unix and Windows except
272     * for the separator character.
273     * <pre>
274     * /foo//               --&gt;   /foo
275     * /foo/./              --&gt;   /foo
276     * /foo/../bar          --&gt;   /bar
277     * /foo/../bar/         --&gt;   /bar
278     * /foo/../bar/../baz   --&gt;   /baz
279     * //foo//./bar         --&gt;   /foo/bar
280     * /../                 --&gt;   null
281     * ../foo               --&gt;   null
282     * foo/bar/..           --&gt;   foo
283     * foo/../../bar        --&gt;   null
284     * foo/../bar           --&gt;   bar
285     * //server/foo/../bar  --&gt;   //server/bar
286     * //server/../bar      --&gt;   null
287     * C:\foo\..\bar        --&gt;   C:\bar
288     * C:\..\bar            --&gt;   null
289     * ~/foo/../bar/        --&gt;   ~/bar
290     * ~/../bar             --&gt;   null
291     * </pre>
292     * (Note the file separator returned will be correct for Windows/Unix)
293     *
294     * @param fileName  the fileName to normalize, null returns null
295     * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed
296     */
297    public static String normalizeNoEndSeparator(final String fileName) {
298        return doNormalize(fileName, SYSTEM_SEPARATOR, false);
299    }
300
301    /**
302     * Normalizes a path, removing double and single dot path steps,
303     * and removing any final directory separator.
304     * <p>
305     * This method normalizes a path to a standard format.
306     * The input may contain separators in either Unix or Windows format.
307     * The output will contain separators in the format specified.
308     * <p>
309     * A trailing slash will be removed.
310     * A double slash will be merged to a single slash (but UNC names are handled).
311     * A single dot path segment will be removed.
312     * A double dot will cause that path segment and the one before to be removed.
313     * If the double dot has no parent path segment to work with, {@code null}
314     * is returned.
315     * <p>
316     * The output will be the same on both Unix and Windows including
317     * the separator character.
318     * <pre>
319     * /foo//               --&gt;   /foo
320     * /foo/./              --&gt;   /foo
321     * /foo/../bar          --&gt;   /bar
322     * /foo/../bar/         --&gt;   /bar
323     * /foo/../bar/../baz   --&gt;   /baz
324     * //foo//./bar         --&gt;   /foo/bar
325     * /../                 --&gt;   null
326     * ../foo               --&gt;   null
327     * foo/bar/..           --&gt;   foo
328     * foo/../../bar        --&gt;   null
329     * foo/../bar           --&gt;   bar
330     * //server/foo/../bar  --&gt;   //server/bar
331     * //server/../bar      --&gt;   null
332     * C:\foo\..\bar        --&gt;   C:\bar
333     * C:\..\bar            --&gt;   null
334     * ~/foo/../bar/        --&gt;   ~/bar
335     * ~/../bar             --&gt;   null
336     * </pre>
337     *
338     * @param fileName  the fileName to normalize, null returns null
339     * @param unixSeparator {@code true} if a unix separator should
340     * be used or {@code false} if a windows separator should be used.
341     * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed
342     * @since 2.0
343     */
344    public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
345         final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
346        return doNormalize(fileName, separator, false);
347    }
348
349    /**
350     * Internal method to perform the normalization.
351     *
352     * @param fileName  the fileName
353     * @param separator The separator character to use
354     * @param keepSeparator  true to keep the final separator
355     * @return the normalized fileName. Null bytes inside string will be removed.
356     */
357    private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
358        if (fileName == null) {
359            return null;
360        }
361
362        requireNonNullChars(fileName);
363
364        int size = fileName.length();
365        if (size == 0) {
366            return fileName;
367        }
368        final int prefix = getPrefixLength(fileName);
369        if (prefix < 0) {
370            return null;
371        }
372
373        final char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
374        fileName.getChars(0, fileName.length(), array, 0);
375
376        // fix separators throughout
377        final char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR;
378        for (int i = 0; i < array.length; i++) {
379            if (array[i] == otherSeparator) {
380                array[i] = separator;
381            }
382        }
383
384        // add extra separator on the end to simplify code below
385        boolean lastIsDirectory = true;
386        if (array[size - 1] != separator) {
387            array[size++] = separator;
388            lastIsDirectory = false;
389        }
390
391        // adjoining slashes
392        for (int i = prefix + 1; i < size; i++) {
393            if (array[i] == separator && array[i - 1] == separator) {
394                System.arraycopy(array, i, array, i - 1, size - i);
395                size--;
396                i--;
397            }
398        }
399
400        // dot slash
401        for (int i = prefix + 1; i < size; i++) {
402            if (array[i] == separator && array[i - 1] == '.' &&
403                    (i == prefix + 1 || array[i - 2] == separator)) {
404                if (i == size - 1) {
405                    lastIsDirectory = true;
406                }
407                System.arraycopy(array, i + 1, array, i - 1, size - i);
408                size -=2;
409                i--;
410            }
411        }
412
413        // double dot slash
414        outer:
415        for (int i = prefix + 2; i < size; i++) {
416            if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
417                    (i == prefix + 2 || array[i - 3] == separator)) {
418                if (i == prefix + 2) {
419                    return null;
420                }
421                if (i == size - 1) {
422                    lastIsDirectory = true;
423                }
424                int j;
425                for (j = i - 4 ; j >= prefix; j--) {
426                    if (array[j] == separator) {
427                        // remove b/../ from a/b/../c
428                        System.arraycopy(array, i + 1, array, j + 1, size - i);
429                        size -= i - j;
430                        i = j + 1;
431                        continue outer;
432                    }
433                }
434                // remove a/../ from a/../c
435                System.arraycopy(array, i + 1, array, prefix, size - i);
436                size -= i + 1 - prefix;
437                i = prefix + 1;
438            }
439        }
440
441        if (size <= 0) {  // should never be less than 0
442            return EMPTY_STRING;
443        }
444        if (size <= prefix) {  // should never be less than prefix
445            return new String(array, 0, size);
446        }
447        if (lastIsDirectory && keepSeparator) {
448            return new String(array, 0, size);  // keep trailing separator
449        }
450        return new String(array, 0, size - 1);  // lose trailing separator
451    }
452
453    //-----------------------------------------------------------------------
454    /**
455     * Concatenates a fileName to a base path using normal command line style rules.
456     * <p>
457     * The effect is equivalent to resultant directory after changing
458     * directory to the first argument, followed by changing directory to
459     * the second argument.
460     * <p>
461     * The first argument is the base path, the second is the path to concatenate.
462     * The returned path is always normalized via {@link #normalize(String)},
463     * thus {@code ..} is handled.
464     * <p>
465     * If {@code pathToAdd} is absolute (has an absolute prefix), then
466     * it will be normalized and returned.
467     * Otherwise, the paths will be joined, normalized and returned.
468     * <p>
469     * The output will be the same on both Unix and Windows except
470     * for the separator character.
471     * <pre>
472     * /foo/      + bar        --&gt;  /foo/bar
473     * /foo       + bar        --&gt;  /foo/bar
474     * /foo       + /bar       --&gt;  /bar
475     * /foo       + C:/bar     --&gt;  C:/bar
476     * /foo       + C:bar      --&gt;  C:bar (*)
477     * /foo/a/    + ../bar     --&gt;  /foo/bar
478     * /foo/      + ../../bar  --&gt;  null
479     * /foo/      + /bar       --&gt;  /bar
480     * /foo/..    + /bar       --&gt;  /bar
481     * /foo       + bar/c.txt  --&gt;  /foo/bar/c.txt
482     * /foo/c.txt + bar        --&gt;  /foo/c.txt/bar (!)
483     * </pre>
484     * (*) Note that the Windows relative drive prefix is unreliable when
485     * used with this method.
486     * (!) Note that the first parameter must be a path. If it ends with a name, then
487     * the name will be built into the concatenated path. If this might be a problem,
488     * use {@link #getFullPath(String)} on the base path argument.
489     *
490     * @param basePath  the base path to attach to, always treated as a path
491     * @param fullFileNameToAdd  the fileName (or path) to attach to the base
492     * @return the concatenated path, or null if invalid.  Null bytes inside string will be removed
493     */
494    public static String concat(final String basePath, final String fullFileNameToAdd) {
495        final int prefix = getPrefixLength(fullFileNameToAdd);
496        if (prefix < 0) {
497            return null;
498        }
499        if (prefix > 0) {
500            return normalize(fullFileNameToAdd);
501        }
502        if (basePath == null) {
503            return null;
504        }
505        final int len = basePath.length();
506        if (len == 0) {
507            return normalize(fullFileNameToAdd);
508        }
509        final char ch = basePath.charAt(len - 1);
510        if (isSeparator(ch)) {
511            return normalize(basePath + fullFileNameToAdd);
512        }
513        return normalize(basePath + '/' + fullFileNameToAdd);
514    }
515
516    /**
517     * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory).
518     * <p>
519     * The files names are expected to be normalized.
520     * </p>
521     *
522     * Edge cases:
523     * <ul>
524     * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
525     * <li>A directory does not contain itself: return false</li>
526     * <li>A null child file is not contained in any parent: return false</li>
527     * </ul>
528     *
529     * @param canonicalParent
530     *            the file to consider as the parent.
531     * @param canonicalChild
532     *            the file to consider as the child.
533     * @return true is the candidate leaf is under by the specified composite. False otherwise.
534     * @throws IOException Never thrown.
535     * @since 2.2
536     * @see FileUtils#directoryContains(File, File)
537     */
538    public static boolean directoryContains(final String canonicalParent, final String canonicalChild)
539            throws IOException {
540        Objects.requireNonNull(canonicalParent, "canonicalParent");
541
542        if (canonicalChild == null) {
543            return false;
544        }
545
546        if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
547            return false;
548        }
549
550        return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent);
551    }
552
553    /**
554     * Converts all separators to the Unix separator of forward slash.
555     *
556     * @param path  the path to be changed, null ignored
557     * @return the updated path
558     */
559    public static String separatorsToUnix(final String path) {
560        if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) {
561            return path;
562        }
563        return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
564    }
565
566    /**
567     * Converts all separators to the Windows separator of backslash.
568     *
569     * @param path  the path to be changed, null ignored
570     * @return the updated path
571     */
572    public static String separatorsToWindows(final String path) {
573        if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) {
574            return path;
575        }
576        return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
577    }
578
579    /**
580     * Converts all separators to the system separator.
581     *
582     * @param path  the path to be changed, null ignored
583     * @return the updated path
584     */
585    public static String separatorsToSystem(final String path) {
586        if (path == null) {
587            return null;
588        }
589        return isSystemWindows() ? separatorsToWindows(path) : separatorsToUnix(path);
590    }
591
592    /**
593     * Returns the length of the fileName prefix, such as {@code C:/} or {@code ~/}.
594     * <p>
595     * This method will handle a file in either Unix or Windows format.
596     * <p>
597     * The prefix length includes the first slash in the full fileName
598     * if applicable. Thus, it is possible that the length returned is greater
599     * than the length of the input string.
600     * <pre>
601     * Windows:
602     * a\b\c.txt           --&gt; 0           --&gt; relative
603     * \a\b\c.txt          --&gt; 1           --&gt; current drive absolute
604     * C:a\b\c.txt         --&gt; 2           --&gt; drive relative
605     * C:\a\b\c.txt        --&gt; 3           --&gt; absolute
606     * \\server\a\b\c.txt  --&gt; 9           --&gt; UNC
607     * \\\a\b\c.txt        --&gt; -1          --&gt; error
608     *
609     * Unix:
610     * a/b/c.txt           --&gt; 0           --&gt; relative
611     * /a/b/c.txt          --&gt; 1           --&gt; absolute
612     * ~/a/b/c.txt         --&gt; 2           --&gt; current user
613     * ~                   --&gt; 2           --&gt; current user (slash added)
614     * ~user/a/b/c.txt     --&gt; 6           --&gt; named user
615     * ~user               --&gt; 6           --&gt; named user (slash added)
616     * //server/a/b/c.txt  --&gt; 9
617     * ///a/b/c.txt        --&gt; -1          --&gt; error
618     * C:                  --&gt; 0           --&gt; valid filename as only null byte and / are reserved characters
619     * </pre>
620     * <p>
621     * The output will be the same irrespective of the machine that the code is running on.
622     * ie. both Unix and Windows prefixes are matched regardless.
623     *
624     * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
625     * These must be followed by a server name, so double-slashes are not collapsed
626     * to a single slash at the start of the fileName.
627     *
628     * @param fileName  the fileName to find the prefix in, null returns -1
629     * @return the length of the prefix, -1 if invalid or null
630     */
631    public static int getPrefixLength(final String fileName) {
632        if (fileName == null) {
633            return NOT_FOUND;
634        }
635        final int len = fileName.length();
636        if (len == 0) {
637            return 0;
638        }
639        char ch0 = fileName.charAt(0);
640        if (ch0 == ':') {
641            return NOT_FOUND;
642        }
643        if (len == 1) {
644            if (ch0 == '~') {
645                return 2;  // return a length greater than the input
646            }
647            return isSeparator(ch0) ? 1 : 0;
648        }
649        if (ch0 == '~') {
650            int posUnix = fileName.indexOf(UNIX_SEPARATOR, 1);
651            int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 1);
652            if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
653                return len + 1;  // return a length greater than the input
654            }
655            posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
656            posWin = posWin == NOT_FOUND ? posUnix : posWin;
657            return Math.min(posUnix, posWin) + 1;
658        }
659        final char ch1 = fileName.charAt(1);
660        if (ch1 == ':') {
661            ch0 = Character.toUpperCase(ch0);
662            if (ch0 >= 'A' && ch0 <= 'Z') {
663                if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
664                    return 0;
665                }
666                if (len == 2 || !isSeparator(fileName.charAt(2))) {
667                    return 2;
668                }
669                return 3;
670            }
671            if (ch0 == UNIX_SEPARATOR) {
672                return 1;
673            }
674            return NOT_FOUND;
675
676        }
677        if (!isSeparator(ch0) || !isSeparator(ch1)) {
678            return isSeparator(ch0) ? 1 : 0;
679        }
680        int posUnix = fileName.indexOf(UNIX_SEPARATOR, 2);
681        int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 2);
682        if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
683            return NOT_FOUND;
684        }
685        posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
686        posWin = posWin == NOT_FOUND ? posUnix : posWin;
687        final int pos = Math.min(posUnix, posWin) + 1;
688        final String hostnamePart = fileName.substring(2, pos - 1);
689        return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
690    }
691
692    /**
693     * Returns the index of the last directory separator character.
694     * <p>
695     * This method will handle a file in either Unix or Windows format.
696     * The position of the last forward or backslash is returned.
697     * <p>
698     * The output will be the same irrespective of the machine that the code is running on.
699     *
700     * @param fileName  the fileName to find the last path separator in, null returns -1
701     * @return the index of the last separator character, or -1 if there
702     * is no such character
703     */
704    public static int indexOfLastSeparator(final String fileName) {
705        if (fileName == null) {
706            return NOT_FOUND;
707        }
708        final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR);
709        final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR);
710        return Math.max(lastUnixPos, lastWindowsPos);
711    }
712
713    /**
714     * Returns the index of the last extension separator character, which is a dot.
715     * <p>
716     * This method also checks that there is no directory separator after the last dot. To do this it uses
717     * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
718     * </p>
719     * <p>
720     * The output will be the same irrespective of the machine that the code is running on, with the
721     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
722     * </p>
723     * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt".
724     * In this case, the name wouldn't be the name of a file, but the identifier of an
725     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
726     * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
727     * an {@link IllegalArgumentException} for names like this.
728     *
729     * @param fileName
730     *            the fileName to find the last extension separator in, null returns -1
731     * @return the index of the last extension separator character, or -1 if there is no such character
732     * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact,
733     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
734     */
735    public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
736        if (fileName == null) {
737            return NOT_FOUND;
738        }
739        if (isSystemWindows()) {
740            // Special handling for NTFS ADS: Don't accept colon in the fileName.
741            final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
742            if (offset != -1) {
743                throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
744            }
745        }
746        final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
747        final int lastSeparator = indexOfLastSeparator(fileName);
748        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
749    }
750
751    //-----------------------------------------------------------------------
752    /**
753     * Gets the prefix from a full fileName, such as {@code C:/}
754     * or {@code ~/}.
755     * <p>
756     * This method will handle a file in either Unix or Windows format.
757     * The prefix includes the first slash in the full fileName where applicable.
758     * <pre>
759     * Windows:
760     * a\b\c.txt           --&gt; ""          --&gt; relative
761     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
762     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
763     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
764     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
765     *
766     * Unix:
767     * a/b/c.txt           --&gt; ""          --&gt; relative
768     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
769     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
770     * ~                   --&gt; "~/"        --&gt; current user (slash added)
771     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
772     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
773     * </pre>
774     * <p>
775     * The output will be the same irrespective of the machine that the code is running on.
776     * ie. both Unix and Windows prefixes are matched regardless.
777     *
778     * @param fileName  the fileName to query, null returns null
779     * @return the prefix of the file, null if invalid. Null bytes inside string will be removed
780     */
781    public static String getPrefix(final String fileName) {
782        if (fileName == null) {
783            return null;
784        }
785        final int len = getPrefixLength(fileName);
786        if (len < 0) {
787            return null;
788        }
789        if (len > fileName.length()) {
790            requireNonNullChars(fileName + UNIX_SEPARATOR);
791            return fileName + UNIX_SEPARATOR;
792        }
793        final String path = fileName.substring(0, len);
794        requireNonNullChars(path);
795        return path;
796    }
797
798    /**
799     * Gets the path from a full fileName, which excludes the prefix.
800     * <p>
801     * This method will handle a file in either Unix or Windows format.
802     * The method is entirely text based, and returns the text before and
803     * including the last forward or backslash.
804     * <pre>
805     * C:\a\b\c.txt --&gt; a\b\
806     * ~/a/b/c.txt  --&gt; a/b/
807     * a.txt        --&gt; ""
808     * a/b/c        --&gt; a/b/
809     * a/b/c/       --&gt; a/b/c/
810     * </pre>
811     * <p>
812     * The output will be the same irrespective of the machine that the code is running on.
813     * <p>
814     * This method drops the prefix from the result.
815     * See {@link #getFullPath(String)} for the method that retains the prefix.
816     *
817     * @param fileName  the fileName to query, null returns null
818     * @return the path of the file, an empty string if none exists, null if invalid.
819     * Null bytes inside string will be removed
820     */
821    public static String getPath(final String fileName) {
822        return doGetPath(fileName, 1);
823    }
824
825    /**
826     * Gets the path from a full fileName, which excludes the prefix, and
827     * also excluding the final directory separator.
828     * <p>
829     * This method will handle a file in either Unix or Windows format.
830     * The method is entirely text based, and returns the text before the
831     * last forward or backslash.
832     * <pre>
833     * C:\a\b\c.txt --&gt; a\b
834     * ~/a/b/c.txt  --&gt; a/b
835     * a.txt        --&gt; ""
836     * a/b/c        --&gt; a/b
837     * a/b/c/       --&gt; a/b/c
838     * </pre>
839     * <p>
840     * The output will be the same irrespective of the machine that the code is running on.
841     * <p>
842     * This method drops the prefix from the result.
843     * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
844     *
845     * @param fileName  the fileName to query, null returns null
846     * @return the path of the file, an empty string if none exists, null if invalid.
847     * Null bytes inside string will be removed
848     */
849    public static String getPathNoEndSeparator(final String fileName) {
850        return doGetPath(fileName, 0);
851    }
852
853    /**
854     * Does the work of getting the path.
855     *
856     * @param fileName  the fileName
857     * @param separatorAdd  0 to omit the end separator, 1 to return it
858     * @return the path. Null bytes inside string will be removed
859     */
860    private static String doGetPath(final String fileName, final int separatorAdd) {
861        if (fileName == null) {
862            return null;
863        }
864        final int prefix = getPrefixLength(fileName);
865        if (prefix < 0) {
866            return null;
867        }
868        final int index = indexOfLastSeparator(fileName);
869        final int endIndex = index+separatorAdd;
870        if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
871            return EMPTY_STRING;
872        }
873        final String path = fileName.substring(prefix, endIndex);
874        requireNonNullChars(path);
875        return path;
876    }
877
878    /**
879     * Gets the full path from a full fileName, which is the prefix + path.
880     * <p>
881     * This method will handle a file in either Unix or Windows format.
882     * The method is entirely text based, and returns the text before and
883     * including the last forward or backslash.
884     * <pre>
885     * C:\a\b\c.txt --&gt; C:\a\b\
886     * ~/a/b/c.txt  --&gt; ~/a/b/
887     * a.txt        --&gt; ""
888     * a/b/c        --&gt; a/b/
889     * a/b/c/       --&gt; a/b/c/
890     * C:           --&gt; C:
891     * C:\          --&gt; C:\
892     * ~            --&gt; ~/
893     * ~/           --&gt; ~/
894     * ~user        --&gt; ~user/
895     * ~user/       --&gt; ~user/
896     * </pre>
897     * <p>
898     * The output will be the same irrespective of the machine that the code is running on.
899     *
900     * @param fileName  the fileName to query, null returns null
901     * @return the path of the file, an empty string if none exists, null if invalid
902     */
903    public static String getFullPath(final String fileName) {
904        return doGetFullPath(fileName, true);
905    }
906
907    /**
908     * Gets the full path from a full fileName, which is the prefix + path,
909     * and also excluding the final directory separator.
910     * <p>
911     * This method will handle a file in either Unix or Windows format.
912     * The method is entirely text based, and returns the text before the
913     * last forward or backslash.
914     * <pre>
915     * C:\a\b\c.txt --&gt; C:\a\b
916     * ~/a/b/c.txt  --&gt; ~/a/b
917     * a.txt        --&gt; ""
918     * a/b/c        --&gt; a/b
919     * a/b/c/       --&gt; a/b/c
920     * C:           --&gt; C:
921     * C:\          --&gt; C:\
922     * ~            --&gt; ~
923     * ~/           --&gt; ~
924     * ~user        --&gt; ~user
925     * ~user/       --&gt; ~user
926     * </pre>
927     * <p>
928     * The output will be the same irrespective of the machine that the code is running on.
929     *
930     * @param fileName  the fileName to query, null returns null
931     * @return the path of the file, an empty string if none exists, null if invalid
932     */
933    public static String getFullPathNoEndSeparator(final String fileName) {
934        return doGetFullPath(fileName, false);
935    }
936
937    /**
938     * Does the work of getting the path.
939     *
940     * @param fileName  the fileName
941     * @param includeSeparator  true to include the end separator
942     * @return the path
943     */
944    private static String doGetFullPath(final String fileName, final boolean includeSeparator) {
945        if (fileName == null) {
946            return null;
947        }
948        final int prefix = getPrefixLength(fileName);
949        if (prefix < 0) {
950            return null;
951        }
952        if (prefix >= fileName.length()) {
953            if (includeSeparator) {
954                return getPrefix(fileName);  // add end slash if necessary
955            }
956            return fileName;
957        }
958        final int index = indexOfLastSeparator(fileName);
959        if (index < 0) {
960            return fileName.substring(0, prefix);
961        }
962        int end = index + (includeSeparator ?  1 : 0);
963        if (end == 0) {
964            end++;
965        }
966        return fileName.substring(0, end);
967    }
968
969    /**
970     * Gets the name minus the path from a full fileName.
971     * <p>
972     * This method will handle a file in either Unix or Windows format.
973     * The text after the last forward or backslash is returned.
974     * <pre>
975     * a/b/c.txt --&gt; c.txt
976     * a.txt     --&gt; a.txt
977     * a/b/c     --&gt; c
978     * a/b/c/    --&gt; ""
979     * </pre>
980     * <p>
981     * The output will be the same irrespective of the machine that the code is running on.
982     *
983     * @param fileName  the fileName to query, null returns null
984     * @return the name of the file without the path, or an empty string if none exists.
985     * Null bytes inside string will be removed
986     */
987    public static String getName(final String fileName) {
988        if (fileName == null) {
989            return null;
990        }
991        requireNonNullChars(fileName);
992        final int index = indexOfLastSeparator(fileName);
993        return fileName.substring(index + 1);
994    }
995
996    /**
997     * Checks the input for null bytes, a sign of unsanitized data being passed to to file level functions.
998     *
999     * This may be used for poison byte attacks.
1000     *
1001     * @param path the path to check
1002     */
1003    private static void requireNonNullChars(final String path) {
1004        if (path.indexOf(0) >= 0) {
1005            throw new IllegalArgumentException("Null byte present in file/path name. There are no "
1006                + "known legitimate use cases for such data, but several injection attacks may use it");
1007        }
1008    }
1009
1010    /**
1011     * Gets the base name, minus the full path and extension, from a full fileName.
1012     * <p>
1013     * This method will handle a file in either Unix or Windows format.
1014     * The text after the last forward or backslash and before the last dot is returned.
1015     * <pre>
1016     * a/b/c.txt --&gt; c
1017     * a.txt     --&gt; a
1018     * a/b/c     --&gt; c
1019     * a/b/c/    --&gt; ""
1020     * </pre>
1021     * <p>
1022     * The output will be the same irrespective of the machine that the code is running on.
1023     *
1024     * @param fileName  the fileName to query, null returns null
1025     * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string
1026     * will be removed
1027     */
1028    public static String getBaseName(final String fileName) {
1029        return removeExtension(getName(fileName));
1030    }
1031
1032    /**
1033     * Gets the extension of a fileName.
1034     * <p>
1035     * This method returns the textual part of the fileName after the last dot.
1036     * There must be no directory separator after the dot.
1037     * <pre>
1038     * foo.txt      --&gt; "txt"
1039     * a/b/c.jpg    --&gt; "jpg"
1040     * a/b.txt/c    --&gt; ""
1041     * a/b/c        --&gt; ""
1042     * </pre>
1043     * <p>
1044     * The output will be the same irrespective of the machine that the code is running on, with the
1045     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
1046     * </p>
1047     * <p>
1048     * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt".
1049     * In this case, the name wouldn't be the name of a file, but the identifier of an
1050     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
1051     * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
1052     * an {@link IllegalArgumentException} for names like this.
1053     *
1054     * @param fileName the fileName to retrieve the extension of.
1055     * @return the extension of the file or an empty string if none exists or {@code null}
1056     * if the fileName is {@code null}.
1057     * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact,
1058     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
1059     */
1060    public static String getExtension(final String fileName) throws IllegalArgumentException {
1061        if (fileName == null) {
1062            return null;
1063        }
1064        final int index = indexOfExtension(fileName);
1065        if (index == NOT_FOUND) {
1066            return EMPTY_STRING;
1067        }
1068        return fileName.substring(index + 1);
1069    }
1070
1071    /**
1072     * Special handling for NTFS ADS: Don't accept colon in the fileName.
1073     *
1074     * @param fileName a file name
1075     * @return ADS offsets.
1076     */
1077    private static int getAdsCriticalOffset(final String fileName) {
1078        // Step 1: Remove leading path segments.
1079        final int offset1 = fileName.lastIndexOf(SYSTEM_SEPARATOR);
1080        final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
1081        if (offset1 == -1) {
1082            if (offset2 == -1) {
1083                return 0;
1084            }
1085            return offset2 + 1;
1086        }
1087        if (offset2 == -1) {
1088            return offset1 + 1;
1089        }
1090        return Math.max(offset1, offset2) + 1;
1091    }
1092
1093    //-----------------------------------------------------------------------
1094    /**
1095     * Removes the extension from a fileName.
1096     * <p>
1097     * This method returns the textual part of the fileName before the last dot.
1098     * There must be no directory separator after the dot.
1099     * <pre>
1100     * foo.txt    --&gt; foo
1101     * a\b\c.jpg  --&gt; a\b\c
1102     * a\b\c      --&gt; a\b\c
1103     * a.b\c      --&gt; a.b\c
1104     * </pre>
1105     * <p>
1106     * The output will be the same irrespective of the machine that the code is running on.
1107     *
1108     * @param fileName  the fileName to query, null returns null
1109     * @return the fileName minus the extension
1110     */
1111    public static String removeExtension(final String fileName) {
1112        if (fileName == null) {
1113            return null;
1114        }
1115        requireNonNullChars(fileName);
1116
1117        final int index = indexOfExtension(fileName);
1118        if (index == NOT_FOUND) {
1119            return fileName;
1120        }
1121        return fileName.substring(0, index);
1122    }
1123
1124    //-----------------------------------------------------------------------
1125    /**
1126     * Checks whether two fileNames are equal exactly.
1127     * <p>
1128     * No processing is performed on the fileNames other than comparison,
1129     * thus this is merely a null-safe case-sensitive equals.
1130     *
1131     * @param fileName1  the first fileName to query, may be null
1132     * @param fileName2  the second fileName to query, may be null
1133     * @return true if the fileNames are equal, null equals null
1134     * @see IOCase#SENSITIVE
1135     */
1136    public static boolean equals(final String fileName1, final String fileName2) {
1137        return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
1138    }
1139
1140    /**
1141     * Checks whether two fileNames are equal using the case rules of the system.
1142     * <p>
1143     * No processing is performed on the fileNames other than comparison.
1144     * The check is case-sensitive on Unix and case-insensitive on Windows.
1145     *
1146     * @param fileName1  the first fileName to query, may be null
1147     * @param fileName2  the second fileName to query, may be null
1148     * @return true if the fileNames are equal, null equals null
1149     * @see IOCase#SYSTEM
1150     */
1151    public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
1152        return equals(fileName1, fileName2, false, IOCase.SYSTEM);
1153    }
1154
1155    //-----------------------------------------------------------------------
1156    /**
1157     * Checks whether two fileNames are equal after both have been normalized.
1158     * <p>
1159     * Both fileNames are first passed to {@link #normalize(String)}.
1160     * The check is then performed in a case-sensitive manner.
1161     *
1162     * @param fileName1  the first fileName to query, may be null
1163     * @param fileName2  the second fileName to query, may be null
1164     * @return true if the fileNames are equal, null equals null
1165     * @see IOCase#SENSITIVE
1166     */
1167    public static boolean equalsNormalized(final String fileName1, final String fileName2) {
1168        return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
1169    }
1170
1171    /**
1172     * Checks whether two fileNames are equal after both have been normalized
1173     * and using the case rules of the system.
1174     * <p>
1175     * Both fileNames are first passed to {@link #normalize(String)}.
1176     * The check is then performed case-sensitive on Unix and
1177     * case-insensitive on Windows.
1178     *
1179     * @param fileName1  the first fileName to query, may be null
1180     * @param fileName2  the second fileName to query, may be null
1181     * @return true if the fileNames are equal, null equals null
1182     * @see IOCase#SYSTEM
1183     */
1184    public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
1185        return equals(fileName1, fileName2, true, IOCase.SYSTEM);
1186    }
1187
1188    /**
1189     * Checks whether two fileNames are equal, optionally normalizing and providing
1190     * control over the case-sensitivity.
1191     *
1192     * @param fileName1  the first fileName to query, may be null
1193     * @param fileName2  the second fileName to query, may be null
1194     * @param normalized  whether to normalize the fileNames
1195     * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1196     * @return true if the fileNames are equal, null equals null
1197     * @since 1.3
1198     */
1199    public static boolean equals(
1200            String fileName1, String fileName2,
1201            final boolean normalized, IOCase caseSensitivity) {
1202
1203        if (fileName1 == null || fileName2 == null) {
1204            return fileName1 == null && fileName2 == null;
1205        }
1206        if (normalized) {
1207            fileName1 = normalize(fileName1);
1208            if (fileName1 == null) {
1209                return false;
1210            }
1211            fileName2 = normalize(fileName2);
1212            if (fileName2 == null) {
1213                return false;
1214            }
1215        }
1216        if (caseSensitivity == null) {
1217            caseSensitivity = IOCase.SENSITIVE;
1218        }
1219        return caseSensitivity.checkEquals(fileName1, fileName2);
1220    }
1221
1222    //-----------------------------------------------------------------------
1223    /**
1224     * Checks whether the extension of the fileName is that specified.
1225     * <p>
1226     * This method obtains the extension as the textual part of the fileName
1227     * after the last dot. There must be no directory separator after the dot.
1228     * The extension check is case-sensitive on all platforms.
1229     *
1230     * @param fileName  the fileName to query, null returns false
1231     * @param extension  the extension to check for, null or empty checks for no extension
1232     * @return true if the fileName has the specified extension
1233     * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes
1234     */
1235    public static boolean isExtension(final String fileName, final String extension) {
1236        if (fileName == null) {
1237            return false;
1238        }
1239        requireNonNullChars(fileName);
1240
1241        if (extension == null || extension.isEmpty()) {
1242            return indexOfExtension(fileName) == NOT_FOUND;
1243        }
1244        final String fileExt = getExtension(fileName);
1245        return fileExt.equals(extension);
1246    }
1247
1248    /**
1249     * Checks whether the extension of the fileName is one of those specified.
1250     * <p>
1251     * This method obtains the extension as the textual part of the fileName
1252     * after the last dot. There must be no directory separator after the dot.
1253     * The extension check is case-sensitive on all platforms.
1254     *
1255     * @param fileName  the fileName to query, null returns false
1256     * @param extensions  the extensions to check for, null checks for no extension
1257     * @return true if the fileName is one of the extensions
1258     * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes
1259     */
1260    public static boolean isExtension(final String fileName, final String... extensions) {
1261        if (fileName == null) {
1262            return false;
1263        }
1264        requireNonNullChars(fileName);
1265
1266        if (extensions == null || extensions.length == 0) {
1267            return indexOfExtension(fileName) == NOT_FOUND;
1268        }
1269        final String fileExt = getExtension(fileName);
1270        for (final String extension : extensions) {
1271            if (fileExt.equals(extension)) {
1272                return true;
1273            }
1274        }
1275        return false;
1276    }
1277
1278    /**
1279     * Checks whether the extension of the fileName is one of those specified.
1280     * <p>
1281     * This method obtains the extension as the textual part of the fileName
1282     * after the last dot. There must be no directory separator after the dot.
1283     * The extension check is case-sensitive on all platforms.
1284     *
1285     * @param fileName  the fileName to query, null returns false
1286     * @param extensions  the extensions to check for, null checks for no extension
1287     * @return true if the fileName is one of the extensions
1288     * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes
1289     */
1290    public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1291        if (fileName == null) {
1292            return false;
1293        }
1294        requireNonNullChars(fileName);
1295
1296        if (extensions == null || extensions.isEmpty()) {
1297            return indexOfExtension(fileName) == NOT_FOUND;
1298        }
1299        final String fileExt = getExtension(fileName);
1300        for (final String extension : extensions) {
1301            if (fileExt.equals(extension)) {
1302                return true;
1303            }
1304        }
1305        return false;
1306    }
1307
1308    //-----------------------------------------------------------------------
1309    /**
1310     * Checks a fileName to see if it matches the specified wildcard matcher,
1311     * always testing case-sensitive.
1312     * <p>
1313     * The wildcard matcher uses the characters '?' and '*' to represent a
1314     * single or multiple (zero or more) wildcard characters.
1315     * This is the same as often found on Dos/Unix command lines.
1316     * The check is case-sensitive always.
1317     * <pre>
1318     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1319     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1320     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1321     * wildcardMatch("c.txt", "*.???")      --&gt; true
1322     * wildcardMatch("c.txt", "*.????")     --&gt; false
1323     * </pre>
1324     * N.B. the sequence "*?" does not work properly at present in match strings.
1325     *
1326     * @param fileName  the fileName to match on
1327     * @param wildcardMatcher  the wildcard string to match against
1328     * @return true if the fileName matches the wildcard string
1329     * @see IOCase#SENSITIVE
1330     */
1331    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1332        return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1333    }
1334
1335    /**
1336     * Checks a fileName to see if it matches the specified wildcard matcher
1337     * using the case rules of the system.
1338     * <p>
1339     * The wildcard matcher uses the characters '?' and '*' to represent a
1340     * single or multiple (zero or more) wildcard characters.
1341     * This is the same as often found on Dos/Unix command lines.
1342     * The check is case-sensitive on Unix and case-insensitive on Windows.
1343     * <pre>
1344     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1345     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1346     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1347     * wildcardMatch("c.txt", "*.???")      --&gt; true
1348     * wildcardMatch("c.txt", "*.????")     --&gt; false
1349     * </pre>
1350     * N.B. the sequence "*?" does not work properly at present in match strings.
1351     *
1352     * @param fileName  the fileName to match on
1353     * @param wildcardMatcher  the wildcard string to match against
1354     * @return true if the fileName matches the wildcard string
1355     * @see IOCase#SYSTEM
1356     */
1357    public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1358        return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1359    }
1360
1361    /**
1362     * Checks a fileName to see if it matches the specified wildcard matcher
1363     * allowing control over case-sensitivity.
1364     * <p>
1365     * The wildcard matcher uses the characters '?' and '*' to represent a
1366     * single or multiple (zero or more) wildcard characters.
1367     * N.B. the sequence "*?" does not work properly at present in match strings.
1368     *
1369     * @param fileName  the fileName to match on
1370     * @param wildcardMatcher  the wildcard string to match against
1371     * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1372     * @return true if the fileName matches the wildcard string
1373     * @since 1.3
1374     */
1375    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase caseSensitivity) {
1376        if (fileName == null && wildcardMatcher == null) {
1377            return true;
1378        }
1379        if (fileName == null || wildcardMatcher == null) {
1380            return false;
1381        }
1382        if (caseSensitivity == null) {
1383            caseSensitivity = IOCase.SENSITIVE;
1384        }
1385        final String[] wcs = splitOnTokens(wildcardMatcher);
1386        boolean anyChars = false;
1387        int textIdx = 0;
1388        int wcsIdx = 0;
1389        final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1390
1391        // loop around a backtrack stack, to handle complex * matching
1392        do {
1393            if (!backtrack.isEmpty()) {
1394                final int[] array = backtrack.pop();
1395                wcsIdx = array[0];
1396                textIdx = array[1];
1397                anyChars = true;
1398            }
1399
1400            // loop whilst tokens and text left to process
1401            while (wcsIdx < wcs.length) {
1402
1403                if (wcs[wcsIdx].equals("?")) {
1404                    // ? so move to next text char
1405                    textIdx++;
1406                    if (textIdx > fileName.length()) {
1407                        break;
1408                    }
1409                    anyChars = false;
1410
1411                } else if (wcs[wcsIdx].equals("*")) {
1412                    // set any chars status
1413                    anyChars = true;
1414                    if (wcsIdx == wcs.length - 1) {
1415                        textIdx = fileName.length();
1416                    }
1417
1418                } else {
1419                    // matching text token
1420                    if (anyChars) {
1421                        // any chars then try to locate text token
1422                        textIdx = caseSensitivity.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1423                        if (textIdx == NOT_FOUND) {
1424                            // token not found
1425                            break;
1426                        }
1427                        final int repeat = caseSensitivity.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1428                        if (repeat >= 0) {
1429                            backtrack.push(new int[] {wcsIdx, repeat});
1430                        }
1431                    } else if (!caseSensitivity.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1432                        // matching from current position
1433                        // couldn't match token
1434                        break;
1435                    }
1436
1437                    // matched text token, move text index to end of matched token
1438                    textIdx += wcs[wcsIdx].length();
1439                    anyChars = false;
1440                }
1441
1442                wcsIdx++;
1443            }
1444
1445            // full match
1446            if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1447                return true;
1448            }
1449
1450        } while (!backtrack.isEmpty());
1451
1452        return false;
1453    }
1454
1455    /**
1456     * Splits a string into a number of tokens.
1457     * The text is split by '?' and '*'.
1458     * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1459     *
1460     * @param text  the text to split
1461     * @return the array of tokens, never null
1462     */
1463    static String[] splitOnTokens(final String text) {
1464        // used by wildcardMatch
1465        // package level so a unit test may run on this
1466
1467        if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1468            return new String[] { text };
1469        }
1470
1471        final char[] array = text.toCharArray();
1472        final ArrayList<String> list = new ArrayList<>();
1473        final StringBuilder buffer = new StringBuilder();
1474        char prevChar = 0;
1475        for (final char ch : array) {
1476            if (ch == '?' || ch == '*') {
1477                if (buffer.length() != 0) {
1478                    list.add(buffer.toString());
1479                    buffer.setLength(0);
1480                }
1481                if (ch == '?') {
1482                    list.add("?");
1483                } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*'
1484                    list.add("*");
1485                }
1486            } else {
1487                buffer.append(ch);
1488            }
1489            prevChar = ch;
1490        }
1491        if (buffer.length() != 0) {
1492            list.add(buffer.toString());
1493        }
1494
1495        return list.toArray(EMPTY_STRING_ARRAY);
1496    }
1497
1498    /**
1499     * Checks whether a given string is a valid host name according to
1500     * RFC 3986.
1501     *
1502     * <p>Accepted are IP addresses (v4 and v6) as well as what the
1503     * RFC calls a "reg-name". Percent encoded names don't seem to be
1504     * valid names in UNC paths.</p>
1505     *
1506     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1507     * @param name the hostname to validate
1508     * @return true if the given name is a valid host name
1509     */
1510    private static boolean isValidHostName(final String name) {
1511        return isIPv6Address(name) || isRFC3986HostName(name);
1512    }
1513
1514    private static final Pattern IPV4_PATTERN =
1515        Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
1516    private static final int IPV4_MAX_OCTET_VALUE = 255;
1517
1518    /**
1519     * Checks whether a given string represents a valid IPv4 address.
1520     *
1521     * @param name the name to validate
1522     * @return true if the given name is a valid IPv4 address
1523     */
1524    // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1525    private static boolean isIPv4Address(final String name) {
1526        final Matcher m = IPV4_PATTERN.matcher(name);
1527        if (!m.matches() || m.groupCount() != 4) {
1528            return false;
1529        }
1530
1531        // verify that address subgroups are legal
1532        for (int i = 1; i <= 4; i++) {
1533            final String ipSegment = m.group(i);
1534            final int iIpSegment = Integer.parseInt(ipSegment);
1535            if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
1536                return false;
1537            }
1538
1539            if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1540                return false;
1541            }
1542
1543        }
1544
1545        return true;
1546    }
1547
1548    private static final int IPV6_MAX_HEX_GROUPS = 8;
1549    private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
1550    private static final int MAX_UNSIGNED_SHORT = 0xffff;
1551    private static final int BASE_16 = 16;
1552
1553    // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1554    /**
1555     * Checks whether a given string represents a valid IPv6 address.
1556     *
1557     * @param inet6Address the name to validate
1558     * @return true if the given name is a valid IPv6 address
1559     */
1560    private static boolean isIPv6Address(final String inet6Address) {
1561        final boolean containsCompressedZeroes = inet6Address.contains("::");
1562        if (containsCompressedZeroes && (inet6Address.indexOf("::") != inet6Address.lastIndexOf("::"))) {
1563            return false;
1564        }
1565        if ((inet6Address.startsWith(":") && !inet6Address.startsWith("::"))
1566                || (inet6Address.endsWith(":") && !inet6Address.endsWith("::"))) {
1567            return false;
1568        }
1569        String[] octets = inet6Address.split(":");
1570        if (containsCompressedZeroes) {
1571            final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1572            if (inet6Address.endsWith("::")) {
1573                // String.split() drops ending empty segments
1574                octetList.add("");
1575            } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1576                octetList.remove(0);
1577            }
1578            octets = octetList.toArray(EMPTY_STRING_ARRAY);
1579        }
1580        if (octets.length > IPV6_MAX_HEX_GROUPS) {
1581            return false;
1582        }
1583        int validOctets = 0;
1584        int emptyOctets = 0; // consecutive empty chunks
1585        for (int index = 0; index < octets.length; index++) {
1586            final String octet = octets[index];
1587            if (octet.isEmpty()) {
1588                emptyOctets++;
1589                if (emptyOctets > 1) {
1590                    return false;
1591                }
1592            } else {
1593                emptyOctets = 0;
1594                // Is last chunk an IPv4 address?
1595                if (index == octets.length - 1 && octet.contains(".")) {
1596                    if (!isIPv4Address(octet)) {
1597                        return false;
1598                    }
1599                    validOctets += 2;
1600                    continue;
1601                }
1602                if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1603                    return false;
1604                }
1605                int octetInt = 0;
1606                try {
1607                    octetInt = Integer.parseInt(octet, BASE_16);
1608                } catch (final NumberFormatException e) {
1609                    return false;
1610                }
1611                if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1612                    return false;
1613                }
1614            }
1615            validOctets++;
1616        }
1617        return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1618    }
1619
1620    private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
1621
1622    /**
1623     * Checks whether a given string is a valid host name according to
1624     * RFC 3986 - not accepting IP addresses.
1625     *
1626     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1627     * @param name the hostname to validate
1628     * @return true if the given name is a valid host name
1629     */
1630    private static boolean isRFC3986HostName(final String name) {
1631        final String[] parts = name.split("\\.", -1);
1632        for (int i = 0; i < parts.length; i++) {
1633            if (parts[i].isEmpty()) {
1634                // trailing dot is legal, otherwise we've hit a .. sequence
1635                return i == parts.length - 1;
1636            }
1637            if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1638                return false;
1639            }
1640        }
1641        return true;
1642    }
1643}