
/*
 * de.unkrig.commons - A general-purpose Java class library
 *
 * Copyright (c) 2014, Arno Unkrig
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 * following conditions are met:
 *
 *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
 *       following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 *       following disclaimer in the documentation and/or other materials provided with the distribution.
 *    3. The name of the author may not be used to endorse or promote products derived from this software without
 *       specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

package de.unkrig.commons.text.pattern;

import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.nio.CharBuffer;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import de.unkrig.commons.nullanalysis.Nullable;

/** {@link Pattern}-related utility methods. */
public final
class PatternUtil {

    private static final Logger LOGGER = Logger.getLogger(PatternUtil.class.getName());

    private PatternUtil() {}

    /**
     * Reads text from {@code in}, replaces all occurrences of {@code pattern} with {@code replacement}, and
     * writes the result to {@code out}.
     * <p>
     * The pattern search is stream-oriented not line-oriented, i.e. matches are found even across line boundaries.
     * Thus the {@code pattern} should have been compiled with the {@link Pattern#MULTILINE} flag.
     *
     * @param context Only for logging; e.g. the name of the file being transformed
     */
    public static void
    replaceAll(String context, Reader in, Pattern pattern, String replacement, Writer out) throws IOException {
        PatternUtil.replaceAll(
            context,
            in,
            pattern,
            PatternUtil.constantReplacer(context, replacement),
            out,
            8192
        );
    }

    /**
     * Decides whether and how a match is to be replaced.
     */
    public
    interface Replacer {

        /**
         * @param context Only for logging; e.g. the name of the file being transformed
         * @return        The {@link Matcher#appendReplacement(StringBuffer, String) replacement} for this match, or
         *                {@code null} to indicate that this match should not be replaced
         * @see           Matcher#appendReplacement(StringBuffer, String)
         */
        @Nullable String
        getReplacement(String context, CharSequence match);
    }

    /** @return A {@link Replacer} which constantly produces the given {@code replacement} */
    public static Replacer
    constantReplacer(String context, final String replacement) {
        return new Replacer() {
            @Override public String getReplacement(String context, CharSequence match) { return replacement; }
        };
    }

    /**
     * Reads text from {@code in}, finds all occurrences of {@code pattern}, replaces each with the result of {@link
     * Replacer#getReplacement(String, CharSequence)}, and writes the result to {@code out}.
     * <p>
     * The pattern search is stream-oriented, not line-oriented, i.e. matches are found even across line boundaries.
     * Thus the {@code pattern} should have been compiled with the {@link Pattern#MULTILINE} flag.
     *
     * @param context Only for logging; e.g. the name of the file being transformed
     * @return        The number of substitutions that were executed
     */
    public static int
    replaceAll(String context, Reader in, Pattern pattern, Replacer replacer, Writer out, int initialBufferCapacity)
    throws IOException {

        PatternUtil.LOGGER.log(Level.FINE, "{0}: Replace all matches of ''{1}''", new Object[] { context, pattern });

        int        substitutionCount = 0;
        CharBuffer cb                = CharBuffer.allocate(initialBufferCapacity);

        for (;;) {

            // Fill the buffer as far as possible (limited by the buffer capacity or by EOI).
            while (cb.hasRemaining() && in.read(cb) != -1);
            if (cb.position() == 0) break; // End-of-input.
            cb.flip();

            // Find the next match.
            Matcher m     = pattern.matcher(cb);
            boolean found = m.find();

            while (m.hitEnd()) {

                // We hit the end; read more data until we don't hit the end any more.
                if (cb.limit() < cb.capacity()) {

                    // There's room left in the CharBuffer; fill it.
                    cb.compact();
                    if (in.read(cb) == -1) {

                        // End-of-input.
                        cb.flip();
                        break;
                    }
                    while (cb.hasRemaining() && in.read(cb) != -1);
                    cb.flip();
                } else {
                    PatternUtil.LOGGER.finest("Increasing buffer size");
                    cb = CharBuffer.allocate(cb.capacity() * 2).append(cb);
                    if (in.read(cb) == -1) {

                        // End-of-input.
                        cb.flip();
                        m     = pattern.matcher(cb);
                        found = m.find();
                        break;
                    }
                    while (cb.hasRemaining() && in.read(cb) != -1);
                    cb.flip();
                }

                m     = pattern.matcher(cb);
                found = m.find();
            }

            if (!found) {
                out.append(cb);
                cb.clear();
                continue;
            }

            if (m.end() == 0) {

                // Start-of-input match.
                if (cb.limit() == 0) break;
                out.append(cb.get());
                cb.compact();
                continue;
            }

            CharSequence match       = cb.subSequence(m.start(), m.end());
            String       replacement = replacer.getReplacement(context, match);
            if (replacement == null) {
                out.append(cb, 0, m.end());
            } else {

                PatternUtil.LOGGER.log(
                    Level.CONFIG,
                    "{0}: Replacing ''{1}'' with ''{2}''",
                    new Object[] { context, match, replacement }
                );

                StringBuffer sb = new StringBuffer();
                m.appendReplacement(sb, replacement);
                substitutionCount++;
                out.append(sb);
            }
            cb.position(m.end());
            cb.compact();

            // If the CharBuffer was enlarged (due to 'hitEnd()'), shrink it to its initial size (if possible).
            if (cb.capacity() > initialBufferCapacity && cb.position() <= initialBufferCapacity) {
                PatternUtil.LOGGER.finest("Restoring initial buffer size");
                cb.flip();
                cb = CharBuffer.allocate(initialBufferCapacity).append(cb);
            }
        }

        PatternUtil.LOGGER.log(
            Level.FINE,
            "{0}: Replaced {1,choice,0#no matches|1#one match|1<{1} matches} of ''{2}''",
            new Object[] { context, substitutionCount, pattern }
        );

        return substitutionCount;
    }
}
