package net.aihelp.core.util.elva.text;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import net.aihelp.core.util.elva.util.ParseUtil;

import static java.util.regex.Pattern.CASE_INSENSITIVE;
import static java.util.regex.Pattern.UNICODE_CASE;

public class Transformations {

    private static final Integer[] INTEGER_ARRAY = new Integer[0];
    private final Tokenizer tokenizer;
    private final Pattern fitting = Pattern.compile("[^A-Z0-9\\u4E00-\\u9FA5\\u3040-\\u31FE\\u0400-\\u04FF\\u0E00-\\u0E7F\\uAC00-\\uD7AF\\u0600-\\u06FF\\u0080-\\u00FF\\u0100-\\u017F\\u0B80-\\u0BFF\\u0370-\\u03FF\\u1000-\\u109F\\u1780-\\u17FF]+");
    private final Pattern wordBreakers = Pattern.compile("([,;:])([A-Za-z]|[ \\s]{2,})");
    private final SentenceSplitter splitter;
    private Map<String, String> correction;
    private Map<String, String> protection;
    private List<Substitution> person;
    private List<Substitution> person2;
    private List<Substitution> gender;

    public Transformations(List<String> splitters,
                           Map<String, Map<String, String>> substitutions,
                           Tokenizer tokenizer) {
        this.tokenizer = tokenizer;
        this.splitter = new SentenceSplitter(substitutions.get("protection"), splitters);

        correction = substitutions.get("correction");

    }

    private class Mapper {

        private int charIndex;
        private int listIndex;
        private int spaceCount;
        private final List<Integer> mappings = new LinkedList<>();
        private String input;
        private String find;
        private String replace;

        public Mapper(String input) {
            char[] chars = input.toCharArray();
            for (int i = 0, n = chars.length; i < n; i++)
                if (chars[i] == ' ') {
                    mappings.add(i);
                }
        }

        private int spaceCount(String string) {
            return spaceCount(string, 0, string.length());
        }

        private int spaceCount(String string, int beginIndex, int endIndex) {
            int spaces = 0;
            char[] chars = string.toCharArray();
            for (int i = beginIndex, n = endIndex; i < n; i++)
                if (chars[i] == ' ')
                    spaces++;
            return spaces;
        }

        public void prepare(String input, String find, String replace) {
            this.input = input;
            this.find = find;
            this.replace = replace;
            spaceCount = spaceCount(find);
            listIndex = 0;
            charIndex = 0;
        }

        public void update(int beginIndex) {
            listIndex += spaceCount(input, charIndex, beginIndex);
            charIndex = beginIndex;

            int n = spaceCount;
            for (int j = 0, m = replace.length(); j < m; j++)
                if (replace.charAt(j) == ' ' && --n < 0) {
                    if (mappings.size() >= listIndex) mappings.add(listIndex++, null);
                }

            while (n-- > 0) {
                if (mappings.size() > listIndex) mappings.remove(listIndex);
            }
        }

        public Integer[] toArray() {
            return mappings.toArray(INTEGER_ARRAY);
        }
    }

    private List<Substitution> newSubstitutionList(Map<String, String> inputs) {
        List<Substitution> subsitutions = new ArrayList<Substitution>(inputs.size());
        for (Entry<String, String> entry : inputs.entrySet()) {
            Substitution substitution = new Substitution(entry.getKey(), entry.getValue(), tokenizer);
            subsitutions.add(substitution);
        }
        return subsitutions;
    }

    private String breakWords(String input) {
        Matcher matcher = wordBreakers.matcher(input);
        StringBuffer buffer = new StringBuffer();
        while (matcher.find()) {
            String replace = matcher.group(2);    //group(0) 整个匹配,  group(1)  ,;:
            if (replace.charAt(0) != ' ') {
                replace = matcher.group(1) + ' ' + replace;
            } else {
                replace = matcher.group(1) + ' ';
            }

            matcher.appendReplacement(buffer, replace);
        }
        matcher.appendTail(buffer);
        return buffer.toString();
    }

    private String fit(String input) {
        input = input.toUpperCase();
        Matcher matcher = fitting.matcher(input);
        return matcher.replaceAll(" ");
    }

    private String fit(String input, Mapper mapper) {
        input = input.toUpperCase();
        Matcher matcher = fitting.matcher(input);

        StringBuffer buffer = new StringBuffer();
        while (!matcher.hitEnd() && matcher.find()) {
            mapper.prepare(input, matcher.group(), " ");
            mapper.update(matcher.start());
            matcher.appendReplacement(buffer, " ");
        }

        matcher.appendTail(buffer);
        return buffer.toString();
    }

    private String substitute(String input) {
        for (String find : correction.keySet()) {
            Pattern pattern = Pattern.compile(find, CASE_INSENSITIVE | UNICODE_CASE);
            Matcher matcher = pattern.matcher(input);
            String replace = correction.get(find);
            input = matcher.replaceAll(replace);
        }
        return input;
    }

    private String substitute(String input, Mapper mapper) {
        StringBuffer buffer = new StringBuffer();
        for (String find : correction.keySet()) {
            Pattern pattern = Pattern.compile(find, CASE_INSENSITIVE | UNICODE_CASE);
            Matcher matcher = pattern.matcher(input);
            String replace = correction.get(find);
            mapper.prepare(input, find, replace);
            while (!matcher.hitEnd() && matcher.find()) {
                mapper.update(matcher.start() + 1);
                matcher.appendReplacement(buffer, replace);
            }
            matcher.appendTail(buffer);
            input = buffer.toString();
            buffer.delete(0, buffer.length());
        }
        return input;
    }

    private String transform(String input, List<Substitution> substitutions) {
        List<String> tokens = tokenizer.tokenize(input);
        outer:
        for (int i = 0; i < tokens.size(); ) {
            int offset = i;
            for (final Substitution substitution : substitutions) {
                i = substitution.substitute(offset, tokens);
                if (i > offset) {
                    continue outer;
                }
            }
            i++;
        }
        return tokenizer.toString(tokens);
    }

    public void normalization(Request request) {
        String original = ' ' + request.getOriginal() + ' ';
        original = original.replaceAll("[ ]{2,}", " ");
        String[] input = splitter.split(original);
        Sentence[] sentences = new Sentence[input.length];
        for (int i = 0, n = input.length; i < n; i++) {
            sentences[i] = new Sentence(input[i]);
            normalization(sentences[i]);
        }
        request.setOriginal(original);
        request.setSentences(sentences);
    }


    public void normalization(Sentence sentence) {
        String input = breakWords(sentence.getOriginal());
        input = ParseUtil.blankSplit(input);
        input = ' ' + input + ' ';
        input = input.replaceAll("[ ]{2,}", " ");
        sentence.setOriginal(input);
        Mapper mapper = new Mapper(input);
        input = substitute(input, mapper);
        input = fit(input, mapper);
        sentence.setMappings(mapper.toArray());
        sentence.setNormalized(input);
    }

    public String normalization(String input) {
        input = ' ' + input + ' ';
         input = input.replaceAll("[ ]{2,}", " ");
        input = substitute(input);
        input = fit(input);
        return input;
    }

}