/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.utils.corpuslinguistics.tokenizer;

import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.WordTokenizer;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class EEBOWordTokenizer
extends DefaultWordTokenizer
implements WordTokenizer {
    protected static final Pattern numberDotSpellingPattern = Pattern.compile("(\\d+)\\.(\\p{L})+");
    protected static final Matcher numberDotSpellingMatcher = numberDotSpellingPattern.matcher("");
    protected static Pattern underlineCapCapPattern = Pattern.compile("^_([ABCDEFGHIJKLMNOPQRSTUVWXYZ])([ABCDEFGHIJKLMNOPQRSTUVWXYZ])");
    protected static final Matcher underlineCapCapMatcher = underlineCapCapPattern.matcher("");

    @Override
    public String preprocessToken(String token, List<String> tokenList) {
        String result = token;
        if (!result.equals("|")) {
            result = StringUtils.replaceAll(token, "|", "");
        }
        if (result.length() > 1) {
            if (result.charAt(0) == '+') {
                if (!CharUtils.isPunctuationOrSymbol(result)) {
                    result = result.substring(1);
                    if (tokenList.size() > 0) {
                        result = tokenList.get(tokenList.size() - 1) + result;
                        tokenList.remove(tokenList.size() - 1);
                    }
                }
            } else {
                result = StringUtils.replaceAll(result, "+", "");
            }
        }
        if (result.length() > 1 && result.charAt(0) == '_') {
            underlineCapCapMatcher.reset(result);
            if (underlineCapCapMatcher.find()) {
                String char1 = result.charAt(1) + "";
                String char2 = Character.toLowerCase(result.charAt(2)) + "";
                String rest = "";
                if (result.length() > 3) {
                    rest = result.substring(3);
                }
                result = char1 + char2 + rest;
            }
        }
        if (result.length() > 2 && result.indexOf(".") > 0) {
            numberDotSpellingMatcher.reset(result);
            if (numberDotSpellingMatcher.matches()) {
                tokenList.add(numberDotSpellingMatcher.group(1) + ".");
                result = numberDotSpellingMatcher.group(2);
            }
        }
        return result;
    }
}

