/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.ae.acronymtagger.main;

import de.julielab.jcore.ae.acronymtagger.entries.AcronymEntry;
import de.julielab.jcore.ae.acronymtagger.entries.FullformEntry;
import de.julielab.jcore.ae.acronymtagger.main.ConsistencyAnnotator;
import de.julielab.jcore.ae.acronymtagger.main.Postprocessing;
import de.julielab.jcore.types.Abbreviation;
import de.julielab.jcore.types.AbbreviationLongform;
import de.julielab.jcore.types.Sentence;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class AcronymAnnotator
extends JCasAnnotator_ImplBase {
    private static final String COMPONENT_ID = "de.julielab.jcore.ae.acronymtagger.AcronymAnnotator";
    public static final String PARAM_ACROLIST = "AcroList";
    public static final String PARAM_CONSISTENCY_ANNO = "ConsistencyAnno";
    private static final String PARAM_POSTPROCESSING = "Postprocessing";
    public static final String PARAM_MAXLENGTH_FACTOR = "MaxLength";
    @ConfigurationParameter(name="MaxLength", defaultValue={"5"})
    int MAXLENGTHFACTOR;
    private static final String[] STOPWORDS = new String[]{"a", "about", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot", "cant", "co", "computer", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves"};
    private static String ABBREVIATION = "[\\(\\[][-\\w]*?([A-Z]-?\\w|\\w-?[A-Z])[-\\w]*?[\\)\\]]";
    private final Pattern ABBR_PATTERN = Pattern.compile(ABBREVIATION);
    private static String EMBEDDED_ABBR = "[\\(\\[][a-z]+?([A-Z]-?\\w|\\w-?[A-Z])[-\\w]*?[\\)\\]]";
    private final Pattern EMBEDDED_ABBR_PATTERN = Pattern.compile(EMBEDDED_ABBR);
    private static String LONG_FORM_IN_PARENTHESIS = "[\\(\\[]\\w+ (\\w+[ \\)])+";
    private final Pattern LONG_FORM_IN_PARENTHESIS_PATTERN = Pattern.compile(LONG_FORM_IN_PARENTHESIS);
    @ConfigurationParameter(name="ConsistencyAnno", defaultValue={"true"})
    private boolean consistencyAnno = false;
    @ConfigurationParameter(name="Postprocessing", defaultValue={"true"})
    private boolean postprocessing = false;
    private HashMap<String, String> acro2fullForm;
    @ConfigurationParameter(name="AcroList", mandatory=false)
    private String acroList;
    private static final Logger LOGGER = LoggerFactory.getLogger(AcronymAnnotator.class);

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        LOGGER.info("[JACRO] initializing AcronymAnnotator...");
        try {
            this.setAcroList(aContext);
            this.consistencyAnno = (Boolean)aContext.getConfigParameterValue(PARAM_CONSISTENCY_ANNO);
            this.postprocessing = (Boolean)aContext.getConfigParameterValue(PARAM_POSTPROCESSING);
            this.MAXLENGTHFACTOR = (Integer)aContext.getConfigParameterValue(PARAM_MAXLENGTH_FACTOR);
            LOGGER.info(" done");
        }
        catch (AnnotatorContextException e) {
            throw new ResourceInitializationException();
        }
        catch (AnnotatorConfigurationException e) {
            throw new ResourceInitializationException();
        }
        catch (ResourceProcessException e) {
            throw new ResourceInitializationException();
        }
    }

    private void setAcroList(UimaContext aContext) throws AnnotatorConfigurationException, AnnotatorContextException, ResourceProcessException, ResourceInitializationException {
        String acro = "";
        String fullForm = "";
        String pattern = "";
        this.acro2fullForm = new HashMap();
        this.acroList = (String)aContext.getConfigParameterValue(PARAM_ACROLIST);
        if (this.acroList != null) {
            InputStream acroListInputStream = null;
            File listFile = new File(this.acroList);
            if (listFile.exists()) {
                LOGGER.debug("Acronym file at {} exists and will be used.", (Object)this.acroList);
                try {
                    acroListInputStream = new FileInputStream(listFile);
                }
                catch (FileNotFoundException e) {
                    throw new ResourceInitializationException((Throwable)e);
                }
            } else {
                String cpResource = this.acroList.startsWith("/") ? this.acroList : "/" + this.acroList;
                LOGGER.debug("Acronym file at {} does not exist. Searching in the classpath for resource {}", (Object)this.acroList, (Object)cpResource);
                acroListInputStream = ((Object)((Object)this)).getClass().getResourceAsStream(cpResource);
            }
            if (null == acroListInputStream) {
                throw new ResourceInitializationException("could_not_access_data", new Object[]{this.acroList});
            }
            try (BufferedReader br = new BufferedReader(new InputStreamReader(acroListInputStream));){
                String line = "";
                while ((line = br.readLine()) != null) {
                    String[] pair = line.split("\t");
                    if (pair.length != 2) {
                        throw new ResourceProcessException("resource_data_not_valid", (Object[])new String[]{"faulty line in acroList: " + line});
                    }
                    acro = pair[0];
                    fullForm = pair[1];
                    this.acro2fullForm.put(acro, fullForm.toLowerCase());
                    pattern = fullForm + " (" + acro + ")";
                }
            }
            catch (IOException e) {
                LOGGER.error("setAcroList() - specified acroList file cannot be read: " + e.getMessage());
                throw new AnnotatorConfigurationException();
            }
            LOGGER.debug("setAcroList() - using acronym list: " + listFile);
        }
    }

    public void process(JCas aJCas) {
        LOGGER.debug("[JACRO] processing document...");
        try {
            JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
            for (Sentence sentence : indexes.getAnnotationIndex(Sentence.type)) {
                String sentenceText = sentence.getCoveredText();
                this.annotate(sentenceText, aJCas, sentence.getBegin());
            }
            if (this.consistencyAnno) {
                ConsistencyAnnotator ca = new ConsistencyAnnotator();
                ca.consistencyAnnotate(aJCas);
            }
            if (this.postprocessing) {
                Postprocessing.doPostprocessing(aJCas);
            }
        }
        catch (StringIndexOutOfBoundsException e) {
            LOGGER.error("typical Error in AcronymAnnotator.process() : StringIndexOutOfBounds");
        }
    }

    private void annotate(String sentence, JCas aJCas, int beginSent) {
        try {
            Matcher abbrMatcher = this.ABBR_PATTERN.matcher(sentence);
            this.processAllMatches(abbrMatcher, aJCas, sentence, beginSent, false);
            abbrMatcher = this.EMBEDDED_ABBR_PATTERN.matcher(sentence);
            this.processAllMatches(abbrMatcher, aJCas, sentence, beginSent, true);
            this.processLongFormInParantheses(this.LONG_FORM_IN_PARENTHESIS_PATTERN.matcher(sentence), sentence, aJCas, beginSent);
        }
        catch (Exception e) {
            LOGGER.error("annotate(String sentence, JCas aJCas, int offset)", (Throwable)e);
        }
    }

    private void processLongFormInParantheses(Matcher matcher, String sentence, JCas aJCas, int beginSent) {
        int pos = 0;
        while (matcher.find(pos)) {
            int fullformEnd;
            int fullformBegin = matcher.start() + 1;
            pos = fullformEnd = matcher.end() - 1;
            int previousTokenEnd = this.getNextToken(sentence, fullformBegin);
            int previousTokenStart = this.getNextToken(sentence, previousTokenEnd - 1) + 1;
            String previousToken = sentence.substring(previousTokenStart, previousTokenEnd);
            String fullform = matcher.group();
            fullform = fullform.substring(1, fullform.length() - 1);
            StringBuilder acronymBuilder = new StringBuilder();
            for (int i = 0; i < fullform.length(); ++i) {
                char currentChar = fullform.charAt(i);
                if (i != 0 && (' ' != fullform.charAt(i - 1) || ' ' == currentChar)) continue;
                acronymBuilder.append(currentChar);
            }
            String derivedAcronym = acronymBuilder.toString();
            if (!derivedAcronym.equalsIgnoreCase(previousToken)) continue;
            LOGGER.debug("identified full form: " + fullform + " for abbreviation: " + previousToken);
            Abbreviation a = new Abbreviation(aJCas, previousTokenStart + beginSent, previousTokenEnd + beginSent);
            a.setExpan(fullform);
            a.setDefinedHere(true);
            AbbreviationLongform anno = new AbbreviationLongform(aJCas, beginSent + fullformBegin, beginSent + fullformEnd);
            anno.setComponentId(COMPONENT_ID);
            anno.addToIndexes();
            a.setTextReference(anno);
            a.setComponentId(COMPONENT_ID);
            a.addToIndexes();
        }
    }

    private boolean hasMoreThanOneUpperCase(String acronym) {
        StringBuffer sb = new StringBuffer(acronym);
        int numUpper = 0;
        int numLower = 0;
        for (int i = 0; i < sb.length(); ++i) {
            char c = sb.charAt(i);
            if (c > '@' && c < '[' || c > '\u00bf' && c < '\u00d7' || c > '\u00d7' && c < '\u00df') {
                ++numUpper;
                continue;
            }
            if (!(c > '`' && c < '{' || c > '\u00d4' && c < '\u00f7') && (c <= '\u00f8' || c >= '\u0100')) continue;
            ++numLower;
        }
        return numUpper > 1;
    }

    private void processAllMatches(Matcher matcher, JCas aJCas, String sentence, int beginSent, boolean embedded) {
        String searchText = "";
        String fullform = "";
        String acronym = "";
        int pos = 0;
        while (matcher.find(pos)) {
            int searchResult;
            int acroEnd;
            int acroStart = embedded ? this.getEmbeddedAcroStart(sentence, matcher.start() + 2) : matcher.start() + 1;
            acronym = sentence.substring(acroStart, acroEnd = matcher.end() - 1);
            if (!this.hasMoreThanOneUpperCase(acronym)) {
                pos = matcher.end() + 1;
                if (pos < sentence.length() && pos >= 0) continue;
                break;
            }
            int searchTextBegin = this.getPotFullformStart(sentence, acroStart, acronym.length());
            searchText = sentence.substring(searchTextBegin, acroStart);
            if (searchText.length() != 0 && (searchResult = this.findFullformStart(" " + searchText, acronym)) != -1) {
                int ffStart = searchTextBegin + searchResult;
                int ffEnd = this.getFfEnd(sentence, matcher.start() + 1);
                fullform = sentence.substring(ffStart, ffEnd);
                LOGGER.debug("processAllMatches() - identified full form: " + fullform + " for abbreviation: " + acronym.toString());
                Abbreviation a = new Abbreviation(aJCas, acroStart + beginSent, acroEnd + beginSent);
                a.setExpan(fullform);
                a.setDefinedHere(true);
                AbbreviationLongform anno = new AbbreviationLongform(aJCas, beginSent + ffStart, beginSent + ffEnd);
                anno.setComponentId(COMPONENT_ID);
                anno.addToIndexes();
                a.setTextReference(anno);
                a.setComponentId(COMPONENT_ID);
                a.addToIndexes();
            }
            if ((pos = matcher.end() + 1) < sentence.length() && pos >= 0) continue;
            break;
        }
    }

    private int getEmbeddedAcroStart(String sentence, int acroStart) {
        int origAcroStart = acroStart;
        while (acroStart < sentence.length()) {
            char c = sentence.charAt(acroStart);
            if (c > '@' && c < '[') {
                return acroStart;
            }
            ++acroStart;
        }
        return origAcroStart;
    }

    private int getFfEnd(String sentence, int acroStart) {
        char c = sentence.charAt(acroStart - 2);
        if (Character.isWhitespace(c)) {
            return acroStart - 2;
        }
        return acroStart - 1;
    }

    private int findFullformStart(String potFF, String acro) {
        int shortIndex = acro.length() - 1;
        int longIndex = potFF.length() - 1;
        String fullForm = "";
        LOGGER.debug("findFullformStart() -- acro: " + acro);
        LOGGER.debug("findFullformStart() -- potential FF: " + potFF);
        if (this.acro2fullForm.containsKey(acro)) {
            fullForm = this.acro2fullForm.get(acro);
            int start = potFF.toLowerCase().indexOf(fullForm);
            if (start != -1) {
                return --start;
            }
        }
        while (shortIndex >= 0) {
            char curCharShort = acro.charAt(shortIndex);
            char curCharLong = potFF.charAt(longIndex);
            if (Character.isLetter(curCharShort)) {
                curCharShort = Character.toLowerCase(curCharShort);
            }
            if (Character.isLetter(curCharLong)) {
                curCharLong = Character.toLowerCase(curCharLong);
            }
            if (Character.isWhitespace(curCharShort) || Character.isDigit(curCharShort) || curCharShort == '-' || curCharShort == '+') {
                --shortIndex;
                continue;
            }
            while (longIndex >= 0 && curCharShort != curCharLong || longIndex > 0 && shortIndex == 0 && !Character.isWhitespace(potFF.charAt(longIndex - 1)) && potFF.charAt(longIndex - 1) != '-' && potFF.charAt(longIndex - 1) != ')' && potFF.charAt(longIndex - 1) != '/' && potFF.charAt(longIndex - 1) != '\"') {
                if (--longIndex < 0 || !Character.isLetter(curCharLong = potFF.charAt(longIndex))) continue;
                curCharLong = Character.toLowerCase(curCharLong);
            }
            if (longIndex <= 0 && shortIndex >= 0) {
                return -1;
            }
            --longIndex;
            --shortIndex;
        }
        return longIndex;
    }

    private int getPotFullformStart(String sentence, int acroStart, int acroLength) {
        int aTokens = 0;
        String s = sentence.substring(0, acroStart);
        int oldp = s.length() - 1;
        int p = this.getNextToken(s, oldp);
        if (acroStart >= 2 && s.charAt(acroStart - 2) == ' ') {
            --aTokens;
        }
        while (p != -1 && aTokens != this.MAXLENGTHFACTOR * acroLength) {
            String token = p == 0 ? s.substring(0, oldp) : s.substring(p + 1, oldp);
            boolean i = false;
            if (Arrays.binarySearch(STOPWORDS, token) >= 0) {
                --aTokens;
            }
            if (++aTokens == acroLength + 2) break;
            oldp = p;
            p = this.getNextToken(s, oldp - 1);
        }
        if (p == 0) {
            return p;
        }
        return p + 1;
    }

    int getNextToken(String s, int index) {
        int p;
        if (index == 0 || index == -1) {
            return -1;
        }
        for (p = index; p != 0 && s.charAt(p) != ' '; --p) {
        }
        return p;
    }

    private String getBestFullformFromDict(AcronymEntry ae) {
        Set<Map.Entry<String, FullformEntry>> fullforms = ae.getAllFullforms();
        String fName = "";
        int temp = 0;
        for (Map.Entry<String, FullformEntry> entry : fullforms) {
            FullformEntry fEntry = entry.getValue();
            if (fEntry.count <= temp) continue;
            temp = fEntry.count;
            fName = entry.getKey();
        }
        return fName;
    }
}

