/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.geneexpbase.resources;

import com.lahodiuk.ahocorasick.AhoCorasickOptimized;
import de.julielab.geneexpbase.AhoCorasickLongestMatchCallback;
import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.java.utilities.FileUtilities;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.HashSet;
import java.util.Map;
import java.util.TreeMap;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.commons.lang3.Range;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SpecialistLexiconGreekDictCreator {
    private static final Logger log = LoggerFactory.getLogger(SpecialistLexiconGreekDictCreator.class);

    public static void main(String[] args) throws IOException, XMLStreamException {
        if (args.length != 2) {
            System.err.println("Usage: " + SpecialistLexiconGreekDictCreator.class.getCanonicalName() + " <SPECIALIST Lexicon XML file> <output dictionary destination>");
        } else {
            SpecialistLexiconGreekDictCreator dictCreator = new SpecialistLexiconGreekDictCreator();
            dictCreator.createDict(new File(args[0]), new File(args[1]));
        }
    }

    public void createDict(File specialistXmlLexicon, File dictionaryDestination) throws IOException, XMLStreamException {
        log.info("Reading SPECIALIST Lexicon from {} and writing dictionary to {}.", (Object)specialistXmlLexicon, (Object)dictionaryDestination);
        TermNormalizer termNormalizer = new TermNormalizer();
        AhoCorasickLongestMatchCallback callback = new AhoCorasickLongestMatchCallback();
        HashSet alreadyWritten = new HashSet();
        try (BufferedReader br = FileUtilities.getReaderFromFile(specialistXmlLexicon);
             BufferedWriter bw = FileUtilities.getWriterToFile(dictionaryDestination);){
            bw.write("# Created " + new Date());
            bw.newLine();
            XMLStreamReader reader = XMLInputFactory.newFactory().createXMLStreamReader(br);
            HashSet<String> toWrite = new HashSet<String>();
            boolean recordHasNonEmbeddedGreek = false;
            while (reader.hasNext()) {
                String elementName;
                int eventType = reader.next();
                if (eventType == 1) {
                    if (recordHasNonEmbeddedGreek) continue;
                    elementName = reader.getName().getLocalPart();
                    String elementText = null;
                    switch (elementName) {
                        case "acronyms": 
                        case "abbreviations": {
                            elementText = reader.getElementText();
                            elementText = elementText.split("\\|")[0];
                        }
                        case "base": 
                        case "spellingVars": 
                        case "inflVars": {
                            if (elementText == null) {
                                elementText = reader.getElementText();
                            }
                            if (this.checkTextForGreek(elementText, termNormalizer.getGreekHighLowKinaseAC(), callback) && !alreadyWritten.contains(elementText) && !elementText.matches(".*\\s.*")) {
                                toWrite.add(elementText);
                                break;
                            }
                            if (callback.getLongestMatches().isEmpty()) break;
                            recordHasNonEmbeddedGreek = true;
                            toWrite.clear();
                        }
                    }
                    continue;
                }
                if (eventType != 2 || !(elementName = reader.getName().getLocalPart()).equalsIgnoreCase("lexRecord")) continue;
                if (!recordHasNonEmbeddedGreek) {
                    for (String entry : toWrite) {
                        bw.write(entry);
                        bw.newLine();
                    }
                }
                toWrite.clear();
                recordHasNonEmbeddedGreek = false;
            }
        }
        log.info("Done.");
    }

    private boolean checkTextForGreek(String elementText, AhoCorasickOptimized greekAC, AhoCorasickLongestMatchCallback callback) {
        callback.clear();
        greekAC.match(elementText.toLowerCase(), callback);
        TreeMap<Range<Integer>, String> longestMatches = callback.getLongestMatches();
        int numEmbeddedGreek = 0;
        for (Map.Entry<Range<Integer>, String> e : longestMatches.entrySet()) {
            char rightChar;
            char leftChar;
            Range<Integer> range = e.getKey();
            boolean seperatedLeft = true;
            boolean seperatedRight = true;
            if (range.getMinimum() > 0 && !Character.isWhitespace(leftChar = elementText.charAt(range.getMinimum() - 1)) && !elementText.substring(range.getMinimum() - 1, range.getMinimum()).matches("\\p{P}|[0-9]")) {
                seperatedLeft = false;
            }
            if (range.getMaximum() < elementText.length() - 1 && !Character.isWhitespace(rightChar = elementText.charAt(range.getMaximum() + 1)) && !elementText.substring(range.getMaximum() + 1, range.getMaximum() + 2).matches("\\p{P}|[0-9]")) {
                seperatedRight = false;
            }
            if (seperatedLeft && seperatedRight) continue;
            ++numEmbeddedGreek;
        }
        return numEmbeddedGreek > 0;
    }
}

