/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.namedentities;

import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.PatternReplacer;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.TextFile;
import edu.northwestern.at.utils.gate.Annie;
import edu.northwestern.at.utils.xml.DOMUtils;
import java.io.File;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

public class AdornWithNamedEntities {
    protected static Document document;
    protected static final int INITPARAMS = 1;
    protected static int docsToProcess;
    protected static int currentDocNumber;
    protected static String outputDirectory;
    protected static Annie annie;
    protected static String fixupsURL;
    protected static List<PatternReplacer> fixupsList;
    protected static final String teiHeaderPattern = "tei|tei\\.2|TEI|TEI\\.2";

    public static void main(String[] args) {
        if (!AdornWithNamedEntities.initialize(args)) {
            System.exit(1);
        }
        long startTime = System.currentTimeMillis();
        int filesProcessed = AdornWithNamedEntities.processFiles(args);
        long processingTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
        AdornWithNamedEntities.terminate(filesProcessed, processingTime);
    }

    protected static boolean initialize(String[] args) {
        boolean result = false;
        if (args.length < 2) {
            System.out.println("Not enough parameters.");
            return result;
        }
        outputDirectory = args[0];
        result = AdornWithNamedEntities.loadFixups();
        if (result) {
            try {
                annie = new Annie();
                result = true;
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        }
        return result;
    }

    protected static boolean loadFixups() {
        TextFile fixupsFile = new TextFile(AdornWithNamedEntities.class.getResourceAsStream(fixupsURL), "utf-8");
        String[] fixups = fixupsFile.toArray();
        for (int i = 0; i < fixups.length; ++i) {
            String[] fixup;
            String fixupLine = fixups[i].trim();
            if (fixupLine.length() <= 0 || fixupLine.charAt(0) == '#' || (fixup = fixupLine.split("\t")).length != 2) continue;
            fixupsList.add(new PatternReplacer(fixup[0], fixup[1]));
        }
        return true;
    }

    protected static void processOneFile(String xmlFileName) {
        System.out.println("Processing " + xmlFileName + " (" + ++currentDocNumber + "/" + docsToProcess + ")");
        try {
            long startTime = System.currentTimeMillis();
            document = DOMUtils.parse(xmlFileName);
            long processingTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
            System.out.println("   Document loaded and parsed in " + Formatters.formatLongWithCommas(processingTime) + StringUtils.pluralize(processingTime, " second.", " seconds."));
            Node textRoot = AdornWithNamedEntities.findTextNodesParent(document);
            List<Node> textRootChildren = DOMUtils.findChildren(textRoot, "text|TEXT");
            startTime = System.currentTimeMillis();
            for (int i = 0; i < textRootChildren.size(); ++i) {
                AdornWithNamedEntities.traverse(textRootChildren.get(i));
            }
            String docText = DOMUtils.saveToString(document);
            String[] docParts = AdornWithNamedEntities.splitDocumentText(docText, "</teiHeader>|</temphead>|</TEMPHEAD>|</tempHead>");
            docParts[1] = docParts[1].replaceAll("&lt;", "<");
            docParts[1] = docParts[1].replaceAll("&gt;", ">");
            docParts[1] = AdornWithNamedEntities.applyFixups(docParts[1]);
            docText = docParts[0] + docParts[1];
            processingTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
            System.out.println("   Named entities added in " + Formatters.formatLongWithCommas(processingTime) + StringUtils.pluralize(processingTime, " second.", " seconds."));
            String outputFileName = new File(outputDirectory, FileNameUtils.stripPathName(xmlFileName)).getCanonicalPath();
            FileUtils.createPathForFile(outputFileName);
            FileUtils.writeTextFile(outputFileName, false, docText, "utf-8");
        }
        catch (Exception e) {
            e.printStackTrace();
            System.out.println("   *** Failed");
        }
    }

    protected static int processFiles(String[] args) {
        boolean result = false;
        String[] wildCards = new String[args.length - 1];
        for (int i = 1; i < args.length; ++i) {
            wildCards[i - 1] = args[i];
        }
        String[] fileNames = FileNameUtils.expandFileNameWildcards(wildCards);
        docsToProcess = fileNames.length;
        for (int i = 0; i < fileNames.length; ++i) {
            AdornWithNamedEntities.processOneFile(fileNames[i]);
        }
        return fileNames.length;
    }

    protected static void terminate(int filesProcessed, long processingTime) {
        System.out.println("Processed " + Formatters.formatIntegerWithCommas(filesProcessed) + StringUtils.pluralize(processingTime, " file in ", " files in ") + Formatters.formatLongWithCommas(processingTime) + StringUtils.pluralize(processingTime, " second.", " seconds."));
    }

    protected static void traverse(Node node) {
        Text textNode;
        String text;
        short type;
        NodeList children = node.getChildNodes();
        if (children != null) {
            for (int i = 0; i < children.getLength(); ++i) {
                AdornWithNamedEntities.traverse(children.item(i));
            }
        }
        if ((type = node.getNodeType()) == 3 && (text = (textNode = (Text)node).getData()) != null && text.length() > 0 && (text = AdornWithNamedEntities.addNamedEntities(text)) != null) {
            textNode.setData(text);
        }
    }

    protected static String addNamedEntities(String text) {
        return annie.adornText(text);
    }

    protected static String applyFixups(String text) {
        String result = text;
        result = result.replaceAll("&amp;(\\w+);", "&$1;");
        result = result.replaceAll("&apos;", "'");
        result = result.replaceAll("&lt;", "<");
        result = result.replaceAll("&gt;", ">");
        result = result.replaceAll("&quot;", "\"");
        for (int i = 0; i < fixupsList.size(); ++i) {
            PatternReplacer fixup = fixupsList.get(i);
            result = fixup.replace(result);
        }
        return result;
    }

    protected static String[] splitDocumentText(String docText, String splitString) {
        String[] result = new String[2];
        Matcher matcher = Pattern.compile(splitString).matcher(docText);
        if (matcher.find()) {
            int splitIndex = matcher.start();
            result[0] = docText.substring(0, splitIndex);
            result[1] = docText.substring(splitIndex);
        } else {
            result[0] = "";
            result[1] = docText;
        }
        return result;
    }

    protected static Node findTextNodesParent(Document document) {
        Element rootNode = document.getDocumentElement();
        Element teiNode = rootNode.getTagName().matches(teiHeaderPattern) ? rootNode : DOMUtils.findChild(rootNode, teiHeaderPattern);
        Element eeboNode = DOMUtils.findChild(rootNode, "eebo|EEBO");
        Element groupTextRoot = null;
        if (eeboNode != null) {
            groupTextRoot = DOMUtils.findChild(eeboNode, "group|GROUP");
        }
        Element textParent = null;
        if (groupTextRoot != null) {
            textParent = groupTextRoot;
        } else {
            textParent = eeboNode;
            if (textParent == null) {
                textParent = teiNode;
            }
        }
        return textParent;
    }

    protected AdornWithNamedEntities() {
    }

    static {
        docsToProcess = 0;
        currentDocNumber = 0;
        fixupsURL = "resources/fixups.txt";
        fixupsList = ListFactory.createNewList();
    }
}

