/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.man.entitytagger.entities.species;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import martin.common.ArgParser;
import martin.common.Loggers;
import martin.common.Misc;
import martin.common.Pair;
import martin.common.StreamIterator;
import martin.common.xml.XPath;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import uk.ac.man.entitytagger.generate.DictionaryEntry;
import uk.ac.man.entitytagger.generate.GenerateMatchers;

public class GenerateDictionary {
    private static final String COL_WEBSERVICE_URL = "http://webservice.catalogueoflife.org/annual-checklist/2009/search.php";

    public static void main(String[] args) {
        ArgParser ap = new ArgParser(args);
        Logger logger = Loggers.getDefaultLogger(ap);
        File[] extraSynonymFiles = ap.getFiles("extraSynonyms");
        boolean includeLineNumbers = ap.containsKey("includeLineNumbers");
        logger.info("%t: includeLineNumbers = " + includeLineNumbers + "\n");
        int report = ap.getInt("report", -1);
        if (ap.containsKey("inSpecies")) {
            HashMap<String, Object> dict;
            File out;
            File in = ap.getFile("inSpecies");
            if (ap.containsKey("outRegexp")) {
                out = ap.getFile("outRegexp");
                dict = GenerateDictionary.generateSpeciesDictionary(in, extraSynonymFiles, includeLineNumbers, logger, report);
                GenerateDictionary.save(out, dict, logger);
            }
            if (ap.containsKey("outNames")) {
                out = ap.getFile("outNames");
                dict = GenerateDictionary.generateSpeciesNames(in, extraSynonymFiles, includeLineNumbers, logger);
                HashMap<String, DictionaryEntry> dictWithComments = GenerateDictionary.generateSpeciesDictionary(in, extraSynonymFiles, includeLineNumbers, logger, report);
                GenerateDictionary.saveNames(out, dict, dictWithComments, logger);
            }
        }
    }

    private static void saveNames(File outFile, HashMap<String, List<String>> dict, HashMap<String, DictionaryEntry> dictWithComments, Logger logger) {
        logger.info("%t: Saving to file " + outFile.getAbsolutePath() + "...\n");
        try {
            BufferedWriter outStream = new BufferedWriter(new FileWriter(outFile));
            for (String id : dict.keySet()) {
                if (dictWithComments.containsKey(id) && dictWithComments.get(id).getComment() != null) {
                    outStream.write(String.valueOf(id) + "\t" + Misc.implode(dict.get(id).toArray(new String[0]), "|") + "\t" + dictWithComments.get(id).getComment() + "\n");
                    continue;
                }
                outStream.write(String.valueOf(id) + "\t" + Misc.implode(dict.get(id).toArray(new String[0]), "|") + "\t\n");
            }
            outStream.close();
            logger.info("%t: Done.\n");
        }
        catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
    }

    private static HashMap<String, List<String>> generateSpeciesNames(File in, File[] extraSynonymFiles, boolean includeLineNumbers, Logger logger) {
        logger.info("%t: Generating species name variants...\n");
        HashMap<String, List<String>> res = new HashMap<String, List<String>>();
        File[] fileArray = extraSynonymFiles;
        int n = extraSynonymFiles.length;
        int n2 = 0;
        while (n2 < n) {
            File f = fileArray[n2];
            StreamIterator inData = new StreamIterator(f);
            for (String s : inData) {
                String[] fields2;
                String[] fields = s.split("\\t");
                String[] stringArray = fields2 = fields[1].split("\\|");
                int n3 = fields2.length;
                int n4 = 0;
                while (n4 < n3) {
                    String field = stringArray[n4];
                    GenerateDictionary.addName(res, fields[0], field, "common name");
                    ++n4;
                }
            }
            ++n2;
        }
        StreamIterator inData = new StreamIterator(in);
        int lineNumber = 0;
        for (String s : inData) {
            String[] fields = s.split(",");
            if (fields.length == 4) {
                String type;
                String id = "species:ncbi:" + fields[0];
                if (includeLineNumbers) {
                    id = String.valueOf(id) + "|" + lineNumber;
                }
                if (!((type = fields[3]).contains("acronym") || fields[1].matches("([\\[\\(\\{\\?].*)") || fields[1].contains("@"))) {
                    String name = fields[1];
                    GenerateDictionary.addName(res, id, name, type);
                }
            }
            ++lineNumber;
        }
        logger.info("%t: Done.\n");
        return res;
    }

    private static void addName(HashMap<String, List<String>> res, String id, String name, String type) {
        while (name.startsWith(" ")) {
            name = name.substring(1);
        }
        String[] parts = name.split(" ");
        ArrayList<String> names = new ArrayList<String>();
        int firstSpace = name.indexOf(" ");
        char c = name.charAt(0);
        if ((type.equals("scientific name") || type.contains("synonym") || type.contains("anamorph")) && parts.length > 1 && (parts.length != 2 || parts[1].length() >= 4)) {
            names.add(String.valueOf(Character.toLowerCase(c)) + name.substring(1));
            names.add(String.valueOf(Character.toUpperCase(c)) + name.substring(1));
            names.add(String.valueOf(Character.toLowerCase(c)) + "." + name.substring(firstSpace));
            names.add(String.valueOf(Character.toUpperCase(c)) + "." + name.substring(firstSpace));
        } else if ((type.contains("common name") || type.contains("include")) && !name.endsWith("s") && !name.endsWith("family")) {
            names.add(String.valueOf(Character.toLowerCase(c)) + name.substring(1));
            names.add(String.valueOf(Character.toUpperCase(c)) + name.substring(1));
            names.add(String.valueOf(Character.toLowerCase(c)) + name.substring(1) + "s");
            names.add(String.valueOf(Character.toUpperCase(c)) + name.substring(1) + "s");
        } else {
            names.add(String.valueOf(Character.toLowerCase(c)) + name.substring(1));
            names.add(String.valueOf(Character.toUpperCase(c)) + name.substring(1));
        }
        if (!res.containsKey(id)) {
            res.put(id, names);
        } else {
            res.get(id).addAll(names);
        }
    }

    private static String getAcceptedName(String name) {
        Node n;
        block4: {
            if (name.contains("<") || name.contains(">")) {
                return null;
            }
            String url = "http://webservice.catalogueoflife.org/annual-checklist/2009/search.php?name=" + name.replace(" ", "+") + "&format=xml&response=terse";
            try {
                String content = Misc.downloadURL(new URL(url));
                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
                DocumentBuilder db = dbf.newDocumentBuilder();
                Document doc = db.parse(new InputSource(new StringReader(content)));
                n = XPath.getNode("results/result/accepted_name/name", doc);
                if (n != null) break block4;
                return null;
            }
            catch (Exception e) {
                System.err.println(e.toString());
                e.printStackTrace();
                System.exit(-1);
                return null;
            }
        }
        return n.getTextContent();
    }

    private static void save(File out, HashMap<String, DictionaryEntry> dict, Logger logger) {
        try {
            logger.info("Writing regular expressions to file " + out.getAbsolutePath() + "...\n");
            BufferedWriter outStream = new BufferedWriter(new FileWriter(out));
            for (String id : dict.keySet()) {
                outStream.write(String.valueOf(dict.get(id).toString()) + "\n");
            }
            outStream.close();
        }
        catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
    }

    private static int addGenusName(DictionaryEntry de, String name, String type) {
        int numGen = 1;
        while (name.startsWith(" ")) {
            name = name.substring(1);
        }
        name = name.replace("\"", "");
        name = name.replace("'", "");
        name = name.replace("(", "\\(");
        name = name.replace(")", "\\)");
        name = name.replace(".", "\\.");
        name = name.replace("<", "\\<");
        name = name.replace(">", "\\>");
        name = name.replace("{", "\\{");
        name = name.replace("}", "\\}");
        name = name.replace("[", "\\[");
        name = name.replace("]", "\\]");
        String[] parts = name.split(" ");
        String[] regExp = new String[parts.length];
        char first = parts[0].charAt(0);
        boolean abbrev = false;
        regExp[0] = "(" + Character.toUpperCase(first) + "|" + Character.toLowerCase(first) + ")" + parts[0].substring(1);
        String res = regExp[0];
        int i = 1;
        while (i < parts.length) {
            if (!abbrev || i > 1) {
                res = String.valueOf(res) + " ";
            }
            res = String.valueOf(res) + parts[i];
            ++i;
        }
        if ((type.contains("common name") || type.contains("include")) && !name.endsWith("s") && !name.endsWith("family")) {
            res = String.valueOf(res) + "s?";
            numGen = 2;
        }
        de.addPattern(res);
        return numGen;
    }

    private static void addSynonyms(HashMap<String, DictionaryEntry> hashMap, File file) {
        ArrayList<Pair<String>> entries = new GenerateMatchers().loadExtraSynonyms(file);
        for (Pair<String> e : entries) {
            if (!hashMap.containsKey(e.getX())) {
                hashMap.put(e.getX(), new DictionaryEntry(e.getX()));
            }
            hashMap.get(e.getX()).addPattern(e.getY());
        }
    }

    public static HashMap<String, DictionaryEntry> generateGenusDictionary(File file, File[] extraSynonymFiles, boolean includeLineNumbers, Logger logger) {
        logger.info("Loading NCBI taxonomy data... ");
        int numPatterns = 0;
        HashMap<String, DictionaryEntry> hashMap = new HashMap<String, DictionaryEntry>();
        int lineCounter = 0;
        try {
            BufferedReader inStream = new BufferedReader(new FileReader(file));
            String line = inStream.readLine();
            while (line != null) {
                String type;
                line = line.replaceAll("<.*?,.*?>", "<...>");
                String[] fields = line.split("\t\\|\t");
                String id = includeLineNumbers ? "genus:ncbi:" + fields[0] + "|" + lineCounter++ : "genus:ncbi:" + fields[0];
                if (!(fields.length != 4 || (type = fields[3]).contains("acronym") || fields[1].matches("([\\[\\(\\{\\?].*)") || fields[1].contains("@"))) {
                    String name = fields[1];
                    if (!hashMap.containsKey(id)) {
                        hashMap.put(id, new DictionaryEntry(id));
                    }
                    numPatterns += GenerateDictionary.addGenusName(hashMap.get(id), name, type);
                }
                line = inStream.readLine();
            }
            inStream.close();
        }
        catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
        File[] fileArray = extraSynonymFiles;
        int n = extraSynonymFiles.length;
        int n2 = 0;
        while (n2 < n) {
            File f = fileArray[n2];
            GenerateDictionary.addSynonyms(hashMap, f);
            ++n2;
        }
        logger.info("Done, loaded " + hashMap.size() + " genus and " + numPatterns + " name variants.\n");
        return hashMap;
    }

    public static HashMap<String, DictionaryEntry> generateSpeciesDictionary(File file, File[] extraSynonymFiles, boolean includeLineNumbers, Logger logger, int report) {
        logger.info("Loading NCBI taxonomy data... ");
        int numPatterns = 0;
        HashMap<String, DictionaryEntry> hashMap = new HashMap<String, DictionaryEntry>();
        try {
            BufferedReader inStream = new BufferedReader(new FileReader(file));
            String line = inStream.readLine();
            int linecounter = 0;
            while (line != null) {
                String type;
                String[] fields = line.split(",");
                String id = includeLineNumbers ? "species:ncbi:" + fields[0] + "|" + linecounter : "species:ncbi:" + fields[0];
                ++linecounter;
                if (!(fields.length != 4 || (type = fields[3]).contains("acronym") || fields[1].matches("([\\[\\(\\{\\?\"'].*)") || fields[1].contains("@"))) {
                    String name = fields[1];
                    if (name.startsWith(". ")) {
                        name = name.substring(2);
                    }
                    if (!hashMap.containsKey(id)) {
                        hashMap.put(id, new DictionaryEntry(id));
                    }
                    numPatterns += GenerateDictionary.addName(hashMap.get(id), name, type);
                    if (includeLineNumbers) {
                        String accepted_name;
                        if (type.equals("misnomer") || type.equals("misspelling") || type.equals("in-part")) {
                            hashMap.get(id).setComment(type);
                        }
                        if (type.equals("scientific name") && (accepted_name = GenerateDictionary.getAcceptedName(name)) != null) {
                            hashMap.get(id).setComment("accepted: \"" + accepted_name + "\"");
                        }
                    }
                }
                if (report != -1 && linecounter % report == 0) {
                    logger.info("%t: generateSpeciesDictionary: processed " + linecounter + " lines.\n");
                }
                line = inStream.readLine();
            }
            inStream.close();
        }
        catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
        File[] fileArray = extraSynonymFiles;
        int n = extraSynonymFiles.length;
        int n2 = 0;
        while (n2 < n) {
            File f = fileArray[n2];
            GenerateDictionary.addSynonyms(hashMap, f);
            ++n2;
        }
        logger.info("Done, loaded " + hashMap.size() + " species and " + numPatterns + " name variants.\n");
        return hashMap;
    }

    private static int addName(DictionaryEntry de, String name, String type) {
        int numGen = 1;
        while (name.startsWith(" ")) {
            name = name.substring(1);
        }
        name = GenerateMatchers.escapeRegexp(name);
        String[] parts = name.split(" ");
        String[] regExp = new String[parts.length];
        char first = parts[0].charAt(0);
        boolean abbrev = false;
        if ((type.equals("scientific name") || type.contains("synonym") || type.contains("anamorph")) && parts.length > 1 && (parts.length != 2 || parts[1].length() >= 4)) {
            if (parts[0].length() > 1) {
                regExp[0] = "(" + Character.toUpperCase(first) + "|" + Character.toLowerCase(first) + ")(\\. ?|" + parts[0].substring(1) + " )";
                abbrev = true;
                numGen = 4;
            } else {
                regExp[0] = "(" + Character.toUpperCase(first) + "|" + Character.toLowerCase(first) + ")";
            }
        } else {
            regExp[0] = "(" + Character.toUpperCase(first) + "|" + Character.toLowerCase(first) + ")" + parts[0].substring(1);
        }
        String res = regExp[0];
        int i = 1;
        while (i < parts.length) {
            if (!abbrev || i > 1) {
                res = String.valueOf(res) + " ";
            }
            res = String.valueOf(res) + parts[i];
            ++i;
        }
        if ((type.contains("common name") || type.contains("include")) && !name.endsWith("s") && !name.endsWith("family")) {
            res = String.valueOf(res) + "s?";
            numGen = 2;
        }
        de.addPattern(res);
        return numGen;
    }
}

