/*
 * Decompiled with CFR 0.152.
 */
package de.citec.scie.ner.importer.formats;

import de.citec.scie.ner.db.types.AssociationType;
import de.citec.scie.ner.importer.ImporterException;
import de.citec.scie.ner.importer.utils.Associator;
import de.citec.scie.ner.importer.xml.XmlImporter;
import de.citec.scie.ner.importer.xml.XmlState;
import de.citec.scie.ner.importer.xml.XmlStateStack;
import de.citec.scie.ner.importer.xml.XmlUtils;
import de.citec.scie.ner.ontology.Node;
import de.citec.scie.ner.ontology.Ontology;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

public class NcbiImporter
extends XmlImporter {
    private static final int TAXA_SET = 0;
    private static final int TAXON = 1;
    private static final int TAX_ID = 2;
    private static final int SCIENTIFIC_NAME = 3;
    private static final int OTHER_NAMES = 4;
    private static final int SYNONYM = 5;
    private static final int NAME = 6;
    private static final int DISP_NAME = 7;
    private static final int PARENT_TAX_ID = 8;
    private static final int GENBANK_COMMON_NAME = 9;
    private static final int COMMON_NAME = 10;
    private static final XmlState[] STATES = new XmlState[]{new XmlState(0, "TaxaSet", -1), new XmlState(1, "Taxon", 0), new XmlState(2, "TaxId", 1), new XmlState(3, "ScientificName", 1), new XmlState(4, "OtherNames", 1), new XmlState(5, "Synonym", 4), new XmlState(6, "Name", 4), new XmlState(7, "Synonym", 6), new XmlState(8, "ParentTaxId", 1), new XmlState(9, "GenbankCommonName", 4), new XmlState(10, "CommonName", 4)};
    private static final Map<Integer, String> TYPE_NAMES = new HashMap<Integer, String>();
    protected static final Pattern[] CLEAN_REDUCE;
    protected static final Pattern REDUCE_SPACES;
    protected static final Pattern CLEAN_PARANTHESIS;

    private static String cleanName(String name) {
        Matcher m = REDUCE_SPACES.matcher(name);
        name = m.replaceAll(" ").trim();
        for (Pattern reducer : CLEAN_REDUCE) {
            m = reducer.matcher(name);
            if (!m.matches()) continue;
            name = m.group(1);
        }
        m = CLEAN_PARANTHESIS.matcher(name);
        return m.replaceAll("").trim();
    }

    @Override
    protected final String doDeduceOntologyName(XMLStreamReader reader) {
        return "ncbi";
    }

    @Override
    protected void doImportOntology(Ontology ontology, XMLStreamReader reader) throws XMLStreamException, ImporterException {
        Associator associator = new Associator(ontology, true);
        Node node = new Node(ontology.getDB(), -1);
        XmlStateStack stack = new XmlStateStack(STATES);
        while (reader.hasNext()) {
            int ev = stack.handleEvent(reader);
            switch (ev) {
                case 1: {
                    node = ontology.addNode();
                    break;
                }
                case 2: {
                    int id = XmlUtils.readIntContent(reader, -1);
                    node.setId(id);
                    break;
                }
                case 8: {
                    int parentId = XmlUtils.readIntContent(reader, -1);
                    if (parentId <= -1) break;
                    associator.addAssociation(node.getId(), parentId, AssociationType.IS_A);
                    break;
                }
                case 3: 
                case 5: 
                case 9: 
                case 10: {
                    String name = NcbiImporter.cleanName(reader.getElementText());
                    String type = TYPE_NAMES.get(ev);
                    if (name.isEmpty() || node.getLabelBySurfaceForm(name, false) != null) break;
                    node.addLabel(name, type);
                    break;
                }
            }
        }
        associator.storeAssociations();
    }

    static {
        TYPE_NAMES.put(10, "general common name");
        TYPE_NAMES.put(9, "genbank common name");
        TYPE_NAMES.put(3, "scientific name");
        TYPE_NAMES.put(5, "synonym");
        CLEAN_REDUCE = new Pattern[]{Pattern.compile("(.*)(sp|cf|aff)\\..*$"), Pattern.compile("(.*) in .*, [1-9][0-9][0-9][0-9]$"), Pattern.compile("(.*) [^\\s]+ & [^\\s]+, [1-9][0-9][0-9][0-9]$"), Pattern.compile("(.*) [^\\s]+, [1-9][0-9][0-9][0-9]$")};
        REDUCE_SPACES = Pattern.compile("(^\\s+|\\s+$|\\s\\s+)");
        CLEAN_PARANTHESIS = Pattern.compile("(\\(.*\\)| '.*'| \".*\")");
    }
}

