package de.julielab.genemapper.resources;

import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.zip.GZIPInputStream;

/**
 * @author engelmann, faessler
 * <p>
 * The UniprotDictCreator is used to retrieve relevant content for the
 * uniprot-dictionary as found in the shell script makeGeneDictionary.sh
 * for the creation of the resources. Note that this solution was
 * reverse-engineered looking at the already existing uniprot.all.dict
 * and searching for the elements in question to retrieve from
 * uniprot.sprot.xml.
 * A later change was made by faessler to use the primary accession ID instead
 * of the UniProt mnemonic ID (IDs like IL2_MOUSE).
 */
public class UniprotDictCreator {

    private static final HashMap<String, ArrayList<String>> dictContent = new HashMap<>();

    public static void main(String[] args) {
        if (args.length == 2) {
            File inputFile = new File(args[0]);
            File outputFile = new File(args[1]);
            System.out.println("Reading UniProt XML from " + inputFile.getAbsolutePath() + " and writing dictionary to " + outputFile.getAbsolutePath());
            UniprotDictCreator dictCreator = new UniprotDictCreator();
            dictCreator.readEntries(inputFile);
            dictCreator.writeEntries(outputFile);
        } else {
            System.err
                    .println("usage:\nUniProtDictCreator <inputFile> <outputFile>");
            System.exit(-1);
        }
    }

    public void readEntries(File inputFile) {
        boolean isInEntry;
        boolean retrievedAccession;
        boolean isInRecommendedName;
        boolean isInAlternativeName;
        boolean isInGene;
        String accession = "";
        ArrayList<String> otherNames = new ArrayList<String>();

        try {
            InputStream fis = new FileInputStream(inputFile);
            if (inputFile.getName().endsWith(".gz"))
                fis = new GZIPInputStream(fis);
            XMLStreamReader reader = XMLInputFactory.newInstance()
                    .createXMLStreamReader(fis);
            while (reader.hasNext()) {
                reader.next();
                if (reader.getEventType() == XMLStreamReader.START_ELEMENT
                        && reader.getLocalName().equals("entry")) {
                    isInEntry = true;
                    retrievedAccession = false;
                    while (isInEntry && reader.hasNext()) {
                        reader.next();
                        if (reader.getEventType() == XMLStreamReader.START_ELEMENT) {
                            String localName = reader.getLocalName();
                            if (localName.equals("accession")
                                    && retrievedAccession == false) {
                                accession = reader.getElementText();
                                // get the primary accession (or the only one)
                                accession = accession.split(",", 2)[0];
                                retrievedAccession = true;
                                if (accession.equals("P04578"))
                                    System.out.println("Found accession P04578");
                            } else if (localName.equals("recommendedName")) {
                                isInRecommendedName = true;
                                while (isInRecommendedName && reader.hasNext()) {
                                    reader.next();
                                    if (reader.getEventType() == XMLStreamReader.END_ELEMENT
                                            && reader
                                            .getLocalName()
                                            .equals("recommendedName")) {
                                        isInRecommendedName = false;
                                    } else if (reader.getEventType() == XMLStreamReader.START_ELEMENT
                                            && reader.getLocalName()
                                            .contains("Name")) {
                                        String otherName = reader
                                                .getElementText();
                                        // The -1 is the name priority: the recommended name gets -1 (like the official symbol from NCBI gene)
                                        otherNames.add(otherName + "\t-1");
                                    }
                                }
                            } else if (localName.equals("alternativeName")) {
                                isInAlternativeName = true;
                                while (isInAlternativeName && reader.hasNext()) {
                                    reader.next();
                                    if (reader.getEventType() == XMLStreamReader.END_ELEMENT
                                            && reader
                                            .getLocalName()
                                            .equals("alternativeName")) {
                                        isInAlternativeName = false;
                                    } else if (reader.getEventType() == XMLStreamReader.START_ELEMENT
                                            && reader.getLocalName()
                                            .contains("Name")) {
                                        String otherName = reader
                                                .getElementText();
                                        // The 2 is the name priority: the alternative name gets 2 (like the synonyms from NCBI gene)
                                        otherNames.add(otherName + "\t2");
                                    }
                                }
                            } else if (localName.equals("gene")) {
                                isInGene = true;
                                while (isInGene && reader.hasNext()) {
                                    reader.next();
                                    if (reader.getEventType() == XMLStreamReader.END_ELEMENT
                                            && reader.getLocalName()
                                            .equals("gene")) {
                                        isInGene = false;
                                    }
                                    if (reader.getEventType() == XMLStreamReader.START_ELEMENT
                                            && reader.getLocalName()
                                            .equals("name")) {
                                        String type = reader.getAttributeValue(null, "type");
                                        if (type.equals("primary")) {
                                            String otherName = reader.getElementText();
                                            // the gene name gets priority 0
                                            otherNames.add(otherName + "\t0");
                                            isInGene = false;
                                            isInEntry = false;
                                            dictContent.put(accession, new ArrayList<>(otherNames));
                                            accession = "";
                                            otherNames.clear();
                                        }
                                    }
                                }
                            }
                        } else if (reader.getEventType() == XMLStreamReader.END_ELEMENT && reader.getLocalName().equals("entry") && !accession.isBlank()) {
//                            System.out.println("WARNING: Got the end of an entry element and the accession is not blank");
                        }
                    }
                }
            }
            reader.close();
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (XMLStreamException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (FactoryConfigurationError e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

    public void writeEntries(File outputFile) {
        try (FileWriter writer = new FileWriter(outputFile)) {
            for (String name : dictContent.keySet()) {
                if (name.equals("P04578"))
                    System.out.println("Got key P04578 and writing its names to file");
                for (String otherName : dictContent.get(name)) {
                    writer.write(otherName + "\t" + name + "\n");
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

}
