/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.man.documentparser.input;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.StringReader;
import java.util.Iterator;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import martin.common.Misc;
import martin.common.xml.EntityResolver;
import martin.common.xml.MyNodeList;
import martin.common.xml.XPath;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import uk.ac.man.documentparser.dataholders.Author;
import uk.ac.man.documentparser.dataholders.Document;
import uk.ac.man.documentparser.dataholders.ExternalID;
import uk.ac.man.documentparser.dataholders.Journal;
import uk.ac.man.documentparser.dataholders.Section;
import uk.ac.man.documentparser.input.DocumentIterator;

public class PMC
implements DocumentIterator {
    private Document doc = null;
    private int numArticles;
    private int nextArticle = 0;
    private String basePath;
    private String pmcID;
    private boolean hasOCR;
    private boolean hasPTT;
    private static int staticid = 0;
    private String xml = null;

    public PMC(File xmlLocation, String[] dtdLocations) {
        if (!xmlLocation.getAbsolutePath().endsWith(".xml") && !xmlLocation.getAbsolutePath().endsWith(".nxml")) {
            throw new IllegalStateException("PMC XML files have to end with .xml or .nxml");
        }
        this.basePath = xmlLocation.getAbsolutePath().substring(0, xmlLocation.getAbsolutePath().length() - 4);
        boolean hasXML = xmlLocation.exists();
        this.hasOCR = new File(String.valueOf(this.basePath) + ".txt").exists();
        this.hasPTT = new File(String.valueOf(this.basePath) + ".pdf.txt").exists();
        this.pmcID = xmlLocation.getName().split("\\.")[0];
        if (hasXML) {
            this.xml = Misc.loadFile(xmlLocation);
        }
        if (hasXML) {
            try {
                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
                DocumentBuilder db = dbf.newDocumentBuilder();
                if (dtdLocations != null) {
                    db.setEntityResolver(new EntityResolver(dtdLocations));
                }
                this.doc = db.parse(xmlLocation);
                this.numArticles = this.doc.getElementsByTagName("article").getLength();
            }
            catch (Exception e) {
                System.err.println(e);
                this.doc = null;
                e.printStackTrace();
                System.exit(-1);
            }
        }
    }

    public PMC(StringBuffer data, String[] dtdLocation) {
        try {
            this.xml = data.toString();
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbf.newDocumentBuilder();
            if (dtdLocation != null) {
                db.setEntityResolver(new EntityResolver(dtdLocation));
            }
            this.doc = db.parse(new InputSource(new StringReader(this.xml)));
            this.pmcID = "pmc-local-id-" + staticid++;
            this.numArticles = this.doc.getElementsByTagName("article").getLength();
        }
        catch (Exception e) {
            System.err.println(e);
            this.doc = null;
            e.printStackTrace();
            System.exit(-1);
        }
    }

    @Override
    public Iterator<uk.ac.man.documentparser.dataholders.Document> iterator() {
        return this;
    }

    public PMC(String basePath, String[] dtdLocation, String pmcID, boolean hasXML, boolean hasXMLBody, boolean hasOCR, boolean hasPTT) {
        this.basePath = basePath;
        this.hasOCR = hasOCR;
        this.hasPTT = hasPTT;
        this.pmcID = pmcID;
        if (hasXML) {
            try {
                this.xml = Misc.loadFile(new File(String.valueOf(basePath) + ".xml"));
                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
                DocumentBuilder db = dbf.newDocumentBuilder();
                if (dtdLocation != null) {
                    db.setEntityResolver(new EntityResolver(dtdLocation));
                }
                this.doc = db.parse(new File(String.valueOf(basePath) + ".xml"));
                this.numArticles = this.doc.getElementsByTagName("article").getLength();
            }
            catch (Exception e) {
                System.err.println(e);
                this.doc = null;
                e.printStackTrace();
                System.exit(-1);
            }
        }
    }

    @Override
    public boolean hasNext() {
        if (this.doc == null) {
            return false;
        }
        return this.nextArticle < this.numArticles;
    }

    private Section[] getSection(NodeList mainElement) {
        if (mainElement == null) {
            return null;
        }
        Section[] mainSections = new Section[mainElement.getLength()];
        int i = 0;
        while (i < mainElement.getLength()) {
            Node n = mainElement.item(i);
            Node titleNode = XPath.getNode("title", n);
            String title = titleNode != null ? titleNode.getTextContent() : "";
            Section[] subSections = this.getSection(XPath.getNodeList("sec", n));
            MyNodeList contentParagraphs = XPath.getNodeList("p", n);
            StringBuffer contents = new StringBuffer();
            int j = 0;
            while (j < contentParagraphs.getLength()) {
                contents.append(String.valueOf(contentParagraphs.item(j).getTextContent()) + "\n");
                ++j;
            }
            mainSections[i] = new Section(title, contents.toString(), subSections);
            ++i;
        }
        return mainSections;
    }

    private String loadFile(File f) {
        StringBuffer res = new StringBuffer();
        try {
            BufferedReader inStream = new BufferedReader(new FileReader(f));
            String line = inStream.readLine();
            while (line != null) {
                if (line.length() == 0) {
                    res.append("\n\n");
                } else {
                    res.append(line);
                }
                if (res.length() > 0) {
                    if (res.charAt(res.length() - 1) == '-') {
                        res = res.deleteCharAt(res.length() - 1);
                    } else {
                        res.append(" ");
                    }
                }
                line = inStream.readLine();
            }
            inStream.close();
        }
        catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
        return res.toString();
    }

    @Override
    public uk.ac.man.documentparser.dataholders.Document next() {
        if (this.doc == null) {
            return null;
        }
        Element root = this.doc.getDocumentElement();
        Node titleNode = XPath.getNode("front/article-meta/title-group/article-title", root);
        String title = titleNode != null ? titleNode.getTextContent() : null;
        Section[] absSections = this.getSection(XPath.getNodeList("front/article-meta/abstract", root));
        Section[] bdy = this.getSection(XPath.getNodeList("body", root));
        Node yearNode = XPath.getNode("front/article-meta/pub-date/year", root);
        String year = yearNode != null ? yearNode.getTextContent() : null;
        String rawContent = "";
        Document.Text_raw_type rawtype = null;
        if (this.hasOCR) {
            rawContent = String.valueOf(rawContent) + this.loadFile(new File(String.valueOf(this.basePath) + ".txt"));
            rawtype = Document.Text_raw_type.OCR;
        }
        if (this.hasPTT) {
            rawContent = String.valueOf(rawContent) + this.loadFile(new File(String.valueOf(this.basePath) + ".pdf.txt"));
            rawtype = Document.Text_raw_type.PDF2TEXT;
        }
        if (rawContent.length() == 0) {
            rawContent = null;
        }
        MyNodeList authorList = XPath.getNodeList("front/article-meta/contrib-group/contrib", root);
        Author[] authors = new Author[authorList.getLength()];
        int i = 0;
        while (i < authors.length) {
            Node snn = XPath.getNode("name/surname", authorList.item(i));
            Node fnn = XPath.getNode("name/given-names", authorList.item(i));
            Node emailn = XPath.getNode("email", authorList.item(i));
            String sn = snn != null ? snn.getTextContent() : null;
            String fn = fnn != null ? fnn.getTextContent() : null;
            String email = emailn != null ? emailn.getTextContent() : null;
            authors[i] = new Author(sn, fn, email);
            ++i;
        }
        String ISSN = null;
        String jTitle = null;
        String jTitleAbbrev = null;
        MyNodeList ISSNlist = XPath.getNodeList("front/journal-meta/issn", root);
        for (Node n : ISSNlist) {
            ISSN = n.getTextContent();
        }
        MyNodeList jIDs = XPath.getNodeList("front/journal/journal-id", root);
        for (Node n : jIDs) {
            if (!n.getAttributes().getNamedItem("journal-id-type").getTextContent().equals("nlm-ta")) continue;
            jTitleAbbrev = n.getTextContent();
        }
        Node jTitleNode = XPath.getNode("front/journal-meta/journal-title", root);
        jTitle = jTitleNode != null ? jTitleNode.getTextContent() : null;
        Journal journal = new Journal(ISSN, jTitle, jTitleAbbrev);
        String volume = XPath.getNode("front/article-meta/volume", root) != null ? XPath.getNode("front/article-meta/volume", root).getTextContent() : null;
        String issue = XPath.getNode("front/article-meta/issue", root) != null ? XPath.getNode("front/article-meta/issue", root).getTextContent() : null;
        String type = root.getAttributes().getNamedItem("article-type") != null ? root.getAttributes().getNamedItem("article-type").getTextContent() : null;
        String fpage = XPath.getNode("front/article-meta/fpage", root) != null ? XPath.getNode("front/article-meta/fpage", root).getTextContent() : null;
        String lpage = XPath.getNode("front/article-meta/lpage", root) != null ? XPath.getNode("front/article-meta/lpage", root).getTextContent() : null;
        String pages = null;
        if (fpage != null && lpage != null) {
            pages = fpage.equals(lpage) ? fpage : String.valueOf(fpage) + "-" + lpage;
        }
        Document.Type typee = null;
        if (type != null) {
            typee = type.equals("research-article") ? Document.Type.RESEARCH : (type.equals("review-article") ? Document.Type.REVIEW : Document.Type.OTHER);
        }
        ExternalID externalID = new ExternalID(this.pmcID, ExternalID.Source.PMC);
        uk.ac.man.documentparser.dataholders.Document d = new uk.ac.man.documentparser.dataholders.Document(this.pmcID, title, Section.toString(absSections), Section.toString(bdy), rawContent, rawtype, year, journal, typee, authors, volume, issue, pages, this.xml, externalID);
        ++this.nextArticle;
        return d;
    }

    private void removeSections(Section[] sections, String keyword) {
        int i = 0;
        while (i < sections.length) {
            if (sections[i] != null) {
                Section s = sections[i];
                if (s.getTitle() != null && s.getTitle().toLowerCase().contains(keyword.toLowerCase())) {
                    sections[i] = null;
                } else {
                    this.removeSections(s.getSubSections(), keyword);
                }
            }
            ++i;
        }
    }

    @Override
    public void remove() {
        throw new IllegalStateException("remove() is not supported");
    }

    @Override
    public void skip() {
        ++this.nextArticle;
    }
}

