/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.man.documentparser;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Logger;
import martin.common.ArgParser;
import martin.common.Loggers;
import martin.common.Misc;
import martin.common.MyConnection;
import martin.common.SQL;
import uk.ac.man.documentparser.dataholders.Document;
import uk.ac.man.documentparser.input.BMCFactory;
import uk.ac.man.documentparser.input.BMCXMLFactory;
import uk.ac.man.documentparser.input.DatabaseIterator;
import uk.ac.man.documentparser.input.DatabaseListIterator;
import uk.ac.man.documentparser.input.Directory;
import uk.ac.man.documentparser.input.DocumentIterator;
import uk.ac.man.documentparser.input.ElsevierFactory;
import uk.ac.man.documentparser.input.IDIterator;
import uk.ac.man.documentparser.input.MedlineIndexFactory;
import uk.ac.man.documentparser.input.MedlinePMCIndexFactory;
import uk.ac.man.documentparser.input.OTMI;
import uk.ac.man.documentparser.input.OTMIFactory;
import uk.ac.man.documentparser.input.PMCAbstract;
import uk.ac.man.documentparser.input.PMCFactory;
import uk.ac.man.documentparser.input.PMCIndexFactory;
import uk.ac.man.documentparser.input.TextFile;
import uk.ac.man.documentparser.input.TextFileFactory;
import uk.ac.man.documentparser.input.util.CleanUnicode;
import uk.ac.man.documentparser.input.util.DocumentBuffer;
import uk.ac.man.documentparser.input.util.Skipper;
import uk.ac.man.documentparser.input.util.Splitter;

public class DocumentParser {
    private static void runSeparated(DocumentIterator documents, File outputDir, int report, Logger logger) {
        if (outputDir == null) {
            throw new IllegalStateException("Need to specify an output base directory after the runSeparated command");
        }
        int c = 0;
        while (documents.hasNext()) {
            Document doc = (Document)documents.next();
            if (doc == null) continue;
            if (doc.getID() == null) {
                throw new IllegalStateException("ID not set");
            }
            String id = doc.getID();
            boolean pmc = id.startsWith("PMC");
            String first = ("0000" + id).substring(id.length() + 4 - 2, id.length() + 4);
            String second = ("0000" + id).substring(id.length() + 4 - 4, id.length() + 4 - 2);
            File dir = new File(outputDir, first);
            if (!dir.exists()) {
                dir.mkdir();
            }
            if (!pmc && !(dir = new File(dir, second)).exists()) {
                dir.mkdir();
            }
            File outFile = new File(dir, String.valueOf(id.replace(File.separatorChar, '_')) + ".txt");
            doc.saveToTextFile(outFile, false);
            if (report == -1 || ++c % report != 0) continue;
            logger.info("%t: Stored " + c + " documents.\n");
        }
    }

    private static void run(DocumentIterator documents, File outputDir, boolean simplify, int report) {
        int c = 0;
        for (Document d : documents) {
            if (d.getID() == null) {
                throw new IllegalStateException("ID not set");
            }
            if (outputDir != null) {
                File outFile = new File(outputDir, String.valueOf(d.getID().replace(File.separatorChar, '_')) + ".txt");
                d.saveToTextFile(outFile, simplify);
                if (report == -1 || ++c % report != 0) continue;
                System.out.println("Stored " + c + " documents.");
                continue;
            }
            System.out.println(d.getID());
            d.print();
        }
    }

    public static void main(String[] args) {
        DocumentIterator documents;
        ArgParser ap = new ArgParser(args);
        Logger logger = Loggers.getDefaultLogger(ap);
        int report = ap.getInt("report", -1);
        if (ap.containsKey("help") || args.length == 0) {
            System.out.println("documentparser.jar [--properties <conf file>]");
            System.out.println(DocumentParser.getDocumentHelpMessage());
            System.out.println("[--outDir <export directory> [--simplify]]");
            System.out.println("[--getPubYears <output file> [--report <report interval>]]");
            System.exit(0);
        }
        if (ap.containsKey("outDir")) {
            documents = DocumentParser.getDocuments(ap, logger);
            File outDir = ap.getFile("outDir");
            DocumentParser.run(documents, outDir, ap.containsKey("simplify"), report);
        }
        if (ap.containsKey("outSeparated")) {
            documents = DocumentParser.getDocuments(ap, logger);
            File outBaseDir = ap.getFile("outSeparated");
            DocumentParser.runSeparated(documents, outBaseDir, report, logger);
        }
        if (ap.containsKey("print")) {
            documents = DocumentParser.getDocuments(ap, logger);
            for (Document d : documents) {
                d.print();
            }
        }
        if (ap.containsKey("saveToDB")) {
            String table = ap.get("saveToDB");
            DocumentIterator documents2 = DocumentParser.getDocuments(ap, logger);
            Connection conn = SQL.connectMySQL(ap, logger, "articles");
            logger.info("%t: Processing...\n");
            DocumentParser.saveToDB(documents2, conn, logger, table, report, ap.containsKey("clear"));
            logger.info("%t: Completed.\n");
        }
        if (ap.containsKey("buildDescriptions")) {
            documents = DocumentParser.getDocuments(ap, logger);
            DocumentParser.buildDescriptions(documents, logger, ap.getFile("buildDescriptions"), ap.getInt("report", -1));
        }
    }

    private static void buildDescriptions(DocumentIterator documents, Logger logger, File outFile, int report) {
        try {
            BufferedWriter outStream = new BufferedWriter(new FileWriter(outFile));
            int c = 0;
            outStream.write("#ID\tdescription\tyear\n");
            for (Document d : documents) {
                String year = d.getYear() != null && d.getYear().length() == 4 ? d.getYear() : "0";
                outStream.write(String.valueOf(d.getID()) + "\t" + d.getDescription() + "\t" + year + "\n");
                if (report == -1 || ++c % report != 0) continue;
                logger.info("%t: Processed " + c + " documents.\n");
            }
            logger.info("%t: Completed.\n");
            outStream.close();
        }
        catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
    }

    private static void saveToDB(DocumentIterator documents, Connection conn, Logger logger, String table, int report, boolean clear) {
        PreparedStatement pstmt = Document.prepareInsertStatements(conn, table, clear);
        int c = 0;
        for (Document d : documents) {
            if (d != null) {
                d.saveToDB(pstmt);
            }
            if (report == -1 || ++c % report != 0) continue;
            logger.info("%t: Saved " + c + " documents to DB.\n");
        }
    }

    public static DocumentIterator getDocuments(ArgParser ap) {
        return DocumentParser.getDocuments(ap, null);
    }

    public static DocumentIterator getDocuments(ArgParser ap, Logger logger) {
        PMCFactory pmcFactory;
        File indexFile;
        File medlineBaseDir;
        String[] dtds = ap.gets("dtd");
        DocumentIterator documents = null;
        if (ap.containsKey("pmcAbs")) {
            medlineBaseDir = ap.getFile("medlineBaseDir");
            File medlineIndexFile = ap.getFile("medlineIndex");
            DocumentIterator medlineDocs = medlineIndexFile != null ? new MedlineIndexFactory(medlineBaseDir, null).parse(medlineIndexFile) : null;
            File pmcBaseDir = ap.getFile("pmcBaseDir");
            File pmcIndexFile = ap.getFile("pmcIndex");
            DocumentIterator pmcDocs = pmcIndexFile != null ? new PMCIndexFactory(pmcBaseDir, dtds).parse(pmcIndexFile) : null;
            documents = new PMCAbstract(pmcDocs, medlineDocs);
        } else if (ap.containsKey("medlineIndex")) {
            medlineBaseDir = ap.getFile("medlineBaseDir");
            indexFile = ap.getFile("medlineIndex");
            documents = new MedlineIndexFactory(medlineBaseDir, null).parse(indexFile);
        } else if (ap.containsKey("medlinePMCIndex")) {
            medlineBaseDir = ap.getFile("medlineBaseDir");
            File pmcBaseDir = ap.getFile("pmcBaseDir");
            File indexFile2 = ap.getFile("medlinePMCIndex");
            documents = new MedlinePMCIndexFactory(medlineBaseDir, pmcBaseDir, dtds, null).parse(indexFile2);
        } else if (ap.containsKey("pmcIndex")) {
            File pmcBaseDir = ap.getFile("pmcBaseDir");
            indexFile = ap.getFile("pmcIndex");
            documents = new PMCIndexFactory(pmcBaseDir, dtds).parse(indexFile);
        } else if (ap.containsKey("pmcDir")) {
            pmcFactory = new PMCFactory(dtds);
            documents = new Directory(ap.getFile("pmcDir"), pmcFactory, "xml", ap.containsKey("recursive"));
        } else if (ap.containsKey("pmc")) {
            pmcFactory = new PMCFactory(dtds);
            documents = pmcFactory.parse(ap.getFile("pmc"));
        } else if (ap.containsKey("OTMI")) {
            documents = new OTMI(ap.getFile("OTMI"));
        } else if (ap.containsKey("OTMIDir")) {
            documents = new Directory(ap.getFile("OTMIDir"), new OTMIFactory(), ".otmi", ap.containsKey("recursive"));
        } else {
            MyConnection conn;
            if (ap.containsKey("text")) {
                return new TextFile(ap.getFiles("text"));
            }
            if (ap.containsKey("textDir")) {
                documents = new Directory(ap.getFile("textDir"), new TextFileFactory(), ".txt", ap.containsKey("recursive"));
            } else if (ap.containsKey("bmcxml")) {
                documents = new BMCXMLFactory().parse(ap.getFile("bmcxml"));
            } else if (ap.containsKey("bmcxmlDir")) {
                documents = new Directory(ap.getFile("bmcxmlDir"), new BMCXMLFactory(), ".xml", ap.containsKey("recursive"));
            } else if (ap.containsKey("bmcDir")) {
                documents = new Directory(ap.getFile("bmcDir"), new BMCFactory(dtds), ".xml", ap.containsKey("recursive"));
            } else if (ap.containsKey("databaseDocs")) {
                conn = SQL.connectMySQL2(ap, logger, "articles");
                documents = new DatabaseIterator(conn, ap.get("databaseDocs"), ap.containsKey("full"), ap.get("skipDocIdsQuery"));
            } else if (ap.containsKey("databaseList")) {
                if (ap.gets("databaseList").length != 2) {
                    throw new IllegalStateException("Usage: --databaseList <table> <file with docids>");
                }
                conn = SQL.connectMySQL2(ap, logger, "articles");
                ArrayList<String> ids = new ArrayList<String>();
                ids.addAll(Misc.loadStringSetFromFile(ap.getFiles("databaseList")[1]));
                documents = new DatabaseListIterator(conn, ap.gets("databaseList")[0], ids, ap.containsKey("full"));
            } else if (ap.containsKey("elsevierDir")) {
                ElsevierFactory factory = new ElsevierFactory(dtds);
                documents = new Directory(ap.getFile("elsevierDir"), factory, "xml", ap.containsKey("recursive"));
            } else if (ap.containsKey("idsOnly")) {
                File f = ap.getFile("idsOnly");
                documents = new IDIterator(f);
            }
        }
        if (ap.containsKey("buffer")) {
            documents = new DocumentBuffer(documents, ap.getInt("buffer", 250), logger);
        }
        if (documents != null && ap.containsKey("skip")) {
            if (logger != null) {
                logger.info("%t: Skipping " + ap.getInt("skip") + " documents...\n");
            }
            int i = 0;
            while (i < ap.getInt("skip")) {
                documents.skip();
                ++i;
            }
            if (logger != null) {
                logger.info("%t: Skip complete.\n");
            }
        }
        if (documents != null && ap.containsKey("skipEvery")) {
            if (logger != null) {
                logger.info("%t: Will be skipping " + ap.getInt("skipEvery") + " documents for each processed document.\n");
            }
            documents = new Skipper(documents, ap.getInt("skipEvery"));
        }
        if (documents != null && ap.containsKey("cleanUnicode")) {
            if (logger != null) {
                logger.info("%t: Removing high unicode characters from documents.");
            }
            documents = new CleanUnicode(documents);
        }
        if (documents != null && ap.getInt("split", 0) > 0) {
            if (logger != null) {
                logger.info("%t: Splitting all documents at " + ap.getInt("split") + " sentencens.");
            }
            documents = new Splitter(documents, ap.getInt("split"));
        }
        return documents;
    }

    public static String getDocumentHelpMessage() {
        return "[--medlineIndex <file> --medlineBaseDir <dir>]\n[--medlinePMCIndex <file> --medlineBaseDir <dir> --pmcBaseDir <dir> --dtd <files>]\n[--pmcIndex <file> --pmcBaseDir<dir> --dtd <files>]\n[--textDir <dir> [--recursive]]\n[--OTMIDir <dir> [--recursive]]\n";
    }

    public static Map<String, Document> getDocumentsToHash(ArgParser ap) {
        HashMap<String, Document> aux = new HashMap<String, Document>();
        DocumentIterator documents = DocumentParser.getDocuments(ap);
        for (Document d : documents) {
            aux.put(d.getID(), d);
        }
        return aux;
    }
}

