/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jsbd;

import cc.mallet.fst.CRF;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelSequence;
import de.julielab.jsbd.EOSSymbols;
import de.julielab.jsbd.SentenceSplitter;
import de.julielab.jsbd.Unit;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.Random;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;

public class SentenceSplitterApplication {
    private static final boolean doPostprocessing = true;

    public static void main(String[] args) {
        String mode;
        if (args.length < 1) {
            System.err.println("usage: JSBD <mode> {mode_specific_parameters}");
            System.err.println("different modes:");
            System.err.println("c: check texts");
            System.err.println("t: train a sentence splitting model");
            System.err.println("p: do the sentence splitting");
            System.err.println("s: evaluation with 90-10 split");
            System.err.println("x: evaluation with cross-validation");
            System.err.println("e: evaluation on previously trained model");
            System.exit(-1);
        }
        if ((mode = args[0]).equals("c")) {
            SentenceSplitterApplication.startCheckMode(args);
        } else if (mode.equals("t")) {
            SentenceSplitterApplication.startTrainingMode(args);
        } else if (mode.equals("p")) {
            SentenceSplitterApplication.startPredictionMode(args);
        } else if (mode.equals("x")) {
            SentenceSplitterApplication.startXValidationMode(args);
        } else if (mode.equals("s")) {
            SentenceSplitterApplication.start9010ValidationMode(args);
        } else if (mode.equals("e")) {
            SentenceSplitterApplication.startCompareValidationMode(args);
        } else {
            System.err.println("Unknown run mode.");
            System.exit(-1);
        }
    }

    private static void startCompareValidationMode(String[] args) {
        System.out.println("performing evaluation previously trained model.");
        if (args.length != 4) {
            System.err.println("usage: JSBD e <modelFile> <predictInDir> <errorFile>");
            System.exit(-1);
        }
        CRF crf = null;
        try {
            ObjectInputStream in = new ObjectInputStream(new GZIPInputStream(new FileInputStream(args[1])));
            crf = (CRF)in.readObject();
            in.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        File abstractDir = new File(args[2]);
        if (!abstractDir.isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] abstractArray = abstractDir.listFiles();
        TreeSet<String> errorList = new TreeSet<String>();
        EvalResult er = SentenceSplitterApplication.doEvaluation(crf, abstractArray, errorList);
        SentenceSplitterApplication.writeFile(errorList, new File(args[3]));
        System.out.println("\n\nAccuracy on pretrained model: " + er.ACC);
        System.exit(0);
    }

    private static void start9010ValidationMode(String[] args) {
        File abstractDir;
        System.out.println("performing evaluation on 90/10 split");
        if (args.length != 3) {
            System.err.println("usage: JSBD s <textDir> <errorFile>");
            System.exit(-1);
        }
        if (!(abstractDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] abstractArray = abstractDir.listFiles();
        TreeSet<String> errorList = new TreeSet<String>();
        EvalResult er = SentenceSplitterApplication.do9010Evaluation(abstractArray, errorList);
        SentenceSplitterApplication.writeFile(errorList, new File(args[2]));
        System.out.println("\n\nAccuracy on 90/10 split: " + er.ACC);
        System.exit(0);
    }

    private static void startXValidationMode(String[] args) {
        File abstractDir;
        System.out.println("performing cross-validation");
        if (args.length != 4) {
            System.err.println("usage: JSBD x <textDir> <cross-val-rounds> <errorFile>");
            System.exit(-1);
        }
        if (!(abstractDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] abstractArray = abstractDir.listFiles();
        int n = new Integer(args[2]);
        if (n > abstractArray.length / 2 || n > 10 || n < 2) {
            System.err.println("Error: cannot perform " + n + " cross-validation rounds. Choose n in [2:10].");
            System.exit(-1);
        }
        TreeSet<String> errorList = new TreeSet<String>();
        double acc = SentenceSplitterApplication.doCrossEvaluation(abstractArray, n, errorList);
        SentenceSplitterApplication.writeFile(errorList, new File(args[3]));
        System.out.println("\n\nAccuracy on cross validation: " + acc);
        System.exit(0);
    }

    private static void startPredictionMode(String[] args) {
        File inDir;
        System.out.println("doing the sentence splitting...");
        if (args.length != 4) {
            System.err.println("usage: JSBD p <inDir> <outDir> <modelFilename>");
            System.exit(-1);
        }
        if (!(inDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified input directory does not exist.");
            System.exit(-1);
        }
        File[] inFiles = inDir.listFiles();
        File outDir = new File(args[2]);
        if (!outDir.isDirectory()) {
            System.err.println("Error: the specified output directory does not exist.");
            System.exit(-1);
        }
        String modelFilename = args[3];
        SentenceSplitterApplication.doPrediction(inFiles, outDir, modelFilename);
    }

    private static void startTrainingMode(String[] args) {
        File trainDir;
        System.out.println("training the model...");
        if (args.length != 3) {
            System.err.println("usage: JSBD t <trainDir> <modelFilename>");
            System.exit(-1);
        }
        if (!(trainDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] trainFiles = trainDir.listFiles();
        System.out.println("number of files to train on: " + trainFiles.length);
        String modelFilename = args[2];
        SentenceSplitterApplication.doTraining(trainFiles, modelFilename);
        System.out.println("Saved model to: " + modelFilename);
    }

    private static void startCheckMode(String[] args) {
        File abstractDir;
        System.out.println("checking abstracts...");
        if (args.length != 2) {
            System.err.println("usage: JSBD c <textDir>");
            System.exit(-1);
        }
        if (!(abstractDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] abstractArray = abstractDir.listFiles();
        SentenceSplitterApplication.doCheckAbstracts(abstractArray);
        System.exit(0);
    }

    private static void doCheckAbstracts(File[] abstractList) {
        SentenceSplitter tpFunctions = new SentenceSplitter();
        tpFunctions.makeTrainingData(abstractList, false);
        System.out.println("done.");
    }

    private static EvalResult do9010Evaluation(File[] abstractArray, TreeSet<String> errorList) {
        ArrayList<File> abstractList = new ArrayList<File>();
        int i = 0;
        while (i < abstractArray.length) {
            abstractList.add(abstractArray[i]);
            ++i;
        }
        Collections.shuffle(abstractList, new Random(1L));
        int sizeAll = abstractList.size();
        int sizeTest = (int)((double)sizeAll * 0.1);
        int sizeTrain = sizeAll - sizeTest;
        if (sizeTest == 0) {
            System.err.println("Error: no test files for this split. Number of files in directory might be too small.");
            System.exit(-1);
        }
        System.out.println("all: " + sizeAll + "\ttrain: " + sizeTrain + "\t" + "test: " + sizeTest);
        File[] trainFiles = new File[sizeTrain];
        File[] predictFiles = new File[sizeTest];
        int i2 = 0;
        while (i2 < sizeTrain) {
            trainFiles[i2] = (File)abstractList.get(i2);
            ++i2;
        }
        int j = 0;
        int i3 = sizeTrain;
        while (i3 < abstractList.size()) {
            predictFiles[j++] = (File)abstractList.get(i3);
            ++i3;
        }
        return SentenceSplitterApplication.doEvaluation(trainFiles, predictFiles, errorList);
    }

    private static double doCrossEvaluation(File[] abstractArray, int n, TreeSet<String> errorList) {
        ArrayList<File> abstractList = new ArrayList<File>();
        int i = 0;
        while (i < abstractArray.length) {
            abstractList.add(abstractArray[i]);
            ++i;
        }
        Collections.shuffle(abstractList, new Random(1L));
        int pos = 0;
        int sizeRound = abstractArray.length / n;
        int sizeAll = abstractArray.length;
        int sizeLastRound = sizeRound + sizeAll % n;
        System.out.println("number of files in directory: " + sizeAll);
        System.out.println("size of each/last round: " + sizeRound + "/" + sizeLastRound);
        System.out.println();
        EvalResult[] evalResults = new EvalResult[n];
        double avgAcc = 0.0;
        double avgF = 0.0;
        int i2 = 0;
        while (i2 < n) {
            File f;
            int j;
            File[] predictFiles;
            File[] trainFiles;
            int p = 0;
            int t = 0;
            if (i2 == n - 1) {
                trainFiles = new File[sizeAll - sizeLastRound];
                predictFiles = new File[sizeLastRound];
                j = 0;
                while (j < abstractList.size()) {
                    f = (File)abstractList.get(j);
                    if (j < pos) {
                        trainFiles[t] = f;
                        ++t;
                    } else {
                        predictFiles[p] = f;
                        ++p;
                    }
                    ++j;
                }
            } else {
                trainFiles = new File[sizeAll - sizeRound];
                predictFiles = new File[sizeRound];
                j = 0;
                while (j < abstractList.size()) {
                    f = (File)abstractList.get(j);
                    if (j < pos || j >= pos + sizeRound) {
                        trainFiles[t] = f;
                        ++t;
                    } else {
                        predictFiles[p] = f;
                        ++p;
                    }
                    ++j;
                }
                pos += sizeRound;
            }
            System.out.println("training size: " + trainFiles.length);
            System.out.println("prediction size: " + predictFiles.length);
            evalResults[i2] = SentenceSplitterApplication.doEvaluation(trainFiles, predictFiles, errorList);
            ++i2;
        }
        DecimalFormat df = new DecimalFormat("0.000");
        int i3 = 0;
        while (i3 < evalResults.length) {
            avgAcc += evalResults[i3].ACC;
            avgF += evalResults[i3].getF();
            System.out.println(String.valueOf(i3) + ": " + df.format(evalResults[i3].ACC));
            ++i3;
        }
        System.out.println("avg accuracy: " + df.format(avgAcc /= (double)n));
        System.out.println("avg f-score: " + df.format(avgF /= (double)n));
        return avgAcc;
    }

    private static EvalResult doEvaluation(File[] trainFiles, File[] predictFiles, TreeSet<String> errorList) {
        SentenceSplitter tpFunctions = new SentenceSplitter();
        EOSSymbols eoss = new EOSSymbols();
        InstanceList trainData = tpFunctions.makeTrainingData(trainFiles, false);
        Pipe myPipe = trainData.getPipe();
        System.out.println("training...");
        tpFunctions.train(trainData, myPipe);
        return SentenceSplitterApplication.doEvaluation(tpFunctions.getModel(), predictFiles, errorList);
    }

    private static EvalResult doEvaluation(CRF crf, File[] predictFiles, TreeSet<String> errorList) {
        SentenceSplitter tpFunctions = new SentenceSplitter();
        tpFunctions.setModel(crf);
        EOSSymbols eoss = new EOSSymbols();
        InstanceList predictData = tpFunctions.makePredictionData(predictFiles, crf.getInputPipe());
        System.out.println("predicting...");
        int corr = 0;
        int all = 0;
        int fn = 0;
        int fp = 0;
        double acc = 0.0;
        int i = 0;
        while (i < predictData.size()) {
            Instance inst = (Instance)predictData.get(i);
            String abstractName = (String)inst.getSource();
            ArrayList<Unit> units = null;
            try {
                units = tpFunctions.predict(inst, true);
            }
            catch (Exception e) {
                e.printStackTrace();
            }
            ArrayList<String> orgLabels = SentenceSplitterApplication.getLabelsFromLabelSequence((LabelSequence)inst.getTarget());
            int j = 0;
            while (j < units.size()) {
                String unitRep = units.get((int)j).rep;
                String pred = units.get((int)j).label;
                String org = orgLabels.get(j);
                if (eoss.tokenEndsWithEOSSymbol(unitRep)) {
                    ++all;
                    if (pred.equals(org)) {
                        ++corr;
                    } else {
                        String error = String.valueOf(abstractName) + "\t" + org + "\t" + pred + "\t" + unitRep + "  (" + j + ")";
                        errorList.add(error);
                        if (pred.equals("EOS") && org.equals("IS")) {
                            ++fp;
                        } else if (pred.equals("IS") && org.equals("EOS")) {
                            ++fn;
                        }
                    }
                }
                ++j;
            }
            ++i;
        }
        acc = (double)corr / (double)all;
        EvalResult er = new EvalResult();
        er.corrDecisions = corr;
        er.nrDecisions = all;
        er.fn = fn;
        er.fp = fp;
        er.ACC = acc;
        System.out.println("all : " + all);
        System.out.println("corr: " + corr);
        System.out.println("fp :" + fp);
        System.out.println("fn :" + fn);
        System.out.println("R :" + er.getR());
        System.out.println("P :" + er.getP());
        System.out.println("F :" + er.getF());
        System.out.println("ACC : " + acc);
        return er;
    }

    private static void doTraining(File[] trainFiles, String modelFilename) {
        SentenceSplitter sentenceSplitter = new SentenceSplitter();
        System.out.println("making training data...");
        InstanceList trainData = sentenceSplitter.makeTrainingData(trainFiles, false);
        Pipe myPipe = trainData.getPipe();
        System.out.println("training model...");
        sentenceSplitter.train(trainData, myPipe);
        sentenceSplitter.writeModel(modelFilename);
    }

    private static void doPrediction(File[] inFiles, File outDir, String modelFilename) {
        SentenceSplitter sentenceSplitter = new SentenceSplitter();
        System.out.println("reading model...");
        try {
            sentenceSplitter.readModel(new File(modelFilename));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        System.out.println("starting sentence splitting...");
        Pipe myPipe = sentenceSplitter.getModel().getInputPipe();
        int step = 100;
        int percentage = 0;
        Instance inst = null;
        Instance tmp = null;
        int i = 0;
        while (i < inFiles.length) {
            long s1 = System.currentTimeMillis();
            if (i % step == 0 && i > 0) {
                ++percentage;
                System.out.println(String.valueOf(i) + " files done...");
            }
            ArrayList<String> fileLines = sentenceSplitter.readFile(inFiles[i]);
            tmp = new Instance(fileLines, "", "", inFiles[i].getName());
            inst = myPipe.instanceFrom(tmp);
            fileLines = null;
            ArrayList<Unit> units = null;
            try {
                units = sentenceSplitter.predict(inst, true);
            }
            catch (Exception e) {
                e.printStackTrace();
            }
            ArrayList<String> orgLabels = SentenceSplitterApplication.getLabelsFromLabelSequence((LabelSequence)inst.getTarget());
            String fName = inFiles[i].toString();
            String newfName = fName.substring(fName.lastIndexOf("/") + 1, fName.length());
            File fNew = new File(String.valueOf(outDir.toString()) + "/" + newfName);
            ArrayList<String> lines = new ArrayList<String>();
            String sentence = "";
            int j = 0;
            while (j < units.size()) {
                String label = units.get((int)j).label;
                String unitRep = units.get((int)j).rep;
                sentence = String.valueOf(sentence) + (sentence.length() == 0 ? unitRep : " " + unitRep);
                if (label.equals("EOS")) {
                    lines.add(sentence);
                    sentence = "";
                }
                ++j;
            }
            long s2 = System.currentTimeMillis();
            SentenceSplitterApplication.writeFile(lines, fNew);
            ++i;
        }
    }

    private static ArrayList<String> getLabelsFromLabelSequence(LabelSequence ls) {
        ArrayList<String> labels = new ArrayList<String>();
        int j = 0;
        while (j < ls.size()) {
            labels.add((String)ls.get(j));
            ++j;
        }
        return labels;
    }

    private static void writeFile(TreeSet<String> lines, File outFile) {
        try {
            FileWriter fw = new FileWriter(outFile);
            Iterator<String> iter = lines.iterator();
            while (iter.hasNext()) {
                fw.write(String.valueOf(iter.next()) + "\n");
            }
            fw.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static void writeFile(ArrayList<String> lines, File outFile) {
        try {
            FileWriter fw = new FileWriter(outFile);
            int i = 0;
            while (i < lines.size()) {
                fw.write(String.valueOf(lines.get(i)) + "\n");
                ++i;
            }
            fw.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static class EvalResult {
        int nrDecisions;
        double ACC;
        double fp;
        double fn;
        double corrDecisions;

        private EvalResult() {
        }

        double getF() {
            return 2.0 * this.getR() * this.getP() / (this.getR() + this.getP());
        }

        double getR() {
            return this.corrDecisions / (this.corrDecisions + this.fn);
        }

        double getP() {
            return this.corrDecisions / (this.corrDecisions + this.fp);
        }
    }
}

