/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.ae.jsbd;

import cc.mallet.fst.CRF;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelSequence;
import de.julielab.jcore.ae.jsbd.EOSSymbols;
import de.julielab.jcore.ae.jsbd.SentenceSplitter;
import de.julielab.jcore.ae.jsbd.Unit;
import de.julielab.jcore.ae.jsbd.postprocessingfilters.PostprocessingFilter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;

public class SentenceSplitterApplication {
    private static String doPostprocessing = "biomed";

    public static void main(String[] args) {
        String mode;
        if (args.length < 1) {
            System.err.println("usage: JSBD <mode> {mode_specific_parameters}");
            System.err.println("different modes:");
            System.err.println("c: check texts");
            System.err.println("t: train a sentence splitting model");
            System.err.println("p: do the sentence splitting");
            System.err.println("s: evaluation with 90-10 split");
            System.err.println("x: evaluation with cross-validation");
            System.err.println("e: evaluation on previously trained model");
            System.exit(-1);
        }
        if ((mode = args[0]).equals("c")) {
            SentenceSplitterApplication.startCheckMode(args);
        } else if (mode.equals("t")) {
            SentenceSplitterApplication.startTrainingMode(args);
        } else if (mode.equals("p")) {
            SentenceSplitterApplication.startPredictionMode(args);
        } else if (mode.equals("x")) {
            SentenceSplitterApplication.startXValidationMode(args);
        } else if (mode.equals("s")) {
            SentenceSplitterApplication.start9010ValidationMode(args);
        } else if (mode.equals("e")) {
            SentenceSplitterApplication.startCompareValidationMode(args);
        } else {
            System.err.println("Unknown run mode.");
            System.exit(-1);
        }
    }

    private static void startCompareValidationMode(String[] args) {
        System.out.println("performing evaluation previously trained model.");
        if (args.length < 4) {
            System.err.println("usage: JSBD e <modelFile> <predictInDir> <errorFile> [<postprocessing>]");
            System.exit(-1);
        }
        if (args.length > 4 && PostprocessingFilter.POSTPROC_STREAM.anyMatch(x -> args[4].equals(x))) {
            doPostprocessing = args[4];
        }
        CRF crf = null;
        try {
            ObjectInputStream in = new ObjectInputStream(new GZIPInputStream(new FileInputStream(args[1])));
            crf = (CRF)in.readObject();
            in.close();
        }
        catch (IOException | ClassNotFoundException e) {
            e.printStackTrace();
        }
        File abstractDir = new File(args[2]);
        if (!abstractDir.isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] abstractArray = abstractDir.listFiles();
        TreeSet<String> errorList = new TreeSet<String>();
        EvalResult er = SentenceSplitterApplication.doEvaluation(crf, abstractArray, errorList);
        SentenceSplitterApplication.writeFile(errorList, new File(args[3]));
        System.out.println("\n\nAccuracy on pretrained model: " + er.ACC);
        System.exit(0);
    }

    private static void start9010ValidationMode(String[] args) {
        File abstractDir;
        System.out.println("performing evaluation on 90/10 split");
        if (args.length < 4) {
            System.err.println("usage: JSBD s <textDir> <errorFile> <allow split on all punctuation (false: splits only occur before whitespaces)> [<postprocessing>]");
            System.exit(-1);
        }
        if (args.length > 4 && PostprocessingFilter.POSTPROC_STREAM.anyMatch(x -> args[4].equals(x))) {
            doPostprocessing = args[4];
        }
        if (!(abstractDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] abstractArray = abstractDir.listFiles();
        TreeSet<String> errorList = new TreeSet<String>();
        boolean splitUnitsAfterPunctuation = Boolean.parseBoolean(args[3]);
        System.out.println("Allow sentence split after all punctuation: " + splitUnitsAfterPunctuation);
        EvalResult er = SentenceSplitterApplication.do9010Evaluation(abstractArray, errorList, splitUnitsAfterPunctuation);
        SentenceSplitterApplication.writeFile(errorList, new File(args[2]));
        System.out.println("\n\nAccuracy on 90/10 split: " + er.ACC);
        System.exit(0);
    }

    private static void startXValidationMode(String[] args) {
        File abstractDir;
        System.out.println("performing cross-validation");
        if (args.length < 5) {
            System.err.println("usage: JSBD x <textDir> <cross-val-rounds> <errorFile> <allow split on all punctuation (false: splits only occur before whitespaces)> [<postprocessing>]");
            System.exit(-1);
        }
        if (args.length > 5 && PostprocessingFilter.POSTPROC_STREAM.anyMatch(x -> args[5].equals(x))) {
            doPostprocessing = args[5];
        }
        if (!(abstractDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] abstractArray = abstractDir.listFiles();
        int n = new Integer(args[2]);
        if (n > abstractArray.length / 2 || n > 10 || n < 2) {
            System.err.println("Error: cannot perform " + n + " cross-validation rounds. Choose n in [2:10].");
            System.exit(-1);
        }
        TreeSet<String> errorList = new TreeSet<String>();
        boolean splitUnitsAfterPunctuation = Boolean.parseBoolean(args[4]);
        System.out.println("Allowing sentence split after all punctuation: " + splitUnitsAfterPunctuation);
        double acc = SentenceSplitterApplication.doCrossEvaluation(abstractArray, n, errorList, splitUnitsAfterPunctuation);
        SentenceSplitterApplication.writeFile(errorList, new File(args[3]));
        System.out.println("\n\nAccuracy on cross validation: " + acc);
        System.exit(0);
    }

    private static void startPredictionMode(String[] args) {
        File inDir;
        System.out.println("doing the sentence splitting...");
        if (args.length < 4) {
            System.err.println("usage: JSBD p <inDir> <outDir> <modelFilename> [<postprocessing>]");
            System.exit(-1);
        }
        if (args.length > 4) {
            if (PostprocessingFilter.POSTPROC_STREAM.anyMatch(args[4]::equals)) {
                doPostprocessing = args[4];
            }
        }
        if (!(inDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified input directory does not exist.");
            System.exit(-1);
        }
        File[] inFiles = inDir.listFiles();
        File outDir = new File(args[2]);
        if (!outDir.isDirectory()) {
            System.err.println("Error: the specified output directory does not exist.");
            System.exit(-1);
        }
        String modelFilename = args[3];
        SentenceSplitterApplication.doPrediction(inFiles, outDir, modelFilename);
    }

    private static void startTrainingMode(String[] args) {
        File trainDir;
        System.out.println("training the model...");
        if (args.length != 4) {
            System.err.println("usage: JSBD t <trainDir> <allow split on all punctuation (false: splits only occur before whitespaces)> <modelFilename>");
            System.exit(-1);
        }
        if (!(trainDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] trainFiles = trainDir.listFiles();
        System.out.println("number of files to train on: " + trainFiles.length);
        boolean splitUnitsAfterPunctuation = Boolean.parseBoolean(args[2]);
        System.out.println("Allow sentence split after all punctuation: " + splitUnitsAfterPunctuation);
        String modelFilename = args[3];
        SentenceSplitterApplication.doTraining(trainFiles, splitUnitsAfterPunctuation, modelFilename);
        System.out.println("Saved model to: " + modelFilename);
    }

    private static void startCheckMode(String[] args) {
        File abstractDir;
        System.out.println("checking abstracts...");
        if (args.length != 2) {
            System.err.println("usage: JSBD c <textDir>");
            System.exit(-1);
        }
        if (!(abstractDir = new File(args[1])).isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] abstractArray = abstractDir.listFiles();
        SentenceSplitterApplication.doCheckAbstracts(abstractArray, false);
        System.exit(0);
    }

    private static void doCheckAbstracts(File[] abstractList, boolean splitUnitsAfterPunctuation) {
        SentenceSplitter tpFunctions = new SentenceSplitter();
        tpFunctions.makeTrainingData(abstractList, false, splitUnitsAfterPunctuation);
        System.out.println("done.");
    }

    private static EvalResult do9010Evaluation(File[] abstractArray, TreeSet<String> errorList, boolean splitUnitsAfterPunctuation) {
        ArrayList<File> abstractList = new ArrayList<File>();
        for (int i = 0; i < abstractArray.length; ++i) {
            abstractList.add(abstractArray[i]);
        }
        Collections.shuffle(abstractList, new Random(1L));
        int sizeAll = abstractList.size();
        int sizeTest = (int)((double)sizeAll * 0.1);
        int sizeTrain = sizeAll - sizeTest;
        if (sizeTest == 0) {
            System.err.println("Error: no test files for this split. Number of files in directory might be too small.");
            System.exit(-1);
        }
        System.out.println("all: " + sizeAll + "\ttrain: " + sizeTrain + "\ttest: " + sizeTest);
        File[] trainFiles = new File[sizeTrain];
        File[] predictFiles = new File[sizeTest];
        for (int i = 0; i < sizeTrain; ++i) {
            trainFiles[i] = (File)abstractList.get(i);
        }
        int j = 0;
        for (int i = sizeTrain; i < abstractList.size(); ++i) {
            predictFiles[j++] = (File)abstractList.get(i);
        }
        return SentenceSplitterApplication.doEvaluation(trainFiles, predictFiles, errorList, splitUnitsAfterPunctuation);
    }

    private static double doCrossEvaluation(File[] abstractArray, int n, TreeSet<String> errorList, boolean splitUnitsAfterPunctuation) {
        ArrayList<File> abstractList = new ArrayList<File>();
        for (int i = 0; i < abstractArray.length; ++i) {
            abstractList.add(abstractArray[i]);
        }
        Collections.shuffle(abstractList, new Random(1L));
        int pos = 0;
        int sizeRound = abstractArray.length / n;
        int sizeAll = abstractArray.length;
        int sizeLastRound = sizeRound + sizeAll % n;
        System.out.println("number of files in directory: " + sizeAll);
        System.out.println("size of each/last round: " + sizeRound + "/" + sizeLastRound);
        System.out.println();
        EvalResult[] evalResults = new EvalResult[n];
        double avgAcc = 0.0;
        double avgF = 0.0;
        for (int i = 0; i < n; ++i) {
            File f;
            int j;
            File[] predictFiles;
            File[] trainFiles;
            int p = 0;
            int t = 0;
            if (i == n - 1) {
                trainFiles = new File[sizeAll - sizeLastRound];
                predictFiles = new File[sizeLastRound];
                for (j = 0; j < abstractList.size(); ++j) {
                    f = (File)abstractList.get(j);
                    if (j < pos) {
                        trainFiles[t] = f;
                        ++t;
                        continue;
                    }
                    predictFiles[p] = f;
                    ++p;
                }
            } else {
                trainFiles = new File[sizeAll - sizeRound];
                predictFiles = new File[sizeRound];
                for (j = 0; j < abstractList.size(); ++j) {
                    f = (File)abstractList.get(j);
                    if (j < pos || j >= pos + sizeRound) {
                        trainFiles[t] = f;
                        ++t;
                        continue;
                    }
                    predictFiles[p] = f;
                    ++p;
                }
                pos += sizeRound;
            }
            System.out.println("training size: " + trainFiles.length);
            System.out.println("prediction size: " + predictFiles.length);
            evalResults[i] = SentenceSplitterApplication.doEvaluation(trainFiles, predictFiles, errorList, splitUnitsAfterPunctuation);
        }
        DecimalFormat df = new DecimalFormat("0.000");
        for (int i = 0; i < evalResults.length; ++i) {
            avgAcc += evalResults[i].ACC;
            avgF += evalResults[i].getF();
            System.out.println(i + ": " + df.format(evalResults[i].ACC));
        }
        System.out.println("avg accuracy: " + df.format(avgAcc /= (double)n));
        System.out.println("avg f-score: " + df.format(avgF /= (double)n));
        return avgAcc;
    }

    private static EvalResult doEvaluation(File[] trainFiles, File[] predictFiles, TreeSet<String> errorList, boolean splitUnitsAfterPunctuation) {
        SentenceSplitter tpFunctions = new SentenceSplitter();
        EOSSymbols eoss = new EOSSymbols();
        InstanceList trainData = tpFunctions.makeTrainingData(trainFiles, splitUnitsAfterPunctuation, false);
        Pipe myPipe = trainData.getPipe();
        System.out.println("training...");
        tpFunctions.train(trainData, myPipe);
        return SentenceSplitterApplication.doEvaluation(tpFunctions.getModel(), predictFiles, errorList);
    }

    private static EvalResult doEvaluation(CRF crf, File[] predictFiles, TreeSet<String> errorList) {
        SentenceSplitter tpFunctions = new SentenceSplitter();
        tpFunctions.setModel(crf);
        EOSSymbols eoss = new EOSSymbols();
        InstanceList predictData = tpFunctions.makePredictionData(predictFiles, crf.getInputPipe());
        System.out.println("predicting...");
        int corr = 0;
        int all = 0;
        int fn = 0;
        int fp = 0;
        double acc = 0.0;
        for (int i = 0; i < predictData.size(); ++i) {
            Instance inst = (Instance)predictData.get(i);
            String abstractName = (String)inst.getSource();
            List<Unit> units = null;
            try {
                units = tpFunctions.predict(inst, doPostprocessing);
            }
            catch (IllegalStateException e) {
                e.printStackTrace();
            }
            ArrayList<String> orgLabels = SentenceSplitterApplication.getLabelsFromLabelSequence((LabelSequence)inst.getTarget());
            for (int j = 0; j < units.size(); ++j) {
                String unitRep = units.get((int)j).rep;
                String pred = units.get((int)j).label;
                String org = (String)orgLabels.get(j);
                if (!eoss.tokenEndsWithEOSSymbol(unitRep)) continue;
                ++all;
                if (pred.equals(org)) {
                    ++corr;
                    continue;
                }
                String error = abstractName + "\t" + org + "\t" + pred + "\t" + unitRep + "  (" + j + ")";
                errorList.add(error);
                if (pred.equals("EOS") && org.equals("IS")) {
                    ++fp;
                    continue;
                }
                if (!pred.equals("IS") || !org.equals("EOS")) continue;
                ++fn;
            }
        }
        acc = (double)corr / (double)all;
        EvalResult er = new EvalResult();
        er.corrDecisions = corr;
        er.nrDecisions = all;
        er.fn = fn;
        er.fp = fp;
        er.ACC = acc;
        System.out.println("all : " + all);
        System.out.println("corr: " + corr);
        System.out.println("fp :" + fp);
        System.out.println("fn :" + fn);
        System.out.println("R :" + er.getR());
        System.out.println("P :" + er.getP());
        System.out.println("F :" + er.getF());
        System.out.println("ACC : " + acc);
        return er;
    }

    private static void doTraining(File[] trainFiles, boolean splitUnitsAfterPunctuation, String modelFilename) {
        SentenceSplitter sentenceSplitter = new SentenceSplitter();
        System.out.println("making training data...");
        InstanceList trainData = sentenceSplitter.makeTrainingData(trainFiles, false, splitUnitsAfterPunctuation);
        Pipe myPipe = trainData.getPipe();
        System.out.println("training model...");
        sentenceSplitter.train(trainData, myPipe);
        sentenceSplitter.writeModel(modelFilename);
    }

    private static void doPrediction(File[] inFiles, File outDir, String modelFilename) {
        SentenceSplitter sentenceSplitter = new SentenceSplitter();
        System.out.println("reading model...");
        try {
            sentenceSplitter.readModel(new File(modelFilename));
        }
        catch (IOException | ClassNotFoundException e) {
            e.printStackTrace();
        }
        System.out.println("starting sentence splitting...");
        Pipe myPipe = sentenceSplitter.getModel().getInputPipe();
        int step = 100;
        int percentage = 0;
        Instance inst = null;
        Instance tmp = null;
        for (int i = 0; i < inFiles.length; ++i) {
            long s1 = System.currentTimeMillis();
            if (i % step == 0 && i > 0) {
                ++percentage;
                System.out.println(i + " files done...");
            }
            ArrayList<String> fileLines = sentenceSplitter.readFile(inFiles[i]);
            tmp = new Instance(fileLines, (Object)"", (Object)"", (Object)inFiles[i].getName());
            inst = myPipe.instanceFrom(tmp);
            List<Unit> units = null;
            try {
                units = sentenceSplitter.predict(inst, doPostprocessing);
            }
            catch (IllegalStateException e) {
                e.printStackTrace();
            }
            String fName = inFiles[i].toString();
            String newfName = fName.substring(fName.lastIndexOf("/") + 1, fName.length());
            File fNew = new File(outDir.toString() + "/" + newfName);
            ArrayList<String> lines = new ArrayList<String>();
            Object sentence = "";
            for (Unit unit : units) {
                String label = unit.label;
                String unitRep = unit.rep;
                sentence = (String)sentence + unitRep;
                if (unit.afterWs) {
                    sentence = (String)sentence + " ";
                }
                if (!label.equals("EOS")) continue;
                lines.add((String)sentence);
                sentence = "";
            }
            long s2 = System.currentTimeMillis();
            SentenceSplitterApplication.writeFile(lines, fNew);
        }
    }

    private static ArrayList<String> getLabelsFromLabelSequence(LabelSequence ls) {
        ArrayList<String> labels = new ArrayList<String>();
        for (int j = 0; j < ls.size(); ++j) {
            labels.add((String)ls.get(j));
        }
        return labels;
    }

    private static void writeFile(TreeSet<String> lines, File outFile) {
        try {
            FileWriter fw = new FileWriter(outFile);
            Iterator<String> iter = lines.iterator();
            while (iter.hasNext()) {
                fw.write(iter.next() + "\n");
            }
            fw.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static void writeFile(ArrayList<String> lines, File outFile) {
        try {
            FileWriter fw = new FileWriter(outFile);
            for (int i = 0; i < lines.size(); ++i) {
                fw.write(lines.get(i) + "\n");
            }
            fw.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static class EvalResult {
        int nrDecisions;
        double ACC;
        double fp;
        double fn;
        double corrDecisions;

        private EvalResult() {
        }

        double getF() {
            return 2.0 * this.getR() * this.getP() / (this.getR() + this.getP());
        }

        double getR() {
            return this.corrDecisions / (this.corrDecisions + this.fn);
        }

        double getP() {
            return this.corrDecisions / (this.corrDecisions + this.fp);
        }
    }
}

