/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jsbd;

import cc.mallet.fst.CRF;
import cc.mallet.fst.CRFTrainerByLabelLikelihood;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.tsf.OffsetConjunctions;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.types.Sequence;
import de.julielab.jsbd.Abbreviations;
import de.julielab.jsbd.Abstract2UnitPipe;
import de.julielab.jsbd.Unit;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SentenceSplitter {
    private static final Logger LOGGER = LoggerFactory.getLogger(SentenceSplitter.class);
    CRF model = null;
    boolean trained = false;

    public Instance makePredictionData(ArrayList<String> lines, Pipe myPipe) {
        Instance inst = this.model.getInputPipe().instanceFrom(new Instance(lines, (Object)"", (Object)"", (Object)""));
        return inst;
    }

    public Instance makePredictionData(File predictFile, Pipe myPipe) {
        ArrayList<String> lines = this.readFile(predictFile);
        Instance inst = this.model.getInputPipe().instanceFrom(new Instance(lines, (Object)"", (Object)"", (Object)predictFile.getName()));
        return inst;
    }

    public InstanceList makePredictionData(File[] predictFiles, Pipe myPipe) {
        InstanceList predictData = new InstanceList(myPipe);
        int i = 0;
        while (i < predictFiles.length) {
            ArrayList<String> fileLines = this.readFile(predictFiles[i]);
            Instance inst = this.model.getInputPipe().instanceFrom(new Instance(fileLines, (Object)"", (Object)"", (Object)predictFiles[i].getName()));
            predictData.add(inst);
            ++i;
        }
        return predictData;
    }

    public InstanceList makeTrainingData(File[] trainFiles, boolean useTokenOffset) {
        LabelAlphabet dict = new LabelAlphabet();
        dict.lookupLabel((Object)"EOS", true);
        dict.lookupLabel((Object)"IS", true);
        SerialPipes myPipe = new SerialPipes(new Pipe[]{new Abstract2UnitPipe(), new OffsetConjunctions((int[][])new int[][]{{-1}, new int[1], {1}}), new TokenSequence2FeatureVectorSequence(true, true)});
        InstanceList instList = new InstanceList((Pipe)myPipe);
        System.out.print("preparing training data...");
        int step = trainFiles.length / 20;
        int percentage = 0;
        int i = 0;
        while (i < trainFiles.length) {
            ArrayList<String> fileLines = this.readFile(trainFiles[i]);
            if (step > 0 && i % step == 0 && i > 0) {
                System.out.print(String.valueOf(percentage += 5) + "%...");
            }
            instList.addThruPipe(new Instance(fileLines, (Object)"", (Object)"", (Object)trainFiles[i].getName()));
            ++i;
        }
        return instList;
    }

    public void train(InstanceList instList, Pipe dataPipe) {
        long s1 = System.currentTimeMillis();
        this.model = new CRF(instList.getPipe(), null);
        this.model.addStatesForLabelsConnectedAsIn(instList);
        CRFTrainerByLabelLikelihood crfTrainer = new CRFTrainerByLabelLikelihood(this.model);
        boolean b = crfTrainer.trainOptimized(instList);
        LOGGER.info("SentencesSplitter training: model converged: " + b);
        long s2 = System.currentTimeMillis();
        this.model.getInputPipe().getDataAlphabet().stopGrowth();
        this.trained = true;
        LOGGER.info("training time: " + (s2 - s1) / 1000L + " sec");
    }

    public ArrayList<Unit> predict(ArrayList<String> lines, boolean doPostprocessing) {
        if (!this.trained || this.model == null) {
            throw new IllegalStateException("No model available. Train or load trained model first.");
        }
        Instance inst = this.model.getInputPipe().instanceFrom(new Instance(lines, (Object)"", (Object)"", (Object)""));
        return this.predict(inst, doPostprocessing);
    }

    public ArrayList<Unit> predict(Instance inst, boolean doPostProcessing) {
        if (!this.trained || this.model == null) {
            throw new IllegalStateException("No model available. Train or load trained model first.");
        }
        Sequence input = (Sequence)inst.getData();
        ArrayList units = (ArrayList)inst.getName();
        ArrayList<String> labelList = new ArrayList<String>();
        Sequence crfOutput = this.model.transduce(input);
        int j = 0;
        while (j < crfOutput.size()) {
            labelList.add((String)crfOutput.get(j));
            ++j;
        }
        if (doPostProcessing) {
            labelList = this.postprocessingFilter(labelList, units);
        }
        j = 0;
        while (j < labelList.size()) {
            ((Unit)units.get((int)j)).label = labelList.get(j);
            ++j;
        }
        return units;
    }

    public ArrayList<String> postprocessingFilter(ArrayList<String> predLabels, ArrayList<Unit> units) {
        String unitRep;
        Abbreviations abr = new Abbreviations();
        TreeSet<String> abrSet = abr.getSet();
        String[] labels = predLabels.toArray(new String[predLabels.size()]);
        ArrayList<String> newPred = new ArrayList<String>();
        int openNormalBrackets = 0;
        int openSquareBrackets = 0;
        int count = 0;
        int i = 0;
        while (i < labels.length) {
            unitRep = units.get((int)i).rep;
            char[] c = unitRep.toCharArray();
            int j = 0;
            while (j < c.length) {
                switch (c[j]) {
                    case '(': {
                        ++openNormalBrackets;
                        break;
                    }
                    case '[': {
                        ++openSquareBrackets;
                        break;
                    }
                    case ')': {
                        --openNormalBrackets;
                        break;
                    }
                    case ']': {
                        --openSquareBrackets;
                    }
                }
                ++j;
            }
            if (openSquareBrackets > 0 || openNormalBrackets > 0) {
                labels[i] = "IS";
                ++count;
            }
            if (count >= 50) {
                openSquareBrackets = 0;
                openNormalBrackets = 0;
            }
            if (openSquareBrackets < 0) {
                openSquareBrackets = 0;
            }
            if (openNormalBrackets < 0) {
                openNormalBrackets = 0;
            }
            ++i;
        }
        i = 0;
        while (i < labels.length) {
            unitRep = units.get((int)i).rep;
            if (abrSet.contains(unitRep)) {
                labels[i] = "IS";
            }
            if (unitRep.endsWith(".\"") || unitRep.endsWith("?") || unitRep.endsWith("!")) {
                labels[i] = "EOS";
            }
            newPred.add(labels[i]);
            ++i;
        }
        return newPred;
    }

    public ArrayList<String> getLabelsFromLabelSequence(LabelSequence ls) {
        ArrayList<String> labels = new ArrayList<String>();
        int j = 0;
        while (j < ls.size()) {
            labels.add((String)ls.get(j));
            ++j;
        }
        return labels;
    }

    public void writeModel(String filename) {
        if (!this.trained || this.model == null) {
            String info = "train or load trained model first.";
            IllegalStateException e = new IllegalStateException(info);
            LOGGER.error(info, (Throwable)e);
        }
        try {
            FileOutputStream fos = new FileOutputStream(new File(String.valueOf(filename) + ".gz"));
            GZIPOutputStream gout = new GZIPOutputStream(fos);
            ObjectOutputStream oos = new ObjectOutputStream(gout);
            oos.writeObject(this.model);
            oos.close();
        }
        catch (IOException e) {
            e.printStackTrace();
            System.exit(0);
        }
    }

    public void readModel(File file) throws IOException, FileNotFoundException, ClassNotFoundException {
        this.readModel(new FileInputStream(file));
    }

    public void readModel(InputStream is) throws IOException, ClassNotFoundException {
        GZIPInputStream gin = new GZIPInputStream(is);
        Throwable throwable = null;
        Object var4_5 = null;
        try (ObjectInputStream ois = new ObjectInputStream(gin);){
            this.model = (CRF)ois.readObject();
            this.trained = true;
            this.model.getInputPipe().getDataAlphabet().stopGrowth();
        }
        catch (Throwable throwable2) {
            if (throwable == null) {
                throwable = throwable2;
            } else if (throwable != throwable2) {
                throwable.addSuppressed(throwable2);
            }
            throw throwable;
        }
    }

    public ArrayList<String> readFile(File myFile) {
        ArrayList<String> lines = new ArrayList<String>();
        try {
            BufferedReader b = new BufferedReader(new FileReader(myFile));
            String line = "";
            while ((line = b.readLine()) != null) {
                lines.add(line);
            }
            b.close();
        }
        catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        return lines;
    }

    public CRF getModel() {
        return this.model;
    }

    void setModel(CRF crf) {
        this.trained = true;
        this.model = crf;
    }
}

