/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.ae.jsbd;

import cc.mallet.fst.CRF;
import cc.mallet.fst.CRFTrainerByLabelLikelihood;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.tsf.OffsetConjunctions;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.types.Sequence;
import de.julielab.jcore.ae.jsbd.Abstract2UnitPipe;
import de.julielab.jcore.ae.jsbd.Unit;
import de.julielab.jcore.ae.jsbd.postprocessingfilters.PostprocessingFilter;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SentenceSplitter {
    private static final Logger LOGGER = LoggerFactory.getLogger(SentenceSplitter.class);
    CRF model = null;
    boolean trained = false;

    public Instance makePredictionData(ArrayList<String> lines, Pipe myPipe) {
        Instance inst = this.model.getInputPipe().instanceFrom(new Instance(lines, "", "", ""));
        return inst;
    }

    public Instance makePredictionData(File predictFile, Pipe myPipe) {
        ArrayList<String> lines = this.readFile(predictFile);
        Instance inst = this.model.getInputPipe().instanceFrom(new Instance(lines, "", "", predictFile.getName()));
        return inst;
    }

    public InstanceList makePredictionData(File[] predictFiles, Pipe myPipe) {
        InstanceList predictData = new InstanceList(myPipe);
        for (int i = 0; i < predictFiles.length; ++i) {
            ArrayList<String> fileLines = this.readFile(predictFiles[i]);
            Instance inst = this.model.getInputPipe().instanceFrom(new Instance(fileLines, "", "", predictFiles[i].getName()));
            predictData.add(inst);
        }
        return predictData;
    }

    public InstanceList makeTrainingData(File[] trainFiles, boolean useTokenOffset, boolean splitUnitsAfterPunctuation) {
        LabelAlphabet dict = new LabelAlphabet();
        dict.lookupLabel("EOS", true);
        dict.lookupLabel("IS", true);
        SerialPipes myPipe = new SerialPipes(new Pipe[]{new Abstract2UnitPipe(splitUnitsAfterPunctuation), new OffsetConjunctions(new int[][]{{-1}, {0}, {1}}), new TokenSequence2FeatureVectorSequence(true, true)});
        InstanceList instList = new InstanceList(myPipe);
        System.out.print("preparing training data...");
        int step = trainFiles.length / 20;
        int percentage = 0;
        for (int i = 0; i < trainFiles.length; ++i) {
            ArrayList<String> fileLines = this.readFile(trainFiles[i]);
            if (step > 0 && i % step == 0 && i > 0) {
                System.out.print((percentage += 5) + "%...");
            }
            instList.addThruPipe(new Instance(fileLines, "", "", trainFiles[i].getName()));
        }
        return instList;
    }

    public void train(InstanceList instList, Pipe dataPipe) {
        long s1 = System.currentTimeMillis();
        this.model = new CRF(instList.getPipe(), (Pipe)null);
        this.model.addStatesForLabelsConnectedAsIn(instList);
        CRFTrainerByLabelLikelihood crfTrainer = new CRFTrainerByLabelLikelihood(this.model);
        boolean b = crfTrainer.train(instList);
        LOGGER.info("SentencesSplitter training: model converged: " + b);
        long s2 = System.currentTimeMillis();
        this.model.getInputPipe().getDataAlphabet().stopGrowth();
        this.trained = true;
        LOGGER.info("training time: " + (s2 - s1) / 1000L + " sec");
    }

    public List<Unit> predict(List<String> lines, String postprocessingFilter) {
        if (!this.trained || this.model == null) {
            throw new IllegalStateException("No model available. Train or load trained model first.");
        }
        Instance inst = this.model.getInputPipe().instanceFrom(new Instance(lines, null, null, null));
        return this.predict(inst, postprocessingFilter);
    }

    public List<Unit> predict(Instance inst, String filterName) {
        int j;
        if (!this.trained || this.model == null) {
            throw new IllegalStateException("No model available. Train or load trained model first.");
        }
        Sequence input = (Sequence)inst.getData();
        List units = (List)inst.getName();
        List<String> labelList = new ArrayList<String>();
        Sequence crfOutput = this.model.transduce(input);
        for (j = 0; j < crfOutput.size(); ++j) {
            labelList.add((String)crfOutput.get(j));
        }
        if (filterName != null) {
            LOGGER.debug("Postprocessing with " + filterName);
            if (filterName.toLowerCase().equals("biomed")) {
                labelList = PostprocessingFilter.Mode.BIOMED.process(labelList, units);
            } else if (filterName.toLowerCase().equals("medical")) {
                labelList = PostprocessingFilter.Mode.MEDICAL.process(labelList, units);
            }
        }
        for (j = 0; j < labelList.size(); ++j) {
            ((Unit)units.get((int)j)).label = (String)labelList.get(j);
        }
        return units;
    }

    public ArrayList<String> getLabelsFromLabelSequence(LabelSequence ls) {
        ArrayList<String> labels = new ArrayList<String>();
        for (int j = 0; j < ls.size(); ++j) {
            labels.add((String)ls.get(j));
        }
        return labels;
    }

    public void writeModel(String filename) {
        if (!this.trained || this.model == null) {
            String info = "train or load trained model first.";
            IllegalStateException e = new IllegalStateException(info);
            LOGGER.error(info, e);
        }
        try {
            FileOutputStream fos = new FileOutputStream(new File(filename + ".gz"));
            GZIPOutputStream gout = new GZIPOutputStream(fos);
            ObjectOutputStream oos = new ObjectOutputStream(gout);
            oos.writeObject(this.model);
            oos.close();
        }
        catch (IOException e) {
            e.printStackTrace();
            System.exit(0);
        }
    }

    public void readModel(File file) throws IOException, FileNotFoundException, ClassNotFoundException {
        this.readModel(new FileInputStream(file));
    }

    public void readModel(InputStream is) throws IOException, ClassNotFoundException {
        GZIPInputStream gin = new GZIPInputStream(is);
        try (ObjectInputStream ois = new ObjectInputStream(gin);){
            this.model = (CRF)ois.readObject();
            this.trained = true;
            this.model.getInputPipe().getDataAlphabet().stopGrowth();
        }
    }

    public ArrayList<String> readFile(File myFile) {
        ArrayList<String> lines = new ArrayList<String>();
        try {
            BufferedReader b = new BufferedReader(new FileReader(myFile));
            String line = "";
            while ((line = b.readLine()) != null) {
                lines.add(line);
            }
            b.close();
        }
        catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        return lines;
    }

    public CRF getModel() {
        return this.model;
    }

    void setModel(CRF crf) {
        this.trained = true;
        this.model = crf;
    }
}

