/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jtbd;

import cc.mallet.fst.CRF;
import cc.mallet.fst.CRFTrainerByLabelLikelihood;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.tsf.OffsetConjunctions;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.types.Sequence;
import de.julielab.jtbd.EOSSymbols;
import de.julielab.jtbd.Sentence2TokenPipe;
import de.julielab.jtbd.Unit;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Tokenizer {
    private static final Logger LOGGER = LoggerFactory.getLogger(Tokenizer.class);
    CRF model = null;
    private boolean trained = false;

    public Tokenizer() {
        LOGGER.debug("this is the JTBD constuctor");
        this.model = null;
        this.trained = false;
    }

    ArrayList<String> getLabelsFromLabelSequence(LabelSequence ls) {
        ArrayList<String> labels = new ArrayList<String>();
        int j = 0;
        while (j < ls.size()) {
            labels.add((String)ls.get(j));
            ++j;
        }
        return labels;
    }

    public CRF getModel() {
        return this.model;
    }

    InstanceList makePredictionData(List<String> orgSentences, List<String> tokSentences) {
        LOGGER.debug("makePredictionData() - making prediction data");
        InstanceList predictData = new InstanceList(this.model.getInputPipe());
        int i = 0;
        while (i < orgSentences.size()) {
            StringBuffer tokSentence;
            StringBuffer orgSentence = new StringBuffer(orgSentences.get(i));
            Instance inst = this.makePredictionData(orgSentence, tokSentence = new StringBuffer(tokSentences.get(i)));
            if (!(inst.getSource() instanceof String)) {
                predictData.add(inst);
            }
            ++i;
        }
        return predictData;
    }

    private Instance makePredictionData(StringBuffer orgSentence, StringBuffer tokSentence) {
        Character lastChar = null;
        if (tokSentence.length() > 0 && EOSSymbols.contains(lastChar = Character.valueOf(tokSentence.charAt(tokSentence.length() - 1)))) {
            tokSentence.deleteCharAt(tokSentence.length() - 1);
        }
        if (orgSentence.length() > 0 && EOSSymbols.contains(lastChar = Character.valueOf(orgSentence.charAt(orgSentence.length() - 1)))) {
            orgSentence.deleteCharAt(orgSentence.length() - 1);
        }
        Instance inst = null;
        try {
            inst = this.model.getInputPipe().instanceFrom(new Instance((Object)orgSentence.toString(), null, null, (Object)tokSentence.toString()));
        }
        catch (NoSuchMethodError e) {
            e.printStackTrace();
            System.exit(0);
        }
        return inst;
    }

    InstanceList makeTrainingData(List<String> orgSentences, List<String> tokSentences) {
        LOGGER.debug("makeTrainingData() - making training data...");
        LabelAlphabet dict = new LabelAlphabet();
        dict.lookupLabel((Object)"P", true);
        dict.lookupLabel((Object)"N", true);
        SerialPipes myPipe = new SerialPipes(new Pipe[]{new Sentence2TokenPipe(), new OffsetConjunctions((int[][])new int[][]{{-1}, {1}}), new TokenSequence2FeatureVectorSequence(true, true)});
        InstanceList instList = new InstanceList((Pipe)myPipe);
        System.out.print("preparing training data...");
        int i = 0;
        while (i < orgSentences.size()) {
            StringBuffer orgSentence = new StringBuffer(orgSentences.get(i).trim());
            StringBuffer tokSentence = new StringBuffer(tokSentences.get(i).trim());
            Character lastChar = Character.valueOf(tokSentence.charAt(tokSentence.length() - 1));
            if (EOSSymbols.contains(lastChar)) {
                tokSentence.deleteCharAt(tokSentence.length() - 1);
            }
            if (EOSSymbols.contains(lastChar = Character.valueOf(orgSentence.charAt(orgSentence.length() - 1)))) {
                orgSentence.deleteCharAt(orgSentence.length() - 1);
            }
            instList.addThruPipe(new Instance((Object)orgSentence.toString(), (Object)"", (Object)new Integer(i), (Object)tokSentence.toString()));
            ++i;
        }
        LOGGER.debug("makeTrainingData() -  number of features on training data: " + myPipe.getDataAlphabet().size());
        return instList;
    }

    ArrayList<Unit> predict(Instance inst) {
        if (!this.trained || this.model == null) {
            throw new IllegalStateException("No model available. Train or load trained model first.");
        }
        ArrayList units = (ArrayList)inst.getName();
        if (units.size() > 0) {
            Sequence input = (Sequence)inst.getData();
            Sequence crfOutput = this.model.transduce(input);
            int j = 0;
            while (j < crfOutput.size()) {
                ((Unit)units.get((int)j)).label = (String)crfOutput.get(j);
                ++j;
            }
        }
        return units;
    }

    public ArrayList<Unit> predict(String sentence) {
        LOGGER.debug("predict() - before pedicting labelss ...");
        if (!this.trained || this.model == null) {
            throw new IllegalStateException("No model available. Train or load trained model first.");
        }
        LOGGER.debug("predict() - now making pedictions ...");
        Instance inst = this.makePredictionData(new StringBuffer(sentence), new StringBuffer(""));
        LOGGER.debug("predict() - after pedicting labels ...");
        return this.predict(inst);
    }

    public void readModel(File file) throws IOException, FileNotFoundException, ClassNotFoundException {
        FileInputStream fis = new FileInputStream(file);
        this.readModel(fis);
    }

    public void readModel(InputStream is) throws IOException, ClassNotFoundException {
        GZIPInputStream gin = new GZIPInputStream(is);
        ObjectInputStream ois = new ObjectInputStream(gin);
        this.model = (CRF)ois.readObject();
        this.trained = true;
        this.model.getInputPipe().getDataAlphabet().stopGrowth();
        ois.close();
    }

    void setModel(CRF crf) {
        this.trained = true;
        this.model = crf;
    }

    String showErrorContext(int i, ArrayList<Unit> units, ArrayList<String> orgLabels) {
        int c = 2;
        String orgContext = "";
        String newContext = "";
        int j = 0;
        while (j < units.size()) {
            if (j >= i - 2 && j <= i + 2) {
                String orgL = orgLabels.get(j).equals("P") ? " " : "";
                String newL = units.get((int)j).label.equals("P") ? " " : "";
                orgContext = String.valueOf(orgContext) + units.get((int)j).rep + orgL;
                newContext = String.valueOf(newContext) + units.get((int)j).rep + newL;
            }
            ++j;
        }
        return String.valueOf(newContext) + "\n" + orgContext + "\n";
    }

    void train(InstanceList instList, Pipe myPipe) {
        long s1 = System.currentTimeMillis();
        this.model = new CRF(myPipe, null);
        this.model.addStatesForLabelsConnectedAsIn(instList);
        CRFTrainerByLabelLikelihood crfTrainer = new CRFTrainerByLabelLikelihood(this.model);
        boolean b = crfTrainer.trainOptimized(instList);
        LOGGER.info("Tokenizer training: model converged: " + b);
        long s2 = System.currentTimeMillis();
        this.model.getInputPipe().getDataAlphabet().stopGrowth();
        this.trained = true;
        LOGGER.debug("train() - training time: " + (s2 - s1) / 1000L + " sec");
    }

    void writeModel(String filename) {
        if (!this.trained || this.model == null) {
            throw new IllegalStateException("train or load trained model first.");
        }
        try {
            FileOutputStream fos = new FileOutputStream(new File(String.valueOf(filename) + ".gz"));
            GZIPOutputStream gout = new GZIPOutputStream(fos);
            ObjectOutputStream oos = new ObjectOutputStream(gout);
            oos.writeObject(this.model);
            oos.close();
        }
        catch (Exception e) {
            e.printStackTrace();
            System.exit(0);
        }
    }
}

