/*
 * Decompiled with CFR 0.152.
 */
package abner;

import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.LabelAlphabet;
import edu.umass.cs.mallet.base.types.LabelSequence;
import edu.umass.cs.mallet.base.types.Token;
import edu.umass.cs.mallet.base.types.TokenSequence;

public class Input2TokenSequence
extends Pipe {
    private static final long serialVersionUID = -2052454513695348353L;
    boolean saveSource = true;
    boolean doDigitCollapses = false;
    boolean doDowncasing = true;
    boolean doWordClass = true;
    boolean doBriefWordClass = true;

    public Input2TokenSequence(boolean cls) {
        super(null, LabelAlphabet.class);
        this.doWordClass = cls;
        this.doBriefWordClass = cls;
    }

    public Input2TokenSequence() {
        super(null, LabelAlphabet.class);
    }

    public Instance pipe(Instance carrier) {
        String sentenceLines = (String)carrier.getData();
        String[] tokens = sentenceLines.trim().split("[\t ]+");
        TokenSequence data = new TokenSequence(tokens.length);
        LabelSequence target = new LabelSequence((LabelAlphabet)this.getTargetAlphabet(), tokens.length);
        StringBuffer source = this.saveSource ? new StringBuffer() : null;
        String prevLabel = "NOLABEL";
        for (int i = 0; i < tokens.length; ++i) {
            String label;
            String bwc;
            String wc;
            String word;
            if (tokens[i].length() > 0) {
                String[] features = tokens[i].split("\\|");
                if (features.length > 2) {
                    throw new IllegalStateException("Line \"" + tokens[i] + "\" is formatted badly!");
                }
                wc = word = features[0];
                bwc = word;
                label = features.length == 2 ? features[1] : "O";
            } else {
                word = "";
                wc = "";
                bwc = "";
                label = "";
            }
            if (this.doDigitCollapses) {
                if (word.matches("19\\d\\d")) {
                    word = "<YEAR>";
                } else if (word.matches("19\\d\\ds")) {
                    word = "<YEARDECADE>";
                } else if (word.matches("19\\d\\d-\\d+")) {
                    word = "<YEARSPAN>";
                } else if (word.matches("\\d+\\\\/\\d")) {
                    word = "<FRACTION>";
                } else if (word.matches("\\d[\\d,\\.]*")) {
                    word = "<DIGITS>";
                } else if (word.matches("19\\d\\d-\\d\\d-\\d--d")) {
                    word = "<DATELINEDATE>";
                } else if (word.matches("19\\d\\d-\\d\\d-\\d\\d")) {
                    word = "<DATELINEDATE>";
                } else if (word.matches(".*-led")) {
                    word = "<LED>";
                } else if (word.matches(".*-sponsored")) {
                    word = "<LED>";
                }
            }
            if (this.doWordClass) {
                wc = wc.replaceAll("[A-Z]", "A");
                wc = wc.replaceAll("[a-z]", "a");
                wc = wc.replaceAll("[0-9]", "0");
                wc = wc.replaceAll("[^A-Za-z0-9]", "x");
            }
            if (this.doBriefWordClass) {
                bwc = bwc.replaceAll("[A-Z]+", "A");
                bwc = bwc.replaceAll("[a-z]+", "a");
                bwc = bwc.replaceAll("[0-9]+", "0");
                bwc = bwc.replaceAll("[^A-Za-z0-9]+", "x");
            }
            Token token = new Token(word);
            if (this.doDowncasing) {
                word = word.toLowerCase();
            }
            token.setFeatureValue("W=" + word, 1.0);
            if (this.doWordClass) {
                token.setFeatureValue("WC=" + wc, 1.0);
            }
            if (this.doBriefWordClass) {
                token.setFeatureValue("BWC=" + bwc, 1.0);
            }
            data.add(token);
            target.add((Object)label);
            if (!this.saveSource) continue;
            source.append(token.getText());
            source.append(" ");
        }
        carrier.setData((Object)data);
        carrier.setTarget((Object)target);
        if (this.saveSource) {
            carrier.setSource((Object)source);
        }
        return carrier;
    }
}

