/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.share.upenn.ner;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import gnu.trove.THashSet;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Serializable;
import java.util.Set;
import java.util.StringTokenizer;

public class ListMember
extends Pipe
implements Serializable {
    String name;
    Set lexicon;
    boolean ignoreCase;
    int min;
    int max;

    public ListMember(String featureName, File lexFile, boolean ignoreCase) {
        this.name = featureName;
        this.ignoreCase = ignoreCase;
        if (!lexFile.exists()) {
            throw new IllegalArgumentException("File " + lexFile + " not found.");
        }
        try {
            this.lexicon = new THashSet();
            this.min = 99999;
            this.max = -1;
            BufferedReader br = new BufferedReader(new FileReader(lexFile));
            while (br.ready()) {
                String s2 = br.readLine().trim();
                if (s2.equals("")) continue;
                int count = this.countTokens(s2);
                if (count < this.min) {
                    this.min = count;
                }
                if (count > this.max) {
                    this.max = count;
                }
                if (ignoreCase) {
                    this.lexicon.add(s2.toLowerCase());
                    continue;
                }
                this.lexicon.add(s2);
            }
        }
        catch (IOException e) {
            System.err.println("Problem with " + lexFile + ": " + e);
            System.exit(0);
        }
    }

    @Override
    public Instance pipe(Instance carrier) {
        TokenSequence seq = (TokenSequence)carrier.getData();
        boolean[] marked = new boolean[seq.size()];
        int i = 0;
        while (i < seq.size()) {
            StringBuffer sb = new StringBuffer();
            StringBuffer sbs = new StringBuffer();
            int j = i;
            while (j < i + this.max && j < seq.size()) {
                String tests;
                String text = ((Token)seq.get(j)).getText();
                sb.append(text);
                if (sbs.length() == 0) {
                    sbs.append(text);
                } else {
                    sbs.append(" " + text);
                }
                String test = this.ignoreCase ? sb.toString().toLowerCase() : sb.toString();
                String string = tests = this.ignoreCase ? sbs.toString().toLowerCase() : sbs.toString();
                if (j - i + 1 >= this.min && (this.lexicon.contains(test) || this.lexicon.contains(tests))) {
                    this.markFrom(i, j, marked);
                }
                ++j;
            }
            ++i;
        }
        i = 0;
        while (i < seq.size()) {
            if (marked[i]) {
                ((Token)seq.get(i)).setFeatureValue(this.name, 1.0);
            }
            ++i;
        }
        return carrier;
    }

    private void markFrom(int a, int b, boolean[] marked) {
        int i = a;
        while (i <= b) {
            marked[i] = true;
            ++i;
        }
    }

    private int countTokens(String s2) {
        StringTokenizer wordst = new StringTokenizer(s2, "~`!@#$%^&*()_-+={[}]|\\:;\"',<.>?/ \t\n\r", true);
        return wordst.countTokens();
    }
}

