/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.utils.corpuslinguistics.tokenizer;

import com.ibm.icu.text.BreakIterator;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.AbstractWordTokenizer;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.WordTokenizer;
import java.util.List;
import java.util.Locale;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ICU4JBreakIteratorWordTokenizer
extends AbstractWordTokenizer
implements WordTokenizer {
    protected Locale locale = Locale.US;

    public ICU4JBreakIteratorWordTokenizer() {
    }

    public ICU4JBreakIteratorWordTokenizer(Locale locale) {
        this.locale = locale;
    }

    @Override
    public List<String> extractWords(String text) {
        List<String> result = ListFactory.createNewList();
        BreakIterator wordIterator = BreakIterator.getWordInstance((Locale)this.locale);
        String fixedText = this.preTokenizer.pretokenize(text);
        wordIterator.setText(fixedText);
        int start = wordIterator.first();
        int end = wordIterator.next();
        while (end != -1) {
            String token = fixedText.substring(start, end);
            if (!Character.isWhitespace(token.charAt(0)) && (token = this.preprocessToken(token, result)).length() > 0) {
                String[] tokens = this.splitToken(token);
                for (int k = 0; k < tokens.length; ++k) {
                    if (tokens[k].length() <= 0) continue;
                    this.addWordToSentence(result, tokens[k]);
                }
            }
            start = end;
            end = wordIterator.next();
        }
        return result;
    }
}

