/*
 * Decompiled with CFR 0.152.
 */
package banner.tokenization;

import banner.tokenization.Tokenizer;
import banner.types.Sentence;
import banner.types.Token;
import java.util.ArrayList;
import java.util.List;

public class SimpleTokenizer
implements Tokenizer {
    private static boolean isPunctuation(char ch) {
        return "`~!@#$%^&*()-\u2013=_+[]\\{}|;':\",./<>?".indexOf(ch) != -1;
    }

    @Override
    public void tokenize(Sentence sentence) {
        String text = sentence.getText();
        int start = 0;
        int i = 1;
        while (i - 1 < text.length()) {
            char current = text.charAt(i - 1);
            char next = '\u0000';
            if (i < text.length()) {
                next = text.charAt(i);
            }
            if (Character.isSpaceChar(current)) {
                start = i;
            } else if (Character.isLetter(current) || Character.isDigit(current)) {
                if (!Character.isLetter(next) && !Character.isDigit(next)) {
                    sentence.addToken(new Token(sentence, start, i));
                    start = i;
                }
            } else if (SimpleTokenizer.isPunctuation(current)) {
                sentence.addToken(new Token(sentence, start, i));
                start = i;
            }
            ++i;
        }
        if (start < text.length()) {
            sentence.addToken(new Token(sentence, start, text.length()));
        }
    }

    @Override
    public List<String> getTokens(String text) {
        int start = 0;
        ArrayList<String> tokens = new ArrayList<String>();
        int i = 1;
        while (i - 1 < text.length()) {
            char current = text.charAt(i - 1);
            char next = '\u0000';
            if (i < text.length()) {
                next = text.charAt(i);
            }
            if (Character.isSpaceChar(current)) {
                start = i;
            } else if (Character.isLetter(current) || Character.isDigit(current)) {
                if (!Character.isLetter(next) && !Character.isDigit(next)) {
                    tokens.add(text.substring(start, i));
                    start = i;
                }
            } else if (SimpleTokenizer.isPunctuation(current)) {
                tokens.add(text.substring(start, i));
                start = i;
            }
            ++i;
        }
        if (start < text.length()) {
            tokens.add(text.substring(start, text.length()));
        }
        return tokens;
    }
}

