/*
 * Decompiled with CFR 0.152.
 */
package dragon.nlp.extract;

import dragon.nlp.Sentence;
import dragon.nlp.Word;
import dragon.nlp.extract.EngDocumentParser;
import java.util.ArrayList;

public class CnSimpleDocumentParser
extends EngDocumentParser {
    public static final String punctuations = "\r\n\t_-.;,?/\"'`:(){}!+[]><=%$#*@&^~|";

    public CnSimpleDocumentParser() {
        this.sentDelimitor = this.sentDelimitor + "";
    }

    @Override
    public Sentence parseSentence(String sentence) {
        int len = 0;
        int flag = 0;
        int start = 0;
        if (sentence == null || sentence.length() == 0) {
            return null;
        }
        Sentence newSent = new Sentence();
        boolean checkPeriod = this.wordDelimitor.indexOf(46) < 0;
        len = (sentence = sentence.trim()).length();
        if (len <= 0) {
            return null;
        }
        if (this.sentDelimitor.indexOf(sentence.charAt(len - 1)) >= 0) {
            newSent.setPunctuation(sentence.charAt(len - 1));
        } else {
            sentence = sentence + ".";
            newSent.setPunctuation('.');
            ++len;
        }
        for (int i = 0; i < len - 1; ++i) {
            char ch = sentence.charAt(i);
            if (ch > '\u00ff') {
                if (flag >= 2) {
                    newSent.addWord(this.parseWord(sentence.substring(start, i)));
                }
                newSent.addWord(this.parseWord(sentence.substring(i, i + 1)));
                flag = 0;
                continue;
            }
            if (checkPeriod && ch == '.' && !this.isPeriodAsWord(i, start, sentence)) {
                if (flag >= 2) {
                    newSent.addWord(this.parseWord(sentence.substring(start, i)));
                }
                flag = 2;
                start = i;
                continue;
            }
            if (ch == ' ') {
                if (flag >= 2) {
                    newSent.addWord(this.parseWord(sentence.substring(start, i)));
                }
                flag = 1;
                continue;
            }
            if (this.wordDelimitor.indexOf(ch) >= 0) {
                if (flag >= 2) {
                    newSent.addWord(this.parseWord(sentence.substring(start, i)));
                }
                start = i;
                flag = 2;
                continue;
            }
            if (flag == 2) {
                newSent.addWord(this.parseWord(sentence.substring(start, i)));
                start = i;
            } else if (flag == 1 || flag == 0) {
                start = i;
            }
            flag = 3;
        }
        if (flag >= 2 && len - 1 > start) {
            newSent.addWord(this.parseWord(sentence.substring(start, len - 1)));
        }
        return newSent;
    }

    @Override
    protected Word parseWord(String content) {
        Word cur = new Word(content);
        if (content.charAt(0) < '\u00ff' && this.isNumber(content)) {
            cur.setType(2);
        } else if (content.length() == 1 && punctuations.indexOf(content) >= 0) {
            cur.setType(4);
        }
        return cur;
    }

    @Override
    public ArrayList parseTokens(String content) {
        if (content == null) {
            return null;
        }
        if ((content = content.trim()).length() == 0) {
            return null;
        }
        String cnPunc = "";
        int len = content.length();
        int flag = 0;
        int start = 0;
        ArrayList<String> tokenList = new ArrayList<String>();
        boolean checkPeriod = this.wordDelimitor.indexOf(46) < 0;
        for (int i = 0; i < len; ++i) {
            char ch = content.charAt(i);
            if (ch > '\u00ff') {
                if (flag >= 2) {
                    tokenList.add(content.substring(start, i));
                }
                if (cnPunc.indexOf(ch) < 0) {
                    tokenList.add(content.substring(i, i + 1));
                }
                flag = 0;
                continue;
            }
            if (checkPeriod && ch == '.' && !this.isPeriodAsToken(i, start, content)) {
                if (flag >= 2) {
                    tokenList.add(content.substring(start, i));
                }
                flag = 1;
                continue;
            }
            if (this.wordDelimitor.indexOf(ch) >= 0) {
                if (flag >= 2) {
                    tokenList.add(content.substring(start, i));
                }
                flag = 1;
                continue;
            }
            if (flag != 1 && flag != 0) continue;
            start = i;
            flag = 2;
        }
        if (flag >= 2) {
            tokenList.add(content.substring(start, len));
        }
        return tokenList;
    }
}

