package dragon.nlp.extract;

import dragon.nlp.Sentence;
import dragon.nlp.Word;
import java.util.ArrayList;

/* loaded from: input_file:dragon/nlp/extract/CnSimpleDocumentParser.class */
public class CnSimpleDocumentParser extends EngDocumentParser {
    public static final String punctuations = "\r\n\t_-.;,?/\"'`:(){}!+[]><=%$#*@&^~|\\—！《》？、，。（）【】";

    public CnSimpleDocumentParser() {
        this.sentDelimitor = new StringBuffer().append(this.sentDelimitor).append("；？。！").toString();
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v31 */
    /* JADX WARN: Type inference failed for: r0v45 */
    /* JADX WARN: Type inference failed for: r0v53 */
    /* JADX WARN: Type inference failed for: r0v58 */
    /* JADX WARN: Type inference failed for: r0v65 */
    /* JADX WARN: Type inference failed for: r0v70 */
    @Override // dragon.nlp.extract.EngDocumentParser, dragon.nlp.DocumentParser
    public Sentence parseSentence(String str) {
        boolean z = false;
        int i = 0;
        if (str == null || str.length() == 0) {
            return null;
        }
        Sentence sentence = new Sentence();
        boolean z2 = this.wordDelimitor.indexOf(46) < 0;
        String trim = str.trim();
        int length = trim.length();
        if (length <= 0) {
            return null;
        }
        if (this.sentDelimitor.indexOf(trim.charAt(length - 1)) >= 0) {
            sentence.setPunctuation(trim.charAt(length - 1));
        } else {
            trim = new StringBuffer().append(trim).append(".").toString();
            sentence.setPunctuation('.');
            length++;
        }
        for (int i2 = 0; i2 < length - 1; i2++) {
            char charAt = trim.charAt(i2);
            if (charAt > 255) {
                if (z >= 2) {
                    sentence.addWord(parseWord(trim.substring(i, i2)));
                }
                sentence.addWord(parseWord(trim.substring(i2, i2 + 1)));
                z = false;
            } else if (z2 && charAt == '.' && !isPeriodAsWord(i2, i, trim)) {
                if (z >= 2) {
                    sentence.addWord(parseWord(trim.substring(i, i2)));
                }
                z = 2;
                i = i2;
            } else if (charAt == ' ') {
                if (z >= 2) {
                    sentence.addWord(parseWord(trim.substring(i, i2)));
                }
                z = true;
            } else if (this.wordDelimitor.indexOf(charAt) >= 0) {
                if (z >= 2) {
                    sentence.addWord(parseWord(trim.substring(i, i2)));
                }
                i = i2;
                z = 2;
            } else {
                if (z == 2) {
                    sentence.addWord(parseWord(trim.substring(i, i2)));
                    i = i2;
                } else if (z || !z) {
                    i = i2;
                }
                z = 3;
            }
        }
        if (z >= 2 && length - 1 > i) {
            sentence.addWord(parseWord(trim.substring(i, length - 1)));
        }
        return sentence;
    }

    @Override // dragon.nlp.extract.EngDocumentParser
    protected Word parseWord(String str) {
        Word word = new Word(str);
        if (str.charAt(0) < 255 && isNumber(str)) {
            word.setType(2);
        } else if (str.length() == 1 && punctuations.indexOf(str) >= 0) {
            word.setType(4);
        }
        return word;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v17 */
    /* JADX WARN: Type inference failed for: r0v32 */
    /* JADX WARN: Type inference failed for: r0v39 */
    /* JADX WARN: Type inference failed for: r0v43 */
    @Override // dragon.nlp.extract.EngDocumentParser, dragon.nlp.DocumentParser
    public ArrayList parseTokens(String str) {
        if (str == null) {
            return null;
        }
        String trim = str.trim();
        if (trim.length() == 0) {
            return null;
        }
        int length = trim.length();
        boolean z = false;
        int i = 0;
        ArrayList arrayList = new ArrayList();
        boolean z2 = this.wordDelimitor.indexOf(46) < 0;
        for (int i2 = 0; i2 < length; i2++) {
            char charAt = trim.charAt(i2);
            if (charAt > 255) {
                if (z >= 2) {
                    arrayList.add(trim.substring(i, i2));
                }
                if ("".indexOf(charAt) < 0) {
                    arrayList.add(trim.substring(i2, i2 + 1));
                }
                z = false;
            } else if (z2 && charAt == '.' && !isPeriodAsToken(i2, i, trim)) {
                if (z >= 2) {
                    arrayList.add(trim.substring(i, i2));
                }
                z = true;
            } else if (this.wordDelimitor.indexOf(charAt) >= 0) {
                if (z >= 2) {
                    arrayList.add(trim.substring(i, i2));
                }
                z = true;
            } else if (z || !z) {
                i = i2;
                z = 2;
            }
        }
        if (z >= 2) {
            arrayList.add(trim.substring(i, length));
        }
        return arrayList;
    }
}
