package dragon.nlp.extract;

import dragon.nlp.Document;
import dragon.nlp.DocumentParser;
import dragon.nlp.Paragraph;
import dragon.nlp.Word;

/* loaded from: input_file:dragon/nlp/extract/EngDocumentParser.class */
public class EngDocumentParser implements DocumentParser {
    public static final String defParaDelimitor = "\n\n";
    public static final String defSentDelimitor = ".;?!";
    public static final String defWordDelimitor = " \r\n\t_-.;,?/\"'`:(){}!+[]><=%$#*@&^~|\\";
    public static final String punctuations = "\r\n\t_-.;,?/\"'`:(){}!+[]><=%$#*@&^~|\\";
    protected String wordDelimitor;
    protected String paraDelimitor;
    protected String sentDelimitor;

    public EngDocumentParser() {
        this(null);
    }

    public EngDocumentParser(String str) {
        this.paraDelimitor = "\n\n";
        this.sentDelimitor = defSentDelimitor;
        if (str == null) {
            this.wordDelimitor = defWordDelimitor;
        } else {
            this.wordDelimitor = str;
        }
    }

    @Override // dragon.nlp.DocumentParser
    public Document parse(String str) {
        if (str == null || str.length() == 0) {
            return null;
        }
        Document document = new Document();
        String replace = str.replaceAll("\r\n", "\n").replace('\r', '\n');
        int i = 0;
        while (i < replace.length() - 1) {
            try {
                int indexOf = replace.indexOf(this.paraDelimitor, i);
                if (indexOf > 0) {
                    if (indexOf > i) {
                        document.addParagraph(parseParagraph(replace.substring(i, indexOf)));
                    }
                    i = indexOf + 2;
                } else {
                    document.addParagraph(parseParagraph(replace.substring(i)));
                    i = replace.length();
                }
            } catch (Exception e) {
                return null;
            }
        }
        return document;
    }

    @Override // dragon.nlp.DocumentParser
    public Paragraph parseParagraph(String str) {
        int i = 0;
        if (str == null || str.length() == 0) {
            return null;
        }
        Paragraph paragraph = new Paragraph();
        if (this.sentDelimitor.indexOf(46) < 0) {
            this.sentDelimitor = new StringBuffer().append(".").append(this.sentDelimitor).toString();
        }
        String trim = str.replace('\n', ' ').trim();
        int length = trim.length();
        if (this.sentDelimitor.indexOf(trim.charAt(length - 1)) < 0) {
            trim = new StringBuffer().append(trim).append(".").toString();
            length++;
        }
        while (i < length) {
            int i2 = -1;
            int i3 = i;
            while (i2 == -1 && i3 < length) {
                i2 = trim.indexOf(46, i3);
                if (i2 >= 0) {
                    i3 = i2 + 1;
                    if (!isSentencePeriod(i2, trim)) {
                        i2 = -1;
                    }
                } else {
                    i3 = length;
                }
            }
            for (int i4 = 0; i4 < this.sentDelimitor.length(); i4++) {
                int indexOf = trim.indexOf(this.sentDelimitor.charAt(i4), i);
                if (indexOf >= 0 && this.sentDelimitor.charAt(i4) != '.' && (i2 < 0 || indexOf < i2)) {
                    i2 = indexOf;
                }
            }
            if (i2 > 0) {
                paragraph.addSentence(parseSentence(trim.substring(i, i2 + 1)));
                i = i2 + 1;
            } else {
                paragraph.addSentence(parseSentence(new StringBuffer().append(trim.substring(i)).append(".").toString()));
                i = length;
            }
        }
        return paragraph;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Removed duplicated region for block: B:35:0x011c  */
    /* JADX WARN: Removed duplicated region for block: B:40:0x0139  */
    /* JADX WARN: Type inference failed for: r0v35 */
    /* JADX WARN: Type inference failed for: r0v49 */
    /* JADX WARN: Type inference failed for: r0v58 */
    /* JADX WARN: Type inference failed for: r0v66 */
    /* JADX WARN: Type inference failed for: r0v71 */
    /* JADX WARN: Type inference failed for: r0v78 */
    @Override // dragon.nlp.DocumentParser
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public dragon.nlp.Sentence parseSentence(java.lang.String r8) {
        /*
            Method dump skipped, instructions count: 453
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: dragon.nlp.extract.EngDocumentParser.parseSentence(java.lang.String):dragon.nlp.Sentence");
    }

    protected Word parseWord(String str) {
        Word word = new Word(str);
        if (isNumber(str)) {
            word.setType(2);
        } else if (str.length() == 1 && punctuations.indexOf(str) >= 0) {
            word.setType(4);
        }
        return word;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Removed duplicated region for block: B:31:0x00c9  */
    /* JADX WARN: Removed duplicated region for block: B:36:0x00e1  */
    /* JADX WARN: Type inference failed for: r0v21 */
    /* JADX WARN: Type inference failed for: r0v33 */
    /* JADX WARN: Type inference failed for: r0v44 */
    /* JADX WARN: Type inference failed for: r0v51 */
    @Override // dragon.nlp.DocumentParser
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public java.util.ArrayList parseTokens(java.lang.String r6) {
        /*
            Method dump skipped, instructions count: 272
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: dragon.nlp.extract.EngDocumentParser.parseTokens(java.lang.String):java.util.ArrayList");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isPeriodAsWord(int i, int i2, String str) {
        int length = str.length();
        if (i == i2 || !Character.isLetter(str.charAt(i - 1)) || str.charAt(i - 1) > 255) {
            return false;
        }
        if ((i - 2 > 0 && str.charAt(i - 2) == '.') || i == i2 + 1) {
            return true;
        }
        if (i - i2 >= 4) {
            return false;
        }
        if (i >= length - 2) {
            return i != length - 1;
        }
        char charAt = str.charAt(i + 1);
        return (charAt == '\r' || charAt == '\n') ? false : true;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isPeriodAsToken(int i, int i2, String str) {
        int length = str.length();
        if (i == i2 || !Character.isLetter(str.charAt(i - 1)) || str.charAt(i - 1) > 255) {
            return false;
        }
        if (i <= 2 || str.charAt(i - 2) != '.') {
            return i - i2 < 4 && i < length - 1 && Character.isLetter(str.charAt(i + 1));
        }
        return true;
    }

    protected int isApostrophesAsWord(int i, int i2, String str) {
        char charAt;
        if (i == 0 || str.charAt(i - 1) == ' ' || i == str.length() - 1 || (charAt = str.charAt(i + 1)) == ' ' || !Character.isLetter(charAt)) {
            return 0;
        }
        return (charAt == 's' && i + 2 < str.length() && str.charAt(i + 2) == ' ') ? 1 : 2;
    }

    protected boolean isSentencePeriod(int i, String str) {
        int lastIndexOf;
        if (i == 0) {
            return false;
        }
        if (str.charAt(i - 1) > 255) {
            return true;
        }
        int length = str.length();
        if (i < length - 1 && str.charAt(i + 1) != ' ') {
            return false;
        }
        if (Character.isUpperCase(str.charAt(i - 1)) && (i == 1 || Character.isWhitespace(str.charAt(i - 2)))) {
            return false;
        }
        if (i - 2 <= 0 || str.charAt(i - 2) != '.') {
            return i >= length - 2 || isUpper(str.charAt(i + 2)) || (lastIndexOf = str.lastIndexOf(32, i)) < 0 || i - lastIndexOf > 5 || !isUpper(str.charAt(lastIndexOf + 1));
        }
        return false;
    }

    private boolean isUpper(char c) {
        return c < 'a' || c > 'z';
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isNumber(String str) {
        try {
            Double.parseDouble(str);
            return true;
        } catch (Exception e) {
            return false;
        }
    }
}
