package io.bdrc.lucene.bo;

import io.bdrc.lucene.stemmer.Optimizer;
import io.bdrc.lucene.stemmer.Row;
import io.bdrc.lucene.stemmer.Trie;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.RollingCharBuffer;

/* loaded from: input_file:io/bdrc/lucene/bo/TibWordTokenizer.class */
public final class TibWordTokenizer extends Tokenizer {
    private Trie scanner;
    private static final int MAX_WORD_LEN = 255;
    private RollingCharBuffer ioBuffer;
    private int tokenLength;
    private int cmdIndex;
    private boolean foundMatch;
    private int foundMatchCmdIndex;
    private Row rootRow;
    private Row currentRow;
    private int tokenStart;
    private int tokenEnd;
    private boolean passedFirstSyllable;
    static final /* synthetic */ boolean $assertionsDisabled;
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
    private boolean lemmatize = true;
    private boolean debug = false;
    private int bufferIndex = 0;
    private int finalOffset = 0;
    private final int charCount = 1;

    static {
        $assertionsDisabled = !TibWordTokenizer.class.desiredAssertionStatus();
    }

    public TibWordTokenizer(String str) throws FileNotFoundException, IOException {
        init(new FileReader(str));
    }

    public TibWordTokenizer() throws FileNotFoundException, IOException {
        InputStream resourceAsStream = TibWordTokenizer.class.getResourceAsStream("total_lexicon.txt");
        if (resourceAsStream == null) {
            init(new FileReader("resource/output/total_lexicon.txt"));
        } else {
            init(new InputStreamReader(resourceAsStream));
        }
    }

    private void init(Reader reader) throws FileNotFoundException, IOException {
        this.scanner = new Trie(true);
        Throwable th = null;
        try {
            BufferedReader bufferedReader = new BufferedReader(reader);
            while (true) {
                try {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        this.scanner.reduce(new Optimizer());
                        if (bufferedReader != null) {
                            bufferedReader.close();
                        }
                        this.ioBuffer = new RollingCharBuffer();
                        this.ioBuffer.reset(this.input);
                        return;
                    }
                    int indexOf = readLine.indexOf(32);
                    if (indexOf == -1) {
                        throw new IllegalArgumentException("The dictionary file is corrupted in the following line.\n" + readLine);
                    }
                    this.scanner.add(readLine.substring(0, indexOf), readLine.substring(indexOf + 1));
                } catch (Throwable th2) {
                    if (bufferedReader != null) {
                        bufferedReader.close();
                    }
                    throw th2;
                }
            }
        } catch (Throwable th3) {
            if (0 == 0) {
                th = th3;
            } else if (null != th3) {
                th.addSuppressed(th3);
            }
            throw th;
        }
    }

    protected int normalize(int i) {
        return i;
    }

    public final boolean incrementToken() throws IOException {
        clearAttributes();
        this.ioBuffer.freeBefore(this.bufferIndex);
        this.tokenLength = 0;
        this.tokenStart = -1;
        this.tokenEnd = -1;
        this.rootRow = this.scanner.getRow(this.scanner.getRoot());
        int i = -1;
        int i2 = -1;
        this.cmdIndex = -1;
        this.foundMatchCmdIndex = -1;
        this.foundMatch = false;
        this.passedFirstSyllable = false;
        this.currentRow = null;
        char[] buffer = this.termAtt.buffer();
        if (this.debug) {
            System.out.println("----------------------");
        }
        while (true) {
            int i3 = this.ioBuffer.get(this.bufferIndex);
            this.bufferIndex++;
            if (i3 == -1) {
                this.bufferIndex--;
                if (this.tokenLength == 0) {
                    this.finalOffset = correctOffset(this.bufferIndex);
                    return false;
                }
            } else {
                if (this.debug) {
                    System.out.println("\t" + ((char) i3));
                }
                if (isTibetanTokenChar(i3)) {
                    checkIfFirstSylPassed(i3);
                    if (isStartOfToken(i3)) {
                        tryToFindMatchIn(this.rootRow, i3);
                        tryToContinueDownTheTrie(this.rootRow, i3);
                        incrementTokenIndices();
                    } else {
                        ifNeededResize(buffer);
                        if (!wentToMaxDownTheTrie()) {
                            if (this.foundMatch) {
                                i = this.tokenEnd;
                                i2 = this.bufferIndex;
                            }
                            this.tokenEnd++;
                            tryToFindMatchIn(this.currentRow, i3);
                            tryToContinueDownTheTrie(this.currentRow, i3);
                        } else {
                            if (this.passedFirstSyllable) {
                                stepBackIfStartedNextSylButCantGoFurther(i3);
                                break;
                            }
                            if (reachedSylEnd(i3)) {
                                i = this.tokenEnd;
                                i2 = this.bufferIndex;
                                break;
                            }
                            this.tokenEnd++;
                        }
                    }
                    IncrementTokenLengthAndAddCurrentCharTo(buffer, i3);
                    if (this.tokenLength >= MAX_WORD_LEN) {
                        break;
                    }
                } else if (this.tokenLength > 0) {
                    break;
                }
            }
        }
        if (this.foundMatch) {
            i = this.tokenEnd;
            i2 = this.bufferIndex;
        }
        if (i > 0) {
            this.bufferIndex = i2;
            this.tokenEnd = i;
        }
        if (!$assertionsDisabled && this.tokenStart == -1) {
            throw new AssertionError();
        }
        finalizeSettingTermAttribute();
        lemmatizeIfRequired();
        return true;
    }

    private void ifNeededResize(char[] cArr) {
        if (this.tokenLength >= cArr.length - 1) {
            this.termAtt.resizeBuffer(2 + this.tokenLength);
        }
    }

    private final void stepBackIfStartedNextSylButCantGoFurther(int i) {
        if (this.cmdIndex == -1 && this.currentRow == null && this.passedFirstSyllable && !reachedSylEnd(i)) {
            this.bufferIndex--;
            this.tokenEnd--;
        }
    }

    private final void checkIfFirstSylPassed(int i) {
        if (i != 3851 || this.passedFirstSyllable) {
            return;
        }
        this.passedFirstSyllable = true;
    }

    private final void finalizeSettingTermAttribute() {
        this.finalOffset = correctOffset(this.tokenEnd);
        this.offsetAtt.setOffset(correctOffset(this.tokenStart), this.finalOffset);
        this.termAtt.setLength(this.tokenEnd - this.tokenStart);
    }

    private final boolean reachedSylEnd(int i) {
        return i == 3851;
    }

    private final boolean wentToMaxDownTheTrie() {
        return this.currentRow == null;
    }

    private final void lemmatizeIfRequired() {
        String commandVal;
        if (!this.lemmatize || (commandVal = this.scanner.getCommandVal(this.foundMatchCmdIndex)) == null) {
            return;
        }
        applyCmdToTermAtt(commandVal);
    }

    private final void IncrementTokenLengthAndAddCurrentCharTo(char[] cArr, int i) {
        this.tokenLength += Character.toChars(normalize(i), cArr, this.tokenLength);
    }

    private final void incrementTokenIndices() {
        this.tokenStart = this.bufferIndex - 1;
        this.tokenEnd = this.tokenStart + 1;
    }

    private final void tryToContinueDownTheTrie(Row row, int i) {
        int ref = row.getRef(Character.valueOf((char) i));
        this.currentRow = ref >= 0 ? this.scanner.getRow(ref) : null;
    }

    private final void tryToFindMatchIn(Row row, int i) {
        this.cmdIndex = row.getCmd(Character.valueOf((char) i));
        this.foundMatch = this.cmdIndex >= 0;
        if (this.foundMatch) {
            this.foundMatchCmdIndex = this.cmdIndex;
        }
    }

    private final boolean isStartOfToken(int i) {
        return this.tokenLength == 0;
    }

    private final boolean isTibetanTokenChar(int i) {
        if (isTibLetter(i)) {
            return true;
        }
        return i == 3851 && this.tokenLength > 0;
    }

    public final boolean isTibLetter(int i) {
        return 3904 <= i && i <= 4028;
    }

    private final void applyCmdToTermAtt(String str) {
        if (str.charAt(0) != '>') {
            if (str.charAt(0) == '/') {
                this.termAtt.setEmpty().append(str.substring(1, str.length()));
                return;
            }
            return;
        }
        switch (str.charAt(1)) {
            case 'A':
                this.termAtt.setLength(this.termAtt.length() - 1);
                return;
            case 'B':
                this.termAtt.setLength(this.termAtt.length() - 2);
                return;
            case 'C':
                this.termAtt.setLength(this.termAtt.length() - 3);
                return;
            case 'D':
                this.termAtt.buffer()[this.termAtt.length() - 1] = 3936;
                return;
            default:
                throw new IllegalArgumentException("the operation should be A, B, C or D.");
        }
    }

    public final void end() throws IOException {
        super.end();
        this.offsetAtt.setOffset(this.finalOffset, this.finalOffset);
    }

    public void reset() throws IOException {
        super.reset();
        this.bufferIndex = 0;
        this.finalOffset = 0;
        this.ioBuffer.reset(this.input);
    }

    public final void setLemmatize(boolean z) {
        this.lemmatize = z;
    }

    public final void setDebug(boolean z) {
        this.debug = z;
    }
}
