package dragon.ml.seqmodel.data;

import dragon.util.FileUtil;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.StringTokenizer;
import net.sf.ehcache.distribution.PayloadUtil;

/* loaded from: input_file:dragon/ml/seqmodel/data/FlatSegmentReader.class */
public class FlatSegmentReader implements DataReader {
    private int originalLabelNum;
    private int markovOrder;
    private String delimit;
    private String tagDelimit;
    private String impDelimit;
    private BufferedReader tin;
    private LabelConverter labelConverter;
    private int[] labels;
    boolean fixedColFormat;
    boolean tagged;

    public FlatSegmentReader(int i, int i2, String str, LabelConverter labelConverter) {
        this.originalLabelNum = i;
        this.markovOrder = i2;
        this.tin = FileUtil.getTextReader(str);
        this.labelConverter = labelConverter;
        this.delimit = ",\t/ -():.;'?\\#`&\"_";
        this.tagDelimit = PayloadUtil.URL_DELIMITER;
        this.impDelimit = ",";
        this.labels = readHeaderInfo(this.tin);
        if (this.labels != null) {
            this.fixedColFormat = true;
        } else {
            this.fixedColFormat = false;
        }
        this.tagged = true;
    }

    public FlatSegmentReader(int i, int i2, String str) {
        this.originalLabelNum = i;
        this.markovOrder = i2;
        this.tin = FileUtil.getTextReader(str);
        this.labelConverter = null;
        this.delimit = " \t";
        this.tagDelimit = PayloadUtil.URL_DELIMITER;
        this.impDelimit = "";
        this.tagged = false;
        this.fixedColFormat = false;
    }

    @Override // dragon.ml.seqmodel.data.DataReader
    public Dataset read() {
        BasicDataset basicDataset = new BasicDataset(this.originalLabelNum, this.markovOrder);
        while (true) {
            DataSequence readRow = readRow();
            if (readRow == null || readRow.length() == 0) {
                break;
            }
            basicDataset.add(readRow);
        }
        return basicDataset;
    }

    @Override // dragon.ml.seqmodel.data.DataReader
    public DataSequence readRow() {
        try {
            return this.tagged ? this.fixedColFormat ? readRowFixedCol(this.tin, this.labels) : readRowVarCol(this.tin) : readRaw();
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    @Override // dragon.ml.seqmodel.data.DataReader
    public void close() {
        try {
            this.tin.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private String[] getTokenList(String str) {
        StringTokenizer stringTokenizer = new StringTokenizer(str.toLowerCase(), this.delimit, true);
        int i = 0;
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (this.delimit.indexOf(nextToken) == -1 || this.impDelimit.indexOf(nextToken) != -1) {
                i++;
            }
        }
        String[] strArr = new String[i];
        int i2 = 0;
        StringTokenizer stringTokenizer2 = new StringTokenizer(str.toLowerCase(), this.delimit, true);
        while (stringTokenizer2.hasMoreTokens()) {
            String nextToken2 = stringTokenizer2.nextToken();
            if (this.delimit.indexOf(nextToken2) == -1 || this.impDelimit.indexOf(nextToken2) != -1) {
                int i3 = i2;
                i2++;
                strArr[i3] = nextToken2;
            }
        }
        return strArr;
    }

    private DataSequence readRowVarCol(BufferedReader bufferedReader) throws IOException {
        BasicDataSequence basicDataSequence = new BasicDataSequence();
        while (true) {
            String readLine = bufferedReader.readLine();
            StringTokenizer stringTokenizer = readLine != null ? new StringTokenizer(readLine.toLowerCase(), this.tagDelimit) : null;
            if (readLine == null || stringTokenizer.countTokens() < 2) {
                break;
            }
            String nextToken = stringTokenizer.nextToken();
            int internalLabel = this.labelConverter != null ? this.labelConverter.getInternalLabel(stringTokenizer.nextToken()) : Integer.parseInt(stringTokenizer.nextToken());
            String[] tokenList = getTokenList(nextToken);
            for (int i = 0; i < tokenList.length; i++) {
                BasicToken basicToken = new BasicToken(tokenList[i], internalLabel);
                if (i == 0) {
                    basicToken.setSegmentMarker(true);
                } else {
                    basicToken.setSegmentMarker(false);
                }
                basicDataSequence.add(basicToken);
            }
        }
        return basicDataSequence;
    }

    private DataSequence readRowFixedCol(BufferedReader bufferedReader, int[] iArr) throws IOException {
        String readLine = bufferedReader.readLine();
        if (readLine == null) {
            return null;
        }
        BasicDataSequence basicDataSequence = new BasicDataSequence();
        StringTokenizer stringTokenizer = new StringTokenizer(readLine.toLowerCase(), this.tagDelimit, true);
        int i = 0;
        while (i < iArr.length && stringTokenizer.hasMoreTokens()) {
            int internalLabel = this.labelConverter != null ? this.labelConverter.getInternalLabel(iArr[i]) : iArr[i];
            String nextToken = stringTokenizer.nextToken();
            if (this.tagDelimit.indexOf(nextToken) == -1) {
                if (stringTokenizer.hasMoreTokens()) {
                    stringTokenizer.nextToken();
                }
                if (internalLabel >= 0 && internalLabel < this.originalLabelNum) {
                    String[] tokenList = getTokenList(nextToken);
                    i = 0;
                    while (i < tokenList.length) {
                        BasicToken basicToken = new BasicToken(tokenList[i], internalLabel);
                        if (i == 0) {
                            basicToken.setSegmentMarker(true);
                        } else {
                            basicToken.setSegmentMarker(false);
                        }
                        basicDataSequence.add(basicToken);
                        i++;
                    }
                }
            }
            i++;
        }
        return basicDataSequence;
    }

    private int[] readHeaderInfo(BufferedReader bufferedReader) {
        try {
            bufferedReader.mark(1000);
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return null;
            }
            if (!readLine.toLowerCase().startsWith("fixed-column-format")) {
                bufferedReader.reset();
                return null;
            }
            StringTokenizer stringTokenizer = new StringTokenizer(bufferedReader.readLine(), this.tagDelimit);
            int[] iArr = new int[this.originalLabelNum];
            int i = 0;
            while (i < this.originalLabelNum && stringTokenizer.hasMoreTokens()) {
                int i2 = i;
                i++;
                iArr[i2] = Integer.parseInt(stringTokenizer.nextToken());
            }
            return iArr;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    private DataSequence readRaw() throws IOException {
        String readLine = this.tin.readLine();
        BasicDataSequence basicDataSequence = new BasicDataSequence();
        StringTokenizer stringTokenizer = new StringTokenizer(readLine.toLowerCase(), this.delimit, true);
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (this.delimit.indexOf(nextToken) == -1 || this.impDelimit.indexOf(nextToken) != -1) {
                basicDataSequence.add(new BasicToken(nextToken));
            }
        }
        return basicDataSequence;
    }
}
