package it.unipi.di.acube.batframework.datasetPlugins;

import it.unipi.di.acube.batframework.data.Annotation;
import it.unipi.di.acube.batframework.data.Mention;
import it.unipi.di.acube.batframework.data.Tag;
import it.unipi.di.acube.batframework.problems.A2WDataset;
import it.unipi.di.acube.batframework.utils.AnnotationException;
import it.unipi.di.acube.batframework.utils.ProblemReduction;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.List;
import java.util.Vector;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.xml.sax.SAXException;

/* loaded from: input_file:it/unipi/di/acube/batframework/datasetPlugins/TimerunsDataset.class */
public class TimerunsDataset implements A2WDataset {
    private List<String> textList = new Vector();
    private List<HashSet<Annotation>> annList = new Vector();

    public TimerunsDataset(String str, int i, int i2) throws IOException, ParserConfigurationException, SAXException, AnnotationException, XPathExpressionException {
        int i3 = 0;
        String loadBody = loadBody(str);
        for (int i4 = 0; i4 < i2; i4++) {
            for (int i5 = 0; i5 < i; i5++) {
                while (i3 < loadBody.length() && loadBody.charAt(i3) != ' ' && loadBody.charAt(i3) != '\n') {
                    i3++;
                }
                while (i3 < loadBody.length() && (loadBody.charAt(i3) == ' ' || loadBody.charAt(i3) == '\n')) {
                    i3++;
                }
                if (i3 == loadBody.length()) {
                    throw new AnnotationException("Cannot make " + i2 + " documents of " + i + " words each (lower one of these values).");
                }
            }
            this.textList.add(loadBody.substring(0, i3));
            this.annList.add(new HashSet<>());
        }
        System.out.println("Biggest document will be " + i3 + " chars long.");
    }

    public String loadBody(String str) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str)), Charset.forName("UTF-8")));
        String str2 = "";
        while (true) {
            String str3 = str2;
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return str3;
            }
            str2 = str3 + readLine.replace((char) 0, ' ') + "\n";
        }
    }

    @Override // it.unipi.di.acube.batframework.problems.TopicDataset
    public int getSize() {
        return this.textList.size();
    }

    @Override // it.unipi.di.acube.batframework.problems.C2WDataset
    public int getTagsCount() {
        return 0;
    }

    @Override // it.unipi.di.acube.batframework.problems.C2WDataset
    public List<HashSet<Tag>> getC2WGoldStandardList() {
        return ProblemReduction.A2WToC2WList(getA2WGoldStandardList());
    }

    @Override // it.unipi.di.acube.batframework.problems.D2WDataset
    public List<HashSet<Annotation>> getD2WGoldStandardList() {
        return getA2WGoldStandardList();
    }

    @Override // it.unipi.di.acube.batframework.problems.TopicDataset
    public List<String> getTextInstanceList() {
        return this.textList;
    }

    @Override // it.unipi.di.acube.batframework.problems.D2WDataset
    public List<HashSet<Mention>> getMentionsInstanceList() {
        return ProblemReduction.A2WToD2WMentionsInstance(getA2WGoldStandardList());
    }

    @Override // it.unipi.di.acube.batframework.problems.TopicDataset
    public String getName() {
        return "TimeRuns";
    }

    @Override // it.unipi.di.acube.batframework.problems.A2WDataset
    public List<HashSet<Annotation>> getA2WGoldStandardList() {
        return this.annList;
    }
}
