package it.unipi.di.acube.batframework.datasetPlugins;

import it.unimi.dsi.lang.MutableString;
import it.unipi.di.acube.batframework.data.Annotation;
import it.unipi.di.acube.batframework.data.Mention;
import it.unipi.di.acube.batframework.data.Tag;
import it.unipi.di.acube.batframework.problems.A2WDataset;
import it.unipi.di.acube.batframework.utils.AnnotationException;
import it.unipi.di.acube.batframework.utils.ProblemReduction;
import it.unipi.di.acube.batframework.utils.WikipediaInterface;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.xml.sax.SAXException;

/* loaded from: input_file:it/unipi/di/acube/batframework/datasetPlugins/KddDataset.class */
public class KddDataset implements A2WDataset {
    private List<HashSet<Annotation>> tags;
    private List<MutableString> documents;
    private Pattern nonePattern;
    private Pattern nonePattern2;
    private Pattern tagPattern;
    private Pattern nonTagPattern;
    private Pattern skipPattern;
    private Pattern endPattern;
    private Pattern nonTagPattern2;

    /* loaded from: input_file:it/unipi/di/acube/batframework/datasetPlugins/KddDataset$KddAnnotation.class */
    private class KddAnnotation {
        public int length;
        public int position;
        public String title;

        public KddAnnotation(int i, int i2, String str) {
            this.length = i2;
            this.position = i;
            this.title = str;
        }
    }

    /* JADX WARN: Code restructure failed: missing block: B:54:0x02d5, code lost:
    
        r0.close();
        r16 = r16 + 1;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public KddDataset(java.io.InputStream[] r10, it.unipi.di.acube.batframework.utils.WikipediaInterface r11) throws java.io.IOException, it.unipi.di.acube.batframework.utils.AnnotationException, javax.xml.xpath.XPathExpressionException, javax.xml.parsers.ParserConfigurationException, org.xml.sax.SAXException {
        /*
            Method dump skipped, instructions count: 903
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: it.unipi.di.acube.batframework.datasetPlugins.KddDataset.<init>(java.io.InputStream[], it.unipi.di.acube.batframework.utils.WikipediaInterface):void");
    }

    public static InputStream[] filesToInputStreams(String[] strArr) throws FileNotFoundException {
        InputStream[] inputStreamArr = new InputStream[strArr.length];
        for (int i = 0; i < strArr.length; i++) {
            inputStreamArr[i] = new FileInputStream(strArr[i]);
        }
        return inputStreamArr;
    }

    public KddDataset(String[] strArr, WikipediaInterface wikipediaInterface) throws IOException, AnnotationException, XPathExpressionException, ParserConfigurationException, SAXException {
        this(filesToInputStreams(strArr), wikipediaInterface);
    }

    @Override // it.unipi.di.acube.batframework.problems.TopicDataset
    public int getSize() {
        return this.tags.size();
    }

    @Override // it.unipi.di.acube.batframework.problems.C2WDataset
    public int getTagsCount() {
        int i = 0;
        Iterator<HashSet<Annotation>> it2 = this.tags.iterator();
        while (it2.hasNext()) {
            i += it2.next().size();
        }
        return i;
    }

    @Override // it.unipi.di.acube.batframework.problems.C2WDataset
    public List<HashSet<Tag>> getC2WGoldStandardList() {
        return ProblemReduction.A2WToC2WList(this.tags);
    }

    @Override // it.unipi.di.acube.batframework.problems.D2WDataset
    public List<HashSet<Annotation>> getD2WGoldStandardList() {
        return getA2WGoldStandardList();
    }

    @Override // it.unipi.di.acube.batframework.problems.TopicDataset
    public List<String> getTextInstanceList() {
        Vector vector = new Vector();
        Iterator<MutableString> it2 = this.documents.iterator();
        while (it2.hasNext()) {
            vector.add(it2.next().toString());
        }
        return vector;
    }

    @Override // it.unipi.di.acube.batframework.problems.D2WDataset
    public List<HashSet<Mention>> getMentionsInstanceList() {
        return ProblemReduction.A2WToD2WMentionsInstance(getA2WGoldStandardList());
    }

    @Override // it.unipi.di.acube.batframework.problems.TopicDataset
    public String getName() {
        return "KDD";
    }

    @Override // it.unipi.di.acube.batframework.problems.A2WDataset
    public List<HashSet<Annotation>> getA2WGoldStandardList() {
        return this.tags;
    }
}
