package it.unipi.di.acube.batframework.datasetPlugins;

import it.unipi.di.acube.batframework.data.Annotation;
import it.unipi.di.acube.batframework.data.Mention;
import it.unipi.di.acube.batframework.data.Tag;
import it.unipi.di.acube.batframework.problems.A2WDataset;
import it.unipi.di.acube.batframework.utils.ProblemReduction;
import it.unipi.di.acube.batframework.utils.WikipediaApiInterface;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:it/unipi/di/acube/batframework/datasetPlugins/GERDAQDataset.class */
public class GERDAQDataset implements A2WDataset {
    private List<String> queries = new Vector();
    private List<HashSet<Tag>> tags = new Vector();
    private List<HashSet<Annotation>> annotations = new Vector();

    public GERDAQDataset(String str, WikipediaApiInterface wikipediaApiInterface) {
        try {
            Document parse = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new File(str));
            parse.getDocumentElement().normalize();
            Vector vector = new Vector();
            NodeList elementsByTagName = parse.getElementsByTagName("instance");
            for (int i = 0; i < elementsByTagName.getLength(); i++) {
                HashMap hashMap = new HashMap();
                String str2 = "";
                NodeList childNodes = ((Element) elementsByTagName.item(i)).getChildNodes();
                for (int i2 = 0; i2 < childNodes.getLength(); i2++) {
                    Node item = childNodes.item(i2);
                    if (item.getNodeType() == 1) {
                        if (!item.getNodeName().equals("annotation")) {
                            throw new RuntimeException("Found internal node that is not an annotation.");
                        }
                        int length = str2.length();
                        str2 = str2 + item.getTextContent();
                        Mention mention = new Mention(length, str2.length() - length);
                        hashMap.put(mention, new Vector());
                        NamedNodeMap attributes = item.getAttributes();
                        int i3 = 0;
                        while (true) {
                            Node namedItem = attributes.getNamedItem(String.format("rank_%d_title", Integer.valueOf(i3)));
                            if (namedItem != null) {
                                ((Vector) hashMap.get(mention)).add(namedItem.getTextContent());
                                i3++;
                            }
                        }
                    } else if (item.getNodeType() == 3) {
                        str2 = str2 + item.getTextContent();
                    }
                }
                this.queries.add(str2);
                vector.add(hashMap);
            }
            Vector vector2 = new Vector();
            Iterator it2 = vector.iterator();
            while (it2.hasNext()) {
                Iterator it3 = ((HashMap) it2.next()).values().iterator();
                while (it3.hasNext()) {
                    vector2.addAll((Vector) it3.next());
                }
            }
            try {
                wikipediaApiInterface.prefetchTitles(vector2);
                for (int i4 = 0; i4 < vector.size(); i4++) {
                    try {
                        HashSet<Tag> hashSet = new HashSet<>();
                        HashSet<Annotation> hashSet2 = new HashSet<>();
                        HashMap hashMap2 = (HashMap) vector.get(i4);
                        for (Mention mention2 : hashMap2.keySet()) {
                            String str3 = (String) ((Vector) hashMap2.get(mention2)).get(0);
                            int idByTitle = wikipediaApiInterface.getIdByTitle(str3);
                            if (idByTitle == -1) {
                                System.err.println("Error in dataset " + getName() + ": Could not find wikipedia title: " + str3);
                            } else {
                                hashSet2.add(new Annotation(mention2.getPosition(), mention2.getLength(), idByTitle));
                            }
                        }
                        Iterator it4 = ((HashMap) vector.get(i4)).values().iterator();
                        while (it4.hasNext()) {
                            Iterator it5 = ((Vector) it4.next()).iterator();
                            while (it5.hasNext()) {
                                String str4 = (String) it5.next();
                                int idByTitle2 = wikipediaApiInterface.getIdByTitle(str4);
                                if (idByTitle2 == -1) {
                                    System.err.println("Error in dataset " + getName() + ": Could not find wikipedia title: " + str4);
                                } else {
                                    hashSet.add(new Tag(idByTitle2));
                                }
                            }
                        }
                        this.annotations.add(hashSet2);
                        this.tags.add(hashSet);
                    } catch (IOException | DOMException e) {
                        throw new RuntimeException(e);
                    }
                }
                if (this.queries.size() != this.tags.size() || this.tags.size() != this.annotations.size()) {
                    throw new RuntimeException("Parsing error");
                }
            } catch (IOException | ParserConfigurationException | XPathExpressionException | SAXException e2) {
                throw new RuntimeException(e2);
            }
        } catch (IOException | ParserConfigurationException | SAXException e3) {
            throw new RuntimeException(e3);
        }
    }

    @Override // it.unipi.di.acube.batframework.problems.TopicDataset
    public int getSize() {
        return this.queries.size();
    }

    @Override // it.unipi.di.acube.batframework.problems.TopicDataset
    public String getName() {
        return "GERDAQ";
    }

    @Override // it.unipi.di.acube.batframework.problems.TopicDataset
    public List<String> getTextInstanceList() {
        return this.queries;
    }

    @Override // it.unipi.di.acube.batframework.problems.C2WDataset
    public int getTagsCount() {
        int i = 0;
        Iterator<HashSet<Tag>> it2 = this.tags.iterator();
        while (it2.hasNext()) {
            i += it2.next().size();
        }
        return i;
    }

    @Override // it.unipi.di.acube.batframework.problems.C2WDataset
    public List<HashSet<Tag>> getC2WGoldStandardList() {
        return ProblemReduction.A2WToC2WList(this.annotations);
    }

    @Override // it.unipi.di.acube.batframework.problems.D2WDataset
    public List<HashSet<Mention>> getMentionsInstanceList() {
        return ProblemReduction.A2WToD2WMentionsInstance(getA2WGoldStandardList());
    }

    @Override // it.unipi.di.acube.batframework.problems.D2WDataset
    public List<HashSet<Annotation>> getD2WGoldStandardList() {
        return this.annotations;
    }

    @Override // it.unipi.di.acube.batframework.problems.A2WDataset
    public List<HashSet<Annotation>> getA2WGoldStandardList() {
        return this.annotations;
    }
}
