package it.unimi.dsi.law.nel.datasets;

import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.di.big.mg4j.tool.VirtualDocumentResolver;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.law.nel.interfaces.AnnotatedDocument;
import it.unimi.dsi.law.nel.interfaces.ImmutableAnnotatedDocument;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.FrontCodedStringList;
import it.unimi.dsi.util.Interval;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/law/nel/datasets/AidaYagoDataset.class */
public class AidaYagoDataset {
    private static final Logger LOGGER = LoggerFactory.getLogger(AidaYagoDataset.class);

    public static Collection<AnnotatedDocument> load(String str, VirtualDocumentResolver virtualDocumentResolver, FrontCodedStringList frontCodedStringList) throws IOException {
        LOGGER.info("Loading " + str + " as AidaYagoDataset...");
        FileReader fileReader = new FileReader(str);
        Collection<AnnotatedDocument> load = load(fileReader, virtualDocumentResolver, frontCodedStringList);
        fileReader.close();
        LOGGER.info(str + " was correctly parsed.");
        return load;
    }

    public static Collection<AnnotatedDocument> load(Reader reader, VirtualDocumentResolver virtualDocumentResolver, FrontCodedStringList frontCodedStringList) throws IOException {
        ObjectArrayList objectArrayList = new ObjectArrayList();
        ArrayList arrayList = null;
        ArrayList arrayList2 = null;
        ArrayList arrayList3 = null;
        int i = -1;
        int i2 = -1;
        BufferedReader bufferedReader = new BufferedReader(reader);
        ProgressLogger progressLogger = new ProgressLogger(LOGGER, "lines");
        progressLogger.start();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (readLine.startsWith("-DOCSTART-")) {
                if (arrayList != null) {
                    objectArrayList.add(new ImmutableAnnotatedDocument(arrayList, arrayList3, arrayList2));
                }
                arrayList = new ArrayList();
                arrayList2 = new ArrayList();
                arrayList3 = new ArrayList();
                i2 = 0;
            } else {
                String[] splitByWholeSeparatorPreserveAllTokens = StringUtils.splitByWholeSeparatorPreserveAllTokens(readLine, "\t", 5);
                arrayList.add(splitByWholeSeparatorPreserveAllTokens.length == 0 ? "" : splitByWholeSeparatorPreserveAllTokens[0]);
                if (i != -1 && (splitByWholeSeparatorPreserveAllTokens.length <= 1 || !splitByWholeSeparatorPreserveAllTokens[1].equals("I"))) {
                    arrayList3.add(Interval.valueOf(i, i2 - 1));
                    i = -1;
                }
                if (splitByWholeSeparatorPreserveAllTokens.length > 1 && splitByWholeSeparatorPreserveAllTokens[1].equals("B")) {
                    i = i2;
                    arrayList2.add(EnWikiUtils.title2NormalizedUrl(splitByWholeSeparatorPreserveAllTokens[3], virtualDocumentResolver, frontCodedStringList));
                }
                i2++;
                progressLogger.lightUpdate();
            }
        }
        if (arrayList != null) {
            objectArrayList.add(new ImmutableAnnotatedDocument(arrayList, arrayList3, arrayList2));
        }
        progressLogger.done();
        return objectArrayList;
    }

    public static void main(String[] strArr) throws Exception {
        SimpleJSAP simpleJSAP = new SimpleJSAP(AidaYagoDataset.class.getName(), "Serializes the AIDA Yago dataset.", new Parameter[]{new UnflaggedOption("dataset", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The original dataset."), new UnflaggedOption("enwikired", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The serialized enwikired Virtual Document Resolver."), new UnflaggedOption("id2name", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The serialized FrontCodedStringList that encode the wikipedia titles."), new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The serialized dataset."), new Switch("printall", (char) 0, "printall", "Print all association")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            System.exit(1);
        }
        LOGGER.info("Loading the wikipedia document resolver from " + parse.getString("enwikired") + "...");
        VirtualDocumentResolver virtualDocumentResolver = (VirtualDocumentResolver) BinIO.loadObject(parse.getString("enwikired"));
        LOGGER.info("Loading the wikipedia id2name FrontCodedStringList from " + parse.getString("id2name") + "...");
        Collection<AnnotatedDocument> load = load(parse.getString("dataset"), virtualDocumentResolver, (FrontCodedStringList) BinIO.loadObject(parse.getString("id2name")));
        LOGGER.info("Dataset correctly loaded, saving it to " + parse.getString("output") + "...");
        BinIO.storeObject(load, parse.getString("output"));
        if (parse.getBoolean("printall")) {
            LOGGER.info("Printing all...");
            Iterator<AnnotatedDocument> it2 = load.iterator();
            while (it2.hasNext()) {
                System.out.println(it2.next());
            }
        }
        LOGGER.info("Done.");
    }
}
