package it.unipi.di.acube.batframework.utils;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.lang.invoke.MethodHandles;
import java.net.URLDecoder;
import java.nio.file.Paths;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.Options;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.io.FileUtils;
import org.apache.jena.graph.Triple;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.lang.PipedRDFIterator;
import org.apache.jena.riot.lang.PipedTriplesStream;
import org.mapdb.DB;
import org.mapdb.DBMaker;
import org.mapdb.HTreeMap;
import org.mapdb.Serializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;

/* loaded from: input_file:it/unipi/di/acube/batframework/utils/WikipediaLocalInterface.class */
public class WikipediaLocalInterface extends WikipediaInterface {
    private static final String REDIRECT_FILENAME = "redirects_en.ttl.bz2";
    private static final String PAGEIDS_FILENAME = "page_ids_en.ttl.bz2";
    private static final String DBPEDIA_WID_RELATION = "http://dbpedia.org/ontology/wikiPageID";
    private static final String DBPEDIA_REDIRECT_RELATION = "http://dbpedia.org/ontology/wikiPageRedirects";
    private HTreeMap<Integer, String> widToTitle;
    private HTreeMap<String, Integer> titleToWid;
    private HTreeMap<Integer, Integer> redirectToWid;
    private DB db;
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private static final Pattern DBPEDIA_RESOURCE_URI = Pattern.compile("http://dbpedia.org/resource/(.*)");

    public static WikipediaLocalInterface open(String str) {
        return new WikipediaLocalInterface(str, true);
    }

    private WikipediaLocalInterface(String str, boolean z) {
        DBMaker.Maker closeOnJvmShutdown = DBMaker.fileDB(str).fileMmapEnable().closeOnJvmShutdown();
        if (z) {
            closeOnJvmShutdown.readOnly();
        }
        this.db = closeOnJvmShutdown.make();
        this.widToTitle = this.db.hashMap("widToTitle", Serializer.INTEGER, Serializer.STRING).createOrOpen();
        this.titleToWid = this.db.hashMap("titleToWid", Serializer.STRING, Serializer.INTEGER).createOrOpen();
        this.redirectToWid = this.db.hashMap("redirectToWid", Serializer.INTEGER, Serializer.INTEGER).createOrOpen();
    }

    @Override // it.unipi.di.acube.batframework.utils.WikipediaInterface
    public int getIdByTitle(String str) throws IOException {
        String normalize = normalize(str);
        if (this.titleToWid.containsKey(normalize)) {
            return ((Integer) this.titleToWid.get(normalize)).intValue();
        }
        return -1;
    }

    @Override // it.unipi.di.acube.batframework.utils.WikipediaInterface
    public String getTitlebyId(int i) throws IOException {
        return (String) this.widToTitle.get(Integer.valueOf(i));
    }

    @Override // it.unipi.di.acube.batframework.utils.WikipediaInterface
    public boolean isRedirect(int i) throws IOException {
        return i != dereference(i);
    }

    @Override // it.unipi.di.acube.batframework.utils.WikipediaInterface
    public int dereference(int i) throws IOException {
        if (!this.redirectToWid.containsKey(Integer.valueOf(i)) && this.widToTitle.containsKey(Integer.valueOf(i))) {
            return i;
        }
        if (this.redirectToWid.containsKey(Integer.valueOf(i))) {
            return ((Integer) this.redirectToWid.get(Integer.valueOf(i))).intValue();
        }
        return -1;
    }

    @Override // it.unipi.di.acube.batframework.utils.WikipediaInterface
    public void prefetchTitles(List<String> list) throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
    }

    @Override // it.unipi.di.acube.batframework.utils.WikipediaInterface
    public void prefetchWids(List<Integer> list) throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
    }

    @Override // it.unipi.di.acube.batframework.utils.WikipediaInterface
    public void flush() throws FileNotFoundException, IOException {
    }

    private static void createDB(String str, String str2) throws FileNotFoundException, IOException {
        WikipediaLocalInterface wikipediaLocalInterface = new WikipediaLocalInterface(str2, false);
        PipedRDFIterator<Triple> tripleIterator = getTripleIterator(new BZip2CompressorInputStream(FileUtils.openInputStream(Paths.get(str, PAGEIDS_FILENAME).toFile())));
        long j = 0;
        while (tripleIterator.hasNext()) {
            Triple triple = (Triple) tripleIterator.next();
            String dbPediaUrlToTitle = dbPediaUrlToTitle(triple.getSubject().getURI());
            if (!triple.getPredicate().getURI().equals(DBPEDIA_WID_RELATION)) {
                throw new IllegalArgumentException();
            }
            int intValue = ((Integer) triple.getObject().getLiteralValue()).intValue();
            wikipediaLocalInterface.titleToWid.put(dbPediaUrlToTitle, Integer.valueOf(intValue));
            wikipediaLocalInterface.widToTitle.put(Integer.valueOf(intValue), dbPediaUrlToTitle);
            long j2 = j + 1;
            j = j2;
            if (j2 % 100000 == 0) {
                LOG.info("Read {} pageids tuples.", Long.valueOf(j));
            }
        }
        PipedRDFIterator<Triple> tripleIterator2 = getTripleIterator(new BZip2CompressorInputStream(FileUtils.openInputStream(Paths.get(str, REDIRECT_FILENAME).toFile())));
        int i = 0;
        while (tripleIterator2.hasNext()) {
            Triple triple2 = (Triple) tripleIterator2.next();
            String dbPediaUrlToTitle2 = dbPediaUrlToTitle(triple2.getSubject().getURI());
            if (!triple2.getPredicate().getURI().equals(DBPEDIA_REDIRECT_RELATION)) {
                throw new IllegalArgumentException();
            }
            String dbPediaUrlToTitle3 = dbPediaUrlToTitle(triple2.getObject().getURI());
            if (!wikipediaLocalInterface.titleToWid.containsKey(dbPediaUrlToTitle2)) {
                LOG.warn("Could not find wid for from-title {}", dbPediaUrlToTitle2);
            } else if (wikipediaLocalInterface.titleToWid.containsKey(dbPediaUrlToTitle3)) {
                wikipediaLocalInterface.redirectToWid.put(Integer.valueOf(((Integer) wikipediaLocalInterface.titleToWid.get(dbPediaUrlToTitle2)).intValue()), Integer.valueOf(((Integer) wikipediaLocalInterface.titleToWid.get(dbPediaUrlToTitle3)).intValue()));
                i++;
                if (i % 100000 == 0) {
                    LOG.info("Read {} redirect tuples.", Integer.valueOf(i));
                }
            } else {
                LOG.warn("Could not find wid for to-title {}", dbPediaUrlToTitle3);
            }
        }
        LOG.info("Committing changes...");
        wikipediaLocalInterface.db.commit();
        LOG.info("Closing db...");
        wikipediaLocalInterface.db.close();
    }

    public static String dbPediaUrlToTitle(String str) {
        Matcher matcher = DBPEDIA_RESOURCE_URI.matcher(str);
        if (!matcher.matches()) {
            throw new IllegalArgumentException();
        }
        try {
            return normalize(URLDecoder.decode(matcher.group(1), "utf-8"));
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
    }

    private static PipedRDFIterator<Triple> getTripleIterator(final BZip2CompressorInputStream bZip2CompressorInputStream) {
        PipedRDFIterator<Triple> pipedRDFIterator = new PipedRDFIterator<>();
        final PipedTriplesStream pipedTriplesStream = new PipedTriplesStream(pipedRDFIterator);
        Executors.newSingleThreadExecutor().submit(new Runnable() { // from class: it.unipi.di.acube.batframework.utils.WikipediaLocalInterface.1
            @Override // java.lang.Runnable
            public void run() {
                RDFDataMgr.parse(pipedTriplesStream, bZip2CompressorInputStream, Lang.TURTLE);
            }
        });
        return pipedRDFIterator;
    }

    public static void main(String[] strArr) throws Exception {
        GnuParser gnuParser = new GnuParser();
        Options options = new Options();
        options.addOption("i", "input", true, "Path where input TTL files reside.");
        options.addOption("o", "output", true, "Output MAPDB file.");
        CommandLine parse = gnuParser.parse(options, strArr);
        LOG.info("Creating local Wikipedia pages database... ");
        createDB(parse.getOptionValue("input"), parse.getOptionValue("output"));
        LOG.info("Done.");
    }
}
