package it.unimi.dsi.law.nel.datasets;

import info.bliki.wiki.filter.Encoder;
import it.unimi.di.big.mg4j.tool.VirtualDocumentResolver;
import it.unimi.dsi.util.FrontCodedStringList;
import org.apache.commons.lang.StringEscapeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/law/nel/datasets/EnWikiUtils.class */
public class EnWikiUtils {
    private static final String NULL_ENTITIES = "--NME--";
    private static final String BASE_URL = "http://en.wikipedia.org/wiki/";
    private static final Logger LOGGER = LoggerFactory.getLogger(EnWikiUtils.class);

    public static String normalizeURL(String str, VirtualDocumentResolver virtualDocumentResolver, FrontCodedStringList frontCodedStringList) {
        long resolve = virtualDocumentResolver.resolve(str);
        if (resolve != -1) {
            return frontCodedStringList.get((int) resolve).toString();
        }
        LOGGER.error("Cannot resolve the URL: " + str);
        return null;
    }

    public static String title2NormalizedUrl(String str, VirtualDocumentResolver virtualDocumentResolver, FrontCodedStringList frontCodedStringList) {
        if (str.equals(NULL_ENTITIES)) {
            return null;
        }
        return normalizeURL("http://en.wikipedia.org/wiki/" + Encoder.encodeTitleToUrl(StringEscapeUtils.unescapeJava(str), true), virtualDocumentResolver, frontCodedStringList);
    }
}
