package it.unimi.dsi.law.nel;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.di.big.mg4j.index.Index;
import it.unimi.di.big.mg4j.query.QueryEngine;
import it.unimi.di.big.mg4j.query.nodes.QueryBuilderVisitorException;
import it.unimi.di.big.mg4j.query.parser.SimpleParser;
import it.unimi.di.big.mg4j.search.DocumentIteratorBuilderVisitor;
import it.unimi.di.big.mg4j.search.score.DocumentScoreInfo;
import it.unimi.di.big.mg4j.search.score.Scorer;
import it.unimi.dsi.big.io.FileLinesCollection;
import it.unimi.dsi.fastutil.BigList;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.Object2DoubleLinkedOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2ReferenceLinkedOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectBidirectionalIterator;
import it.unimi.dsi.fastutil.objects.ObjectListIterator;
import it.unimi.dsi.fastutil.objects.Reference2DoubleMap;
import it.unimi.dsi.fastutil.objects.Reference2DoubleOpenHashMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceOpenHashMap;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.lang.FlyweightPrototype;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.lang.ObjectParser;
import it.unimi.dsi.law.nel.interfaces.CandidateAnnotatedDocument;
import it.unimi.dsi.law.nel.interfaces.Document;
import it.unimi.dsi.law.nel.interfaces.QueryFromMention;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.stat.SummaryStats;
import it.unimi.dsi.util.Interval;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.commons.configuration.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/law/nel/MG4JCandidateAnnotator.class */
public class MG4JCandidateAnnotator implements FlyweightPrototype<MG4JCandidateAnnotator> {
    private static final Logger LOGGER = LoggerFactory.getLogger(MG4JCandidateAnnotator.class);
    private QueryEngine queryEngine;
    private BigList<? extends CharSequence> titleList;
    private final Object2ReferenceLinkedOpenHashMap<String, Index> name2Index;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:it/unimi/dsi/law/nel/MG4JCandidateAnnotator$CandidateAnnotatedDocumentExtension.class */
    public static final class CandidateAnnotatedDocumentExtension extends CandidateAnnotatedDocument implements Serializable {
        private final Document document;
        private static final long serialVersionUID = 1;
        final Object2DoubleLinkedOpenHashMap<String>[] candidates;

        private CandidateAnnotatedDocumentExtension(Object2DoubleLinkedOpenHashMap<String>[] object2DoubleLinkedOpenHashMapArr, Document document) {
            this.document = document;
            this.candidates = object2DoubleLinkedOpenHashMapArr;
        }

        @Override // it.unimi.dsi.law.nel.interfaces.Document
        public String[] token() {
            return this.document.token();
        }

        @Override // it.unimi.dsi.law.nel.interfaces.Document
        public Interval[] mention() {
            return this.document.mention();
        }

        @Override // it.unimi.dsi.law.nel.interfaces.CandidateAnnotatedDocument
        public Object2DoubleLinkedOpenHashMap<String>[] candidate() {
            return this.candidates;
        }
    }

    private static void loadIndicesFromSpec(String[] strArr, boolean z, Object2ReferenceMap<String, Index> object2ReferenceMap, Reference2DoubleMap<Index> reference2DoubleMap) throws IOException, ConfigurationException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
        Index index;
        for (int i = 0; i < strArr.length; i++) {
            int lastIndexOf = strArr[i].lastIndexOf(58);
            double d = 1.0d;
            if (lastIndexOf != -1) {
                try {
                    d = Double.parseDouble(strArr[i].substring(lastIndexOf + 1));
                } catch (NumberFormatException e) {
                }
            }
            if (lastIndexOf == -1 || strArr[i].startsWith("mg4j://")) {
                index = Index.getInstance(strArr[i], true, z);
                reference2DoubleMap.put(index, 1.0d);
            } else {
                index = Index.getInstance(strArr[i].substring(0, lastIndexOf), true, z);
                reference2DoubleMap.put(index, d);
            }
            object2ReferenceMap.put(index.field != null ? index.field : strArr[i], index);
        }
    }

    private MG4JCandidateAnnotator(BigList<? extends CharSequence> bigList, QueryEngine queryEngine, Object2ReferenceLinkedOpenHashMap<String, Index> object2ReferenceLinkedOpenHashMap) {
        this.titleList = bigList;
        this.queryEngine = queryEngine;
        this.name2Index = object2ReferenceLinkedOpenHashMap;
    }

    public MG4JCandidateAnnotator(BigList<? extends CharSequence> bigList, String[] strArr, Scorer scorer) throws ConfigurationException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, IOException, URISyntaxException {
        this.titleList = bigList;
        this.name2Index = new Object2ReferenceLinkedOpenHashMap<>(16, 0.5f);
        Reference2DoubleOpenHashMap reference2DoubleOpenHashMap = new Reference2DoubleOpenHashMap();
        loadIndicesFromSpec(strArr, true, this.name2Index, reference2DoubleOpenHashMap);
        Object2ObjectOpenHashMap object2ObjectOpenHashMap = new Object2ObjectOpenHashMap(this.name2Index.size());
        ObjectBidirectionalIterator it2 = this.name2Index.keySet().iterator();
        while (it2.hasNext()) {
            String str = (String) it2.next();
            object2ObjectOpenHashMap.put(str, ((Index) this.name2Index.get(str)).termProcessor);
        }
        this.queryEngine = new QueryEngine(new SimpleParser(this.name2Index.keySet(), (String) this.name2Index.firstKey(), object2ObjectOpenHashMap), new DocumentIteratorBuilderVisitor(this.name2Index, new Reference2ReferenceOpenHashMap(), (Index) this.name2Index.get(this.name2Index.firstKey()), 2048), this.name2Index);
        this.queryEngine.setWeights(reference2DoubleOpenHashMap);
        this.queryEngine.score(scorer);
    }

    public Object2DoubleLinkedOpenHashMap<String> getCandidates(String[] strArr, QueryFromMention queryFromMention, int i) throws QueryBuilderVisitorException, IOException {
        ObjectArrayList objectArrayList = new ObjectArrayList();
        LOGGER.debug("Submitting query: " + Arrays.toString(queryFromMention.obtain(strArr)));
        this.queryEngine.process(queryFromMention.obtain(strArr), 0, i, objectArrayList);
        Object2DoubleLinkedOpenHashMap<String> object2DoubleLinkedOpenHashMap = new Object2DoubleLinkedOpenHashMap<>();
        ObjectListIterator it2 = objectArrayList.iterator();
        while (it2.hasNext()) {
            DocumentScoreInfo documentScoreInfo = (DocumentScoreInfo) it2.next();
            object2DoubleLinkedOpenHashMap.put(((CharSequence) this.titleList.get(documentScoreInfo.document)).toString(), documentScoreInfo.score);
        }
        return object2DoubleLinkedOpenHashMap;
    }

    public static String[] stringToTokenList(String str) {
        FastBufferedReader fastBufferedReader = new FastBufferedReader(new MutableString(str));
        ObjectArrayList objectArrayList = new ObjectArrayList();
        MutableString mutableString = new MutableString();
        MutableString mutableString2 = new MutableString();
        while (fastBufferedReader.next(mutableString, mutableString2)) {
            try {
                if (!mutableString.isEmpty()) {
                    objectArrayList.add(mutableString.toString());
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        fastBufferedReader.close();
        return (String[]) objectArrayList.toArray(new String[0]);
    }

    public CandidateAnnotatedDocument annotate(Document document, QueryFromMention queryFromMention, int i) throws QueryBuilderVisitorException, IOException {
        String[] mentionAsString = document.mentionAsString();
        int length = mentionAsString.length;
        Object2DoubleLinkedOpenHashMap[] object2DoubleLinkedOpenHashMapArr = new Object2DoubleLinkedOpenHashMap[length];
        for (int i2 = 0; i2 < length; i2++) {
            object2DoubleLinkedOpenHashMapArr[i2] = getCandidates(stringToTokenList(mentionAsString[i2]), queryFromMention, i);
        }
        return new CandidateAnnotatedDocumentExtension(object2DoubleLinkedOpenHashMapArr, document);
    }

    /* renamed from: copy, reason: merged with bridge method [inline-methods] */
    public MG4JCandidateAnnotator m34copy() {
        return new MG4JCandidateAnnotator(this.titleList, this.queryEngine.copy(), this.name2Index);
    }

    public static void main(String[] strArr) throws JSAPException, ClassNotFoundException, IllegalArgumentException, SecurityException, IOException, ConfigurationException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, URISyntaxException, InterruptedException {
        SimpleJSAP simpleJSAP = new SimpleJSAP(MG4JCandidateAnnotator.class.getName(), "Given a collection of Documents, it produces a collection of CandidateAnnotatedDocuments running suitable MG4J queries obtained from the mentions.", new Parameter[]{new FlaggedOption("collection", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, 'c', "collection", "The filename of the serialized collection of documents to be read."), new FlaggedOption("titleList", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, 't', "title-list", "A serialized big list of titles (will override collection titles if specified)."), new FlaggedOption("result", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, 'r', "result", "The filename where the resulting collection will be serialized."), new FlaggedOption("max-results", JSAP.INTEGER_PARSER, "100", false, 'm', "max-results", "The maximum number of candidates to be considered for each mention."), new FlaggedOption("query-from-mention", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, 'q', "query-from-mention", "The QueryFromMention spec object to be used."), new FlaggedOption("scorer", JSAP.STRING_PARSER, "it.unimi.dsi.law.nel.BM25NormalizedScorer(1, 0)", false, 's', "scorer", "The scorer object spec to be used."), new UnflaggedOption("basenameWeight", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, true, "The indices that the servlet will use. Indices are specified using their basename, optionally followed by a colon and a double representing the weight used to score results from that index. Indices without a specified weight are weighted 1.")});
        final JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        MG4JCandidateAnnotator mG4JCandidateAnnotator = new MG4JCandidateAnnotator((BigList<? extends CharSequence>) new FileLinesCollection(parse.getString("titleList"), "UTF-8").allLines(), parse.getStringArray("basenameWeight"), (Scorer) ObjectParser.fromSpec(parse.getString("scorer"), Scorer.class, new String[]{"it.unimi.di.big.mg4j.search.score"}));
        final List list = (List) BinIO.loadObject(parse.getString("collection"));
        final CandidateAnnotatedDocument[] candidateAnnotatedDocumentArr = new CandidateAnnotatedDocument[list.size()];
        final int i = parse.getInt("max-results");
        final ProgressLogger progressLogger = new ProgressLogger(LOGGER);
        progressLogger.expectedUpdates = list.size();
        progressLogger.start("Annotating");
        final SummaryStats summaryStats = new SummaryStats();
        final ThreadLocal<MG4JCandidateAnnotator> threadLocal = new ThreadLocal<MG4JCandidateAnnotator>() { // from class: it.unimi.dsi.law.nel.MG4JCandidateAnnotator.1
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public MG4JCandidateAnnotator initialValue() {
                return MG4JCandidateAnnotator.this.m34copy();
            }
        };
        final ThreadLocal<QueryFromMention> threadLocal2 = new ThreadLocal<QueryFromMention>() { // from class: it.unimi.dsi.law.nel.MG4JCandidateAnnotator.2
            /* JADX INFO: Access modifiers changed from: protected */
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.lang.ThreadLocal
            public QueryFromMention initialValue() {
                try {
                    QueryFromMention queryFromMention = (QueryFromMention) ObjectParser.fromSpec(parse.getString("query-from-mention"), QueryFromMention.class, new String[]{"it.unimi.dsi.law.nel"});
                    queryFromMention.init(((MG4JCandidateAnnotator) threadLocal.get()).name2Index);
                    return queryFromMention;
                } catch (Exception e) {
                    throw new RuntimeException(e.getMessage(), e);
                }
            }
        };
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
        ExecutorCompletionService executorCompletionService = new ExecutorCompletionService(newFixedThreadPool);
        for (int i2 = 0; i2 < list.size(); i2++) {
            final int i3 = i2;
            executorCompletionService.submit(new Callable<Void>() { // from class: it.unimi.dsi.law.nel.MG4JCandidateAnnotator.3
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.util.concurrent.Callable
                public Void call() throws QueryBuilderVisitorException, IOException {
                    CandidateAnnotatedDocument annotate = ((MG4JCandidateAnnotator) threadLocal.get()).annotate((Document) list.get(i3), (QueryFromMention) threadLocal2.get(), i);
                    Object2DoubleLinkedOpenHashMap<String>[] candidate = annotate.candidate();
                    synchronized (summaryStats) {
                        for (Object2DoubleLinkedOpenHashMap<String> object2DoubleLinkedOpenHashMap : candidate) {
                            summaryStats.add(object2DoubleLinkedOpenHashMap.size());
                        }
                    }
                    candidateAnnotatedDocumentArr[i3] = annotate;
                    synchronized (progressLogger) {
                        progressLogger.update();
                    }
                    return null;
                }
            });
        }
        for (int i4 = 0; i4 < list.size(); i4++) {
            try {
                try {
                    executorCompletionService.take().get();
                } catch (ExecutionException e) {
                    Throwable cause = e.getCause();
                    if (cause instanceof RuntimeException) {
                        throw ((RuntimeException) cause);
                    }
                    throw new RuntimeException(cause.getMessage(), cause);
                }
            } finally {
                newFixedThreadPool.shutdown();
            }
        }
        if (progressLogger != null) {
            progressLogger.done();
        }
        LOGGER.info("Number of candidates per mention: " + summaryStats);
        BinIO.storeObject(ObjectArrayList.wrap(candidateAnnotatedDocumentArr), parse.getString("result"));
    }
}
