package it.unimi.dsi.law.nel;

import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.di.big.mg4j.index.DowncaseTermProcessor;
import it.unimi.di.big.mg4j.index.TermProcessor;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.law.nel.interfaces.AnnotatedDocument;
import it.unimi.dsi.law.nel.interfaces.ImmutableAnnotatedDocument;
import it.unimi.dsi.stat.SummaryStats;
import it.unimi.dsi.util.Interval;
import it.unimi.dsi.util.Intervals;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import org.jsoup.helper.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/law/nel/CompareAnnotatedDocuments.class */
public class CompareAnnotatedDocuments {
    private static final Logger LOGGER = LoggerFactory.getLogger(CompareAnnotatedDocuments.class);
    private static int documentNumber = 0;

    private static AnnotatedDocument normalize(AnnotatedDocument annotatedDocument) {
        ObjectArrayList objectArrayList = new ObjectArrayList();
        TermProcessor downcaseTermProcessor = DowncaseTermProcessor.getInstance();
        MutableString mutableString = new MutableString();
        MutableString mutableString2 = new MutableString();
        String[] strArr = annotatedDocument.token();
        int length = strArr.length;
        Interval[] intervalArr = new Interval[length];
        for (int i = 0; i < length; i++) {
            FastBufferedReader fastBufferedReader = new FastBufferedReader(new MutableString(strArr[i]));
            try {
                int size = objectArrayList.size();
                while (fastBufferedReader.next(mutableString, mutableString2)) {
                    if (!mutableString.isEmpty() && downcaseTermProcessor.processTerm(mutableString)) {
                        objectArrayList.add(mutableString.toString());
                    }
                }
                fastBufferedReader.close();
                intervalArr[i] = Interval.valueOf(size, objectArrayList.size());
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        String[] strArr2 = (String[]) objectArrayList.toArray(new String[0]);
        Interval[] mention = annotatedDocument.mention();
        ObjectArrayList objectArrayList2 = new ObjectArrayList();
        for (Interval interval : mention) {
            int i2 = intervalArr[interval.left].left;
            int i3 = intervalArr[interval.right].right - 1;
            if (i3 < i2) {
                LOGGER.debug("Interval " + interval + " got lost while re-tokenizing document " + annotatedDocument + " now re-tokenized as " + StringUtil.join(objectArrayList, " "));
                LOGGER.warn("Interval " + interval + " got lost while re-tokenizing document #" + documentNumber);
                objectArrayList2.add(Intervals.EMPTY_INTERVAL);
            } else {
                objectArrayList2.add(Interval.valueOf(i2, i3));
            }
        }
        return new ImmutableAnnotatedDocument(strArr2, (Interval[]) objectArrayList2.toArray(new Interval[0]), annotatedDocument.entity());
    }

    public static double[] compareShort(AnnotatedDocument annotatedDocument, AnnotatedDocument annotatedDocument2, boolean z) {
        ObjectOpenHashSet objectOpenHashSet = new ObjectOpenHashSet(annotatedDocument.entity());
        ObjectOpenHashSet objectOpenHashSet2 = new ObjectOpenHashSet(annotatedDocument2.entity());
        objectOpenHashSet.remove((Object) null);
        objectOpenHashSet2.remove((Object) null);
        if (z) {
            System.err.println("Expected: " + objectOpenHashSet);
            System.err.println("Actual: " + objectOpenHashSet2);
        }
        int size = objectOpenHashSet.size();
        int size2 = objectOpenHashSet2.size();
        objectOpenHashSet.retainAll(objectOpenHashSet2);
        double size3 = size2 == 0 ? 1.0d : objectOpenHashSet.size() / size2;
        double size4 = size == 0 ? 1.0d : objectOpenHashSet.size() / size;
        return new double[]{size3, size4, 2.0d / ((1.0d / size3) + (1.0d / size4))};
    }

    public static double[] compareLong(AnnotatedDocument annotatedDocument, AnnotatedDocument annotatedDocument2, boolean z, boolean z2) {
        Interval[] mention = annotatedDocument.mention();
        String[] entity = annotatedDocument.entity();
        int numMentions = annotatedDocument.numMentions();
        Interval[] mention2 = annotatedDocument2.mention();
        String[] entity2 = annotatedDocument2.entity();
        Arrays.sort(mention, Intervals.STARTS_BEFORE);
        Arrays.sort(mention2, Intervals.ENDS_BEFORE);
        int numMentions2 = annotatedDocument2.numMentions();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        while (i5 < numMentions && i4 < numMentions2) {
            if (entity[i5] != null && mention[i5] != Intervals.EMPTY_INTERVAL) {
                i3++;
                while (i4 < numMentions2 && (entity2[i4] == null || mention2[i4] == Intervals.EMPTY_INTERVAL || mention2[i4].right < mention[i5].left)) {
                    i4++;
                }
                if (i4 == numMentions2) {
                    break;
                }
                boolean z3 = false;
                int i6 = i4;
                while (true) {
                    if (i6 >= numMentions2 || mention2[i6].left > mention[i5].right) {
                        break;
                    }
                    if (entity2[i6] == null || mention2[i6] == Intervals.EMPTY_INTERVAL || ((z && !mention2[i6].equals(mention[i5])) || !entity[i5].equals(entity2[i6]))) {
                        i6++;
                    } else {
                        i++;
                        if (z2) {
                            System.err.println("Catched entity " + entity[i5] + " in mention " + annotatedDocument.mentionAsString()[i5]);
                        }
                        z3 = true;
                    }
                }
                if (!z3 && z2) {
                    System.err.println("Missed entity " + entity[i5] + " in mention " + annotatedDocument.mentionAsString()[i5]);
                }
            }
            i5++;
        }
        while (i5 < numMentions) {
            if (entity[i5] != null && mention[i5] != Intervals.EMPTY_INTERVAL) {
                i3++;
            }
            i5++;
        }
        for (int i7 = 0; i7 < numMentions2; i7++) {
            if (entity2[i7] != null && mention2[i7] != Intervals.EMPTY_INTERVAL) {
                i2++;
            }
        }
        double d = i2 == 0 ? 1.0d : i / i2;
        double d2 = i3 == 0 ? 1.0d : i / i3;
        return new double[]{i, i2, i3, d, d2, 2.0d / ((1.0d / d) + (1.0d / d2))};
    }

    public static void main(String[] strArr) throws JSAPException, ClassNotFoundException, IllegalArgumentException, SecurityException, IOException {
        SimpleJSAP simpleJSAP = new SimpleJSAP(CompareAnnotatedDocuments.class.getName(), "Compare two AnnotatedDocument collections.", new Parameter[]{new Switch("verbose", 'v', "Print (on stderr) matched and unmatched mentions."), new Switch("exact", 'e', "Match exactly intervals (ERD 2014 definition requires just overlaps)."), new Switch("skip", 's', "Skip comparison of documents whose text does not match after normalization."), new Switch("full", 'f', "Output full distribution of macro-averaged measures (not just mean values)."), new UnflaggedOption("expected", JSAP.STRING_PARSER, true, "The expected annotated document (ground truth)."), new UnflaggedOption("actual", JSAP.STRING_PARSER, true, "The actual annotated document.")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        boolean z = parse.getBoolean("verbose");
        boolean z2 = parse.getBoolean("skip");
        boolean z3 = parse.getBoolean("exact");
        boolean z4 = parse.getBoolean("full");
        Collection collection = (Collection) BinIO.loadObject(parse.getString("expected"));
        Collection collection2 = (Collection) BinIO.loadObject(parse.getString("actual"));
        if (collection.size() != collection2.size()) {
            LOGGER.error("The sizes of the two collections do not coincide");
            System.exit(1);
        }
        Iterator it2 = collection2.iterator();
        Iterator it3 = collection.iterator();
        Double summaryStats = new SummaryStats();
        Double summaryStats2 = new SummaryStats();
        Double summaryStats3 = new SummaryStats();
        Double summaryStats4 = new SummaryStats();
        Double summaryStats5 = new SummaryStats();
        Double summaryStats6 = new SummaryStats();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        for (int i4 = 0; i4 < collection.size(); i4++) {
            if (z) {
                System.err.println("*** Document #" + i4);
            }
            AnnotatedDocument annotatedDocument = (AnnotatedDocument) it3.next();
            AnnotatedDocument annotatedDocument2 = (AnnotatedDocument) it2.next();
            AnnotatedDocument normalize = normalize(annotatedDocument);
            AnnotatedDocument normalize2 = normalize(annotatedDocument2);
            if (z) {
                System.err.println("Expected document: " + normalize);
            }
            if (z) {
                System.err.println("Actual document: " + annotatedDocument2);
            }
            if (!normalize.allDocumentText().equals(normalize2.allDocumentText())) {
                LOGGER.warn("Documents do not match after normalization. Expected: " + normalize.allDocumentText() + ", Actual: " + normalize2.allDocumentText());
                if (z2) {
                    continue;
                }
            }
            double[] compareShort = compareShort(normalize, normalize2, z);
            if (Double.isFinite(compareShort[0]) && Double.isFinite(compareShort[1]) && Double.isFinite(compareShort[2])) {
                summaryStats4.add(compareShort[0]);
                summaryStats5.add(compareShort[1]);
                summaryStats6.add(compareShort[2]);
            }
            double[] compareLong = compareLong(normalize, normalize2, z3, z);
            if (Double.isFinite(compareLong[3]) && Double.isFinite(compareLong[4]) && Double.isFinite(compareLong[5])) {
                summaryStats.add(compareLong[3]);
                summaryStats2.add(compareLong[4]);
                summaryStats3.add(compareLong[5]);
            }
            i = (int) (i + compareLong[0]);
            i2 = (int) (i2 + compareLong[1]);
            i3 = (int) (i3 + compareLong[2]);
            if (i4 != documentNumber) {
                throw new IllegalStateException();
            }
            documentNumber++;
        }
        System.out.println("Macro-averaging:");
        System.out.println("\tPrecision: " + (z4 ? summaryStats : Double.valueOf(summaryStats.mean())));
        System.out.println("\tRecall: " + (z4 ? summaryStats2 : Double.valueOf(summaryStats2.mean())));
        System.out.println("\tAverage F1: " + (z4 ? summaryStats3 : Double.valueOf(summaryStats3.mean())));
        System.out.println("Micro-averaging:");
        double d = i2 == 0 ? 1.0d : i / i2;
        double d2 = i3 == 0 ? 1.0d : i / i3;
        System.out.println("\tPrecision: " + d);
        System.out.println("\tRecall: " + d2);
        System.out.println("\tAverage F1: " + (2.0d / ((1.0d / d) + (1.0d / d2))));
        System.out.println("Short-track mode:");
        System.out.println("\tPrecision: " + (z4 ? summaryStats4 : Double.valueOf(summaryStats4.mean())));
        System.out.println("\tRecall: " + (z4 ? summaryStats5 : Double.valueOf(summaryStats5.mean())));
        System.out.println("\tAverage F1: " + (z4 ? summaryStats6 : Double.valueOf(summaryStats6.mean())));
    }
}
