package it.unipi.di.acube.batframework.utils;

import it.unipi.di.acube.batframework.data.Tag;
import it.unipi.di.acube.batframework.problems.C2WDataset;
import java.util.HashSet;
import java.util.Iterator;

/* loaded from: input_file:it/unipi/di/acube/batframework/utils/TestDataset.class */
public class TestDataset {
    private static void checkRedirects(C2WDataset c2WDataset, WikipediaApiInterface wikipediaApiInterface) throws Exception {
        Iterator<HashSet<Tag>> it2 = c2WDataset.getC2WGoldStandardList().iterator();
        while (it2.hasNext()) {
            Iterator<Tag> it3 = it2.next().iterator();
            while (it3.hasNext()) {
                Tag next = it3.next();
                if (wikipediaApiInterface.isRedirect(next.getConcept())) {
                    System.out.println("INFO: An annotation points to a redirect page! wid=" + next.getConcept());
                }
            }
        }
    }

    private static boolean checkBasicData(C2WDataset c2WDataset) {
        if (c2WDataset.getC2WGoldStandardList().size() == c2WDataset.getSize()) {
            return true;
        }
        System.out.println("ERROR: list of texts and list of annotations sets have different size! texts=" + c2WDataset.getSize() + "anns=" + c2WDataset.getC2WGoldStandardList().size());
        return false;
    }

    public static void dumpInfo(C2WDataset c2WDataset, WikipediaApiInterface wikipediaApiInterface) throws Exception {
        System.out.println("Basic check on dataset " + c2WDataset.getName());
        if (checkBasicData(c2WDataset)) {
            System.out.println("Checking that no pages are redirects on dataset " + c2WDataset.getName());
            checkRedirects(c2WDataset, wikipediaApiInterface);
            long j = 0;
            long j2 = 0;
            for (String str : c2WDataset.getTextInstanceList()) {
                if (str.length() > j2) {
                    j2 = str.length();
                }
                j += str.length();
            }
            System.out.println("Annotations: " + c2WDataset.getTagsCount() + " Documents:" + c2WDataset.getSize() + " avg. ann/doc: " + (c2WDataset.getTagsCount() / c2WDataset.getSize()) + " avg len:" + ((int) (((float) j) / c2WDataset.getSize())) + " longest doc:" + j2);
            HashSet hashSet = new HashSet();
            int i = 0;
            for (HashSet<Tag> hashSet2 : c2WDataset.getC2WGoldStandardList()) {
                Iterator<Tag> it2 = hashSet2.iterator();
                while (it2.hasNext()) {
                    hashSet.add(Integer.valueOf(wikipediaApiInterface.dereference(it2.next().getConcept())));
                }
                if (!hashSet2.isEmpty()) {
                    i++;
                }
            }
            System.out.println("Distinct Topics: " + hashSet.size());
            System.out.println("Dataset contains " + i + " documents with at least 1 annotation. These documents have an average number of annotations = " + (c2WDataset.getTagsCount() / i));
        }
    }
}
