package uk.ac.shef.dcs.jate.eval;

import dragon.nlp.extract.EngDocumentParser;
import dragon.nlp.tool.lemmatiser.EngLemmatiser;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import opennlp.tools.util.eval.FMeasure;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.shell.Test;
import org.apache.solr.common.params.AnalysisParams;
import org.json.simple.parser.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.nlp.Lemmatiser;

/* loaded from: input_file:uk/ac/shef/dcs/jate/eval/Scorer.class */
public class Scorer {
    protected static final String PATTERN_DIGITALS = "\\d+";
    protected static final String PATTERN_SYMBOLS = "\\p{Punct}";
    protected static final String FILE_TYPE_JSON = "json";
    protected static final String FILE_TYPE_CSV = "csv";
    private static final Logger LOG = LoggerFactory.getLogger(Scorer.class.getName());
    public static boolean EVAL_CONDITION_IGNORE_SYMBOL = true;
    public static boolean EVAL_CONDITION_IGNORE_DIGITS = false;
    public static boolean EVAL_CONDITION_CASE_INSENSITIVE = true;
    public static int EVAL_CONDITION_CHAR_RANGE_MIN = 2;
    public static int EVAL_CONDITION_CHAR_RANGE_MAX = -1;
    public static int EVAL_CONDITION_TOKEN_RANGE_MIN = 1;
    public static int EVAL_CONDITION_TOKEN_RANGE_MAX = 5;
    public static int[] EVAL_CONDITION_TOP_N = {50, 100, 300, 500, 800, 1000, 1500, 2000, 3000, 4000, 5000, 6000};
    public static int[] EVAL_CONDITION_TOP_K = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30};
    public static Boolean IS_COMPUTE_ATR4S_AvP = Boolean.FALSE;

    public static void createReportGenia(Lemmatiser lemmatiser, String str, String str2, String str3, String str4, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4, int[] iArr, int[] iArr2, Boolean bool) throws IOException, ParseException {
        createEvaluationReport(lemmatiser, str, str2, str4, z, z2, z3, i, i2, i3, i4, iArr, iArr2, bool, GSLoader.loadGenia(str3));
    }

    public static void createReportACLRD(Lemmatiser lemmatiser, String str, String str2, String str3, String str4, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4, int[] iArr, int[] iArr2, Boolean bool) throws IOException, ParseException {
        createEvaluationReport(lemmatiser, str, str2, str4, z, z2, z3, i, i2, i3, i4, iArr, iArr2, bool, GSLoader.loadACLRD(str3));
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static void createEvaluationReport(Lemmatiser lemmatiser, String str, String str2, String str3, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4, int[] iArr, int[] iArr2, Boolean bool, List<String> list) throws IOException, ParseException {
        List<String> prune;
        PrintWriter printWriter = new PrintWriter(str3);
        try {
            LOG.info(String.format("creating EvaluationReport for results in [%s]...", str));
            LOG.info("original gsTerms size :" + list.size());
            List<String> prune2 = prune(list, z, z2, z3, i, i2, i3, i4);
            if (lemmatiser != null) {
                prune2 = normalize(prune2, lemmatiser, true);
            } else {
                LOG.info("skip lemmatisation on GS terms.");
            }
            LOG.info("normalised gsTerms size :" + prune2.size());
            TreeMap treeMap = new TreeMap();
            TreeMap treeMap2 = new TreeMap();
            List<File> asList = Arrays.asList(new File(str).listFiles());
            Collections.sort(asList);
            for (File file : asList) {
                String name = file.getName();
                if (name.contains(str2)) {
                    LOG.info("evaluating " + name + " ...");
                    if (!String.valueOf(name.charAt(0)).equals(".")) {
                        List arrayList = new ArrayList();
                        if ("json".equals(str2)) {
                            arrayList = ATEResultLoader.loadFromJSON(file.toString());
                        } else if ("csv".equals(str2)) {
                            arrayList = ATEResultLoader.loadFromCSV(file.toString());
                        }
                        if (!z && !z2 && !z3 && i == -1 && i2 == -1 && i3 == -1 && i4 == -1) {
                            LOG.debug("skip prune terms.");
                            LOG.debug("deduplication ...");
                            prune = (List) arrayList.stream().distinct().collect(Collectors.toList());
                            LOG.debug("deduplication is completed ...");
                        } else {
                            prune = prune((List<String>) arrayList, z, z2, z3, i, i2, i3, i4);
                        }
                        LOG.info("Candidate normalisation are completed. Normalised term size : " + prune.size());
                        computeEvalMetrics(name, prune2, prune, iArr, iArr2, treeMap);
                        if (bool.booleanValue()) {
                            treeMap2.put(name, Double.valueOf(computeAveragePrecision(prune2, prune, prune2.size())));
                            LOG.info("final computeAveragePrecision is completed.");
                        }
                        LOG.info("all evaluation metrics are computed for [" + name + "].");
                    }
                }
            }
            LOG.info("exporting report ...");
            StringBuilder sb = new StringBuilder();
            for (int i5 : iArr) {
                sb.append(",").append(i5);
            }
            for (int i6 : iArr2) {
                sb.append(",").append(i6 + "%_P");
            }
            for (int i7 : iArr2) {
                sb.append(",").append(i7 + "%_R");
            }
            sb.append(",").append("Overall_P");
            sb.append(",").append("Overall_R");
            sb.append(",").append("Overall_F");
            sb.append(",").append("Total_Size");
            sb.append("\n");
            for (Map.Entry entry : treeMap.entrySet()) {
                sb.append(((String) entry.getKey()).replaceAll(",", "_")).append(",");
                double[] dArr = (double[]) entry.getValue();
                for (int i8 = 0; i8 < iArr.length; i8++) {
                    sb.append(dArr[i8]).append(",");
                }
                for (int i9 = 0; i9 < iArr2.length; i9++) {
                    sb.append(dArr[i9 + iArr.length]).append(",");
                }
                for (int i10 = 0; i10 < iArr2.length; i10++) {
                    sb.append(dArr[i10 + iArr.length + iArr2.length]).append(",");
                }
                sb.append(dArr[dArr.length - 4]).append(",");
                sb.append(dArr[dArr.length - 3]).append(",");
                sb.append(dArr[dArr.length - 2]).append(",");
                sb.append(dArr[dArr.length - 1]);
                sb.append("\n");
            }
            printWriter.println(sb.toString());
            if (bool.booleanValue()) {
                printWriter.println("\n\nAVERAGE PRECISION\n");
                for (Map.Entry entry2 : treeMap2.entrySet()) {
                    printWriter.println(((String) entry2.getKey()).replaceAll(",", "_") + "," + entry2.getValue());
                }
            }
            LOG.info(String.format("complete. Check the latest evaluation in [%s]", str3));
        } finally {
            printWriter.close();
        }
    }

    public static void computeEvalMetrics(String str, List<String> list, List<String> list2, int[] iArr, int[] iArr2, Map<String, double[]> map) {
        LOG.info("skip normalise candidate term again.");
        double[] dArr = new double[iArr.length + 2];
        double[] computePrecisionAtRank = computePrecisionAtRank(list, list2, iArr);
        LOG.info("topNPrecisions calculation is completed. Top N size : " + computePrecisionAtRank.length);
        int[] topNFromTopK = getTopNFromTopK(list2.size(), iArr2);
        double[] computePrecisionAtRank2 = computePrecisionAtRank(list, list2, topNFromTopK);
        double[] computeRecallAtRank = computeRecallAtRank(list, list2, topNFromTopK);
        LOG.info("topKPrecision & topKRecall calculation is completed. Top K size : " + computePrecisionAtRank2.length);
        double computeOverallPrecision = computeOverallPrecision(list, list2);
        double computeOverallRecall = computeOverallRecall(list, list2);
        double fMeasure = getFMeasure(Double.valueOf(computeOverallPrecision), Double.valueOf(computeOverallRecall));
        LOG.info("overall P/R/F calculation is complete. ");
        map.put(str, ArrayUtils.addAll(ArrayUtils.addAll(ArrayUtils.addAll(computePrecisionAtRank, computePrecisionAtRank2), computeRecallAtRank), new double[]{computeOverallPrecision, computeOverallRecall, fMeasure, list2.size()}));
    }

    private static int[] getTopNFromTopK(int i, int[] iArr) {
        int[] iArr2 = new int[iArr.length];
        int i2 = 0;
        for (int i3 : iArr) {
            iArr2[i2] = Math.round(i * (i3 / 100.0f));
            i2++;
        }
        return iArr2;
    }

    public static double[] computePrecisionAtRank(Lemmatiser lemmatiser, List<String> list, List<String> list2, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4, int... iArr) {
        return computePrecisionAtRank(normalize(prune(list, z, z2, z3, i, i2, i3, i4), lemmatiser, true), normalize(prune(list2, z, z2, z3, i, i2, i3, i4), lemmatiser, true), iArr);
    }

    public static double[] computePrecisionAtRank(List<String> list, List<String> list2, int[] iArr) {
        double[] dArr = new double[iArr.length];
        for (int i = 0; i < iArr.length; i++) {
            if (list2.size() > iArr[i]) {
                dArr[i] = computeOverallPrecision(list, list2.subList(0, iArr[i]));
            }
        }
        return dArr;
    }

    public static double computeAveragePrecision(List<String> list, List<String> list2, int i) {
        ArrayList arrayList = new ArrayList();
        double d = 0.0d;
        for (int i2 = 0; i2 < i; i2++) {
            arrayList.add(list.get(i2));
            if (list2.size() > i) {
                d += computeOverallPrecision(list, arrayList) * (computeOverallRecall(list, arrayList) - 0.0d);
            }
        }
        return d;
    }

    public static double computeOverallPrecision(Lemmatiser lemmatiser, List<String> list, List<String> list2, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4) {
        return computeOverallPrecision(normalize(prune(list, z, z2, z3, i, i2, i3, i4), lemmatiser, true), normalize(prune(list2, z, z2, z3, i, i2, i3, i4), lemmatiser, true));
    }

    public static double[] computeRecallAtRank(List<String> list, List<String> list2, int[] iArr) {
        double[] dArr = new double[iArr.length];
        for (int i = 0; i < iArr.length; i++) {
            if (list2.size() > iArr[i]) {
                dArr[i] = computeOverallRecall(list, list2.subList(0, iArr[i]));
            }
        }
        return dArr;
    }

    public static double computeOverallPrecision(List<String> list, List<String> list2) {
        return round(precision(list, list2), 2);
    }

    public static FMeasure computeFMeasureWithNormalisation(Lemmatiser lemmatiser, List<String> list, List<String> list2, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4) {
        Set<String> termSetNormalisation = termSetNormalisation(list, lemmatiser, z, z2, z3, i, i2, i3, i4);
        Set<String> termSetNormalisation2 = termSetNormalisation(list2, lemmatiser, z, z2, z3, i, i2, i3, i4);
        FMeasure fMeasure = new FMeasure();
        fMeasure.updateScores(termSetNormalisation.toArray(), termSetNormalisation2.toArray());
        return fMeasure;
    }

    public static Set<String> termSetNormalisation(List<String> list, Lemmatiser lemmatiser, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4) {
        List<String> normalize = normalize(prune(list, z, z2, z3, i, i2, i3, i4), lemmatiser, true);
        HashSet hashSet = new HashSet();
        hashSet.addAll(normalize);
        return hashSet;
    }

    public static String termNormalisation(String str, Lemmatiser lemmatiser, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4) {
        return normaliseTerm(prune(str, z, z2, z3, i, i2, i3, i4), lemmatiser);
    }

    public static double round(double d, int i) {
        return new BigDecimal(d).setScale(i, RoundingMode.HALF_UP).doubleValue();
    }

    public static double precision(List<String> list, List<String> list2) {
        Stream<String> parallelStream = list2.parallelStream();
        list.getClass();
        return round(((List) parallelStream.filter((v1) -> {
            return r1.contains(v1);
        }).collect(Collectors.toList())).size() / list2.size(), 2);
    }

    public static double computeOverallRecall(List<String> list, List<String> list2, Lemmatiser lemmatiser, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4) {
        return computeOverallRecall(normalize(prune(list, z, z2, z3, i, i2, i3, i4), lemmatiser, true), normalize(prune(list2, z, z2, z3, i, i2, i3, i4), lemmatiser, true));
    }

    public static double computeOverallRecall(List<String> list, List<String> list2) {
        Stream<String> parallelStream = list2.parallelStream();
        list.getClass();
        return round(((List) parallelStream.filter((v1) -> {
            return r1.contains(v1);
        }).collect(Collectors.toList())).size() / list.size(), 2);
    }

    public static double[] topNRecall(List<String> list, List<String> list2, int[] iArr) {
        double[] dArr = new double[iArr.length];
        for (int i = 0; i < iArr.length; i++) {
            if (list2.size() > iArr[i]) {
                dArr[i] = round(FMeasure.recall(list.toArray(), list2.subList(0, iArr[i]).toArray()), 2);
            }
        }
        return dArr;
    }

    public static List<String> synonymNormalisation4Genia(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().replace("mouse", "mice").replace("Mouse", "Mice").replace("analyses", AnalysisParams.PREFIX).replace("Analyses", "Analysis").replace("women", "woman").replace("l cell resistance", "Lymphoid cell resistance").replace("DS lymphocyte", "DS ones"));
        }
        return arrayList;
    }

    public static double fmeasure(List<String> list, List<String> list2) {
        FMeasure fMeasure = new FMeasure();
        fMeasure.updateScores(list.toArray(), list2.toArray());
        return fMeasure.getFMeasure();
    }

    public static double getFMeasure(Double d, Double d2) {
        if (d.doubleValue() + d2.doubleValue() > 0.0d) {
            return round((2.0d * (d.doubleValue() * d2.doubleValue())) / (d.doubleValue() + d2.doubleValue()), 2);
        }
        return -1.0d;
    }

    public static String lemmatizeSymbolCompoundTerm(String str, String str2, Lemmatiser lemmatiser) {
        String[] split = str.split(str2);
        return split.length == 2 ? normaliseTerm(split[0], lemmatiser) + str2 + normaliseTerm(split[1], lemmatiser) : str;
    }

    public static List<String> normalize(List<String> list, Lemmatiser lemmatiser, boolean z) {
        String normaliseTerm;
        ArrayList arrayList = new ArrayList();
        for (String str : list) {
            String[] split = str.split(" ");
            String str2 = "";
            if (z) {
                for (String str3 : split) {
                    str2 = str3.contains("/") ? str2 + lemmatizeSymbolCompoundTerm(str3, "/", lemmatiser) + " " : str2 + normaliseTerm(str3, lemmatiser) + " ";
                }
                normaliseTerm = str2.trim();
            } else {
                normaliseTerm = normaliseTerm(str, lemmatiser);
            }
            if (StringUtils.isNotEmpty(normaliseTerm) && !arrayList.contains(normaliseTerm)) {
                arrayList.add(normaliseTerm);
            }
        }
        return arrayList;
    }

    public static String normaliseTerm(String str, Lemmatiser lemmatiser) {
        return lemmatiser.normalize(str, "NN").trim();
    }

    public static List<String> prune(List<String> list, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String prune = prune(it.next(), z, z2, z3, i, i2, i3, i4);
            if (StringUtils.isNotEmpty(prune) && !arrayList.contains(prune)) {
                arrayList.add(prune);
            }
        }
        return arrayList;
    }

    private static String prune(String str, boolean z, boolean z2, boolean z3, int i, int i2, int i3, int i4) {
        String str2 = str;
        if (!z && !z2 && !z3 && i == -1 && i2 == -1 && i3 == -1 && i4 == -1) {
            return str2;
        }
        if (z2) {
            str2 = str2.replaceAll(PATTERN_DIGITALS, " ").replaceAll("\\s+", " ");
        }
        if (z) {
            str2 = str2.replaceAll(PATTERN_SYMBOLS, " ").replaceAll("\\s+", " ");
        }
        int i5 = 0;
        int length = str2.length();
        int i6 = 0;
        while (true) {
            if (i6 >= str2.length()) {
                break;
            }
            if (Character.isLetterOrDigit(str2.charAt(i6))) {
                i5 = i6;
                break;
            }
            i6++;
        }
        int length2 = str2.length() - 1;
        while (true) {
            if (length2 <= -1) {
                break;
            }
            if (Character.isLetterOrDigit(str2.charAt(length2))) {
                length = length2 + 1;
                break;
            }
            length2--;
        }
        String trim = str2.substring(i5, length).trim();
        if ((trim.length() < i || trim.length() > i2) && i2 != -1) {
            trim = "";
        } else {
            int length3 = trim.split("\\s+").length;
            if ((length3 < i3 || length3 > i4) && i4 != -1) {
                trim = "";
            } else if (z3) {
                trim = trim.toLowerCase();
            }
        }
        return trim;
    }

    public static void main(String[] strArr) throws IOException, JATEException, ParseException {
        if (strArr == null || strArr.length < 4) {
            StringBuilder sb = new StringBuilder("Usage:\n");
            sb.append("java -cp 'jate.jar' ").append(Scorer.class.getName()).append(" ").append("[CORPUS_NAME] [ATE_OUTPUT_DIR] [ATE_OUTPUT_FILE_TYPE] ").append(EngDocumentParser.defParaDelimitor);
            sb.append("Example: java -cp 'jate.jar' /c/jate/outputDir/ csv genia_eval.csv \n\n");
            sb.append("[OPTIONS]:\n").append("\t\targs[0]:\t\t 'genia', 'aclrdtec1' or any other dataset name.\n").append("\t\targs[1]:\t\t ATE algorithms output folder that contains one or more ranked term candidates output.\n").append("\t\targs[2]:\t\t ATE algorithms output file type. Two options are 'csv' and 'json'. If file type is 'csv', it should contain a header row. \n").append("\t\targs[3]:\t\t A file name & path to save evaluation output (should not be the same folder of ATE algorithm output.\n");
            System.out.println(sb);
            System.exit(-1);
        }
        String property = System.getProperty("user.dir");
        Lemmatiser lemmatiser = new Lemmatiser(new EngLemmatiser(Paths.get(property, "src", Test.NAME, "resource", "lemmatiser").toString(), false, false));
        Paths.get(property, "src", Test.NAME, "resource", "eval", "GENIA", "concept.txt");
        Paths.get(property, "src", Test.NAME, "resource", "eval", "ACL_RD-TEC", "terms.txt");
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        String str4 = strArr[3];
        String str5 = strArr[4];
        if (str.equals("genia")) {
            createReportGenia(lemmatiser, str2, str3, str5, str4, EVAL_CONDITION_IGNORE_SYMBOL, EVAL_CONDITION_IGNORE_DIGITS, EVAL_CONDITION_CASE_INSENSITIVE, EVAL_CONDITION_CHAR_RANGE_MIN, EVAL_CONDITION_CHAR_RANGE_MAX, EVAL_CONDITION_TOKEN_RANGE_MIN, EVAL_CONDITION_TOKEN_RANGE_MAX, EVAL_CONDITION_TOP_N, EVAL_CONDITION_TOP_K, IS_COMPUTE_ATR4S_AvP);
        } else {
            createReportACLRD(lemmatiser, str2, str3, str5, str4, EVAL_CONDITION_IGNORE_SYMBOL, EVAL_CONDITION_IGNORE_DIGITS, EVAL_CONDITION_CASE_INSENSITIVE, EVAL_CONDITION_CHAR_RANGE_MIN, EVAL_CONDITION_CHAR_RANGE_MAX, EVAL_CONDITION_TOKEN_RANGE_MIN, EVAL_CONDITION_TOKEN_RANGE_MAX, EVAL_CONDITION_TOP_N, EVAL_CONDITION_TOP_K, IS_COMPUTE_ATR4S_AvP);
        }
    }
}
