/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.mappingcores;

import com.google.common.collect.Sets;
import de.julielab.gene.candidateretrieval.BooleanQueryGenerator;
import de.julielab.gene.candidateretrieval.CandidateRetrieval;
import de.julielab.gene.candidateretrieval.GeneRecordQueryGenerator;
import de.julielab.gene.candidateretrieval.NameCentricRetrieval;
import de.julielab.gene.candidateretrieval.scoring.MaxEntScorerPairExtractor;
import de.julielab.geneexpbase.CandidateFilter;
import de.julielab.geneexpbase.candidateretrieval.QueryGenerator;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.MentionMappingResult;
import de.julielab.java.utilities.FileUtilities;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import javax.inject.Inject;
import org.apache.lucene.search.BooleanClause;

public class QuercusCandidateSetter {
    private static final boolean WRITE_NAME_SIMILARITY_DATA = false;
    private final BooleanQueryGenerator queryGenerator;
    private final GeneRecordQueryGenerator geneRecordQueryGenerator;
    private final NameCentricRetrieval nameCentricRetrieval;
    private final CandidateRetrieval geneRecordCandidateRetrieval;

    @Inject
    public QuercusCandidateSetter(NameCentricRetrieval nameCentricRetrieval, CandidateRetrieval geneRecordCandidateRetrieval) {
        this.nameCentricRetrieval = nameCentricRetrieval;
        this.geneRecordCandidateRetrieval = geneRecordCandidateRetrieval;
        this.queryGenerator = new BooleanQueryGenerator(BooleanClause.Occur.SHOULD, -1);
        this.geneRecordQueryGenerator = new GeneRecordQueryGenerator(true, false, true, true, true, true, true, Integer.MAX_VALUE);
    }

    public void setCandidates(GeneDocument document) {
        for (GeneMention gm : document.getGenesIterable()) {
            MentionMappingResult mmr = new MentionMappingResult(gm);
            gm.setMentionMappingResult(mmr);
            mmr.tax2originalCandidates = new HashMap<String, List<SynHit>>();
            for (String taxId : gm.getTaxonomyIds()) {
                List<SynHit> candidates = this.nameCentricRetrieval.getCandidates(gm, Collections.emptyList(), List.of(taxId), (QueryGenerator)this.queryGenerator);
                for (SynHit sh : candidates) {
                    sh.setTaxId(taxId);
                    sh.setSynonymPriorities(sh.getPrioritiesOfIds(sh.getGeneIdsOfTaxId(taxId)));
                    sh.setIds(sh.getGeneIdsOfTaxId(taxId).collect(Collectors.toList()));
                    try {
                        sh.setTaxIds(IntStream.range(0, sh.getIds().size()).mapToObj(i -> taxId).collect(Collectors.toList()));
                    }
                    catch (Throwable e) {
                        e.printStackTrace();
                    }
                    if (!sh.isDisambiguated() || sh.getIds().isEmpty()) continue;
                    sh.setId(sh.getIds().get(0));
                }
                mmr.tax2originalCandidates.put(taxId, candidates);
            }
            mmr.tax2lexicallyRerankedCandidates = mmr.tax2originalCandidates;
            mmr.tax2finalRankedCandidates = mmr.tax2originalCandidates;
            for (String taxId : gm.getTaxonomyIds()) {
                if (!mmr.tax2originalCandidates.get(taxId).isEmpty()) continue;
                gm.reject(taxId, MentionMappingResult.RejectReason.NO_CANDIDATES);
            }
            for (String taxId : gm.getNonRejectedTaxonomyIds()) {
                if (gm.hasExactMatchInTax(taxId)) continue;
                this.examineForExactMatch(gm, taxId);
            }
        }
    }

    private void writeNameSimilarityData(GeneDocument document) {
        File positivePairsFile = new File("positiveNamePairs.tsv");
        File allPairsFile = new File("allNamePairs.tsv");
        File mergedTrainingData = new File("posNegPairs.tsv");
        try (BufferedWriter posWriter = FileUtilities.getWriterToFile(positivePairsFile, true);
             BufferedWriter completeWriter = FileUtilities.getWriterToFile(allPairsFile, true);){
            HashSet<String> alreadySeenNames = new HashSet<String>();
            for (GeneMention gm : document.getNonRejectedGenesIterable()) {
                if (!gm.hasGoldMentions() || gm.hasExactCandidateMatch() || !alreadySeenNames.add(gm.getNormalizedText())) continue;
                Set<String> goldIds = gm.getAllGoldIdAsSet();
                for (String taxId : gm.getNonRejectedTaxonomyIds()) {
                    for (SynHit candidate : gm.getMentionMappingResult().tax2originalCandidates.get(taxId)) {
                        String synonym = candidate.getSynonym();
                        String pair = gm.getNormalizedText() + "\t" + synonym;
                        if (candidate.getIds().stream().anyMatch(goldIds::contains)) {
                            posWriter.write(pair);
                            posWriter.newLine();
                        }
                        completeWriter.write(pair);
                        completeWriter.newLine();
                    }
                }
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        MaxEntScorerPairExtractor pairExtractor = new MaxEntScorerPairExtractor();
        ArrayList<String[]> pairs = pairExtractor.getPairs(positivePairsFile, allPairsFile, 1000, 1.0f);
        try {
            pairExtractor.storePairs(pairs, mergedTrainingData);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void examineForExactMatch(GeneMention gm, String taxId) {
        List<SynHit> candidates = gm.getMentionMappingResult().tax2lexicallyRerankedCandidates.get(taxId);
        for (SynHit candidate : candidates) {
            String[] synonymTokensArray = candidate.getSynonym().split("\\s+");
            List<String> synonymTokens = Arrays.asList(synonymTokensArray);
            String[] gmTokensArray = gm.getNormalizedText().split("\\s+");
            if (gmTokensArray.length == 1) {
                return;
            }
            List<String> gmTokens = Arrays.asList(gmTokensArray);
            synonymTokens.sort(Comparator.comparing(Function.identity()));
            gmTokens.sort(Comparator.comparing(Function.identity()));
            if (synonymTokens.equals(gmTokens)) {
                candidate.setExactMatch(true);
                break;
            }
            Sets.SetView<String> differentWords = Sets.symmetricDifference(new HashSet<String>(synonymTokens), new HashSet<String>(gmTokens));
            if (gm.getSpecificType() != GeneMention.SpecificType.GENE || differentWords.size() != 1 || CandidateFilter.isSpecifier((String)differentWords.iterator().next())) continue;
            candidate.setExactMatch(true);
            break;
        }
    }

    public void setCandidatesToGoldMentions(GeneDocument document) {
        for (GeneMention gm : document.getGenesIterable()) {
            if (!gm.hasGoldMentions()) continue;
            List<GeneMention> overlappingGoldMentions = gm.getOverlappingGoldMentions();
            for (GeneMention goldGm : overlappingGoldMentions) {
                if (!goldGm.getOffsets().equals(gm.getOffsets()) || Sets.intersection(goldGm.getTaxonomyIdsSet(), gm.getTaxonomyIdsSet()).isEmpty()) continue;
                MentionMappingResult goldMmr = new MentionMappingResult(gm.getMentionMappingResult());
                goldGm.setMentionMappingResult(goldMmr);
                goldMmr.mappedMention = goldGm;
                HashSet<String> taxIdsForRemoval = new HashSet<String>();
                for (String taxId : goldMmr.tax2originalCandidates.keySet()) {
                    if (goldGm.getTaxonomyIdsSet().contains(taxId)) continue;
                    taxIdsForRemoval.add(taxId);
                }
                for (String taxId : taxIdsForRemoval) {
                    goldMmr.tax2originalCandidates.remove(taxId);
                    goldMmr.tax2lexicallyRerankedCandidates.remove(taxId);
                    goldMmr.tax2finalRankedCandidates.remove(taxId);
                }
                assert (goldGm.getMentionMappingResult().tax2originalCandidates.values().stream().map(Collection::stream).count() > 0L) : "No candidates for gold mention were found: " + goldGm;
            }
        }
        for (GeneMention goldGm : () -> document.getGoldGenes().values().stream().flatMap(Collection::stream).iterator()) {
            if (goldGm.getMentionMappingResult() != null) continue;
            goldGm.setNormalizer(document.getTermNormalizer());
            MentionMappingResult goldMmr = new MentionMappingResult(goldGm);
            goldGm.setMentionMappingResult(goldMmr);
            goldMmr.tax2originalCandidates = new HashMap<String, List<SynHit>>();
            for (String taxId : goldGm.getTaxonomyIds()) {
                List<SynHit> candidates = this.nameCentricRetrieval.getCandidates(goldGm, goldGm.getIds(), List.of(taxId), (QueryGenerator)this.queryGenerator);
                goldMmr.tax2originalCandidates.put(taxId, candidates);
            }
            goldMmr.tax2lexicallyRerankedCandidates = goldMmr.tax2originalCandidates;
            goldMmr.tax2finalRankedCandidates = goldMmr.tax2originalCandidates;
            assert (goldGm.getMentionMappingResult().tax2originalCandidates.values().stream().map(Collection::stream).count() > 0L) : "No candidates for gold mention were found: " + goldGm;
        }
    }
}

