/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.gene.candidateretrieval;

import de.julielab.gene.candidateretrieval.GeneRecordSynonymsQueryGenerator;
import de.julielab.geneexpbase.CandidateFilter;
import de.julielab.geneexpbase.candidateretrieval.CandidateCacheKey;
import de.julielab.geneexpbase.candidateretrieval.QueryGenerator;
import de.julielab.geneexpbase.genemodel.GeneName;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.jetbrains.annotations.NotNull;

public class GeneRecordQueryGenerator
extends QueryGenerator {
    public static final String[] ALL_FIELDS = new String[]{"symbol", "symbol_from_nomenclature", "synonyms", "full_names", "other_designations", "chromosome", "description", "generif", "godesc", "interaction", "maplocation", "summary", "protein_names", "uniprot_names", "bio_thesaurus"};
    public static final String[] SYNONYM_FIELDS = new String[]{"symbol", "symbol_from_nomenclature", "synonyms", "full_names", "other_designations", "maplocation", "protein_names", "uniprot_names", "bio_thesaurus"};
    public static final String[] SYNONYM_FIELDS_NOT_CHROMOSOMAL = new String[]{"symbol", "symbol_from_nomenclature", "synonyms", "full_names", "other_designations", "protein_names", "uniprot_names", "bio_thesaurus"};
    public static final List<String> ALL_FIELDS_LIST = Arrays.asList(ALL_FIELDS);
    private final boolean useDisMax;
    private final boolean flatDisjunction;
    private final boolean searchExactMatches;
    private final boolean useSynonymMatchesAsRelevanceSignal;
    private final boolean useNpContextAsRelevanceSignal;
    private final boolean useAlternativeNamesAsRelevanceSignal;
    private final boolean useContextGenesAsRelevanceSignal;
    private final boolean useAppositionsAsRelevanceSignal;
    private final int relaxation;
    private GeneRecordSynonymsQueryGenerator synonymsQueryGeneratorApprox;
    private GeneRecordSynonymsQueryGenerator synonymsQueryGeneratorExact;

    public GeneRecordQueryGenerator() {
        this(false);
    }

    public GeneRecordQueryGenerator(boolean useDisMax) {
        this(useDisMax, false);
    }

    public GeneRecordQueryGenerator(boolean useDisMax, boolean flatDisjunction) {
        this(useDisMax, flatDisjunction, true, false);
    }

    public GeneRecordQueryGenerator(boolean useDisMax, boolean flatDisjunction, boolean searchExactMatches, boolean useSynonymMatchesAsRelevanceSignal) {
        this(useDisMax, flatDisjunction, searchExactMatches, useSynonymMatchesAsRelevanceSignal, false);
    }

    public GeneRecordQueryGenerator(boolean useDisMax, boolean flatDisjunction, boolean searchExactMatches, boolean useSynonymMatchesAsRelevanceSignal, boolean useNpContextAsRelevanceSignal) {
        this(useDisMax, flatDisjunction, searchExactMatches, useSynonymMatchesAsRelevanceSignal, useNpContextAsRelevanceSignal, false, false);
    }

    public GeneRecordQueryGenerator(boolean useDisMax, boolean flatDisjunction, boolean searchExactMatches, boolean useSynonymMatchesAsRelevanceSignal, boolean useNpContextAsRelevanceSignal, boolean useAlternativeNamesAsRelevanceSignal, boolean useAppositionsAsRelevanceSignal) {
        this(useDisMax, flatDisjunction, searchExactMatches, useSynonymMatchesAsRelevanceSignal, useNpContextAsRelevanceSignal, useAlternativeNamesAsRelevanceSignal, useAppositionsAsRelevanceSignal, 0);
    }

    public GeneRecordQueryGenerator(boolean useDisMax, boolean flatDisjunction, boolean searchExactMatches, boolean useSynonymMatchesAsRelevanceSignal, boolean useNpContextAsRelevanceSignal, boolean useAlternativeNamesAsRelevanceSignal, boolean useAppositionsAsRelevanceSignal, int relaxation) {
        this(useDisMax, flatDisjunction, searchExactMatches, useSynonymMatchesAsRelevanceSignal, useNpContextAsRelevanceSignal, useAlternativeNamesAsRelevanceSignal, useAppositionsAsRelevanceSignal, false, relaxation);
    }

    public GeneRecordQueryGenerator(boolean useDisMax, boolean flatDisjunction, boolean searchExactMatches, boolean useSynonymMatchesAsRelevanceSignal, boolean useNpContextAsRelevanceSignal, boolean useAlternativeNamesAsRelevanceSignal, boolean useAppositionsAsRelevanceSignal, boolean useContextGenesAsRelevanceSignal) {
        this(useDisMax, flatDisjunction, searchExactMatches, useSynonymMatchesAsRelevanceSignal, useNpContextAsRelevanceSignal, useAlternativeNamesAsRelevanceSignal, useAppositionsAsRelevanceSignal, useContextGenesAsRelevanceSignal, 0);
    }

    public GeneRecordQueryGenerator(boolean useDisMax, boolean flatDisjunction, boolean searchExactMatches, boolean useSynonymMatchesAsRelevanceSignal, boolean useNpContextAsRelevanceSignal, boolean useAlternativeNamesAsRelevanceSignal, boolean useAppositionsAsRelevanceSignal, boolean useContextGenesAsRelevanceSignal, int relaxation) {
        this.useDisMax = useDisMax;
        this.flatDisjunction = flatDisjunction;
        this.searchExactMatches = searchExactMatches;
        this.useSynonymMatchesAsRelevanceSignal = useSynonymMatchesAsRelevanceSignal;
        this.useNpContextAsRelevanceSignal = useNpContextAsRelevanceSignal;
        this.useAlternativeNamesAsRelevanceSignal = useAlternativeNamesAsRelevanceSignal;
        this.useContextGenesAsRelevanceSignal = useContextGenesAsRelevanceSignal;
        this.useAppositionsAsRelevanceSignal = useAppositionsAsRelevanceSignal;
        this.relaxation = relaxation;
        if (this.useSynonymMatchesAsRelevanceSignal) {
            this.synonymsQueryGeneratorApprox = new GeneRecordSynonymsQueryGenerator(false);
            this.synonymsQueryGeneratorExact = new GeneRecordSynonymsQueryGenerator(true);
        }
    }

    @Override
    public Query generateQuery(CandidateCacheKey key) throws BooleanQuery.TooManyClauses {
        BooleanQuery.Builder mainQb = new BooleanQuery.Builder();
        Map<String, Float> fieldWeights = key.getFieldWeights();
        boolean fieldweightsPresent = fieldWeights != null;
        float dismaxTieBreaker = fieldweightsPresent ? fieldWeights.get("dismax_tie_breaker").floatValue() : 0.3f;
        GeneName geneName = key.getGeneName();
        HashSet<String> normalizedGeneNames = new HashSet<String>();
        BooleanQuery.Builder namesBuilder = new BooleanQuery.Builder();
        List names = Stream.concat(Stream.of(geneName), geneName.getAlternatives().stream()).collect(Collectors.toList());
        for (GeneName name : names) {
            String normalizedName = name.getNormalizedText();
            normalizedGeneNames.add(normalizedName);
            BooleanQuery.Builder mainTokenDisjunction = new BooleanQuery.Builder();
            String[] tokens = normalizedName.split("\\s+");
            if (!this.flatDisjunction) {
                int effectiveRelaxation = this.relaxation != Integer.MAX_VALUE ? this.relaxation : tokens.length;
                int max = Math.max(1, tokens.length - effectiveRelaxation);
                mainTokenDisjunction.setMinimumNumberShouldMatch(max);
            }
            if (!this.flatDisjunction) {
                for (String token : tokens) {
                    BooleanQuery.Builder fieldsDisjunctionBuilder = new BooleanQuery.Builder();
                    String[] fieldsToSearch = CandidateFilter.isNumberGreekOrLatin(token) ? SYNONYM_FIELDS : ALL_FIELDS;
                    Stream<Query> termQueryStream = Arrays.stream(fieldsToSearch).map(field -> new Term((String)field, token)).map(TermQuery::new);
                    if (fieldWeights != null) {
                        termQueryStream = termQueryStream.map(TermQuery.class::cast).map(tq -> {
                            float weight = fieldWeights.getOrDefault(tq.getTerm().field(), Float.valueOf(1.0f)).floatValue();
                            return new BoostQuery((Query)tq, weight);
                        });
                    }
                    if (!this.useDisMax) {
                        termQueryStream.forEach(tq -> fieldsDisjunctionBuilder.add((Query)tq, BooleanClause.Occur.SHOULD));
                        mainTokenDisjunction.add(fieldsDisjunctionBuilder.build(), BooleanClause.Occur.SHOULD);
                        continue;
                    }
                    DisjunctionMaxQuery dismaxQuery = new DisjunctionMaxQuery(termQueryStream.collect(Collectors.toList()), dismaxTieBreaker);
                    mainTokenDisjunction.add(dismaxQuery, BooleanClause.Occur.SHOULD);
                }
            } else {
                BooleanQuery.Builder flatDisjunctionBuilder = this.buildFlatDisjunction(fieldWeights, normalizedName);
                mainTokenDisjunction.add(flatDisjunctionBuilder.build(), BooleanClause.Occur.MUST);
            }
            namesBuilder.add(mainTokenDisjunction.build(), BooleanClause.Occur.SHOULD);
        }
        mainQb.add(namesBuilder.build(), BooleanClause.Occur.MUST);
        if (this.useSynonymMatchesAsRelevanceSignal) {
            Object q = this.synonymsQueryGeneratorApprox.generateQuery(key);
            mainQb.add((Query)q, BooleanClause.Occur.SHOULD);
            q = this.synonymsQueryGeneratorExact.generateQuery(key);
            mainQb.add((Query)q, BooleanClause.Occur.SHOULD);
        }
        for (GeneName appositionContext : geneName.getAppositionContexts()) {
            mainQb.add(this.buildFlatDisjunction(fieldWeights, appositionContext.getNormalizedText()).build(), BooleanClause.Occur.SHOULD);
        }
        if (key.getContextNames() != null) {
            for (GeneName contextName : key.getContextNames()) {
                if (normalizedGeneNames.contains(contextName.getNormalizedText())) continue;
                mainQb.add(this.buildFlatDisjunction(fieldWeights, contextName.getNormalizedText()).build(), BooleanClause.Occur.SHOULD);
            }
        }
        if (key.getGeneIdsFilter() != null && !key.getGeneIdsFilter().isEmpty()) {
            BooleanQuery.Builder filterBuilder = new BooleanQuery.Builder();
            key.getGeneIdsFilter().stream().map(id -> new Term("entry_id", (String)id)).map(TermQuery::new).forEach(tq -> filterBuilder.add((Query)tq, BooleanClause.Occur.SHOULD));
            mainQb.add(filterBuilder.build(), BooleanClause.Occur.FILTER);
        }
        if (key.getIdPrefixFilter() != null && !key.getIdPrefixFilter().isBlank()) {
            mainQb.add(new PrefixQuery(new Term("entry_id", key.getIdPrefixFilter())), BooleanClause.Occur.FILTER);
        }
        if (!StringUtils.isBlank(key.getTaxId())) {
            mainQb.add(new TermQuery(new Term("tax_id", key.getTaxId())), BooleanClause.Occur.FILTER);
        }
        return mainQb.build();
    }

    @NotNull
    private BooleanQuery.Builder buildFlatDisjunction(Map<String, Float> fieldWeights, String normalizedGeneName) {
        BooleanQuery.Builder flatDisjunctionBuilder = new BooleanQuery.Builder();
        int flatDisjunctionClauses = 0;
        for (String token : normalizedGeneName.split("\\s+")) {
            String[] fieldsToSearch = CandidateFilter.isNumberGreekOrLatin(token) ? SYNONYM_FIELDS : ALL_FIELDS;
            BooleanQuery.Builder tokenQueryBuilder = new BooleanQuery.Builder();
            int tokenQueryClauses = 0;
            for (String field : fieldsToSearch) {
                Query tq = new TermQuery(new Term(field, token));
                if (fieldWeights != null) {
                    float weight = fieldWeights.getOrDefault(field, Float.valueOf(1.0f)).floatValue();
                    tq = new BoostQuery(tq, weight);
                }
                tokenQueryBuilder.add(tq, BooleanClause.Occur.SHOULD);
                if (++tokenQueryClauses >= BooleanQuery.getMaxClauseCount()) break;
            }
            flatDisjunctionBuilder.add(tokenQueryBuilder.build(), BooleanClause.Occur.SHOULD);
            if (++flatDisjunctionClauses >= BooleanQuery.getMaxClauseCount()) break;
        }
        return flatDisjunctionBuilder;
    }

    @Override
    public String getName() {
        return this.getClass().getSimpleName() + (this.useDisMax ? "dismax" : "disjunction") + "-" + this.flatDisjunction + "-" + this.searchExactMatches + "-" + this.useSynonymMatchesAsRelevanceSignal + "-" + this.useNpContextAsRelevanceSignal + "-" + this.useAlternativeNamesAsRelevanceSignal + "-" + this.relaxation;
    }

    public boolean isUseContextGenesAsRelevanceSignal() {
        return this.useContextGenesAsRelevanceSignal;
    }
}

