/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jules.ae.genemapping;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import de.julielab.jules.ae.genemapping.CandidateCacheKey;
import de.julielab.jules.ae.genemapping.CandidateRetrieval;
import de.julielab.jules.ae.genemapping.GeneMappingConfiguration;
import de.julielab.jules.ae.genemapping.QueryGenerator;
import de.julielab.jules.ae.genemapping.SynHit;
import de.julielab.jules.ae.genemapping.genemodel.GeneMention;
import de.julielab.jules.ae.genemapping.genemodel.GeneName;
import de.julielab.jules.ae.genemapping.scoring.JaroWinklerScorer;
import de.julielab.jules.ae.genemapping.scoring.LevenshteinScorer;
import de.julielab.jules.ae.genemapping.scoring.LuceneScorer;
import de.julielab.jules.ae.genemapping.scoring.MaxEntScorer;
import de.julielab.jules.ae.genemapping.scoring.Scorer;
import de.julielab.jules.ae.genemapping.scoring.SimpleScorer;
import de.julielab.jules.ae.genemapping.scoring.TokenJaroSimilarityScorer;
import de.julielab.jules.ae.genemapping.utils.GeneCandidateRetrievalException;
import de.julielab.jules.ae.genemapping.utils.GeneMappingException;
import de.julielab.jules.ae.genemapping.utils.norm.TermNormalizer;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LuceneCandidateRetrieval
implements CandidateRetrieval {
    public static final String NAME_PRIO_DELIMITER = "__";
    public static final String LOGGER_NAME_CANDIDATES = "de.julielab.jules.ae.genemapper.candidates";
    public static final int SIMPLE_SCORER = 0;
    public static final int TOKEN_JAROWINKLER_SCORER = 1;
    public static final int MAXENT_SCORER = 2;
    public static final int JAROWINKLER_SCORER = 3;
    public static final int LEVENSHTEIN_SCORER = 4;
    public static final int TFIDF = 5;
    public static final int LUCENE_SCORER = 10;
    public static final String MAXENT_SCORER_MODEL = "/genemapper_jules_mallet.mod";
    public static final Logger candidateLog = LoggerFactory.getLogger((String)"de.julielab.jules.ae.genemapper.candidates");
    private static final Logger log = LoggerFactory.getLogger(LuceneCandidateRetrieval.class);
    private static final int LUCENE_MAX_HITS = 20;
    private static ConcurrentHashMap<String, LoadingCache<CandidateCacheKey, List<SynHit>>> caches = new ConcurrentHashMap();
    private String maxEntModel = "/genemapper_jules_mallet.mod";
    private TermNormalizer normalizer;
    private IndexSearcher mentionIndexSearcher;
    private Scorer exactScorer;
    private Scorer approxScorer;
    private LoadingCache<CandidateCacheKey, List<SynHit>> candidateCache;
    private SpellChecker spellingChecker;

    @Deprecated
    public LuceneCandidateRetrieval(IndexSearcher mentionIndexSearcher, Scorer scorer) throws IOException {
        this.mentionIndexSearcher = mentionIndexSearcher;
        this.exactScorer = scorer;
        this.normalizer = new TermNormalizer();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public LuceneCandidateRetrieval(GeneMappingConfiguration config) throws GeneMappingException {
        String mentionIndex = config.getProperty("mention_index");
        if (mentionIndex == null) {
            throw new GeneMappingException("mention index not specified in configuration file (critical).");
        }
        try {
            String scorerType;
            File spellingIndex;
            DirectoryReader reader = DirectoryReader.open((Directory)FSDirectory.open((Path)Paths.get(mentionIndex, new String[0])));
            this.mentionIndexSearcher = new IndexSearcher((IndexReader)reader);
            this.mentionIndexSearcher.setSimilarity((Similarity)new ClassicSimilarity());
            log.debug("mention index loaded.");
            String spellingIndexPath = config.getProperty("spelling_index");
            if (spellingIndexPath != null && (spellingIndex = new File(spellingIndexPath)).exists()) {
                this.spellingChecker = new SpellChecker((Directory)FSDirectory.open((Path)spellingIndex.toPath()));
            }
            if (this.spellingChecker == null) {
                log.warn("Spelling index was not given or file does not exist. No spelling correction can be done. Specified spelling index: {}", (Object)spellingIndexPath);
            }
            if ((scorerType = config.getProperty("exact_scorer_type")) == null) {
                throw new GeneMappingException("No configuration value given for exact_scorer_type");
            }
            this.exactScorer = this.setScorerType(Integer.valueOf(scorerType));
            scorerType = config.getProperty("approx_scorer_type");
            if (scorerType == null) {
                throw new GeneMappingException("No configuration value given for approx_scorer_type");
            }
            this.approxScorer = this.setScorerType(Integer.valueOf(scorerType));
            String maxEntModel = config.getProperty("maxent_model");
            if (maxEntModel != null) {
                this.maxEntModel = maxEntModel;
            }
            this.normalizer = new TermNormalizer();
        }
        catch (IOException e) {
            throw new GeneMappingException(e);
        }
        log.info("Mention index: " + mentionIndex);
        log.info("Exact scorer: " + this.exactScorer);
        log.info("Approx scorer: " + this.approxScorer);
        ConcurrentHashMap<String, LoadingCache<CandidateCacheKey, List<SynHit>>> concurrentHashMap = caches;
        synchronized (concurrentHashMap) {
            this.candidateCache = caches.get(mentionIndex);
            if (null == this.candidateCache) {
                log.info("Creating new gene candidate cache for index {}", (Object)mentionIndex);
                this.candidateCache = CacheBuilder.newBuilder().maximumSize(1000000L).expireAfterWrite(60L, TimeUnit.MINUTES).build((CacheLoader)new CacheLoader<CandidateCacheKey, List<SynHit>>(){

                    public List<SynHit> load(CandidateCacheKey key) throws IOException, BooleanQuery.TooManyClauses {
                        return Collections.unmodifiableList(LuceneCandidateRetrieval.this.getCandidatesFromIndexWithoutCache(key));
                    }
                });
                if (null != caches.put(mentionIndex, this.candidateCache)) {
                    throw new IllegalStateException("There already is a candidate index for " + mentionIndex + " which points to a faulty concurrency implementation");
                }
            } else {
                log.info("Using existing gene candidate cache for index {}", (Object)mentionIndex);
            }
        }
    }

    public TermNormalizer getNormalizer() {
        return this.normalizer;
    }

    public void setNormalizer(TermNormalizer normalizer) {
        this.normalizer = normalizer;
    }

    public Scorer getScorer() {
        return this.exactScorer;
    }

    public IndexSearcher getMentionIndexSearcher() {
        return this.mentionIndexSearcher;
    }

    public SpellChecker getSpellingChecker() {
        return this.spellingChecker;
    }

    public Scorer setScorerType(int type) throws GeneMappingException {
        Scorer scorer;
        if (type == 0) {
            scorer = new SimpleScorer();
        } else if (type == 1) {
            scorer = new TokenJaroSimilarityScorer();
        } else if (type == 2) {
            if (!this.maxEntModel.equals(MAXENT_SCORER_MODEL)) {
                scorer = new MaxEntScorer(new File(this.maxEntModel));
            } else {
                InputStream in = this.getClass().getResourceAsStream(MAXENT_SCORER_MODEL);
                scorer = new MaxEntScorer(in);
            }
        } else if (type == 3) {
            scorer = new JaroWinklerScorer();
        } else if (type == 10) {
            scorer = new LuceneScorer();
        } else if (type == 4) {
            scorer = new LevenshteinScorer();
        } else {
            throw new GeneMappingException("Unknown mention scorer type: " + type);
        }
        return scorer;
    }

    public String getScorerInfo() {
        if (this.exactScorer == null) {
            return "Lucene Score (unnormalized)";
        }
        return this.exactScorer.info();
    }

    public int getScorerType() {
        return this.exactScorer.getScorerType();
    }

    @Override
    public List<SynHit> getCandidates(String originalSearchTerm) throws GeneCandidateRetrievalException {
        GeneMention geneMention = new GeneMention(originalSearchTerm, this.normalizer);
        return this.getCandidates(geneMention);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention) throws GeneCandidateRetrievalException {
        return this.getCandidates(geneMention, geneMention.getTaxonomyIds());
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> organisms) throws GeneCandidateRetrievalException {
        try {
            List<Object> hits = new ArrayList();
            CandidateCacheKey key = new CandidateCacheKey(geneMention.getGeneName());
            if (organisms.isEmpty()) {
                hits = this.getCandidatesFromIndex(key);
                if (log.isDebugEnabled()) {
                    int geneBegin = geneMention.getOffsets() != null ? geneMention.getBegin() : -1;
                    int geneEnd = geneMention.getOffsets() != null ? geneMention.getEnd() : -1;
                    log.debug("Returning {} candidates for gene mention {}[{}-{}]", new Object[]{hits.size(), key.geneName.getText(), geneBegin, geneEnd});
                }
            }
            Iterator<String> geneBegin = organisms.iterator();
            while (geneBegin.hasNext()) {
                String taxonomyId;
                key.taxId = taxonomyId = geneBegin.next();
                hits.addAll(this.getCandidatesFromIndex(key));
                if (!log.isDebugEnabled()) continue;
                int begin = -1;
                int end = -1;
                if (geneMention.getOffsets() != null) {
                    begin = geneMention.getBegin();
                    end = geneMention.getEnd();
                }
                log.debug("Returning {} candidates for gene mention {}[{}-{}] for taxonomy ID {}", new Object[]{hits.size(), key.geneName.getText(), begin, end, organisms});
            }
            hits.stream().forEach(h -> h.setCompareType(SynHit.CompareType.SCORE));
            List<SynHit> sortedHits = hits.stream().sorted().collect(Collectors.toList());
            return sortedHits;
        }
        catch (ExecutionException e) {
            throw new GeneCandidateRetrievalException(e);
        }
    }

    private List<SynHit> getCandidatesFromIndex(CandidateCacheKey key) throws ExecutionException {
        return ((List)this.candidateCache.get((Object)key)).stream().map(synHit -> {
            try {
                return synHit.clone();
            }
            catch (CloneNotSupportedException e) {
                log.error("Could not clone a cached SynHit: {}", synHit, (Object)e);
                throw new RuntimeException(e);
            }
        }).collect(Collectors.toList());
    }

    private ArrayList<SynHit> getCandidatesFromIndexWithoutCache(CandidateCacheKey key) throws IOException, BooleanQuery.TooManyClauses {
        Query searchQuery = QueryGenerator.makeDisjunctionMaxQuery(key, this.spellingChecker);
        TopDocs foundDocs = this.mentionIndexSearcher.search(searchQuery, 20);
        log.debug("searching with query: " + searchQuery + "; found hits: " + foundDocs.totalHits);
        return this.scoreHits(foundDocs, key.geneName);
    }

    private ArrayList<SynHit> scoreHits(TopDocs foundDocs, GeneName geneName) throws CorruptIndexException, IOException {
        ArrayList<SynHit> allHits = new ArrayList<SynHit>();
        String originalMention = geneName.getText().toLowerCase();
        String normalizedMention = geneName.getNormalizedText();
        ScoreDoc[] scoredDocs = foundDocs.scoreDocs;
        log.debug("ordering candidates for best match to this reference term: " + originalMention + " for top " + scoredDocs.length + " candidates");
        candidateLog.trace("Search term: " + normalizedMention);
        for (int i = 0; i < scoredDocs.length; ++i) {
            Scorer scorer;
            int docID = scoredDocs[i].doc;
            Document d = this.mentionIndexSearcher.doc(docID);
            String indexNormalizedName = d.getField("indexed_syn").stringValue();
            ArrayList<String> ids = new ArrayList<String>();
            ArrayList<Number> priorities = new ArrayList<Number>();
            Arrays.stream(d.getFields("entry_id")).map(IndexableField::stringValue).map(idAndSyn -> idAndSyn.split(NAME_PRIO_DELIMITER)).forEach(split -> {
                ids.add(split[0]);
                priorities.add(Integer.valueOf(split[1]));
            });
            List<String> taxIds = Arrays.stream(d.getFields("tax_id")).map(IndexableField::stringValue).collect(Collectors.toList());
            double score = 0.0;
            Scorer scorer2 = scorer = indexNormalizedName.equals(normalizedMention) ? this.exactScorer : this.approxScorer;
            score = scorer.getScorerType() == 10 ? (indexNormalizedName.equals(normalizedMention) ? 9999.0 : (double)scoredDocs[i].score) : scorer.getScore(normalizedMention, indexNormalizedName);
            SynHit m = new SynHit(indexNormalizedName, score, ids, "UniProt ID (any organism)", taxIds);
            m.setMappedMention(originalMention);
            m.setMappedGeneName(geneName);
            m.setSynonymPriorities(priorities);
            allHits.add(m);
        }
        return allHits;
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, String organism) throws GeneCandidateRetrievalException {
        return this.getCandidates(geneMention, Arrays.asList(organism));
    }

    @Override
    public List<SynHit> getCandidates(String geneMentionText, String organism) throws GeneCandidateRetrievalException {
        return this.getCandidates(new GeneMention(geneMentionText, this.normalizer), Arrays.asList(organism));
    }

    @Override
    public List<SynHit> getCandidates(String geneMentionText, Collection<String> organism) throws GeneCandidateRetrievalException {
        return this.getCandidates(new GeneMention(geneMentionText, this.normalizer), organism);
    }

    @Override
    public String mapGeneIdToTaxId(String geneId) throws IOException {
        String fieldValue = geneId + "__-1";
        TermQuery query = new TermQuery(new Term("entry_id", fieldValue));
        TopDocs topDocs = this.mentionIndexSearcher.search((Query)query, 1);
        ScoreDoc[] scoredDocs = topDocs.scoreDocs;
        if (topDocs.totalHits > 0L) {
            int docID = scoredDocs[0].doc;
            Document d = this.mentionIndexSearcher.doc(docID);
            List ids = Arrays.stream(d.getFields("entry_id")).map(IndexableField::stringValue).map(idandprio -> idandprio.split(NAME_PRIO_DELIMITER)).map(split -> split[0]).collect(Collectors.toList());
            List taxIds = Arrays.stream(d.getFields("tax_id")).map(IndexableField::stringValue).collect(Collectors.toList());
            String taxId = "";
            for (int i = 0; i < ids.size(); ++i) {
                if (!((String)ids.get(i)).equals(geneId)) continue;
                taxId = (String)taxIds.get(i);
            }
            if (taxId.equals("")) {
                log.warn("GeneID: " + geneId + " has no TaxId assigned.");
            }
            return taxId;
        }
        return "";
    }

    @Override
    public List<SynHit> getIndexEntries(List<String> ids) throws IOException {
        log.warn("LuceneCandidateRetrieval.getIndexEntries(): This method currently does not work as intended since the synonym index is now synonym-centric instead of id-centric. The ID field values have the form id_priority, thus at this place a wildcard query for all priorities would be needed");
        ArrayList<SynHit> entries = new ArrayList<SynHit>(ids.size());
        for (String id : ids) {
            BooleanClause clause = new BooleanClause((Query)new TermQuery(new Term("entry_id", id + "__-1")), BooleanClause.Occur.FILTER);
            BooleanQuery query = new BooleanQuery.Builder().add(clause).build();
            TopDocs result = this.mentionIndexSearcher.search((Query)query, 1);
            if (result.totalHits > 0L) {
                int docID = result.scoreDocs[0].doc;
                Document d = this.mentionIndexSearcher.doc(docID);
                List<String> taxIdField = Arrays.stream(d.getFields("tax_id")).map(IndexableField::stringValue).filter(tax -> !StringUtils.isBlank((CharSequence)tax)).collect(Collectors.toList());
                if (taxIdField.isEmpty()) {
                    log.warn("GeneID: " + id + " has no TaxId assigned.");
                }
                SynHit m = new SynHit("<none>", 0.0, Arrays.asList(id), "UniProt ID (any organism)", taxIdField);
                entries.add(m);
            }
            entries.add(null);
        }
        return entries;
    }

    @Override
    public List<String> getSynonyms(String id) throws IOException {
        List<String> ret = Collections.emptyList();
        BooleanClause clause = new BooleanClause((Query)new WildcardQuery(new Term("entry_id", id + "__*")), BooleanClause.Occur.FILTER);
        BooleanQuery query = new BooleanQuery.Builder().add(clause).build();
        int maxRet = 200;
        TopDocs result = this.mentionIndexSearcher.search((Query)query, maxRet);
        if (result.totalHits > 0L) {
            ret = new ArrayList<String>(maxRet);
            for (int i = 0; i < result.scoreDocs.length; ++i) {
                Document doc = this.mentionIndexSearcher.doc(result.scoreDocs[i].doc);
                String geneName = doc.getField("indexed_syn").stringValue();
                ret.add(geneName);
            }
        }
        return ret;
    }

    public List<String> getPriorityNames(String id, int priority) throws IOException {
        List<String> ret = Collections.emptyList();
        BooleanClause ic = new BooleanClause((Query)new TermQuery(new Term("entry_id", id + NAME_PRIO_DELIMITER + priority)), BooleanClause.Occur.FILTER);
        BooleanQuery query = new BooleanQuery.Builder().add(ic).build();
        int maxRet = 1;
        TopDocs result = this.mentionIndexSearcher.search((Query)query, maxRet);
        if (result.totalHits > 0L) {
            ret = new ArrayList<String>(maxRet);
            for (int i = 0; i < result.scoreDocs.length; ++i) {
                Document doc = this.mentionIndexSearcher.doc(result.scoreDocs[i].doc);
                String name = doc.getField("indexed_syn").stringValue();
                ret.add(name);
            }
        }
        return ret;
    }

    public List<String> getPriorityNames(List<String> ids, int priority) throws IOException {
        Stream.Builder<List<String>> builder = Stream.builder();
        for (String id : ids) {
            builder.accept(this.getPriorityNames(id, priority));
        }
        return builder.build().flatMap(Collection::stream).collect(Collectors.toList());
    }
}

