/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.resources;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.HashMultiset;
import de.julielab.geneexpbase.CandidateFilter;
import de.julielab.genemapper.resources.DictionaryFamilyDomainFilter;
import de.julielab.java.utilities.FileUtilities;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.ngram.NGramFilterFactory;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class NameCentricSynonymIndexGenerator {
    private static final Logger log = LoggerFactory.getLogger(NameCentricSynonymIndexGenerator.class);
    private static final Boolean OMIT_FILTERED = true;
    private final File filteredDictFile;
    private final File ambiguousSynsFile;
    private final NGramFilterFactory nGramFilterFactory;
    Map<String, String> id2tax;
    Directory indexDirectory;
    private final File dictFile;
    private final File familyRecordsFile;
    private final String idSource;
    private final String entityType;

    public NameCentricSynonymIndexGenerator(File dictFile, File familyRecordsFile, String idSource, String entityType, File indexFile) {
        this.familyRecordsFile = familyRecordsFile;
        this.idSource = idSource;
        this.entityType = entityType;
        System.out.println("Building synonym index from dictionary " + dictFile.getAbsolutePath());
        System.out.println("Adding family synonyms from " + familyRecordsFile.getAbsolutePath());
        this.dictFile = dictFile;
        this.filteredDictFile = new File(dictFile.getParent(), dictFile.getName() + ".indexGeneratorFiltered");
        this.ambiguousSynsFile = new File(dictFile.getParent(), "intra_tax_ambiguous.eg");
        this.indexDirectory = this.createIndexDirectory(indexFile);
        HashMap<String, String> ngramFilterSettings = new HashMap<String, String>();
        ngramFilterSettings.put("minGramSize", "2");
        ngramFilterSettings.put("maxGramSize", "3");
        this.nGramFilterFactory = new NGramFilterFactory(ngramFilterSettings);
    }

    public static void main(String[] args) {
        Object indexPath;
        File geneInfo;
        Object resPath;
        File resDir;
        long s1 = System.currentTimeMillis();
        if (args.length != 3) {
            System.err.println("Usage: SynonymIndexGenerator <resourcesDirectory> <gene_info file name> <geneSynonymIndicesDirectory>");
            System.exit(1);
        }
        if (!(resDir = new File((String)(resPath = args[0]))).isDirectory()) {
            System.err.println("Could not find resources directory");
            System.exit(1);
        }
        if (!((String)resPath).endsWith(File.separator)) {
            resPath = (String)resPath + File.separator;
        }
        if (!(geneInfo = new File((String)resPath + args[1])).exists()) {
            System.err.println("Gene info file could not be found at " + geneInfo.getAbsolutePath());
            System.exit(1);
        }
        if (!((String)(indexPath = args[2])).endsWith("/")) {
            indexPath = (String)indexPath + "/";
        }
        File geneIndexDir = new File((String)indexPath + "geneSynonymIndex");
        File proteinIndexDir = new File((String)indexPath + "proteinSynonymIndex");
        if (geneIndexDir.exists()) {
            FileUtils.deleteQuietly(geneIndexDir);
        }
        if (proteinIndexDir.exists()) {
            FileUtils.deleteQuietly(proteinIndexDir);
        }
        File upDictFile = new File((String)resPath + "gene.dict.variants.norm.up");
        File egDictFile = new File((String)resPath + "gene.dict.variants.norm.filtered.eg");
        NameCentricSynonymIndexGenerator.checkFile(egDictFile);
        File upTaxMap = new File((String)resPath + "up2eg2tax.map");
        NameCentricSynonymIndexGenerator.checkFile(upTaxMap);
        File familyRecordsFile = new File((String)resPath + "familyrecords.dict");
        File egTaxMap = geneInfo;
        try {
            NameCentricSynonymIndexGenerator indexGenerator = new NameCentricSynonymIndexGenerator(egDictFile, familyRecordsFile, "NCBI Gene", "Gene/Protein", geneIndexDir);
            indexGenerator.readEgTaxMap(egTaxMap);
            indexGenerator.createIndex();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        long s2 = System.currentTimeMillis();
        System.out.println("Index created successfully! (" + (s2 - s1) / 1000L + " sec)");
    }

    private static void checkFile(File file) {
        if (!file.isFile()) {
            throw new IllegalArgumentException("File \"" + file.getAbsolutePath() + "\" could not be found.");
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void createIndex() throws IOException {
        CandidateFilter cf = new CandidateFilter();
        WhitespaceAnalyzer wsAnalyzer = new WhitespaceAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(wsAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        log.info("Using up to 20 threads for index document creation");
        ExecutorService executorService = Executors.newFixedThreadPool(20);
        try (IndexWriter iw = new IndexWriter(this.indexDirectory, iwc);){
            try (BufferedWriter outdictBw = FileUtilities.getWriterToFile(this.filteredDictFile);
                 BufferedWriter ambiguousSynonymsBw = FileUtilities.getWriterToFile(this.ambiguousSynsFile);){
                try {
                    this.indexDictionary(this.dictFile, this.idSource, this.entityType, cf, executorService, iw, outdictBw, ambiguousSynonymsBw);
                }
                finally {
                    try {
                        executorService.shutdown();
                        log.info("Waiting for running threads to terminate.");
                        executorService.awaitTermination(100L, TimeUnit.DAYS);
                    }
                    catch (InterruptedException e) {
                        log.warn("Waiting for running threads to finish has been interrupted. Shutting down the executor service now.");
                        executorService.shutdownNow();
                    }
                    log.info("ExecutorService has been shut down.");
                }
            }
            log.info("Committing all index additions.");
            iw.commit();
        }
    }

    public void indexDictionary(File dictFile, String idSource, String entityType, CandidateFilter cf, ExecutorService executorService, IndexWriter iw, BufferedWriter outdictBw, BufferedWriter ambiguousSynonymsBw) throws IOException {
        try (BufferedReader br = FileUtilities.getReaderFromFile(dictFile);){
            String line;
            AtomicInteger counter = new AtomicInteger();
            String currentSynonym = null;
            ArrayList<String[]> entriesForCurrentSynonym = new ArrayList<String[]>();
            while ((line = br.readLine()) != null) {
                currentSynonym = this.processLine(line, currentSynonym, entriesForCurrentSynonym, counter, idSource, entityType, iw, outdictBw, ambiguousSynonymsBw, cf, executorService);
            }
            if (currentSynonym != null) {
                line = "$$END\tOF\tFILE$$";
                this.processLine(line, currentSynonym, entriesForCurrentSynonym, counter, idSource, entityType, iw, outdictBw, ambiguousSynonymsBw, cf, executorService);
            }
        }
    }

    private String processLine(String line, String currentSynonym, List<String[]> entriesForCurrentSynonym, AtomicInteger counter, String idSource, String entityType, IndexWriter iw, BufferedWriter outdictBw, BufferedWriter ambiguousSynonymsBw, CandidateFilter cf, ExecutorService executorService) {
        String[] split = line.split("\t");
        if (split.length != 3 && split.length != 3) {
            System.err.println("ERR: normalized dictionary not in expected format. \ncritical line: " + line);
        }
        String synonym = split[0];
        if (currentSynonym == null) {
            currentSynonym = synonym;
        }
        if (!synonym.equals(currentSynonym)) {
            String synonymToWrite = currentSynonym;
            ArrayList<String[]> entriesToWrite = new ArrayList<String[]>(entriesForCurrentSynonym);
            executorService.submit(() -> {
                try {
                    this.indexCurrentSynonymEntries(cf, idSource, entityType, iw, outdictBw, ambiguousSynonymsBw, counter, synonymToWrite, entriesToWrite);
                }
                catch (IOException e) {
                    log.error("Could not create index document for synonym {}", (Object)synonymToWrite, (Object)e);
                }
            });
            entriesForCurrentSynonym.clear();
        }
        entriesForCurrentSynonym.add(split);
        currentSynonym = synonym;
        return currentSynonym;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void indexCurrentSynonymEntries(CandidateFilter cf, String idSource, String entityType, IndexWriter iw, BufferedWriter outdictBw, BufferedWriter ambiguousSynonymsBw, AtomicInteger counter, String currentSynonym, List<String[]> entriesForCurrentSynonym) throws IOException {
        int done;
        Document doc = new Document();
        TextField lookupSynField = new TextField("indexed_syn", currentSynonym, Field.Store.YES);
        doc.add(lookupSynField);
        SnowballFilter ts = new SnowballFilter(new WhitespaceAnalyzer().tokenStream("indexed_syn_stemmed", currentSynonym), "English");
        doc.add(new TextField("indexed_syn_stemmed", ts));
        TokenFilter ngrams = this.nGramFilterFactory.create(new WhitespaceAnalyzer().tokenStream("indexed_syn_ngrams", currentSynonym));
        doc.add(new TextField("indexed_syn_ngrams", ngrams));
        doc.add(new StringField("indexed_syn_exact", currentSynonym, Field.Store.NO));
        ArrayList<Field> fields = new ArrayList<Field>();
        int minPriority = Integer.MAX_VALUE;
        HashMultiset<String> taxIdsForSynonym = HashMultiset.create();
        HashMultimap<String, String> tax2Id = HashMultimap.create();
        for (String[] geneEntry : entriesForCurrentSynonym) {
            String tax;
            String id = geneEntry[1];
            boolean isFamilyDict = id.startsWith("GENO:");
            Integer priority = Integer.parseInt(geneEntry[2]);
            if (priority < minPriority) {
                minPriority = priority;
            }
            boolean filtered = false;
            if (cf != null && !OMIT_FILTERED.booleanValue() && priority != -1) {
                filtered = DictionaryFamilyDomainFilter.isFiltered(id, cf, currentSynonym);
            }
            if (log.isDebugEnabled()) {
                log.debug("ID: {}, synonym: {}, filtered out: {}", id, currentSynonym, filtered);
            }
            String string = tax = isFamilyDict ? "0" : "";
            if (this.id2tax.get(id) != null) {
                tax = this.id2tax.get(id);
                if (priority <= 3) {
                    taxIdsForSynonym.add(tax);
                    tax2Id.put(tax, id);
                }
            }
            StringField idField = new StringField("entry_id", id, Field.Store.NO);
            StringField idPriorityField = new StringField("entry_id", id + "__" + priority, Field.Store.YES);
            StringField taxField = new StringField("tax_id", tax, Field.Store.YES);
            IntPoint priorityField = new IntPoint("priority", priority);
            if (!OMIT_FILTERED.booleanValue()) {
                IntPoint filteredField = new IntPoint("filtered", filtered ? 1 : 0);
                StoredField storedFilteredField = new StoredField("filtered", filtered ? 1 : 0);
                fields.add(filteredField);
                fields.add(storedFilteredField);
            }
            StringField idSourceField = new StringField("source", isFamilyDict ? "GenoFamilies" : idSource, Field.Store.YES);
            StringField typeField = new StringField("entity_type", entityType, Field.Store.YES);
            fields.add(idField);
            fields.add(idPriorityField);
            fields.add(taxField);
            fields.add(priorityField);
            fields.add(idSourceField);
            fields.add(typeField);
        }
        if (!fields.isEmpty()) {
            for (Field f : fields) {
                doc.add(f);
            }
            iw.addDocument(doc);
            if (outdictBw != null && minPriority <= 3) {
                BufferedWriter bufferedWriter = outdictBw;
                synchronized (bufferedWriter) {
                    outdictBw.write(currentSynonym + "\tGene");
                    outdictBw.newLine();
                }
            }
            if (ambiguousSynonymsBw != null) {
                for (String tax : taxIdsForSynonym) {
                    if (taxIdsForSynonym.count(tax) <= 1) continue;
                    Collection intraAmbiguousIds = tax2Id.get(tax);
                    BufferedWriter bufferedWriter = ambiguousSynonymsBw;
                    synchronized (bufferedWriter) {
                        for (String id : intraAmbiguousIds) {
                            ambiguousSynonymsBw.write(id);
                            ambiguousSynonymsBw.newLine();
                        }
                    }
                }
            }
        }
        if ((done = counter.incrementAndGet()) % 10000 == 0) {
            log.debug("# entries processed: " + done);
        }
    }

    private FSDirectory createIndexDirectory(File indexFile) {
        FSDirectory fdir = null;
        try {
            fdir = FSDirectory.open(indexFile.toPath());
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return fdir;
    }

    private void readUpTaxMap(File taxMap) throws IOException {
        log.info("Reading up2eg2tax.map ...");
        this.id2tax = new HashMap<String, String>();
        BufferedReader reader = new BufferedReader(new FileReader(taxMap));
        String line = "";
        while ((line = reader.readLine()) != null) {
            String[] entry = line.split("\t");
            if (entry.length != 3) {
                System.err.println("ERR: up2eg2tax.map not in expected format. \ncritical line: " + line);
                System.exit(-1);
            }
            String id = entry[0].trim();
            String taxId = entry[2].trim();
            this.id2tax.put(id, taxId);
        }
        reader.close();
    }

    private void readEgTaxMap(File geneInfo) throws IOException {
        try (BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(geneInfo))));){
            this.id2tax = br.lines().collect(Collectors.toMap(l -> l.split("\\t", 3)[1], l -> l.split("\\t", 3)[0]));
        }
    }
}

