/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.resources;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import de.julielab.geneexpbase.CandidateFilter;
import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.genemapper.resources.DictionaryFamilyDomainFilter;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.ProgressBar;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.lang.ref.SoftReference;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GeneRecordIndexGenerator {
    private static final Logger log = LoggerFactory.getLogger(GeneRecordIndexGenerator.class);
    private static final Boolean OMIT_FILTERED = true;
    private final Directory indexDirectory;
    private final File dictFile;
    private final Map<String, File> extendedInformationFields;
    Map<String, String> id2tax;
    Set<SoftReference<Document>> documents = new HashSet<SoftReference<Document>>();

    public GeneRecordIndexGenerator(File dictFile, Map<String, File> extendedInformationFields, File indexFile) throws FileNotFoundException, IOException {
        this.extendedInformationFields = extendedInformationFields;
        log.info("Building gene records index from dictionary {}", (Object)dictFile);
        this.dictFile = dictFile;
        this.indexDirectory = FSDirectory.open((Path)indexFile.toPath());
    }

    public static void main(String[] args) {
        File geneInfo;
        File proteinIndexDir;
        long s1 = System.currentTimeMillis();
        if (args.length != 3) {
            System.err.println("Usage: GeneRecordIndexGenerator <dictionary file> <resourcesDirectory> <geneRecordIndicesDirectory>");
            System.exit(1);
        }
        String dictFile = args[0];
        Object resPath = args[1];
        File resDir = new File((String)resPath);
        if (!resDir.isDirectory()) {
            System.err.println("Could not find resources directory");
            System.exit(1);
        }
        if (!((String)resPath).endsWith(File.separator)) {
            resPath = (String)resPath + File.separator;
        }
        String indexPath = args[2];
        File geneIndexDir = dictFile.equals("gene.dict.uniqueprioritynames.sortedbyid.eg") ? new File(indexPath, "geneNamesRecordsIndexOriginalNames") : new File(indexPath, "geneRecordsIndex");
        File file = proteinIndexDir = dictFile.equals("gene.dict.uniqueprioritynames.sortedbyid.eg") ? new File(indexPath, "ProteinRecordsIndexOriginalNames") : new File(indexPath, "proteinRecordsIndex");
        if (geneIndexDir.exists()) {
            FileUtils.deleteQuietly((File)geneIndexDir);
        }
        if (proteinIndexDir.exists()) {
            FileUtils.deleteQuietly((File)proteinIndexDir);
        }
        File upDictFile = new File((String)resPath + "gene.dict.up");
        GeneRecordIndexGenerator.checkFile(upDictFile);
        File egDictFile = new File((String)resPath + dictFile);
        GeneRecordIndexGenerator.checkFile(egDictFile);
        File eg2chromosome = new File((String)resPath + "eg2chromosome");
        File eg2description = new File((String)resPath + "eg2description");
        File eg2generif = new File((String)resPath + "eg2generif");
        File eg2go = new File((String)resPath + "eg2go");
        File goDesc = new File((String)resPath + "go_all");
        File eg2interaction = new File((String)resPath + "eg2interaction");
        File eg2maplocation = new File((String)resPath + "eg2maplocation");
        File eg2summary = new File((String)resPath + "eg2summary");
        File eg2ecnumber = new File((String)resPath + "eg2ecnumber-genexmldownloader.gz");
        LinkedHashMap<String, File> extendedInformationFields = new LinkedHashMap<String, File>();
        extendedInformationFields.put("chromosome", eg2chromosome);
        extendedInformationFields.put("description", eg2description);
        extendedInformationFields.put("generif", eg2generif);
        extendedInformationFields.put("go", eg2go);
        extendedInformationFields.put("godesc", goDesc);
        extendedInformationFields.put("interaction", eg2interaction);
        extendedInformationFields.put("maplocation", eg2maplocation);
        extendedInformationFields.put("summary", eg2summary);
        extendedInformationFields.put("ecnumber", eg2ecnumber);
        File upTaxMap = new File((String)resPath + "up2eg2tax.map");
        GeneRecordIndexGenerator.checkFile(upTaxMap);
        File egTaxMap = geneInfo = new File((String)resPath + "gene_info_organism_filtered.gz");
        try {
            GeneRecordIndexGenerator indexGenerator = new GeneRecordIndexGenerator(egDictFile, extendedInformationFields, geneIndexDir);
            indexGenerator.readEgTaxMap(egTaxMap);
            indexGenerator.createIndex();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        long s2 = System.currentTimeMillis();
        System.out.println("Index created successfully! (" + (s2 - s1) / 1000L + " sec)");
    }

    private static void checkFile(File file) {
        if (!file.isFile()) {
            throw new IllegalArgumentException("File \"" + file.getAbsolutePath() + "\" could not be found.");
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void createIndex() throws IOException {
        CandidateFilter cf = new CandidateFilter();
        TermNormalizer normalizer = new TermNormalizer();
        Map<String, Multimap<String, String>> id2infotype2info = this.readExtendedInformationFiles();
        FieldType notStoredTextFieldType = new FieldType((IndexableFieldType)TextField.TYPE_NOT_STORED);
        notStoredTextFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        FieldType storedTextFieldType = new FieldType((IndexableFieldType)TextField.TYPE_STORED);
        storedTextFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        FieldType stringFieldTypeDocsAndFreqs = new FieldType((IndexableFieldType)StringField.TYPE_STORED);
        stringFieldTypeDocsAndFreqs.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        WhitespaceAnalyzer wsAnalyzer = new WhitespaceAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig((Analyzer)wsAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        ExecutorService executorService = Executors.newFixedThreadPool(20);
        AtomicInteger numDocumentsIndexed = new AtomicInteger();
        try (IndexWriter iw = new IndexWriter(this.indexDirectory, iwc);){
            long numLines;
            log.info("Counting number of lines of the dictionary file {}", (Object)this.dictFile);
            try (BufferedReader normDictReader = new BufferedReader(new FileReader(this.dictFile));){
                numLines = normDictReader.lines().count();
            }
            log.info("Generating index now for {} synonyms.", (Object)numLines);
            ProgressBar progressBar = new ProgressBar(numLines, 80);
            try (BufferedReader dictReader = new BufferedReader(new FileReader(this.dictFile));){
                String line;
                HashMap<String, Integer> syn2prio = new HashMap<String, Integer>();
                String lastId = null;
                boolean filtered = false;
                int lineNum = 1;
                while ((line = dictReader.readLine()) != null) {
                    String[] values = line.split("\t");
                    if (values.length != 3) {
                        System.err.println("ERR: normalized dictionary not in expected format. \ncritical line: " + line);
                        continue;
                    }
                    String normalizedName = values[0];
                    String id = values[1];
                    Integer priority = Integer.parseInt(values[2]);
                    boolean isFamilyEntry = id.contains("GENO:");
                    if (cf != null && !OMIT_FILTERED.booleanValue() && priority != -1 && !isFamilyEntry) {
                        filtered = DictionaryFamilyDomainFilter.isFiltered(id, cf, normalizedName);
                    }
                    if (filtered && OMIT_FILTERED.booleanValue()) continue;
                    if (lastId != null && !lastId.equals(id)) {
                        boolean finalFiltered = filtered;
                        log.trace("Indexing gene record with ID {} and synonym/priority pairs {}", (Object)lastId, syn2prio);
                        String finalLastId = lastId;
                        HashMap syn2prio4id = new HashMap(syn2prio);
                        executorService.submit(() -> {
                            try {
                                this.indexGeneRecord(finalLastId, syn2prio4id, id2infotype2info, finalFiltered, normalizer, iw, notStoredTextFieldType, storedTextFieldType, stringFieldTypeDocsAndFreqs, numDocumentsIndexed);
                            }
                            catch (IOException e) {
                                log.error("Could not create index document for gene id {}", (Object)finalLastId, (Object)e);
                            }
                        });
                        syn2prio.clear();
                    }
                    syn2prio.put(normalizedName, priority);
                    lastId = id;
                    if (lineNum % 1000 == 0 && !log.isDebugEnabled()) {
                        progressBar.incrementDone((long)lineNum - progressBar.getDone(), true);
                    }
                    ++lineNum;
                }
                boolean finalFiltered = filtered;
                String finalLastId = lastId;
                executorService.submit(() -> {
                    try {
                        this.indexGeneRecord(finalLastId, new HashMap<String, Integer>(syn2prio), id2infotype2info, finalFiltered, normalizer, iw, notStoredTextFieldType, storedTextFieldType, stringFieldTypeDocsAndFreqs, numDocumentsIndexed);
                    }
                    catch (IOException e) {
                        log.error("Could not create index document for gene id {}", (Object)finalLastId, (Object)e);
                    }
                    catch (Throwable t) {
                        log.error("Error", t);
                    }
                });
                log.info("Dictionary file {} has been consumed, all indexing jobs have been sent.", (Object)this.dictFile);
            }
            finally {
                try {
                    log.info("Shutting down executor.");
                    log.info("Waiting for running threads to terminate.");
                    executorService.shutdown();
                    executorService.awaitTermination(100L, TimeUnit.DAYS);
                }
                catch (InterruptedException e) {
                    log.warn("Waiting for running threads to finish has been interrupted. Shutting down the executor service now.");
                    executorService.shutdownNow();
                }
                log.info("ExecutorService has been shut down.");
            }
            log.info("Committing {} documents to the index.", (Object)numDocumentsIndexed.get());
            iw.commit();
            iw.forceMerge(5);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private Map<String, Multimap<String, String>> readExtendedInformationFiles() {
        HashMap goid2eg = new HashMap();
        HashMap<String, Multimap<String, String>> infoMap = new HashMap<String, Multimap<String, String>>();
        log.info("Loading extended information files");
        for (String informationType : this.extendedInformationFields.keySet()) {
            File f = this.extendedInformationFields.get(informationType);
            log.info("Reading {}", (Object)f);
            try {
                BufferedReader br = FileUtilities.getReaderFromFile((File)f);
                try {
                    Stream<String[]> linesSplits = br.lines().map(s -> s.split("\t"));
                    if (informationType.equals("godesc")) {
                        linesSplits = linesSplits.map(s -> {
                            s[0] = goid2eg.containsKey(s[0]) ? ((String)goid2eg.get(s[0].intern())).intern() : null;
                            return s;
                        });
                    }
                    if (informationType.equals("go")) {
                        linesSplits.forEach(s -> goid2eg.put(s[1].intern(), s[0].intern()));
                        continue;
                    }
                    linesSplits.filter(Objects::nonNull).filter(s -> s[0] != null && s[1] != null).filter(s -> !s[0].isBlank() && !s[1].isBlank()).forEach(s -> infoMap.compute(s[0].intern(), (k, v) -> v != null ? v : HashMultimap.create()).put((Object)informationType.intern(), (Object)s[1]));
                }
                finally {
                    if (br == null) continue;
                    br.close();
                }
            }
            catch (IOException e) {
                log.error("Could not read file {}. The respective extended information will not be added to the index", (Object)f, (Object)e);
            }
        }
        return infoMap;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void indexGeneRecord(String id, Map<String, Integer> syn2prio, Map<String, Multimap<String, String>> id2infotype2info, boolean filtered, TermNormalizer normalizer, IndexWriter iw, FieldType notStoredTextFieldType, FieldType storedTextFieldType, FieldType stringFieldTypeDocsAndFreqs, AtomicInteger numDocumentsIndexed) throws IOException {
        block32: {
            try {
                Multimap<String, String> infotype2info;
                String tax = "";
                if (this.id2tax.get(id) != null) {
                    tax = this.id2tax.get(id);
                }
                ArrayList<Object> fields = new ArrayList<Object>();
                fields.add(new StringField("entity_type", id.contains("GENO:") ? GeneMention.SpecificType.FAMILYNAME.name() : GeneMention.SpecificType.GENE.name(), Field.Store.YES));
                StringField idField = new StringField("entry_id", id, Field.Store.YES);
                for (String normalizedName : syn2prio.keySet()) {
                    String fieldname;
                    Integer priority = syn2prio.get(normalizedName);
                    if (priority == -1) {
                        fieldname = "symbol";
                    } else if (priority == 0) {
                        fieldname = "symbol_from_nomenclature";
                    } else if (priority == 1) {
                        fieldname = "full_names";
                    } else if (priority == 2) {
                        fieldname = "synonyms";
                    } else if (priority == 3) {
                        fieldname = "other_designations";
                    } else if (priority == 4) {
                        fieldname = "protein_names";
                    } else if (priority == 5) {
                        fieldname = "uniprot_names";
                    } else if (priority == 6) {
                        fieldname = "xrefs";
                    } else if (priority == 7) {
                        fieldname = "bio_thesaurus";
                    } else {
                        throw new IllegalArgumentException("Unsupported synonym priority: " + priority);
                    }
                    log.trace("Now adding field {} for synonym {} for ID {}", new Object[]{fieldname, normalizedName, id});
                    fields.add(new Field(fieldname, (CharSequence)normalizedName, (IndexableFieldType)storedTextFieldType));
                    fields.add(new Field(fieldname + "_exact", (CharSequence)normalizedName, (IndexableFieldType)stringFieldTypeDocsAndFreqs));
                }
                StringField taxField = new StringField("tax_id", tax, Field.Store.YES);
                if (!OMIT_FILTERED.booleanValue()) {
                    IntPoint filteredField = new IntPoint("filtered", new int[]{filtered ? 1 : 0});
                    StoredField storedFilteredField = new StoredField("filtered", filtered ? 1 : 0);
                    fields.add(filteredField);
                    fields.add(storedFilteredField);
                }
                if ((infotype2info = id2infotype2info.get(id)) != null) {
                    for (String infotype : infotype2info.keySet()) {
                        for (String value : infotype2info.get((Object)infotype)) {
                            String[] values;
                            if (!infotype.equals("godesc")) {
                                fields.add(new Field(infotype, (CharSequence)normalizer.normalize(value), (IndexableFieldType)storedTextFieldType));
                                continue;
                            }
                            for (String v : values = value.split("\\|")) {
                                fields.add(new Field(infotype, (CharSequence)normalizer.normalize(v), (IndexableFieldType)storedTextFieldType));
                            }
                        }
                    }
                }
                id2infotype2info.remove(id);
                fields.add(idField);
                fields.add(taxField);
                Document d = new Document();
                for (Field field : fields) {
                    d.add((IndexableField)field);
                }
                if (fields.isEmpty()) break block32;
                iw.addDocument((Iterable)d);
                numDocumentsIndexed.incrementAndGet();
                Set<SoftReference<Document>> set = this.documents;
                synchronized (set) {
                    this.documents.add(new SoftReference<Document>(d));
                }
            }
            catch (Throwable t) {
                log.error("Error occurred", t);
                throw t;
            }
        }
    }

    private void readUpTaxMap(File taxMap) throws IOException {
        log.info("Reading up2eg2tax.map ...");
        this.id2tax = new HashMap<String, String>();
        BufferedReader reader = new BufferedReader(new FileReader(taxMap));
        String line = "";
        while ((line = reader.readLine()) != null) {
            String[] entry = line.split("\t");
            if (entry.length != 3) {
                System.err.println("ERR: up2eg2tax.map not in expected format. \ncritical line: " + line);
                System.exit(-1);
            }
            String id = entry[0].trim().intern();
            String taxId = entry[2].trim().intern();
            this.id2tax.put(id, taxId);
        }
        reader.close();
    }

    private void readEgTaxMap(File geneInfo) throws IOException {
        log.info("Reading file gene ID to taxonomy ID map from {}", (Object)geneInfo);
        try (BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(geneInfo))));){
            this.id2tax = br.lines().collect(Collectors.toMap(l -> l.split("\\t", 3)[1].intern(), l -> l.split("\\t", 3)[0].intern()));
        }
    }
}

