/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.speciesassignment.resources;

import de.julielab.geneexpbase.GeneExpRuntimeException;
import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.ProgressBar;
import de.julielab.speciesassignment.candidateretrieval.SimpleIndexFieldNames;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NIOFSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SimpleCandidateIndexGenerator {
    public static final String GENE_INFO_URL = "https://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz";
    public static final int GENE_INFO_LINE_CACHE_SIZE = 1000;
    private static final Logger log = LoggerFactory.getLogger(SimpleCandidateIndexGenerator.class);
    private final TermNormalizer normalizer = new TermNormalizer();
    private final BlockingQueue<Object> q = new ArrayBlockingQueue<Object>(1000);

    public static void main(String[] args) throws IOException {
        if (args.length != 2) {
            System.err.println("This class creates a Lucene index from the NCBI gene_info file for all the names of all the genes. If the given gene_info path does not exist, the file is downloaded from NCBI first. If gene_info is a directory, the gene_info.gz file will be downloaded into that directory." + System.getProperty("line.separator") + "Usage: " + SimpleIndexFieldNames.class.getSimpleName() + " <path to gene_info or download destination> <path to gene name index destination>");
        }
        File geneInfo = new File(args[0]);
        Path indexPath = Path.of(args[1], new String[0]);
        SimpleCandidateIndexGenerator indexGenerator = new SimpleCandidateIndexGenerator();
        indexGenerator.createIndex(geneInfo, indexPath);
    }

    public void createIndex(File geneInfo, Path indexPath) throws IOException {
        if (geneInfo.isDirectory()) {
            geneInfo = new File(geneInfo, "gene_info.gz");
        }
        if (!geneInfo.exists()) {
            if (!geneInfo.getName().endsWith(".gz")) {
                log.warn("The specified location of the gene_info file is {}. However, this file does not exist and does not specify the .gz extension. The gene_info.gz file downloaded from NCBI is in GZIP format.", (Object)geneInfo);
            }
            log.info("Could not find file {}. Downloading gene_info to this location.", (Object)geneInfo);
            try {
                this.downloadGeneInfo(geneInfo);
                log.info("Successfully downloaded gene_info file to {}.", (Object)geneInfo);
            }
            catch (IOException e) {
                log.error("The download of the NCBI gene_info.gz file from {} failed.", (Object)GENE_INFO_URL, (Object)e);
            }
        }
        if (!geneInfo.exists()) {
            throw new IllegalStateException("Path to gene_info file " + geneInfo.getAbsolutePath() + " does not exist. Cannot continue.");
        }
        log.info("Creating gene name index from gene_info file {} and indexing to {}.", (Object)geneInfo, (Object)indexPath);
        FSDirectory indexDirectory = NIOFSDirectory.open(indexPath);
        WhitespaceAnalyzer wsAnalyzer = new WhitespaceAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(wsAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        try (IndexWriter iw = new IndexWriter(indexDirectory, iwc);){
            this.createIndex(geneInfo, iw);
            log.info("Merging the index into a single segment.");
            iw.forceMerge(1);
        }
        log.info("Finished index creation.");
    }

    private void createIndex(File geneInfo, IndexWriter iw) throws IOException {
        log.debug("Reading the number of lines in {}", (Object)geneInfo);
        int numLines = 0;
        try (BufferedReader br = FileUtilities.getReaderFromFile(geneInfo);){
            while (br.readLine() != null) {
                ++numLines;
            }
        }
        catch (IOException e) {
            log.error("Exception while counting the number of lines of {}", (Object)geneInfo);
            throw e;
        }
        log.debug("Got {} lines.", (Object)numLines);
        ProgressBar progressBar = new ProgressBar(numLines, 0L, 80, true);
        log.info("Indexing the lines of {} now", (Object)geneInfo);
        int numThreads = Math.max(1, Runtime.getRuntime().availableProcessors() - 1);
        log.info("Using {} threads.", (Object)numThreads);
        ExecutorService executorService = Executors.newFixedThreadPool(numThreads);
        for (int i = 0; i < 1000; ++i) {
            this.q.add(new Object());
        }
        String line = null;
        try (BufferedReader br = FileUtilities.getReaderFromFile(geneInfo);){
            while ((line = br.readLine()) != null) {
                if (line.startsWith("#")) continue;
                String finalLine = line;
                executorService.submit(() -> {
                    String[] split = finalLine.split("\t");
                    String taxId = split[0];
                    String geneId = split[1];
                    String symbol2 = split[2];
                    String[] synonyms = split[4].split("\\|");
                    String symbolNomenclatureAuthority = split[10];
                    String fullNameNomenclatureAuthority = split[11];
                    String[] otherDesignations = split[13].split("\\|");
                    List<Object> allNames = new ArrayList<String>();
                    allNames.add(symbol2);
                    allNames.addAll(Arrays.asList(synonyms));
                    allNames.add(symbolNomenclatureAuthority);
                    allNames.add(fullNameNomenclatureAuthority);
                    allNames.addAll(Arrays.asList(otherDesignations));
                    allNames = allNames.stream().filter(Objects::nonNull).map(String::trim).filter(name -> !name.equals("-")).filter(name -> !name.matches("[0-9]+")).filter(name -> name.matches(".*[a-zA-Z].*")).collect(Collectors.toList());
                    Document document = new Document();
                    document.add(new StringField("tax_id", taxId, Field.Store.YES));
                    document.add(new StringField("gene_id", geneId, Field.Store.YES));
                    for (String string : allNames) {
                        String lcNormalizedName = this.normalizer.normalize(string.toLowerCase());
                        String normalizedName = this.normalizer.normalize(string);
                        document.add(new TextField("names_tokenized", normalizedName, Field.Store.YES));
                        document.add(new StringField("names_exact", normalizedName, Field.Store.YES));
                        if (lcNormalizedName.equals(normalizedName)) continue;
                        document.add(new TextField("names_tokenized", lcNormalizedName, Field.Store.YES));
                        document.add(new StringField("names_exact", lcNormalizedName, Field.Store.YES));
                    }
                    try {
                        iw.addDocument(document);
                    }
                    catch (IOException e) {
                        log.error("Exception when trying to add document to the index writer. Gene info line is {}", (Object)finalLine);
                        throw new GeneExpRuntimeException(e);
                    }
                    this.q.offer(new Object());
                });
                this.q.take();
                progressBar.incrementDone(1L, true);
            }
        }
        catch (IOException e) {
            log.error("Exception while reading from {}. Current gene_info line is {}", geneInfo, iw, line);
            throw e;
        }
        catch (InterruptedException e) {
            log.error("Exception while waiting for a new work item to become available.");
            throw new GeneExpRuntimeException(e);
        }
        finally {
            log.info("Shutting down executor.");
            log.info("Waiting for running threads to terminate.");
            executorService.shutdown();
            try {
                executorService.awaitTermination(100L, TimeUnit.DAYS);
            }
            catch (InterruptedException e) {
                log.error("Could not waiting for the termination of the executor service.");
                throw new GeneExpRuntimeException(e);
            }
        }
        log.info("Indexing finished.");
    }

    private void downloadGeneInfo(File geneInfo) throws IOException {
        URLConnection connection = new URL(GENE_INFO_URL).openConnection();
        byte[] buffer = new byte[4096];
        try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(geneInfo));
             BufferedInputStream bis = new BufferedInputStream(connection.getInputStream());){
            int numRead;
            while ((numRead = bis.read(buffer)) != -1) {
                bos.write(buffer, 0, numRead);
            }
        }
    }
}

