/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.resources;

import de.julielab.geneexpbase.CandidateFilter;
import de.julielab.geneexpbase.TermNormalizer;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SynonymIndexGenerator {
    private static final Logger log = LoggerFactory.getLogger(SynonymIndexGenerator.class);
    private static final Boolean OMIT_FILTERED = true;
    private static final int MAX_SYNLENGTH = 8;
    private static final int MIN_SYNLENGTH = 2;
    private final File dictFile;
    Map<String, String> id2tax;
    Directory indexDirectory;
    private static final boolean debug = false;

    public static void main(String[] args) {
        Object indexPath;
        File geneInfo;
        Object resPath;
        File resDir;
        long s1 = System.currentTimeMillis();
        if (args.length != 3) {
            System.err.println("Usage: SynonymIndexGenerator <resourcesDirectory> <gene_info file name> <geneSynonymIndicesDirectory>");
            System.exit(1);
        }
        if (!(resDir = new File((String)(resPath = args[0]))).isDirectory()) {
            System.err.println("Could not find resources directory");
            System.exit(1);
        }
        if (!((String)resPath).endsWith(File.separator)) {
            resPath = (String)resPath + File.separator;
        }
        if (!(geneInfo = new File((String)resPath + args[1])).exists()) {
            System.err.println("Gene info file could not be found at " + geneInfo.getAbsolutePath());
            System.exit(1);
        }
        if (!((String)(indexPath = args[2])).endsWith("/")) {
            indexPath = (String)indexPath + "/";
        }
        File geneIndexDir = new File((String)indexPath + "geneSynonymIndex");
        File proteinIndexDir = new File((String)indexPath + "proteinSynonymIndex");
        File upDictFile = new File((String)resPath + "gene.dict.up");
        SynonymIndexGenerator.checkFile(upDictFile);
        File egDictFile = new File((String)resPath + "gene.dict.eg");
        SynonymIndexGenerator.checkFile(egDictFile);
        File upTaxMap = new File((String)resPath + "up2eg2tax.map");
        SynonymIndexGenerator.checkFile(upTaxMap);
        File egTaxMap = geneInfo;
        try {
            SynonymIndexGenerator indexGenerator = new SynonymIndexGenerator(egDictFile, geneIndexDir);
            indexGenerator.readEgTaxMap(egTaxMap);
            indexGenerator.createIndex();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        long s2 = System.currentTimeMillis();
        System.out.println("Index created successfully! (" + (s2 - s1) / 1000L + " sec)");
    }

    private static void checkFile(File file) {
        if (!file.isFile()) {
            throw new IllegalArgumentException("File \"" + file.getAbsolutePath() + "\" could not be found.");
        }
    }

    public SynonymIndexGenerator(File dictFile, File indexFile) throws FileNotFoundException, IOException {
        System.out.println("Building synonym index from dictionary " + dictFile.getAbsolutePath());
        this.dictFile = dictFile;
        this.indexDirectory = this.createIndexDirectory(indexFile);
    }

    public void createIndex() throws IOException {
        CandidateFilter cf = new CandidateFilter();
        WhitespaceAnalyzer wsAnalyzer = new WhitespaceAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig((Analyzer)wsAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        IndexWriter iw = new IndexWriter(this.indexDirectory, iwc);
        TermNormalizer normalizer = new TermNormalizer();
        int counter = 0;
        BufferedReader normDictReader = new BufferedReader(new FileReader(this.dictFile));
        System.out.println("Generating index now. This may take quite a while (up to several hours when input files are large) ...");
        try {
            String line = "";
            while ((line = normDictReader.readLine()) != null) {
                int i;
                String[] values = line.split("\t");
                if (values.length != 3) {
                    System.err.println("ERR: normalized dictionary not in expected format. \ncritical line: " + line);
                    continue;
                }
                String name = values[0];
                String normalizedName = normalizer.normalize(name);
                List normalizedNameVariant = normalizer.generateVariants(name).stream().map(arg_0 -> ((TermNormalizer)normalizer).normalize(arg_0)).collect(Collectors.toList());
                String id = values[1];
                Integer priority = Integer.parseInt(values[2]);
                boolean filtered = false;
                int synTokenNum = normalizedName.split(" ").length;
                if (synTokenNum > 8 || synTokenNum < 2 && normalizedName.length() < 2) {
                    log.debug("Removed due to illegal length (too short or too long): {}", (Object)normalizedName);
                    continue;
                }
                Pattern p = CandidateFilter.patternDomainFamilies;
                Matcher m = p.matcher(normalizedName);
                if (m.matches()) {
                    log.debug("DOMAIN/FAMILY REMOVED: |{}|", (Object)normalizedName);
                    filtered = true;
                }
                if ((m = (p = CandidateFilter.patternUnspecifieds).matcher(normalizedName)).matches()) {
                    log.debug("UNSPECIFIED REMOVED: |{}|", (Object)normalizedName);
                    filtered = true;
                }
                if (filtered && OMIT_FILTERED.booleanValue()) continue;
                this.showDebug(id + "\t" + normalizedName);
                String tax = "";
                if (this.id2tax.get(id) != null) {
                    tax = this.id2tax.get(id);
                }
                ArrayList<Object> fields = new ArrayList<Object>();
                StringField idField = new StringField("entry_id", id, Field.Store.YES);
                TextField originalNameField = new TextField("original_name", name.toLowerCase(), Field.Store.YES);
                TextField lookupSynField = new TextField("indexed_syn", normalizedName, Field.Store.YES);
                StringField taxField = new StringField("tax_id", tax, Field.Store.YES);
                IntPoint priorityField = new IntPoint("priority", new int[]{priority});
                StoredField storedPriorityField = new StoredField("priority", priority.intValue());
                if (!OMIT_FILTERED.booleanValue()) {
                    IntPoint filteredField = new IntPoint("filtered", new int[]{filtered ? 1 : 0});
                    StoredField storedFilteredField = new StoredField("filtered", filtered ? 1 : 0);
                    fields.add(filteredField);
                    fields.add(storedFilteredField);
                }
                fields.add(idField);
                fields.add(originalNameField);
                fields.add(lookupSynField);
                fields.add(taxField);
                fields.add(priorityField);
                fields.add(storedPriorityField);
                for (i = 0; i < normalizedNameVariant.size(); ++i) {
                    fields.add(new TextField("variant_name", (String)normalizedNameVariant.get(i), Field.Store.YES));
                }
                for (i = 0; i < normalizedNameVariant.size(); ++i) {
                    fields.add(new TextField("stemmed_normalized_name", (String)normalizedNameVariant.get(i), Field.Store.YES));
                }
                Document d = new Document();
                for (Field field : fields) {
                    d.add((IndexableField)field);
                }
                iw.addDocument((Iterable)d);
                if (++counter % 10000 != 0) continue;
                System.err.println("# entries processed: " + counter);
            }
            iw.close();
            normDictReader.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private FSDirectory createIndexDirectory(File indexFile) {
        FSDirectory fdir = null;
        try {
            fdir = FSDirectory.open((Path)indexFile.toPath());
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return fdir;
    }

    private void showDebug(String s) {
    }

    private void readUpTaxMap(File taxMap) throws IOException {
        System.out.println("Reading up2eg2tax.map ...");
        this.id2tax = new HashMap<String, String>();
        BufferedReader reader = new BufferedReader(new FileReader(taxMap));
        String line = "";
        while ((line = reader.readLine()) != null) {
            String[] entry = line.split("\t");
            if (entry.length != 3) {
                System.err.println("ERR: up2eg2tax.map not in expected format. \ncritical line: " + line);
                System.exit(-1);
            }
            String id = entry[0].trim();
            String taxId = entry[2].trim();
            this.id2tax.put(id, taxId);
        }
        reader.close();
    }

    private void readEgTaxMap(File geneInfo) throws IOException {
        try (BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(geneInfo))));){
            this.id2tax = br.lines().collect(Collectors.toMap(l -> l.split("\\t", 3)[1], l -> l.split("\\t", 3)[0]));
        }
    }
}

