/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.concepts.db.creators;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import de.julielab.concepts.db.core.services.FacetCreationService;
import de.julielab.concepts.db.core.spi.ConceptCreator;
import de.julielab.concepts.db.creators.Forest;
import de.julielab.concepts.db.creators.Node;
import de.julielab.concepts.util.ConceptCreationException;
import de.julielab.concepts.util.FacetCreationException;
import de.julielab.java.utilities.ConfigurationUtilities;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.index.IndexCreationException;
import de.julielab.java.utilities.index.PersistentLuceneIndexStringArrayMapProvider;
import de.julielab.neo4j.plugins.datarepresentation.ConceptCoordinates;
import de.julielab.neo4j.plugins.datarepresentation.CoordinateType;
import de.julielab.neo4j.plugins.datarepresentation.ImportConcept;
import de.julielab.neo4j.plugins.datarepresentation.ImportConceptRelationship;
import de.julielab.neo4j.plugins.datarepresentation.ImportConcepts;
import de.julielab.neo4j.plugins.datarepresentation.ImportFacet;
import de.julielab.neo4j.plugins.datarepresentation.ImportOptions;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterators;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.configuration2.HierarchicalConfiguration;
import org.apache.commons.configuration2.ex.ConfigurationException;
import org.apache.commons.configuration2.tree.ImmutableNode;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.neo4j.graphdb.Label;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class NCBIGeneConceptCreator
implements ConceptCreator {
    public static final String SEMEDICO_RESOURCE_MANAGEMENT_SOURCE = "Semedico Resource Management";
    public static final String NCBI_GENE_SOURCE = "NCBI Gene";
    public static final String BASEPATH = "basepath";
    public static final String GENE_INFO = "gene_info";
    public static final String GENEDESCRIPTIONS = "genedescriptions";
    public static final String ORGANISMLIST = "organismlist";
    public static final String ORGANISMNAMES = "organismnames";
    public static final String GENE_ORTHOLOGS = "gene_orthologs";
    public static final String UP_ID_MAPPING = "up_id_mapping";
    public static final String GENE_2_GO = "gene2go";
    public static final String GO_DB_ORIGINAL_SOURCE_NAME = "go_db_original_source_name";
    public static final String CACHE_DIR = "cache_dir";
    public static final String GENE_GROUP_PREFIX = "genegroup";
    public static final String TOP_ORTHOLOGY_PREFIX = "toporthology";
    public static final String TOP_HOMOLOGY_PREFIX = "tophomology";
    private Path cacheDir = Path.of("concept-manager-caches", "ncbi-gene-concepts");
    private int homologeneAggregateCounter;
    private int orthologAggregateCounter;
    private int topOrthologAggregateCounter;
    private int topHomologyAggregateCounter;
    private int uniProtConceptCounter;
    private int goConceptCounter;
    private int dbXRefCounter;
    private Logger log = LoggerFactory.getLogger(NCBIGeneConceptCreator.class);

    public NCBIGeneConceptCreator() {
        this.resetCounters();
    }

    public static ConceptCoordinates getGeneCoordinates(String originalId) {
        return new ConceptCoordinates(originalId, NCBI_GENE_SOURCE, originalId, NCBI_GENE_SOURCE);
    }

    private void resetCounters() {
        this.homologeneAggregateCounter = 0;
        this.orthologAggregateCounter = 0;
        this.topOrthologAggregateCounter = 0;
        this.topHomologyAggregateCounter = 0;
        this.uniProtConceptCounter = 0;
        this.goConceptCounter = 0;
    }

    private Stream<ImportConcept> createHomologyAggregates(Stream<ImportConcept> conceptStream, Set<String> totalGeneIds, Map<ConceptCoordinates, ImportConcept> termsByGeneId, File geneGroup) throws IOException {
        HashMultimap genes2Aggregate = HashMultimap.create();
        Forest geneHierarchy = new Forest();
        List<String> aggregateCopyProperties = Arrays.asList("preferredName", "facets");
        Stream<ImportConcept> processedConceptStream = this.createGeneOrthologyAggregates(conceptStream, totalGeneIds, (Multimap<String, ConceptCoordinates>)genes2Aggregate, geneHierarchy, geneGroup, termsByGeneId, aggregateCopyProperties);
        return this.createTopHomologyAggregates(processedConceptStream, geneHierarchy, (Multimap<String, ConceptCoordinates>)genes2Aggregate, aggregateCopyProperties);
    }

    private void checkfornullparentcoords(Map<ConceptCoordinates, ImportConcept> termsByGeneId) {
        for (ImportConcept c : termsByGeneId.values()) {
            if (c.parentCoordinates != null) continue;
            throw new IllegalArgumentException(c.coordinates.toString());
        }
    }

    private Stream<ImportConcept> createTopHomologyAggregates(Stream<ImportConcept> processedConceptStream, Forest geneHierarchy, Multimap<String, ConceptCoordinates> genes2Aggregate, List<String> aggregateCopyProperties) {
        Stream.Builder<ImportConcept> topHomologyStreamBuilder = Stream.builder();
        for (String geneId : genes2Aggregate.keySet()) {
            Set<ImportConcept> topAggregates;
            Optional<Node> topHomologyAggregateOpt = geneHierarchy.getRoots(NCBIGeneConceptCreator.getGeneCoordinates(geneId)).stream().filter(c -> c.getConcept() != null).filter(c -> c.getConcept().coordinates.sourceId.startsWith(TOP_HOMOLOGY_PREFIX)).findAny();
            if (topHomologyAggregateOpt.isPresent() && !topHomologyAggregateOpt.get().getId().sourceId.startsWith(TOP_HOMOLOGY_PREFIX) || (topAggregates = this.findTopOrthologsAndHomologyAggregates(geneId, geneHierarchy)).size() <= 1) continue;
            ImportConcept topHomologyAggregate = new ImportConcept(topAggregates.stream().map(ic -> ic.coordinates).collect(Collectors.toList()), aggregateCopyProperties);
            topHomologyAggregate.coordinates = new ConceptCoordinates();
            topHomologyAggregate.coordinates.sourceId = TOP_HOMOLOGY_PREFIX + this.topHomologyAggregateCounter;
            topHomologyAggregate.coordinates.source = SEMEDICO_RESOURCE_MANAGEMENT_SOURCE;
            topHomologyAggregate.aggregateIncludeInHierarchy = true;
            topHomologyAggregate.generalLabels = Arrays.asList("AGGREGATE_TOP_HOMOLOGY", "NO_PROCESSING_GAZETTEER");
            topHomologyStreamBuilder.accept(topHomologyAggregate);
            ConceptCoordinates topHomologyCoordinates = topHomologyAggregateOpt.get().getConcept().coordinates;
            topAggregates.forEach(agg -> agg.addParent(topHomologyCoordinates));
            ++this.topHomologyAggregateCounter;
        }
        return Stream.concat(processedConceptStream, topHomologyStreamBuilder.build());
    }

    private Set<ImportConcept> findTopOrthologsAndHomologyAggregates(String geneId, Forest geneHierarchy) {
        Set<Node> roots = geneHierarchy.getRoots(NCBIGeneConceptCreator.getGeneCoordinates(geneId));
        return roots.stream().map(Node::getConcept).filter(Objects::nonNull).filter(c -> c.coordinates.sourceId.startsWith(GENE_GROUP_PREFIX)).collect(Collectors.toSet());
    }

    private Stream<ImportConcept> createGeneOrthologyAggregates(Stream<ImportConcept> conceptStream, Set<String> totalGeneIds, Multimap<String, ConceptCoordinates> genes2Aggregate, Forest geneHierarchy, File geneGroup, Map<ConceptCoordinates, ImportConcept> termsByGeneId, List<String> aggregateCopyProperties) throws IOException {
        ConceptCoordinates clusterCoordinates;
        HashMap<String, Set> geneGroupOrthologs = new HashMap<String, Set>();
        Iterator iterator = FileUtilities.getReaderFromFile((File)geneGroup).lines().iterator();
        while (iterator.hasNext()) {
            String geneGroupLine = (String)iterator.next();
            if (geneGroupLine.startsWith("#")) continue;
            String[] geneGroupRecord = geneGroupLine.split("\t");
            if (geneGroupRecord.length < 5) {
                throw new IllegalArgumentException("The line " + geneGroupLine + " does not have at least 5 tab-separated columns.");
            }
            String relationship = geneGroupRecord[2];
            if (!relationship.equals("Ortholog")) continue;
            String gene1 = geneGroupRecord[1].intern();
            String gene2 = geneGroupRecord[4].intern();
            geneGroupOrthologs.compute(gene1, (gene, set) -> {
                HashSet<String> newset = set;
                if (newset == null) {
                    newset = new HashSet<String>();
                }
                newset.add(gene2);
                return newset;
            });
        }
        this.log.info("Got {} orthology groups from gene_ortholog file {}", (Object)geneGroupOrthologs.size(), (Object)geneGroup);
        HashMultimap genes2OrthoAggregate = HashMultimap.create();
        Stream.Builder<ImportConcept> aggregatesStreamBuilder = Stream.builder();
        for (String geneGroupId : geneGroupOrthologs.keySet()) {
            Collection mappingTargets = (Collection)geneGroupOrthologs.get(geneGroupId);
            ArrayList<String> groupGeneIds = new ArrayList<String>(mappingTargets.size() + 1);
            ArrayList<ConceptCoordinates> groupGeneCoords = new ArrayList<ConceptCoordinates>(mappingTargets.size() + 1);
            for (String geneId : mappingTargets) {
                if (!totalGeneIds.contains(geneId)) continue;
                groupGeneIds.add(geneId);
                groupGeneCoords.add(NCBIGeneConceptCreator.getGeneCoordinates(geneId));
            }
            if (totalGeneIds.contains(geneGroupId)) {
                groupGeneIds.add(geneGroupId);
                groupGeneCoords.add(NCBIGeneConceptCreator.getGeneCoordinates(geneGroupId));
            }
            if (groupGeneCoords.size() <= 1) continue;
            ImportConcept orthologyCluster = new ImportConcept(groupGeneCoords, aggregateCopyProperties);
            orthologyCluster.coordinates = new ConceptCoordinates();
            orthologyCluster.coordinates.sourceId = (GENE_GROUP_PREFIX + geneGroupId).intern();
            orthologyCluster.coordinates.source = GENE_ORTHOLOGS;
            orthologyCluster.coordinates.originalSource = GENE_ORTHOLOGS;
            orthologyCluster.coordinates.originalId = geneGroupId;
            orthologyCluster.aggregateIncludeInHierarchy = true;
            orthologyCluster.generalLabels = Arrays.asList("AGGREGATE_GENEGROUP", "NO_PROCESSING_GAZETTEER");
            aggregatesStreamBuilder.accept(orthologyCluster);
            ++this.orthologAggregateCounter;
            for (String geneId : groupGeneIds) {
                genes2OrthoAggregate.put((Object)geneId, (Object)orthologyCluster);
                clusterCoordinates = new ConceptCoordinates(orthologyCluster.coordinates.sourceId, orthologyCluster.coordinates.source, true);
                genes2Aggregate.put((Object)geneId, (Object)clusterCoordinates);
                geneHierarchy.addNode(NCBIGeneConceptCreator.getGeneCoordinates(geneId), geneHierarchy.addNode(clusterCoordinates));
            }
        }
        conceptStream = conceptStream.map(arg_0 -> NCBIGeneConceptCreator.lambda$createGeneOrthologyAggregates$6((Multimap)genes2OrthoAggregate, arg_0));
        HashMap<ConceptCoordinates, ImportConcept> orthoAgg2TopOrtho = new HashMap<ConceptCoordinates, ImportConcept>();
        for (String geneid : genes2OrthoAggregate.keySet()) {
            ImportConcept cluster2;
            Collection clusters = genes2OrthoAggregate.get((Object)geneid);
            if (clusters.size() <= 1) continue;
            ImportConcept topOrthologyAggregate = null;
            TreeSet<ImportConcept> seenOrthologyClusters = new TreeSet<ImportConcept>(Comparator.comparingLong(System::identityHashCode));
            Iterator iterator2 = clusters.iterator();
            while (iterator2.hasNext() && (topOrthologyAggregate = this.findTopOrtholog(cluster2 = (ImportConcept)iterator2.next(), seenOrthologyClusters, (Multimap<String, ImportConcept>)genes2OrthoAggregate, orthoAgg2TopOrtho)) == null) {
            }
            if (topOrthologyAggregate == null) {
                topOrthologyAggregate = new ImportConcept(new ArrayList(), aggregateCopyProperties);
                topOrthologyAggregate.coordinates = new ConceptCoordinates();
                topOrthologyAggregate.coordinates.sourceId = TOP_ORTHOLOGY_PREFIX + this.topOrthologAggregateCounter;
                topOrthologyAggregate.coordinates.source = SEMEDICO_RESOURCE_MANAGEMENT_SOURCE;
                topOrthologyAggregate.aggregateIncludeInHierarchy = true;
                topOrthologyAggregate.generalLabels = Arrays.asList("AGGREGATE_TOP_ORTHOLOGY", "NO_PROCESSING_GAZETTEER");
                aggregatesStreamBuilder.accept(topOrthologyAggregate);
                ++this.topOrthologAggregateCounter;
            }
            for (ImportConcept cluster2 : clusters) {
                clusterCoordinates = cluster2.coordinates;
                if (!topOrthologyAggregate.elementCoordinates.contains(clusterCoordinates)) {
                    topOrthologyAggregate.elementCoordinates.add(clusterCoordinates);
                }
                orthoAgg2TopOrtho.put(clusterCoordinates, topOrthologyAggregate);
                cluster2.addParent(topOrthologyAggregate.coordinates);
                geneHierarchy.addNode(cluster2.coordinates).addParent(geneHierarchy.addNode(topOrthologyAggregate.coordinates));
            }
        }
        return Stream.concat(conceptStream, aggregatesStreamBuilder.build());
    }

    private ImportConcept findTopOrtholog(ImportConcept orthologyCluster, Set<ImportConcept> seenOrthologyClusters, Multimap<String, ImportConcept> genes2OrthoAggregate, Map<ConceptCoordinates, ImportConcept> orthoAgg2TopOrtho) {
        ImportConcept topOrtholog = orthoAgg2TopOrtho.get(orthologyCluster.coordinates);
        seenOrthologyClusters.add(orthologyCluster);
        if (topOrtholog == null) {
            for (ConceptCoordinates element : orthologyCluster.elementCoordinates) {
                Collection orthologyClustersOfElement = genes2OrthoAggregate.get((Object)element.originalId);
                for (ImportConcept orthologyClusterOfElement : orthologyClustersOfElement) {
                    if (seenOrthologyClusters.contains(orthologyClusterOfElement)) continue;
                    seenOrthologyClusters.add(orthologyClusterOfElement);
                    topOrtholog = this.findTopOrtholog(orthologyClusterOfElement, seenOrthologyClusters, genes2OrthoAggregate, orthoAgg2TopOrtho);
                    if (topOrtholog == null) continue;
                    return topOrtholog;
                }
            }
        }
        return topOrtholog;
    }

    private Stream<ImportConcept> setSpeciesQualifier(Stream<ImportConcept> conceptStream, File ncbiTaxNames, @Deprecated Map<String, String> geneId2Tax, @Deprecated Collection<ImportConcept> geneTerms) throws IOException {
        if (ncbiTaxNames != null) {
            this.log.info("Setting species qualifiers from file {}", (Object)ncbiTaxNames);
            HashMap<String, TaxonomyRecord> taxNameRecords = new HashMap<String, TaxonomyRecord>();
            Iterator lineIt = FileUtilities.getReaderFromFile((File)ncbiTaxNames).lines().iterator();
            while (lineIt.hasNext()) {
                String recordString = (String)lineIt.next();
                String[] split = recordString.split("(\t\\|\t)|(\t\\|)");
                String taxId = split[0].intern();
                String name = split[1].intern();
                String nameClass = split[3].intern();
                TaxonomyRecord record = (TaxonomyRecord)taxNameRecords.get(taxId);
                if (null == record) {
                    record = new TaxonomyRecord(taxId);
                    taxNameRecords.put(taxId, record);
                }
                if (nameClass.equals("scientific name")) {
                    record.scientificName = name;
                    continue;
                }
                if (!nameClass.equals("genbank common name")) continue;
                record.geneBankCommonName = name;
            }
            return conceptStream.map(gene -> {
                String taxId = (String)gene.getAuxProperty("taxId");
                TaxonomyRecord taxonomyRecord = (TaxonomyRecord)taxNameRecords.get(taxId);
                if (null != taxonomyRecord) {
                    Object speciesQualifier = taxonomyRecord.scientificName;
                    if (null != taxonomyRecord.geneBankCommonName) {
                        speciesQualifier = (String)speciesQualifier + (" (" + taxonomyRecord.geneBankCommonName + ")").intern();
                    }
                    gene.addQualifier((String)speciesQualifier);
                    gene.displayName = gene.prefName + " [" + taxonomyRecord.scientificName;
                    if (null != taxonomyRecord.geneBankCommonName) {
                        gene.displayName = gene.displayName + " (" + taxonomyRecord.geneBankCommonName + ")";
                    }
                    gene.displayName = gene.displayName + "]";
                } else {
                    this.log.warn("No NCBI Taxonomy name record was found for the taxonomy ID {}", (Object)taxId);
                }
                return gene;
            });
        }
        return conceptStream;
    }

    protected Stream<ImportConcept> convertGeneInfoToImportConcepts(File geneInfo, final Set<String> organismSet, File geneDescriptions) throws IOException {
        final HashMap<String, String> gene2Summary = new HashMap<String, String>();
        if (geneDescriptions != null) {
            this.log.info("Reading gene descriptions from {}", (Object)geneDescriptions);
            Iterator lineIt = FileUtilities.getReaderFromFile((File)geneDescriptions).lines().iterator();
            while (lineIt.hasNext()) {
                String line = (String)lineIt.next();
                String[] split = line.split("\t");
                String geneId = split[0].intern();
                String summary = split[1].intern();
                gene2Summary.put(geneId, summary);
            }
        }
        final BufferedReader bw = FileUtilities.getReaderFromFile((File)geneInfo);
        final Iterator it = bw.lines().filter(record -> !record.startsWith("#")).iterator();
        Iterator<ImportConcept> geneIterator = new Iterator<ImportConcept>(){
            private boolean closed = false;

            @Override
            public boolean hasNext() {
                boolean hasNext;
                boolean bl = hasNext = this.closed ? false : it.hasNext();
                if (!hasNext) {
                    try {
                        bw.close();
                        this.closed = true;
                    }
                    catch (IOException e) {
                        throw new IllegalStateException(e);
                    }
                }
                return hasNext;
            }

            @Override
            public ImportConcept next() {
                if (this.hasNext()) {
                    String record = (String)it.next();
                    ImportConcept geneconcept = NCBIGeneConceptCreator.this.createGeneConcept(record, gene2Summary);
                    String[] split = record.split("\t", 2);
                    String taxId = split[0].intern();
                    if (organismSet.contains(taxId) || organismSet.isEmpty()) {
                        geneconcept.putAuxProperty("taxId", (Object)taxId);
                        return geneconcept;
                    }
                }
                return null;
            }
        };
        this.log.info("Returning stream for gene concept creation.");
        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(geneIterator, 0), false).filter(Objects::nonNull);
    }

    private ImportConcept createGeneConcept(String record, Map<String, String> gene2Summary) {
        String[] split = record.split("\t");
        ArrayList<String> synonyms = new ArrayList<String>();
        String prefName = split[2];
        String fullname = split[11];
        if (prefName.length() < 3 && fullname.length() > 2) {
            prefName = fullname;
        } else {
            synonyms.add(fullname);
        }
        String ncbiDescription = split[8];
        if (prefName.length() < 3 && ncbiDescription.length() > 2) {
            prefName = ncbiDescription;
        }
        String originalId = split[1].intern();
        String synonymString = split[4];
        String otherDesignations = split[13];
        String[] synonymSplit = synonymString.split("\\|");
        for (int i = 0; i < synonymSplit.length; ++i) {
            String synonym = synonymSplit[i].intern();
            synonyms.add(synonym);
        }
        String[] otherDesignationsSplit = otherDesignations.split("\\|");
        for (int i = 0; i < otherDesignationsSplit.length; ++i) {
            String synonym = otherDesignationsSplit[i];
            synonyms.add(synonym.intern());
        }
        String description = gene2Summary.get(originalId);
        if (description != null) {
            description = description.intern();
        }
        Iterator synonymIt = synonyms.iterator();
        while (synonymIt.hasNext()) {
            if (((String)synonymIt.next()).length() >= 2) continue;
            synonymIt.remove();
        }
        ImportConcept geneTerm = new ImportConcept(prefName, synonyms, description, NCBIGeneConceptCreator.getGeneCoordinates(originalId));
        geneTerm.additionalProperties = new HashMap();
        geneTerm.additionalProperties.put("taxId", split[0]);
        geneTerm.additionalProperties.put("dbXrefs", split[5]);
        geneTerm.addGeneralLabel(new String[]{ConceptLabels.NO_PROCESSING_GAZETTEER.toString(), ConceptLabels.ID_MAP_NCBI_GENES.toString()});
        return geneTerm;
    }

    public void exposeParameters(String basePath, HierarchicalConfiguration<ImmutableNode> template) {
        String base = ConfigurationUtilities.slash((String[])new String[]{basePath, "concepts", "creator", "configuration"});
        template.addProperty(ConfigurationUtilities.slash((String[])new String[]{basePath, "concepts", "creator", "name"}), (Object)this.getName());
        template.addProperty(ConfigurationUtilities.slash((String[])new String[]{base, BASEPATH}), (Object)"");
        template.addProperty(ConfigurationUtilities.slash((String[])new String[]{base, GENE_INFO}), (Object)"");
        template.addProperty(ConfigurationUtilities.slash((String[])new String[]{base, GENEDESCRIPTIONS}), (Object)"");
        template.addProperty(ConfigurationUtilities.slash((String[])new String[]{base, ORGANISMLIST}), (Object)"");
        template.addProperty(ConfigurationUtilities.slash((String[])new String[]{base, ORGANISMNAMES}), (Object)"");
        template.addProperty(ConfigurationUtilities.slash((String[])new String[]{base, GENE_ORTHOLOGS}), (Object)"");
        template.addProperty(ConfigurationUtilities.slash((String[])new String[]{base, UP_ID_MAPPING}), (Object)"");
        template.addProperty(ConfigurationUtilities.slash((String[])new String[]{base, GENE_2_GO}), (Object)"");
        FacetCreationService.getInstance().exposeParameters(basePath, template);
        template.setProperty(ConfigurationUtilities.slash((String[])new String[]{basePath, "facet", "creator", "configuration", "facetgroup", "name"}), (Object)"Biology");
        template.setProperty(ConfigurationUtilities.slash((String[])new String[]{basePath, "facet", "creator", "configuration", "name"}), (Object)"Genes");
        template.setProperty(ConfigurationUtilities.slash((String[])new String[]{basePath, "facet", "creator", "configuration", "sourcetype"}), (Object)"hierarchical");
    }

    public Stream<ImportConcepts> createConcepts(HierarchicalConfiguration<ImmutableNode> importConfig) throws ConceptCreationException, FacetCreationException {
        this.resetCounters();
        String confPath = ConfigurationUtilities.slash((String[])new String[]{"concepts", "creator", "configuration"});
        try {
            ConfigurationUtilities.checkParameters(importConfig, (String[])new String[]{ConfigurationUtilities.slash((String[])new String[]{confPath, GENE_INFO}), ConfigurationUtilities.slash((String[])new String[]{confPath, ORGANISMLIST}), ConfigurationUtilities.slash((String[])new String[]{confPath, GENE_ORTHOLOGS})});
        }
        catch (ConfigurationException e) {
            throw new ConceptCreationException((Throwable)e);
        }
        String basepath = importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, BASEPATH}), "");
        File geneInfo = this.resolvePath(basepath, importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, GENE_INFO})));
        File geneDescriptions = this.resolvePath(basepath, importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, GENEDESCRIPTIONS})));
        File organisms = this.resolvePath(basepath, importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, ORGANISMLIST})));
        File ncbiTaxNames = this.resolvePath(basepath, importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, ORGANISMNAMES})));
        File geneOrthologs = this.resolvePath(basepath, importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, GENE_ORTHOLOGS})));
        File uniprotIdMapping = this.resolvePath(basepath, importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, UP_ID_MAPPING})));
        File gene2go = this.resolvePath(basepath, importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, GENE_2_GO})));
        String goOriginalSourceName = importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, GO_DB_ORIGINAL_SOURCE_NAME}));
        File cacheDirFile = this.resolvePath(basepath, importConfig.getString(ConfigurationUtilities.slash((String[])new String[]{confPath, CACHE_DIR})));
        try {
            Set<String> organismSet;
            if (cacheDirFile != null) {
                this.log.info("Setting cache directory to {}", (Object)cacheDirFile);
                this.cacheDir = cacheDirFile.toPath();
                if (!Files.exists(this.cacheDir, new LinkOption[0])) {
                    Files.createDirectories(this.cacheDir, new FileAttribute[0]);
                }
            }
            this.log.info("Beginning import of NCBI Genes.");
            if (organisms != null) {
                this.log.info("Reading the set of organisms to import genes of from {}.", (Object)organisms);
            }
            Set<String> set = organismSet = organisms != null ? FileUtilities.getReaderFromFile((File)organisms).lines().map(String::intern).collect(Collectors.toSet()) : Collections.emptySet();
            if (!organismSet.isEmpty()) {
                this.log.info("Retrieved {} taxonomy IDs.", (Object)organismSet.size());
            } else {
                this.log.info("Retrieved {} taxonomy IDs. No restrictions on species is imposed.", (Object)organismSet.size());
            }
            this.log.info("Reading the set of valid gene IDs from {}.", (Object)geneInfo);
            Set<String> totalGeneIds = this.getTotalGeneIds(geneInfo, organismSet);
            this.log.info("Got {} gene IDs.", (Object)totalGeneIds.size());
            HashMap<String, String> geneId2Tax = new HashMap<String, String>();
            HashMap<ConceptCoordinates, ImportConcept> conceptsByGeneId = new HashMap<ConceptCoordinates, ImportConcept>();
            this.log.info("Creating a stream converting NCBI Gene's gene_info file into nodes for the concept graph.");
            Stream<ImportConcept> conceptStream = this.convertGeneInfoToImportConcepts(geneInfo, organismSet, geneDescriptions);
            conceptStream = this.setSpeciesQualifier(conceptStream, ncbiTaxNames, geneId2Tax, conceptsByGeneId.values());
            conceptStream = this.createUniProtIdMappings(conceptStream, uniprotIdMapping, totalGeneIds);
            conceptStream = this.createDbXRefMappings(conceptStream);
            conceptStream = this.createGoAnnotationLinks(conceptStream, gene2go, goOriginalSourceName, totalGeneIds);
            this.log.info("Creating homology aggregates");
            conceptStream = this.createHomologyAggregates(conceptStream, totalGeneIds, conceptsByGeneId, geneOrthologs);
            this.log.info("Created {} homology aggregates", (Object)this.homologeneAggregateCounter);
            this.log.info("Created {} orthology aggregates", (Object)this.orthologAggregateCounter);
            this.log.info("Created {} top-homology aggregates, governing homologene and orthology aggregates", (Object)this.topHomologyAggregateCounter);
            ImportFacet facet = FacetCreationService.getInstance().createFacet(importConfig);
            ImportOptions options = new ImportOptions();
            options.createHollowAggregateElements = true;
            options.doNotCreateHollowParents = false;
            ImportConcepts importConcepts = new ImportConcepts(conceptStream, facet);
            importConcepts.setNumConcepts((long)(totalGeneIds.size() + this.uniProtConceptCounter + this.homologeneAggregateCounter + this.orthologAggregateCounter + this.topHomologyAggregateCounter + this.dbXRefCounter));
            this.log.info("Created a total of {} concepts.", (Object)importConcepts.getNumConcepts());
            importConcepts.setImportOptions(options);
            return Stream.of(importConcepts);
        }
        catch (IndexCreationException | IOException e) {
            throw new ConceptCreationException(e);
        }
    }

    private Stream<ImportConcept> createGoAnnotationLinks(Stream<ImportConcept> conceptStream, File gene2go, String goOriginalSourceName, Set<String> totalGeneIds) throws IOException {
        if (gene2go != null) {
            if (StringUtils.isBlank((String)goOriginalSourceName)) {
                throw new IllegalArgumentException("Found GO gene annotation file. But the go_db_original_source_name parameter is not given. It needs to be set to the original source used by the concept creator that has imported the GO terms. When in doubt, use the GO concept importer individually and then check the database for the source name.");
            }
            HashMultimap geneAnnotations = HashMultimap.create();
            this.log.info("Reading gene GO annotations from {} while excluding qualifiers beginning with NOT.", (Object)gene2go);
            int numAnnotations = 0;
            try (BufferedReader br = FileUtilities.getReaderFromFile((File)gene2go);){
                Iterator lineIt = br.lines().iterator();
                while (lineIt.hasNext()) {
                    String[] split;
                    String geneId;
                    String line = (String)lineIt.next();
                    if (line.startsWith("#") || !totalGeneIds.contains(geneId = (split = line.split("\\t", 6))[1].intern())) continue;
                    String goId = split[2];
                    String qualifier = split[4];
                    if (qualifier.startsWith("NOT")) continue;
                    geneAnnotations.put((Object)geneId, (Object)new String[]{goId, qualifier});
                    ++numAnnotations;
                }
            }
            this.log.info("Received {} GO annotations for {} genes.", (Object)numAnnotations, (Object)geneAnnotations.keySet().size());
            return conceptStream.map(arg_0 -> this.lambda$createGoAnnotationLinks$9((Multimap)geneAnnotations, goOriginalSourceName, arg_0));
        }
        return conceptStream;
    }

    private Stream<ImportConcept> createDbXRefMappings(Stream<ImportConcept> conceptStream) {
        this.log.info("Adding dbXref items to the concept stream.");
        return conceptStream.flatMap(concept -> {
            ArrayList<ImportConcept> returnedConcepts = new ArrayList<ImportConcept>();
            returnedConcepts.add((ImportConcept)concept);
            if (concept.generalLabels != null && concept.generalLabels.contains(ConceptLabels.ID_MAP_NCBI_GENES.name())) {
                String[] dbXrefs;
                String dbXrefsString = (String)concept.additionalProperties.get("dbXrefs");
                concept.additionalProperties.remove("dbXrefs");
                for (String dbXref : dbXrefs = dbXrefsString.split("\\|")) {
                    String refId = null;
                    String refSource = null;
                    String refLabel = null;
                    if (dbXref.startsWith("Ensembl:")) {
                        refId = dbXref.substring(8);
                        refSource = "Ensembl";
                        refLabel = "ENSEMBL";
                    } else if (dbXref.startsWith("HGNC:")) {
                        refId = dbXref.substring(5);
                        refSource = "HGNC";
                        refLabel = "HGNC";
                    }
                    if (refId == null) continue;
                    ImportConcept refConcept = new ImportConcept(new ConceptCoordinates(refId, refSource, refId, refSource));
                    this.log.trace("Creating dbXref concept with coordinates {}", (Object)refConcept.coordinates);
                    refConcept.addGeneralLabel(new String[]{refLabel});
                    refConcept.eligibleForFacetRoot = false;
                    refConcept.addRelationship(new ImportConceptRelationship(concept.coordinates, "IS_MAPPED_TO"));
                    returnedConcepts.add(refConcept);
                }
            }
            return returnedConcepts.stream();
        });
    }

    private Stream<ImportConcept> createUniProtIdMappings(Stream<ImportConcept> conceptStream, File uniprotIdMappingFile, Set<String> totalGeneIds) throws IOException, IndexCreationException {
        if (uniprotIdMappingFile != null) {
            this.log.info("Creating index for UniProt ID mapping file {} to save memory.", (Object)uniprotIdMappingFile);
            PersistentLuceneIndexStringArrayMapProvider mappingIndex = new PersistentLuceneIndexStringArrayMapProvider();
            mappingIndex.setIndexDirectoryPath(this.cacheDir);
            mappingIndex.setEligibleKeys(totalGeneIds);
            mappingIndex.setKeyIndices(new int[]{2});
            mappingIndex.setValueIndices(new int[]{0, 1});
            mappingIndex.load(uniprotIdMappingFile.toURI());
            Map indexMap = mappingIndex.getMap();
            this.uniProtConceptCounter += indexMap.size();
            HashSet seenUpAcs = new HashSet();
            this.log.info("Creating updated gene concept stream with additional IDs.");
            return conceptStream.flatMap(concept -> {
                ArrayList<ImportConcept> returnedConcepts = new ArrayList<ImportConcept>();
                returnedConcepts.add((ImportConcept)concept);
                String[] upIds = (String[])indexMap.get(concept.coordinates.originalId);
                if (upIds != null) {
                    if (upIds.length % 2 == 1) {
                        throw new IllegalStateException("An uneven number of UniProt ACs/IDs was returned but there should be pairs of ACs and IDs.");
                    }
                    for (int i = 1; i < upIds.length; ++i) {
                        String upAc = upIds[i - 1];
                        if (!seenUpAcs.add(upAc)) continue;
                        String upId = upIds[i];
                        ImportConcept upConcept = new ImportConcept(new ConceptCoordinates(upAc, "UniProtKB-AC", upAc, "UniProtKB-AC"));
                        upConcept.addGeneralLabel(new String[]{"UNIPROT"});
                        upConcept.eligibleForFacetRoot = false;
                        upConcept.addAdditionalCoordinates(new ConceptCoordinates(upId, "UniProtKB-ID", CoordinateType.SRC));
                        upConcept.addAdditionalProperty("UniProtKB-ID", (Object)upId);
                        upConcept.addRelationship(new ImportConceptRelationship(concept.coordinates, "IS_MAPPED_TO"));
                        returnedConcepts.add(upConcept);
                    }
                }
                return returnedConcepts.stream();
            });
        }
        this.log.info("UniProt ID mapping file not specified or not found, skipping UniProt ID mappings.");
        return conceptStream;
    }

    private Set<String> getTotalGeneIds(File geneInfo, Set<String> organismSet) throws IOException {
        boolean readFromCache;
        boolean geneInfoNewerThanCache;
        Set geneIdSet = Collections.emptySet();
        Path genesetCacheFile = Path.of(this.cacheDir.toString(), "totalGeneIds.ser.gz");
        boolean bl = geneInfoNewerThanCache = Files.exists(genesetCacheFile, new LinkOption[0]) && geneInfo.lastModified() > Files.getLastModifiedTime(genesetCacheFile, new LinkOption[0]).toMillis();
        if (geneInfoNewerThanCache) {
            this.log.info("gene_info file at {} is newer than cache at {}. Clearing cache and reading gene_info file from scratch.", (Object)geneInfoNewerThanCache, (Object)genesetCacheFile);
            FileUtils.deleteQuietly((File)genesetCacheFile.toFile());
        }
        boolean bl2 = readFromCache = Files.exists(genesetCacheFile, new LinkOption[0]) && !geneInfoNewerThanCache;
        if (readFromCache) {
            this.log.info("Loading set of gene IDs in gene_info from cache at {}", (Object)genesetCacheFile);
            try (ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(FileUtilities.getInputStreamFromFile((File)genesetCacheFile.toFile())));){
                geneIdSet = (Set)ois.readObject();
                this.dbXRefCounter = (Integer)ois.readObject();
                this.log.info("Obtained {} gene IDs and {} dbXref IDs in the cache.", (Object)geneIdSet.size(), (Object)this.dbXRefCounter);
            }
            catch (ClassNotFoundException e) {
                this.log.error("Unexpected error when trying to read gene ID set cache from {}. Perhaps the cache is corrupt. Trying to delete it and start again.");
                FileUtils.deleteQuietly((File)genesetCacheFile.toFile());
                readFromCache = true;
            }
        }
        if (!readFromCache) {
            try (BufferedReader bw = FileUtilities.getReaderFromFile((File)geneInfo);){
                AtomicInteger dbXrefAtomicCounter = new AtomicInteger();
                geneIdSet = bw.lines().filter(record -> !record.startsWith("#")).map(record -> record.split("\t", 6)).filter(split -> organismSet.contains(split[0]) || organismSet.isEmpty()).peek(split -> dbXrefAtomicCounter.addAndGet((int)Arrays.stream(split[5].split("\\|")).filter(dbXref -> dbXref.startsWith("Ensembl:") || dbXref.startsWith("HGNC:")).count())).map(split -> split[1].intern()).collect(Collectors.toSet());
                this.dbXRefCounter += dbXrefAtomicCounter.get();
                this.log.info("Received {} dbXref IDs which will result in additional concepts for the ID mapping", (Object)this.dbXRefCounter);
            }
            try (ObjectOutputStream oos = new ObjectOutputStream(new GZIPOutputStream(FileUtilities.getOutputStreamToFile((File)genesetCacheFile.toFile())));){
                this.log.info("Caching geneId set read from {} at {}", (Object)geneInfo, (Object)genesetCacheFile);
                oos.writeObject(geneIdSet);
                oos.writeObject(this.dbXRefCounter);
            }
        }
        return geneIdSet;
    }

    private File resolvePath(String basepath, String filepath) {
        if (StringUtils.isBlank((String)filepath)) {
            return null;
        }
        String delimiter = !StringUtils.isBlank((String)basepath) && !basepath.endsWith(File.separator) && !filepath.startsWith(File.separator) ? File.separator : "";
        String path = new File(filepath).isAbsolute() ? filepath : basepath + delimiter + filepath;
        return new File(path);
    }

    public String getName() {
        return "NCBIGeneConceptCreator";
    }

    private /* synthetic */ ImportConcept lambda$createGoAnnotationLinks$9(Multimap geneAnnotations, String goOriginalSourceName, ImportConcept concept) {
        Collection annotations;
        if (concept.generalLabels != null && concept.generalLabels.contains(ConceptLabels.ID_MAP_NCBI_GENES.name()) && (annotations = geneAnnotations.get((Object)concept.coordinates.originalId)) != null) {
            this.log.trace("Retrieved GO annotation {} for gene {}", (Object)annotations, (Object)concept.coordinates.originalId);
            for (String[] annotation : annotations) {
                String goId = annotation[0];
                String qualifier = annotation[1];
                ImportConceptRelationship annotatedWith = new ImportConceptRelationship(new ConceptCoordinates(goId, goOriginalSourceName, CoordinateType.OSRC), "IS_ANNOTATED_WITH");
                annotatedWith.addProperty("qualifier", qualifier);
                concept.addRelationship(annotatedWith);
            }
        }
        return concept;
    }

    private static /* synthetic */ ImportConcept lambda$createGeneOrthologyAggregates$6(Multimap genes2OrthoAggregate, ImportConcept gene) {
        Collection orthoAggregates = genes2OrthoAggregate.get((Object)gene.coordinates.originalId);
        for (ImportConcept orthoAggregate : orthoAggregates) {
            gene.addParent(orthoAggregate.coordinates);
            if (orthoAggregates.size() <= 1) continue;
            gene.addGeneralLabel(new String[]{ConceptLabels.NO_QUERY_DICTIONARY.name(), ConceptLabels.NO_SUGGESTIONS.name()});
        }
        return gene;
    }

    private class HomologeneRecord {
        String taxId;
        String geneId;
        String groupId;

        public HomologeneRecord(String[] record) {
            this.groupId = record[0].intern();
            this.taxId = record[1].intern();
            this.geneId = record[2].intern();
        }
    }

    private class TaxonomyRecord {
        String taxId;
        String scientificName;
        String geneBankCommonName;

        public TaxonomyRecord(String taxId) {
            this.taxId = taxId.intern();
        }
    }

    public static enum ConceptLabels implements Label
    {
        NO_PROCESSING_GAZETTEER,
        NO_SUGGESTIONS,
        NO_QUERY_DICTIONARY,
        ID_MAP_NCBI_GENES;

    }
}

