/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.resources;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.lahodiuk.ahocorasick.AhoCorasickOptimized;
import com.sun.istack.NotNull;
import de.julielab.geneexpbase.AhoCorasickLongestMatchCallback;
import de.julielab.geneexpbase.scoring.JaccardScorer;
import de.julielab.geneexpbase.scoring.JaroWinklerScorer;
import de.julielab.geneexpbase.scoring.Scorer;
import de.julielab.java.utilities.FileUtilities;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.commons.lang3.Range;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SpecialistLexiconNameExpansion {
    private static final Logger log = LoggerFactory.getLogger(SpecialistLexiconNameExpansion.class);
    private static final Scorer jaroWinkler = new JaroWinklerScorer();
    private static final Scorer jaccard = new JaccardScorer();
    private static Set<String> inputStopwords;
    private final AtomicInteger geneGroupIDcounter = new AtomicInteger();
    private final Matcher numfinder = Pattern.compile("[0-9]+").matcher("");

    public SpecialistLexiconNameExpansion() {
        inputStopwords = new HashSet<String>();
        inputStopwords.add("family");
        inputStopwords.add("superfamily");
        inputStopwords.add("subfamily");
        inputStopwords.add("group");
        inputStopwords.add("factor");
        inputStopwords.add("receptor");
    }

    public static void main(String[] args) throws IOException, XMLStreamException {
        if (args.length < 3) {
            System.err.println("Usage: " + SpecialistLexiconNameExpansion.class.getCanonicalName() + " <SPECIALIST Lexicon XML file> <output dictionary destination> <input dict file> [<input dict file>]* ");
        } else {
            File[] inputFiles = (File[])IntStream.range(2, args.length).mapToObj(i -> args[i]).map(File::new).toArray(File[]::new);
            SpecialistLexiconNameExpansion dictCreator = new SpecialistLexiconNameExpansion();
            dictCreator.createDict(new File(args[0]), inputFiles, new File(args[1]));
        }
    }

    private String normalize(String input) {
        return input.toLowerCase().replaceAll("\\p{P}", " ");
    }

    public void createDict(File specialistXmlLexicon, File[] inputNamesFiles, File dictionaryDestination) throws IOException, XMLStreamException {
        Map<String, Multimap<EntryType, SpecialistEntry>> names2entries;
        int numNames;
        log.info("Reading SPECIALIST Lexicon from {}, name lists from {} and writing dictionary to {}.", specialistXmlLexicon, inputNamesFiles, dictionaryDestination);
        HashMultimap<String, String> adaptedname2id = HashMultimap.create();
        HashMultimap<String, String> eui2abbreui = HashMultimap.create();
        HashMultimap<String, String> eui2acroeui = HashMultimap.create();
        HashMap<String, SpecialistEntry> eui2entry = new HashMap<String, SpecialistEntry>();
        HashMap<String, SpecialistEntry> acronymEntries = new HashMap<String, SpecialistEntry>();
        HashMap<String, SpecialistEntry> abbreviationEntries = new HashMap<String, SpecialistEntry>();
        HashMultimap id2inputName = HashMultimap.create();
        HashMap adaptedName2originalName = new HashMap();
        HashMultimap egsmlong2acro = HashMultimap.create();
        HashMultimap<String, String> normalized2OriginalInputNames = HashMultimap.create();
        for (File inputNamesFile : inputNamesFiles) {
            FileUtilities.getReaderFromFile(inputNamesFile).lines().filter(Predicate.not(String::isBlank)).filter(Predicate.not(l -> l.startsWith("#"))).forEach(l -> {
                String[] split = l.split("\t");
                if (split.length < 3) {
                    throw new IllegalArgumentException("Unsupported format in file " + inputNamesFile.getName() + ". Expected three columns with 'name', 'id' and 'priority'. Got: " + l);
                }
                String originalName = split[0];
                String name = this.adaptName(originalName);
                adaptedName2originalName.put(name, originalName);
                if (!inputStopwords.contains(name)) {
                    String id = split[1];
                    adaptedname2id.put(name, id);
                    String normalizedName = this.normalize(name);
                    if (!normalized2OriginalInputNames.containsKey(normalizedName) && !normalized2OriginalInputNames.get(normalizedName).contains(l)) {
                        normalized2OriginalInputNames.put(normalizedName, name);
                    }
                    id2inputName.put(id, name);
                }
            });
        }
        log.info("Got {} input names to check for connections amongst each other and with the SPECIALIST Lexicon", (Object)adaptedName2originalName.size());
        int i = 0;
        do {
            numNames = normalized2OriginalInputNames.size();
            names2entries = this.findEntriesForInputNames(specialistXmlLexicon, normalized2OriginalInputNames, eui2entry, eui2abbreui, eui2acroeui, acronymEntries, abbreviationEntries);
            this.extendNamesWithLexiconItems(adaptedname2id, normalized2OriginalInputNames, names2entries);
            ++i;
        } while (normalized2OriginalInputNames.size() != numNames);
        log.info("After {} iterations of name expansion, a final number of {} names was identified.", (Object)i, (Object)numNames);
        ArrayList<GeneGroup> geneGroups = new ArrayList<GeneGroup>();
        HashMultimap lexiconEui2genegroup = HashMultimap.create();
        for (String externalId : id2inputName.keySet()) {
            boolean lexiconEntryFoundForId = false;
            GeneGroup genegroup4externalId = null;
            for (String externalInputName : id2inputName.get(externalId)) {
                Multimap<EntryType, SpecialistEntry> lexiconNames = names2entries.get(externalInputName);
                if (lexiconNames == null) continue;
                lexiconEntryFoundForId = true;
                HashSet<String> alreadySeenLexiconEntries = new HashSet<String>();
                for (SpecialistEntry lexiconEntry : lexiconNames.values()) {
                    GeneGroup gg;
                    int networkSize;
                    if (lexiconEntry.hasAmbiguousAbbreviationsAndAcronyms() || !alreadySeenLexiconEntries.add(lexiconEntry.getEui())) continue;
                    HashSet<SpecialistEntry> networkNodes = new HashSet<SpecialistEntry>();
                    networkNodes.add(lexiconEntry);
                    do {
                        networkSize = networkNodes.size();
                        lexiconEntry.getAbbreviationLongformEuis().stream().map(eui2entry::get).filter(Objects::nonNull).filter(Predicate.not(SpecialistEntry::hasAmbiguousAbbreviationsAndAcronyms)).forEach(networkNodes::add);
                        lexiconEntry.getAcronymLongformEuis().stream().map(eui2entry::get).filter(Objects::nonNull).filter(Predicate.not(SpecialistEntry::hasAmbiguousAbbreviationsAndAcronyms)).forEach(networkNodes::add);
                    } while (networkSize < networkNodes.size());
                    List basesByLength = networkNodes.stream().map(SpecialistEntry::getBase).sorted(Comparator.comparingInt(String::length)).collect(Collectors.toList());
                    if (genegroup4externalId != null) {
                        gg = genegroup4externalId;
                    } else {
                        gg = new GeneGroup("GENO:" + this.geneGroupIDcounter.incrementAndGet());
                        geneGroups.add(gg);
                    }
                    gg.addExternalId(externalId);
                    gg.addInputName(externalInputName);
                    gg.addBase((String)basesByLength.get(0));
                    IntStream.range(1, basesByLength.size()).mapToObj(basesByLength::get).forEach(gg::addSpellingVariant);
                    networkNodes.stream().map(SpecialistEntry::getAbbreviations).flatMap(Collection::stream).forEach(gg::addAbbreviation);
                    networkNodes.stream().map(SpecialistEntry::getAcronyms).flatMap(Collection::stream).forEach(gg::addAcronym);
                    networkNodes.stream().map(SpecialistEntry::getInflectionVariants).flatMap(Collection::stream).forEach(gg::addInflectionVariant);
                    networkNodes.stream().map(SpecialistEntry::getSpellingVariants).flatMap(Collection::stream).forEach(gg::addSpellingVariant);
                    networkNodes.stream().map(SpecialistEntry::getEui).forEach(gg::addConnectedLexiconEntry);
                    networkNodes.forEach(n -> lexiconEui2genegroup.put(n.getEui(), gg));
                    genegroup4externalId = gg;
                }
            }
            if (lexiconEntryFoundForId) continue;
            GeneGroup gg = new GeneGroup("GENO:" + this.geneGroupIDcounter.incrementAndGet());
            gg.addBase(externalId.substring(externalId.indexOf(58) + 1));
            gg.addExternalId(externalId);
            id2inputName.get(externalId).forEach(gg::addInputName);
            geneGroups.add(gg);
        }
        for (GeneGroup gg : geneGroups) {
            for (String externalId : gg.getExternalIds()) {
                for (String name : id2inputName.get(externalId)) {
                    gg.addInputName(name);
                }
            }
        }
        log.info("Writing {} connected gene groups to {}", (Object)geneGroups.size(), (Object)dictionaryDestination);
        this.writeGeneGroups(dictionaryDestination, geneGroups, adaptedname2id, names2entries);
        log.info("Writing a dictionary of all gene group names to familyrecords.dict");
        try (BufferedWriter bw = FileUtilities.getWriterToFile(new File("familyrecords.dict"));){
            for (GeneGroup gg : geneGroups) {
                Set allnames = gg.getAllNames().collect(Collectors.toSet());
                String id = gg.getId();
                boolean firstBaseEncountered = false;
                for (String name : allnames) {
                    String prio = "2";
                    if (gg.bases.contains(name) && !firstBaseEncountered) {
                        prio = "-1";
                        firstBaseEncountered = true;
                    }
                    bw.write(name + "\t" + id + "\t" + prio);
                    bw.newLine();
                }
            }
        }
        log.info("Done.");
    }

    @NotNull
    public String adaptName(String originalName) {
        return originalName.replaceAll(" family$", "");
    }

    public boolean numberCompatible(String t1, String t2) {
        this.numfinder.reset(t1);
        HashSet<String> number1 = new HashSet<String>();
        while (this.numfinder.find()) {
            number1.add(this.numfinder.group());
        }
        this.numfinder.reset(t2);
        HashSet number2 = new HashSet();
        while (this.numfinder.find()) {
            number1.add(this.numfinder.group());
        }
        return number1.isEmpty() && number2.isEmpty() || number1.equals(number2);
    }

    public void writeGeneGroups(File dictionaryDestination, List<GeneGroup> geneGroups, Multimap<String, String> name2id, Map<String, Multimap<EntryType, SpecialistEntry>> names2entries) throws IOException {
        try (BufferedWriter bw = FileUtilities.getWriterToFile(dictionaryDestination);){
            bw.write("# Created " + new Date());
            bw.newLine();
            for (GeneGroup group : geneGroups) {
                bw.write(group.getId());
                bw.newLine();
                bw.write("bases:\t" + group.getBases().stream().sorted().collect(Collectors.joining(", ")));
                bw.newLine();
                bw.write("inflections:\t" + group.getInflectionVariants().stream().sorted().collect(Collectors.joining(", ")));
                bw.newLine();
                bw.write("spellings:\t" + group.getSpellingVariants().stream().sorted().collect(Collectors.joining(", ")));
                bw.newLine();
                bw.write("acronyms:\t" + group.getAcronyms().stream().sorted().collect(Collectors.joining(", ")));
                bw.newLine();
                bw.write("abbreviations:\t" + group.getAbbreviations().stream().sorted().collect(Collectors.joining(", ")));
                bw.newLine();
                bw.write("externalids:\t" + group.getExternalIds().stream().sorted().collect(Collectors.joining(", ")));
                bw.newLine();
                bw.write("inputnames:\t" + group.getInputNames().stream().sorted().collect(Collectors.joining(", ")));
                bw.newLine();
                bw.write("lexiconEuis:\t" + group.getConnectedLexiconEntries().stream().sorted().collect(Collectors.joining(", ")));
                bw.newLine();
                bw.newLine();
            }
        }
    }

    public void extendNamesWithLexiconItems(Multimap<String, String> name2id, Multimap<String, String> normalized2OriginalInputNames, Map<String, Multimap<EntryType, SpecialistEntry>> names2entries) {
        for (String inputName : names2entries.keySet()) {
            Multimap<EntryType, SpecialistEntry> specialistEntries = names2entries.get(inputName);
            Collection<String> ids = name2id.get(inputName);
            block7: for (EntryType type : specialistEntries.keySet()) {
                Collection<SpecialistEntry> entries4type = specialistEntries.get(type);
                switch (type) {
                    case ABBREVIATION: {
                        for (SpecialistEntry e : entries4type) {
                            for (String id : ids) {
                                name2id.put(e.getBase(), id);
                                if (!e.uniformAbbreviationAndAcronymSuffix()) continue;
                                e.getInflectionVariants().forEach(var -> name2id.put((String)var, id));
                                e.getSpellingVariants().forEach(var -> name2id.put((String)var, id));
                                e.getAbbreviations().forEach(abbr -> name2id.put((String)abbr, id));
                            }
                        }
                        continue block7;
                    }
                    case ACRONYM: {
                        for (SpecialistEntry e : entries4type) {
                            for (String id : ids) {
                                name2id.put(id, e.getBase());
                                if (!e.uniformAbbreviationAndAcronymSuffix()) continue;
                                e.getInflectionVariants().forEach(var -> name2id.put((String)var, id));
                                e.getSpellingVariants().forEach(var -> name2id.put((String)var, id));
                                e.getAcronyms().forEach(acr -> name2id.put((String)acr, id));
                            }
                        }
                        continue block7;
                    }
                    case SPELLING: 
                    case INFLECTION: {
                        for (SpecialistEntry e : entries4type) {
                            for (String id : ids) {
                                name2id.put(e.getBase(), id);
                                e.getInflectionVariants().forEach(var -> name2id.put((String)var, id));
                                e.getSpellingVariants().forEach(var -> name2id.put((String)var, id));
                            }
                        }
                        continue block7;
                    }
                    case BASE: {
                        for (SpecialistEntry e : entries4type) {
                            if (!e.isAcronymEntry()) {
                                for (String id : ids) {
                                    name2id.put(e.getBase(), id);
                                    e.getInflectionVariants().forEach(var -> name2id.put((String)var, id));
                                    e.getSpellingVariants().forEach(var -> name2id.put((String)var, id));
                                }
                                continue;
                            }
                            if (e.uniformAbbreviationSuffix()) {
                                for (String id : ids) {
                                    e.getInflectionVariants().forEach(var -> name2id.put((String)var, id));
                                    e.getSpellingVariants().forEach(var -> name2id.put((String)var, id));
                                    e.getAbbreviations().forEach(abbr -> name2id.put((String)abbr, id));
                                }
                                continue;
                            }
                            if (!e.uniformAcronymSuffix()) continue;
                            for (String id : ids) {
                                e.getInflectionVariants().forEach(var -> name2id.put((String)var, id));
                                e.getSpellingVariants().forEach(var -> name2id.put((String)var, id));
                                e.getAcronyms().forEach(acr -> name2id.put((String)acr, id));
                            }
                        }
                        continue block7;
                    }
                    default: {
                        throw new IllegalArgumentException("Unknown Specialist Entry type " + type);
                    }
                }
            }
        }
        for (String newname : name2id.keySet()) {
            String normalizedNewname = this.normalize(newname);
            normalized2OriginalInputNames.put(normalizedNewname, newname);
        }
    }

    public Map<String, Multimap<EntryType, SpecialistEntry>> findEntriesForInputNames(File specialistXmlLexicon, Multimap<String, String> normalized2OriginalInputNames, Map<String, SpecialistEntry> eui2entry, Multimap<String, String> eui2abbreui, Multimap<String, String> eui2acroeui, Map<String, SpecialistEntry> acronymEntries, Map<String, SpecialistEntry> abbreviationEntries) throws IOException, XMLStreamException {
        HashMap<String, Multimap<EntryType, SpecialistEntry>> names2entries = new HashMap<String, Multimap<EntryType, SpecialistEntry>>();
        try (BufferedReader br = FileUtilities.getReaderFromFile(specialistXmlLexicon);){
            XMLStreamReader reader = XMLInputFactory.newFactory().createXMLStreamReader(br);
            SpecialistEntry currentEntry = null;
            HashMultimap<String, EntryType> exactInputs4CurrentEntry = HashMultimap.create();
            HashMultimap<String, EntryType> normalizedInputs4CurrentEntry = HashMultimap.create();
            block25: while (reader.hasNext()) {
                String elementName;
                int eventType = reader.next();
                EntryType entryType = null;
                if (eventType == 2) {
                    elementName = reader.getName().getLocalPart();
                    if (!elementName.equals("lexRecord")) continue;
                    this.storeLexiconRecord(eui2entry, eui2abbreui, eui2acroeui, acronymEntries, abbreviationEntries, names2entries, currentEntry, exactInputs4CurrentEntry, normalizedInputs4CurrentEntry);
                    exactInputs4CurrentEntry.clear();
                    normalizedInputs4CurrentEntry.clear();
                    continue;
                }
                if (eventType != 1) continue;
                elementName = reader.getName().getLocalPart();
                String elementText = null;
                EntryType refType = null;
                switch (elementName) {
                    case "lexRecord": {
                        currentEntry = new SpecialistEntry();
                        break;
                    }
                    case "acronyms": {
                        entryType = EntryType.ACRONYM;
                        refType = EntryType.ACRONYM_LONGFORM_EUID;
                    }
                    case "abbreviations": {
                        if (entryType == null) {
                            entryType = EntryType.ABBREVIATION;
                        }
                        if (refType == null) {
                            refType = EntryType.ABBREVIATION_LONGFORM_EUID;
                        }
                        String originalElementText = reader.getElementText();
                        String[] split = originalElementText.split("\\|");
                        elementText = split[0];
                        String longformEntryEui = split.length > 1 ? split[1] : "<no eui given>";
                        currentEntry.add(elementText, entryType);
                        currentEntry.add(longformEntryEui, refType);
                        String normalizedElementText = this.normalize(elementText);
                        if (!normalized2OriginalInputNames.containsKey(normalizedElementText)) break;
                        Collection<String> originalInputNames = normalized2OriginalInputNames.get(normalizedElementText);
                        for (String originalInputName : originalInputNames) {
                            if (originalInputName.equals(elementText)) {
                                exactInputs4CurrentEntry.put(originalInputName, entryType);
                                continue;
                            }
                            normalizedInputs4CurrentEntry.put(originalInputName, entryType);
                        }
                        continue block25;
                    }
                    case "base": {
                        entryType = EntryType.BASE;
                    }
                    case "spellingVars": {
                        if (entryType == null) {
                            entryType = EntryType.SPELLING;
                        }
                    }
                    case "inflVars": {
                        if (entryType == null) {
                            entryType = EntryType.INFLECTION;
                        }
                        if (elementText == null) {
                            elementText = reader.getElementText();
                        }
                        currentEntry.add(elementText, entryType);
                        String normalizedElementText = this.normalize(elementText);
                        if (!normalized2OriginalInputNames.containsKey(normalizedElementText)) break;
                        Collection<String> originalInputNames = normalized2OriginalInputNames.get(normalizedElementText);
                        for (String originalInputName : originalInputNames) {
                            if (originalInputName.equals(elementText)) {
                                exactInputs4CurrentEntry.put(originalInputName, entryType);
                                continue;
                            }
                            normalizedInputs4CurrentEntry.put(originalInputName, entryType);
                        }
                        continue block25;
                    }
                    case "cat": {
                        currentEntry.setCategory(reader.getElementText());
                        break;
                    }
                    case "eui": {
                        currentEntry.setEui(reader.getElementText());
                    }
                }
            }
        }
        return names2entries;
    }

    private void storeLexiconRecord(Map<String, SpecialistEntry> eui2entry, Multimap<String, String> eui2abbreui, Multimap<String, String> eui2acroeui, Map<String, SpecialistEntry> acronymEntries, Map<String, SpecialistEntry> abbreviationEntries, Map<String, Multimap<EntryType, SpecialistEntry>> names2entries, SpecialistEntry currentEntry, Multimap<String, EntryType> exactInputs4CurrentEntry, Multimap<String, EntryType> normalizedInputs4CurrentEntry) {
        if (currentEntry != null && currentEntry.getCategory().equals("noun")) {
            Multimap type2entries;
            String name;
            eui2entry.put(currentEntry.getEui(), currentEntry);
            for (Map.Entry<String, EntryType> p : exactInputs4CurrentEntry.entries()) {
                name = p.getKey();
                type2entries = names2entries.compute(name, (k, v) -> v != null ? v : HashMultimap.create());
                type2entries.put(p.getValue(), currentEntry);
            }
            for (Map.Entry<String, EntryType> p : normalizedInputs4CurrentEntry.entries()) {
                name = p.getKey();
                if (names2entries.containsKey(name)) continue;
                type2entries = names2entries.compute(name, (k, v) -> v != null ? v : HashMultimap.create());
                type2entries.put(p.getValue(), currentEntry);
            }
            if (!exactInputs4CurrentEntry.isEmpty() || !normalizedInputs4CurrentEntry.isEmpty()) {
                for (String longformEui : currentEntry.getAbbreviationLongformEuis()) {
                    eui2abbreui.put(longformEui, currentEntry.getEui());
                }
                for (String longformEui : currentEntry.getAcronymLongformEuis()) {
                    eui2acroeui.put(longformEui, currentEntry.getEui());
                }
                if (currentEntry.isAcronymEntry()) {
                    acronymEntries.put(currentEntry.getEui(), currentEntry);
                }
                if (currentEntry.isAbbreviationEntry()) {
                    abbreviationEntries.put(currentEntry.getEui(), currentEntry);
                }
            }
        }
    }

    @Nullable
    private String findLongform(CharSequence acronym, String matchedText) {
        String longform = null;
        String[] abbreviationAcronymTokens = matchedText.split("[\\s\\p{P}]+");
        int shortformpos = 0;
        int foundAcronymPositions = 0;
        ArrayList<Integer> acronymTokenIndices = new ArrayList<Integer>();
        for (int i2 = 0; i2 < abbreviationAcronymTokens.length && shortformpos < acronym.length(); ++i2) {
            String token = abbreviationAcronymTokens[i2];
            for (int j = 0; j < token.length() && shortformpos < acronym.length(); ++j) {
                char tchar = Character.toLowerCase(token.charAt(j));
                char schar = Character.toLowerCase(acronym.charAt(shortformpos));
                if (j == 0 && tchar != schar) {
                    shortformpos = 0;
                    foundAcronymPositions = 0;
                    acronymTokenIndices.clear();
                    continue;
                }
                if (j == 0) {
                    acronymTokenIndices.add(i2);
                }
                if (tchar != schar) continue;
                ++foundAcronymPositions;
                ++shortformpos;
            }
        }
        if (foundAcronymPositions == acronym.length()) {
            longform = acronymTokenIndices.stream().map(i -> abbreviationAcronymTokens[i]).collect(Collectors.joining(" "));
        }
        return longform;
    }

    private class GeneGroup {
        private final String id;
        private Set<String> bases = Collections.emptySet();
        private Set<String> acronyms = Collections.emptySet();
        private Set<String> abbreviations = Collections.emptySet();
        private Set<String> abbreviationLongforms = Collections.emptySet();
        private Set<String> acronymLongforms = Collections.emptySet();
        private Set<String> spellingVariants = Collections.emptySet();
        private Set<String> inflectionVariants = Collections.emptySet();
        private Set<String> externalIds = Collections.emptySet();
        private Set<String> inputNames = Collections.emptySet();
        private Set<String> externalIdNamespaces = Collections.emptySet();
        private Set<String> connectedLexiconEntries = Collections.emptySet();

        public GeneGroup(String id) {
            this.id = id;
        }

        private Stream<String> getAllNames() {
            return Stream.concat(this.bases.stream(), Stream.concat(this.spellingVariants.stream(), Stream.concat(this.inflectionVariants.stream(), Stream.concat(this.abbreviations.stream(), this.acronyms.stream()))));
        }

        public Set<String> getBases() {
            return this.bases;
        }

        public Set<String> getAcronyms() {
            return this.acronyms;
        }

        public Set<String> getAbbreviations() {
            return this.abbreviations;
        }

        public Set<String> getSpellingVariants() {
            return this.spellingVariants;
        }

        public Set<String> getInputNames() {
            return this.inputNames;
        }

        public Set<String> getInflectionVariants() {
            return this.inflectionVariants;
        }

        public Set<String> getExternalIds() {
            return this.externalIds;
        }

        public void addConnectedLexiconEntry(String entry) {
            if (this.connectedLexiconEntries.isEmpty()) {
                this.connectedLexiconEntries = new HashSet<String>();
            }
            this.connectedLexiconEntries.add(entry);
        }

        public void addExternalIdNamespace(String namespace) {
            if (this.externalIdNamespaces.isEmpty()) {
                this.externalIdNamespaces = new HashSet<String>();
            }
            this.externalIdNamespaces.add(namespace);
        }

        public void addInputName(String variant) {
            if (this.inputNames.isEmpty()) {
                this.inputNames = new HashSet<String>();
            }
            this.inputNames.add(variant);
        }

        public void addAbbreviation(String variant) {
            if (this.abbreviations.isEmpty()) {
                this.abbreviations = new HashSet<String>();
            }
            this.abbreviations.add(variant);
        }

        public void addAcronym(String variant) {
            if (this.acronyms.isEmpty()) {
                this.acronyms = new HashSet<String>();
            }
            this.acronyms.add(variant);
        }

        public void addAbbreviationLongform(String variant) {
            if (this.abbreviationLongforms.isEmpty()) {
                this.abbreviationLongforms = new HashSet<String>();
            }
            this.abbreviationLongforms.add(variant);
        }

        public void addAcronymLongform(String variant) {
            if (this.acronymLongforms.isEmpty()) {
                this.acronymLongforms = new HashSet<String>();
            }
            this.acronymLongforms.add(variant);
        }

        public void addSpellingVariant(String variant) {
            if (this.spellingVariants.isEmpty()) {
                this.spellingVariants = new HashSet<String>();
            }
            this.spellingVariants.add(variant);
        }

        public void addInflectionVariant(String variant) {
            if (this.inflectionVariants.isEmpty()) {
                this.inflectionVariants = new HashSet<String>();
            }
            this.inflectionVariants.add(variant);
        }

        public void addExternalId(String externalId) {
            if (this.externalIds.isEmpty()) {
                this.externalIds = new HashSet<String>();
            }
            this.externalIds.add(externalId);
            this.addExternalIdNamespace(externalId.substring(0, externalId.indexOf(58)));
        }

        public void addBase(String base) {
            if (this.bases.isEmpty()) {
                this.bases = new HashSet<String>();
            }
            this.bases.add(base);
        }

        public void addSpellingVariants(Collection<String> spellingVariants) {
            if (this.spellingVariants.isEmpty()) {
                this.spellingVariants = new HashSet<String>();
            }
            this.spellingVariants.addAll(spellingVariants);
        }

        public void addInflectionVariants(Collection<String> inflectionVariants) {
            if (this.inflectionVariants.isEmpty()) {
                this.inflectionVariants = new HashSet<String>();
            }
            this.inflectionVariants.addAll(inflectionVariants);
        }

        public String getId() {
            return this.id;
        }

        public boolean isCompatibleTo(GeneGroup otherGroup) {
            Set thisNames = Stream.concat(this.bases.stream(), Stream.concat(this.spellingVariants.stream(), this.inflectionVariants.stream())).collect(Collectors.toSet());
            Set otherNames = Stream.concat(otherGroup.getBases().stream(), Stream.concat(otherGroup.getSpellingVariants().stream(), otherGroup.getInflectionVariants().stream())).collect(Collectors.toSet());
            double maxScore = 0.0;
            for (String thisname : thisNames) {
                for (String othername : otherNames) {
                    double score = jaroWinkler.getScore(thisname, othername);
                    if (!(score > maxScore)) continue;
                    maxScore = score;
                }
            }
            boolean iscompatible = maxScore > 0.9;
            return iscompatible;
        }

        public void merge(GeneGroup gg) {
            this.bases.addAll(gg.getBases());
            this.addSpellingVariants(gg.getSpellingVariants());
            this.addInflectionVariants(gg.getInflectionVariants());
            gg.getAcronyms().forEach(this::addAcronym);
            gg.getAbbreviations().forEach(this::addAbbreviation);
        }

        public Set<String> getExternalIdNamespaces() {
            return this.externalIdNamespaces;
        }

        public Set<String> getConnectedLexiconEntries() {
            return this.connectedLexiconEntries;
        }
    }

    private class SpecialistEntry {
        private String base;
        private List<String> spellingVariants = Collections.emptyList();
        private List<String> inflectionVariants = Collections.emptyList();
        private List<String> abbreviations = Collections.emptyList();
        private List<String> acronyms = Collections.emptyList();
        private List<String> abbreviationLongformEuis = Collections.emptyList();
        private List<String> acronymLongformEuis = Collections.emptyList();
        private String eui;
        private String category;

        private SpecialistEntry() {
        }

        public String getEui() {
            return this.eui;
        }

        public void setEui(String eui) {
            this.eui = eui;
        }

        public boolean isAcronymEntry() {
            return !this.acronyms.isEmpty();
        }

        public void add(String variant, EntryType type) {
            switch (type) {
                case SPELLING: {
                    this.addSpellingVariant(variant);
                    break;
                }
                case INFLECTION: {
                    this.addInflectionVariant(variant);
                    break;
                }
                case ABBREVIATION: {
                    this.addAbbreviation(variant);
                    break;
                }
                case ACRONYM: {
                    this.addAcronym(variant);
                    break;
                }
                case ABBREVIATION_LONGFORM_EUID: {
                    this.addAbbreviationLongformEui(variant);
                    break;
                }
                case ACRONYM_LONGFORM_EUID: {
                    this.addAcronymLongformEui(variant);
                    break;
                }
                case BASE: {
                    this.base = variant;
                }
            }
        }

        public String getBase() {
            return this.base;
        }

        public boolean uniformAcronymSuffix() {
            if (this.acronyms.isEmpty()) {
                return false;
            }
            if (this.acronyms.size() == 1) {
                return true;
            }
            String[] split = this.acronyms.get(0).split("\\s+");
            String suffix = split[split.length - 1];
            for (String acronym : this.acronyms) {
                if (acronym.endsWith(suffix)) continue;
                return false;
            }
            return true;
        }

        public boolean hasAmbiguousAbbreviationsAndAcronyms() {
            List abbreviationsAndAcronyms = Stream.concat(this.abbreviations.stream(), this.acronyms.stream()).collect(Collectors.toList());
            Pattern shortformP = Pattern.compile("[A-Z][A-Z]+");
            Set<String> shortforms = abbreviationsAndAcronyms.stream().map(shortformP::matcher).filter(Matcher::find).map(Matcher::group).collect(Collectors.toSet());
            HashMap<String, String> short2long = new HashMap<String, String>();
            for (String shortform : shortforms) {
                for (String abbreviationAcronym : abbreviationsAndAcronyms) {
                    String longform = SpecialistLexiconNameExpansion.this.findLongform(shortform, abbreviationAcronym);
                    if (longform == null || shortform.equals(longform)) continue;
                    short2long.put(shortform, longform);
                }
            }
            AhoCorasickOptimized ac = new AhoCorasickOptimized(shortforms);
            double minscore = 1.0;
            AhoCorasickLongestMatchCallback callback = new AhoCorasickLongestMatchCallback();
            for (Object s2 : abbreviationsAndAcronyms) {
                ac.match((String)s2, callback);
                for (Range<Integer> shortformRange : callback.getLongestMatches().keySet()) {
                    s2 = ((String)s2).substring(0, shortformRange.getMinimum()) + (String)short2long.get(callback.getLongestMatches().get(shortformRange)) + ((String)s2).substring(shortformRange.getMaximum() + 1);
                }
                callback.clear();
                for (Object s22 : abbreviationsAndAcronyms) {
                    ac.match((String)s22, callback);
                    for (Range<Integer> shortformRange : callback.getLongestMatches().keySet()) {
                        s22 = ((String)s22).substring(0, shortformRange.getMinimum()) + (String)short2long.get(callback.getLongestMatches().get(shortformRange)) + ((String)s22).substring(shortformRange.getMaximum() + 1);
                    }
                    callback.clear();
                    double score = jaccard.getScore((String)s2, (String)s22);
                    if (!(score < minscore)) continue;
                    minscore = score;
                }
            }
            return minscore < 0.5;
        }

        public boolean uniformAbbreviationAndAcronymSuffix() {
            List abbreviationsAndAcronyms = Stream.concat(this.abbreviations.stream(), this.acronyms.stream()).collect(Collectors.toList());
            double minscore = 1.0;
            for (String s2 : abbreviationsAndAcronyms) {
                for (String s22 : abbreviationsAndAcronyms) {
                    double score = jaroWinkler.getScore(s2, s22);
                    if (!(score < minscore)) continue;
                    minscore = score;
                }
            }
            return minscore > 0.7;
        }

        public boolean uniformAbbreviationSuffix() {
            if (this.abbreviations.isEmpty()) {
                return false;
            }
            if (this.abbreviations.size() == 1) {
                return true;
            }
            String[] split = this.abbreviations.get(0).split("\\s+");
            String suffix = split[split.length - 1];
            for (String abbreviation : this.abbreviations) {
                if (abbreviation.endsWith(suffix)) continue;
                return false;
            }
            return true;
        }

        public List<String> getSpellingVariants() {
            return this.spellingVariants;
        }

        public List<String> getInflectionVariants() {
            return this.inflectionVariants;
        }

        public List<String> getAbbreviations() {
            return this.abbreviations;
        }

        public List<String> getAcronyms() {
            return this.acronyms;
        }

        public void addSpellingVariant(String variant) {
            if (this.spellingVariants.isEmpty()) {
                this.spellingVariants = new ArrayList<String>();
            }
            this.spellingVariants.add(variant);
        }

        public void addInflectionVariant(String variant) {
            if (this.inflectionVariants.isEmpty()) {
                this.inflectionVariants = new ArrayList<String>();
            }
            this.inflectionVariants.add(variant);
        }

        public void addAbbreviation(String variant) {
            if (this.abbreviations.isEmpty()) {
                this.abbreviations = new ArrayList<String>();
            }
            this.abbreviations.add(variant);
        }

        public void addAcronym(String variant) {
            if (this.acronyms.isEmpty()) {
                this.acronyms = new ArrayList<String>();
            }
            this.acronyms.add(variant);
        }

        public List<String> getAbbreviationLongformEuis() {
            return this.abbreviationLongformEuis;
        }

        public List<String> getAcronymLongformEuis() {
            return this.acronymLongformEuis;
        }

        public void addAbbreviationLongformEui(String variant) {
            if (this.abbreviationLongformEuis.isEmpty()) {
                this.abbreviationLongformEuis = new ArrayList<String>();
            }
            this.abbreviationLongformEuis.add(variant);
        }

        public void addAcronymLongformEui(String variant) {
            if (this.acronymLongformEuis.isEmpty()) {
                this.acronymLongformEuis = new ArrayList<String>();
            }
            this.acronymLongformEuis.add(variant);
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            SpecialistEntry that = (SpecialistEntry)o;
            return this.eui.equals(that.eui);
        }

        public int hashCode() {
            return Objects.hash(this.eui);
        }

        public String getCategory() {
            return this.category;
        }

        public void setCategory(String category) {
            this.category = category;
        }

        public boolean isAbbreviationEntry() {
            return !this.abbreviations.isEmpty();
        }
    }

    private static enum EntryType {
        ABBREVIATION,
        ACRONYM,
        SPELLING,
        INFLECTION,
        BASE,
        ACRONYM_LONGFORM_EUID,
        ABBREVIATION_LONGFORM_EUID;

    }
}

