/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jules.ae.genemapping;

import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import com.google.common.collect.Multisets;
import de.julielab.jules.ae.genemapping.utils.Utils;
import de.julielab.jules.ae.genemapping.utils.norm.TermNormalizer;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CandidateFilter {
    private static final Logger LOGGER = LoggerFactory.getLogger(CandidateFilter.class);
    public static final String[] GREEK = new String[]{"alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta", "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi", "rho", "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega"};
    public static final String[] LAT_NUM = new String[]{"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX", "XX"};
    public static String GREEK_REGEX = "(" + Stream.of(GREEK).collect(Collectors.joining("|")) + ")";
    public static String LAT_NUM_REGEX = "(" + Stream.of(LAT_NUM).sorted(Comparator.reverseOrder()).collect(Collectors.joining("|")) + ")";
    public static final Map<String, String> greekAbbrMap = new HashMap<String, String>();
    public static final String SUB_GREEK = "(beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)";
    public static String MODIFIER;
    public static String NON_DESCRIPTIVE;
    public static String AMINO_ACIDS;
    public String NON_DESC = "(promoter|onco protein|oncoprotein|proto oncogene|protooncogene|protease|binding site|transcript|element|construct|si rna|prem rna|pre m rna|m rna ?s?|rna|locus|gene product|product|reporter gene|reporter|gene|protein|c dna|molecule|pseudogene|autoantigen|peptide|polypeptide|enzyme)$";
    public Pattern patternNonDesc;
    public Matcher matcherNonDesc;
    public String DOMAIN_FAMILIES = "^.*(acceptors|acid|activators|adapters|adaptors|antibodi|antibody|binders|binding|binding site|binding sites|box|boxe|channel|channels|chromosome|coactivators|cofactors|complex|domain|dyneins|effectors|element|enhancers|epitope|erythrocyte|exchangers|exon|facilitators|factors|familie|family|filament|finger|helicases|histone|histones|homeodomain|inducers|inhibitors|integrators|interactors|intron|kinases|kinesins|lectins|ligands|mediators|member|membrane|modifiers|modulators|motif|myosins|proactivators|proteases|proteasome|proteins|reductases|region|regulators|repeat|repressors|residue|responders|sequence|site|subdomain|subfamily|subunits|superfamily|suppressors|supressors|syndrome|tail|terminal|terminators|terminus|tranporters|transferases|zinc finger)e?s?";
    public Pattern patternDomainFamilies;
    private static final String UNSPECIFIEDS_FILE = "/unspecified_proteins";
    public String UNSPECIFIEDS;
    public Pattern patternUnspecifieds;
    public Matcher matcherUnspecifieds;
    private static final String PREMOD_FILE = "/premodifiers";
    public String PREMODS;
    public Pattern patternPreMods;
    private Pattern num;
    private Pattern singChar;
    private Pattern specWords;

    public CandidateFilter() throws IOException {
        this.initUnspecifieds();
        this.initPreModifiers();
        this.patternDomainFamilies = Pattern.compile(this.DOMAIN_FAMILIES);
        this.patternNonDesc = Pattern.compile(".* " + this.NON_DESC);
        this.matcherNonDesc = this.patternNonDesc.matcher("");
        this.num = Pattern.compile("[0-9]*");
        this.singChar = Pattern.compile("([a-z]|[0-9])");
        this.specWords = Pattern.compile("(" + GREEK_REGEX + "|" + MODIFIER + "||" + NON_DESCRIPTIVE + ")");
    }

    public static void main(String[] args) throws IOException {
        CandidateFilter cf = new CandidateFilter();
        Pattern p = cf.patternUnspecifieds;
        System.out.println(p.pattern());
        Matcher m = p.matcher("fos");
        if (m.matches()) {
            System.out.println("yes");
        } else {
            System.out.println("no");
        }
    }

    private boolean differInTypeOfOneTerm(String searchTerm, String foundTerm, String type) {
        if (searchTerm.equals(foundTerm)) {
            return false;
        }
        TreeSet<String> s1 = this.getSet(searchTerm.split("\\s+"));
        TreeSet<String> s2 = this.getSet(foundTerm.split("\\s+"));
        TreeSet<Object> diff = new TreeSet();
        if (s1.size() == s2.size() + 1) {
            diff = s1;
            diff.removeAll(s2);
        } else if (s1.size() == s2.size() + 1) {
            diff = s2;
            diff.removeAll(s1);
        }
        if (diff.size() == 1) {
            String diffToken = (String)diff.first();
            Pattern pat = Pattern.compile(type);
            Matcher m = pat.matcher(diffToken);
            return m.matches();
        }
        return false;
    }

    private TreeSet<String> getSet(String[] array) {
        TreeSet<String> mySet = new TreeSet<String>();
        for (int i = 0; i < array.length; ++i) {
            mySet.add(array[i]);
        }
        return mySet;
    }

    private int getNumberOfOccurrences(String term, String occurrence) {
        String[] tokens = term.split("\\s+");
        int num = 0;
        Pattern p = Pattern.compile(occurrence);
        for (int i = 0; i < tokens.length; ++i) {
            Matcher m = p.matcher(tokens[i]);
            if (!m.matches()) continue;
            ++num;
        }
        return num;
    }

    private boolean onlyDifferentTypes(String searchTerm, String foundTerm, String type) {
        String query = "([a-z0-9 ]*?) ?" + type + " ?([a-z0-9 ]*?)";
        Pattern num = Pattern.compile(query);
        if (this.getNumberOfOccurrences(searchTerm, type) == 1 && this.getNumberOfOccurrences(foundTerm, type) == 1) {
            Matcher m1 = num.matcher(searchTerm);
            Matcher m2 = num.matcher(foundTerm);
            if (m1.matches() && m2.matches() && !m1.group(2).equals(m2.group(2)) && m1.group(1).equals(m2.group(1)) && m1.group(3).equals(m2.group(3))) {
                return true;
            }
        }
        return false;
    }

    public boolean filterOut(String searchTerm, String foundTerm) {
        boolean bl;
        TreeSet<String> commonWords = Utils.getCommonWords(searchTerm.split(" "), foundTerm.split(" "));
        boolean allNumbers = true;
        for (String string : commonWords) {
            Matcher m = this.num.matcher(string);
            if (m.matches()) continue;
            allNumbers = false;
            break;
        }
        if (allNumbers) {
            LOGGER.debug("filtered out because: overlap only numbers: '" + searchTerm + "' <-> '" + foundTerm + "'");
            return allNumbers;
        }
        boolean singleCharDigits = true;
        for (String c : commonWords) {
            Matcher m = this.singChar.matcher(c);
            if (m.matches()) continue;
            singleCharDigits = false;
            break;
        }
        if (singleCharDigits) {
            LOGGER.debug("filtered out because: overlap only single characters or single digits: '" + searchTerm + "' <-> '" + foundTerm + "'");
            return singleCharDigits;
        }
        boolean bl2 = true;
        for (String c : commonWords) {
            Matcher m = this.specWords.matcher(c);
            if (m.matches()) continue;
            bl = false;
            break;
        }
        if (bl) {
            LOGGER.debug("filtered out because: overlap consists only of special words (greek, modifiers, non-descriptive): '" + searchTerm + "' <-> '" + foundTerm + "'");
            return bl;
        }
        if (this.onlyDifferentTypes(searchTerm, foundTerm, "([0-9]+)")) {
            LOGGER.debug("filtered out because: terms differ in one number only: '" + searchTerm + "' <-> '" + foundTerm + "'");
            return true;
        }
        if (this.onlyDifferentTypes(searchTerm, foundTerm, GREEK_REGEX)) {
            LOGGER.debug("filtered out because: terms differ in one greek token only: '" + searchTerm + "' <-> '" + foundTerm + "'");
            return true;
        }
        if (this.differInTypeOfOneTerm(searchTerm, foundTerm, "([02-9]|[1-9]{2,})")) {
            LOGGER.debug("filtered out because: one has a number and the other doesn't (1 is excluded): '" + searchTerm + "' <-> '" + foundTerm + "'");
            return true;
        }
        if (this.differInTypeOfOneTerm(searchTerm, foundTerm, SUB_GREEK)) {
            LOGGER.debug("filtered out because: one has a greek and the other doesn't (alpha is excluded): '" + searchTerm + "' <-> '" + foundTerm + "'");
            return true;
        }
        if (this.differInTypeOfOneTerm(searchTerm, foundTerm, MODIFIER)) {
            LOGGER.debug("filtered out because: one has a modifier and the other doesn't: '" + searchTerm + "' <-> '" + foundTerm + "'");
            return true;
        }
        return false;
    }

    public void initUnspecifieds() throws IOException {
        TermNormalizer normalizer = new TermNormalizer();
        InputStream in = this.getClass().getResourceAsStream(UNSPECIFIEDS_FILE);
        InputStreamReader isr = new InputStreamReader(in);
        BufferedReader reader = new BufferedReader(isr);
        this.UNSPECIFIEDS = "^(";
        try {
            String line = "";
            while ((line = reader.readLine()) != null) {
                if (line.startsWith("##")) continue;
                line = line.trim();
                line = normalizer.normalize(line);
                this.UNSPECIFIEDS = this.UNSPECIFIEDS + line.trim() + "|";
            }
            reader.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        this.UNSPECIFIEDS = this.UNSPECIFIEDS.substring(0, this.UNSPECIFIEDS.length() - 1) + ")e?s?$";
        this.UNSPECIFIEDS = this.UNSPECIFIEDS.trim();
        this.patternUnspecifieds = Pattern.compile(this.UNSPECIFIEDS);
        this.matcherUnspecifieds = this.patternUnspecifieds.matcher("");
        LOGGER.debug("Initializing unspecified proteins pattern from file: " + this.patternUnspecifieds);
    }

    public void initPreModifiers() throws IOException {
        InputStream in = this.getClass().getResourceAsStream(PREMOD_FILE);
        InputStreamReader isr = new InputStreamReader(in);
        BufferedReader reader = new BufferedReader(isr);
        this.PREMODS = "^(";
        try {
            String line = "";
            while ((line = reader.readLine()) != null) {
                if (line.startsWith("##")) continue;
                this.PREMODS = this.PREMODS + line.trim() + "|";
            }
            reader.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        this.PREMODS = this.PREMODS.substring(0, this.PREMODS.length() - 1) + ") ";
        this.patternPreMods = Pattern.compile(this.PREMODS + ".*");
        LOGGER.debug("Initializing protein void premodifiers from file: " + this.patternPreMods);
    }

    public boolean hasContradictingGreek(String s1, String s2) {
        return false;
    }

    public static String expendGreek(String s) {
        Matcher mGreekAbbrLetter = Pattern.compile("\\b[a-zA-Z]\\b").matcher(s);
        StringBuilder ret = new StringBuilder();
        int lastMatch = 0;
        while (mGreekAbbrLetter.find()) {
            int matchStart = mGreekAbbrLetter.start();
            ret.append(s.substring(lastMatch, matchStart));
            lastMatch = mGreekAbbrLetter.end();
            String letter = mGreekAbbrLetter.group();
            String greekChar = greekAbbrMap.get(letter);
            ret.append(greekChar != null ? greekChar : letter);
        }
        if (lastMatch > 0 && lastMatch < s.length() - 1) {
            ret.append(s.substring(lastMatch, s.length()));
        }
        if (ret.length() == 0) {
            return s;
        }
        return ret.toString();
    }

    public static boolean isNumberCompatible(String normalizedMention, String synonym) {
        String[] mentionSplit = normalizedMention.split("\\s");
        String[] synSplit = synonym.split("\\s");
        Multiset<String> mentionNumbers = CandidateFilter.getNumbers(mentionSplit);
        Multiset<String> synNumbers = CandidateFilter.getNumbers(synSplit);
        return mentionNumbers.size() == synNumbers.size() && Multisets.intersection(mentionNumbers, synNumbers).size() == mentionNumbers.size();
    }

    public static Multiset<String> getNumbers(String[] tokens) {
        HashMultiset numberTokens = HashMultiset.create();
        for (String token : tokens) {
            if (!token.matches("[0-9]+")) continue;
            numberTokens.add((Object)token);
        }
        return numberTokens;
    }

    public static Multiset<String> getSingleSymbols(String[] tokens) {
        HashMultiset singleLetterTokens = HashMultiset.create();
        for (String token : tokens) {
            if (!token.matches("[a-zA-Z]|[0-9]+|" + GREEK_REGEX)) continue;
            singleLetterTokens.add((Object)token);
        }
        return singleLetterTokens;
    }

    public static Multiset<String> getContentTokens(String[] tokens) {
        HashMultiset contentTokens = HashMultiset.create();
        for (String token : tokens) {
            if (token.matches("[a-zA-Z]|[0-9]+|" + GREEK_REGEX)) continue;
            contentTokens.add((Object)token);
        }
        return contentTokens;
    }

    public static Multiset<String> getNumberOfCommonTokens(String normalizedMention, String synonym) {
        int i;
        String[] mentionSplit = normalizedMention.split("\\s");
        String[] synSplit = synonym.split("\\s");
        HashMultiset mentionTokens = HashMultiset.create();
        HashMultiset synTokens = HashMultiset.create();
        for (i = 0; i < mentionSplit.length; ++i) {
            String mentionToken = mentionSplit[i];
            mentionTokens.add((Object)mentionToken);
        }
        for (i = 0; i < synSplit.length; ++i) {
            String synToken = synSplit[i];
            synTokens.add((Object)synToken);
        }
        return Multisets.intersection((Multiset)mentionTokens, (Multiset)synTokens);
    }

    public boolean isUnspecified(String word) {
        return this.matcherUnspecifieds.reset(word).matches();
    }

    public boolean isNonDescriptive(String word) {
        return this.matcherNonDesc.reset(word).matches();
    }

    static {
        for (int i = 0; i < GREEK.length; ++i) {
            String greekChar = GREEK[i];
            String firstChar = greekChar.substring(0, 1);
            if (greekAbbrMap.containsKey(firstChar)) continue;
            greekAbbrMap.put(firstChar, greekChar);
        }
        MODIFIER = "(receptors?|cofactors?|factors?|tranporters?|regulators?|inhibitors?|activators?|suppressors?|enhancers?|repressors?|adaptors?|interactors?|modulators?|mediators?|inducers?|effectors?|coactivators?|supressors?|integrators?|facilitators?|binders?|terminators?|acceptors?|responders?|proactivators?|exchangers?|enhancers?|adapters?|responders?|modifiers?|ligands?)";
        NON_DESCRIPTIVE = "(constructs?|fragments?|antigens?|precursors?|proteins?|genes?|chains?|domains?|kinases?|homologues?|homologs?|isoforms?|isologs?|isotypes?|motifs?|orthologues?|orthologs?|products?|sequences?|subtypes?|subunits?)";
        AMINO_ACIDS = "(alanine|arginine|asparagine|aspartic|cysteine|glutamine|glutamic|glycine|histidine|isoleucine|leucine|lysine|methionine|phenylalanine|proline|serine|threonine|tryptophan|tyrosine|valine)";
    }
}

