/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jules.ae.genemapping.scoring;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.Instance;
import cc.mallet.types.Label;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.Token;
import de.julielab.jules.ae.genemapping.scoring.MaxEntScorerPairExtractor;
import de.julielab.jules.ae.genemapping.scoring.SimpleScorer;
import de.julielab.jules.ae.genemapping.scoring.TokenJaroSimilarity;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MaxEntScorerFeaturePipe
extends Pipe
implements Serializable {
    private boolean lexicalize = true;
    private boolean debug = false;
    private static final long serialVersionUID = 1L;
    private static final Logger LOGGER = LoggerFactory.getLogger(MaxEntScorerFeaturePipe.class);
    private final String GREEK = "(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)";
    private final String GREEK_ALPHA = "alpha";
    private final String NUMBER = "[0-9]+";
    private final String ONE = "1";
    private final String CHAR = "[a-z]";
    private final String ALPHA = "[a-z]+";
    private final String MOL_WEIGHT = "p [0-9][0-9]?";
    private String MODIFIER = "(receptor|tranporter|regulator|inhibitor|activator|suppressor|enhancer|repressor|adaptor|interactor|modulator|mediator|inducer|effector|coactivator|supressor|integrator|facilitator|binder|terminator|acceptor|proactivator|exchanger|enhancer|adapter|responder|modifier|ligand|cofactor|tranporting|regulating|inhibiting|activating|suppressing|enhancing|repressing|adapting|interacting|modulating|mediating|inducing|effecting|coactivating|supressing|integrating|facilitating|binding|terminating|accepting|responding|proactivating|exchanging|enhancing|adapting|modifying|coreceptor|cotranporter|coregulator|coinhibitor|coactivator|cosuppressor|coenhancer|corepressor|coadaptor|cointeractor|comodulator|comediator|coinducer|coeffector|coactivator|cointegrator|cofacilitator|cobinder|coterminator|coacceptor|proactivator|coexchanger|coenhancer|coadapter|coresponder|comodifier|coligand|cofactor)";
    private String NON_DESCRIPTIVE = "(fragment|antigen|precursor|protein|chain|domain|gene|homolog|homologue|isoform|isolog|isotype|motif|ortholog|precursor|precursors|product|sequence|subtype|subunit)";
    private TokenJaroSimilarity jaroSim = null;

    public MaxEntScorerFeaturePipe() {
        super(new Alphabet(), (Alphabet)new LabelAlphabet());
    }

    public Instance pipe(Instance carrier) {
        if (this.jaroSim == null) {
            this.jaroSim = new TokenJaroSimilarity();
        }
        String[] pair = (String[])carrier.getData();
        String term1 = pair[0];
        String term2 = pair[1];
        String label = pair[2];
        MaxEntScorerPairExtractor ext = new MaxEntScorerPairExtractor();
        Label target = ((LabelAlphabet)this.getTargetAlphabet()).lookupLabel((Object)label);
        String[][] results = ext.compareStrings(term1, term2);
        String[] allBigramsTerm1 = this.allBigrams(term1);
        String[] allBigramsTerm2 = this.allBigrams(term2);
        String[] diffBigrams = this.differentBigrams(term1, term2);
        String[] commonBigrams = this.commonBigrams(term1, term2);
        String[] diffTrigrams = this.differentTrigrams(term1, term2);
        String[] commonTrigrams = this.commonTrigrams(term1, term2);
        Token token = new Token(term1);
        token.setText(term1);
        boolean term1HasMolWeight = false;
        boolean term2HasMolWeight = false;
        for (String bigram1 : allBigramsTerm1) {
            if (!bigram1.matches("p [0-9][0-9]?")) continue;
            term1HasMolWeight = true;
        }
        for (String bigram2 : allBigramsTerm2) {
            if (!bigram2.matches("p [0-9][0-9]?")) continue;
            term2HasMolWeight = true;
        }
        for (String bigram : diffBigrams) {
            if (!bigram.matches("p [0-9][0-9]?") || !term1HasMolWeight || !term2HasMolWeight) continue;
            token.setFeatureValue("DIFF_MOL_WEIGHT", 1.0);
        }
        for (String bigram : commonBigrams) {
            token.setFeatureValue("COMMON_BIGRAM=" + bigram, 1.0);
            if (!bigram.matches("p [0-9][0-9]?")) continue;
            token.setFeatureValue("SAME_MOL_WEIGHT", 1.0);
        }
        for (String trigram : commonTrigrams) {
            token.setFeatureValue("COMMON_TRIGRAM=" + trigram, 1.0);
        }
        double simpleScore = new SimpleScorer().getScore(term1, term2);
        if (simpleScore == 1.0) {
            token.setFeatureValue("SIMPLESCORE=1", 1.0);
        } else if (simpleScore >= 0.9) {
            token.setFeatureValue("SIMPLESCORE>=0.9", 1.0);
        } else if (simpleScore >= 0.8) {
            token.setFeatureValue("SIMPLESCORE>=0.8", 1.0);
        } else if (simpleScore >= 0.7) {
            token.setFeatureValue("SIMPLESCORE>=0.7", 1.0);
        } else if (simpleScore >= 0.6) {
            token.setFeatureValue("SIMPLESCORE>=0.6", 1.0);
        } else if (simpleScore >= 0.5) {
            token.setFeatureValue("SIMPLESCORE>=0.5", 1.0);
        } else if (simpleScore >= 0.3) {
            token.setFeatureValue("SIMPLESCORE>=0.3", 1.0);
        }
        if (term1.indexOf(term2) > -1 || term2.indexOf(term1) > -1) {
            token.setFeatureValue("SUBSTRING", 1.0);
        }
        int transpositions = this.jaroSim.getTokenTranspositions(term1, term2);
        token.setFeatureValue("TRANSPOSITIONS=" + transpositions, 1.0);
        HashMap<String, Integer> sames = new HashMap<String, Integer>();
        for (int j = 0; j < results[0].length; ++j) {
            String sameToken = results[0][j];
            if (sameToken.matches("[0-9]+")) {
                this.add2HashMap(sames, "SAME_NUM");
                continue;
            }
            if (sameToken.matches("(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)")) {
                this.add2HashMap(sames, "SAME_GREEK");
                continue;
            }
            if (sameToken.matches("[a-z]")) continue;
            if (sameToken.matches("[a-z]+")) {
                this.add2HashMap(sames, "SAME_ALPHA");
                continue;
            }
            if (sameToken.matches(this.MODIFIER)) {
                this.add2HashMap(sames, "SAME_MODIFIER");
                continue;
            }
            if (sameToken.matches(this.NON_DESCRIPTIVE) || !this.lexicalize) continue;
            sames.put("SAME_STRING=" + sameToken, 1);
        }
        for (String key : sames.keySet()) {
            int count = sames.get(key);
            token.setFeatureValue(key + "=" + count, 1.0);
        }
        int numOfSames = results[0].length;
        token.setFeatureValue("NUM_OF_SAMES=" + numOfSames, 1.0);
        if (results[0].length == 1) {
            String onlySame = results[0][0];
            if (onlySame.matches("[0-9]+")) {
                token.setFeatureValue("ONLY_SAME_NUMBER", 1.0);
            } else if (onlySame.matches("(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)")) {
                token.setFeatureValue("ONLY_SAME_GREEK", 1.0);
            } else if (onlySame.matches("[a-z]")) {
                token.setFeatureValue("ONLY_SAME_CHAR", 1.0);
            } else if (onlySame.matches("[a-z]+")) {
                token.setFeatureValue("ONLY_SAME_ALPHA", 1.0);
            } else if (!onlySame.matches(this.MODIFIER) && !onlySame.matches(this.NON_DESCRIPTIVE) && this.lexicalize) {
                sames.put("ONLY_SAME_STRING=" + onlySame, 1);
            }
        }
        HashMap<String, Integer> diffs = new HashMap<String, Integer>();
        for (int j = 0; j < results[1].length; ++j) {
            String diffToken = results[1][j];
            if (diffToken.matches("[0-9]+")) {
                this.add2HashMap(diffs, "DIFF_NUM");
                continue;
            }
            if (diffToken.matches("(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)")) {
                this.add2HashMap(diffs, "DIFF_GREEK");
                continue;
            }
            if (diffToken.matches("[a-z]")) {
                this.add2HashMap(diffs, "DIFF_CHAR");
                continue;
            }
            if (diffToken.matches("[a-z]+")) {
                this.add2HashMap(diffs, "DIFF_ALPHA");
                continue;
            }
            if (diffToken.matches(this.MODIFIER)) {
                this.add2HashMap(diffs, "DIFF_MODIFIER");
                continue;
            }
            if (diffToken.matches(this.NON_DESCRIPTIVE) || !this.lexicalize) continue;
            diffs.put("DIFF_STRING=" + diffToken, 1);
        }
        for (String key : diffs.keySet()) {
            int count = diffs.get(key);
            token.setFeatureValue(key + "=" + count, 1.0);
        }
        token.setFeatureValue("NUM_OF_DIFFS=" + results[1].length, 1.0);
        if (results[1].length == 1) {
            String onlyDiff = results[1][0];
            if (onlyDiff.matches("1")) {
                token.setFeatureValue("ONLY_DIFF_ONE", 1.0);
            } else if (onlyDiff.matches("[0-9]+")) {
                token.setFeatureValue("ONLY_DIFF_NUMBER", 1.0);
            } else if (onlyDiff.matches("alpha")) {
                token.setFeatureValue("ONLY_DIFF_GREEK_ALPHA", 1.0);
            } else if (onlyDiff.matches("(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)")) {
                token.setFeatureValue("ONLY_DIFF_GREEK", 1.0);
            } else if (onlyDiff.matches("[a-z]+")) {
                token.setFeatureValue("ONLY_DIFF_ALPHA", 1.0);
            } else if (onlyDiff.matches(this.MODIFIER)) {
                token.setFeatureValue("ONLY_DIFF_MODIFIER", 1.0);
            } else if (onlyDiff.matches(this.NON_DESCRIPTIVE)) {
                token.setFeatureValue("ONLY_DIFF_NON_DESCRIPTIVE", 1.0);
            } else if (this.lexicalize) {
                sames.put("ONLY_DIFF_STRING=" + onlyDiff, 1);
            }
        }
        int lenDiff = Math.abs(term1.split(" ").length - term2.split(" ").length);
        token.setFeatureValue("LENGTHDIFF=" + lenDiff, 1.0);
        int maxLen = Math.max(term1.split(" ").length, term2.split(" ").length);
        double relLenDiff = 1.0 - (double)lenDiff / (double)maxLen;
        if (relLenDiff >= 0.9) {
            token.setFeatureValue("RELLENGTHDIFF>=0.9", 1.0);
        } else if (relLenDiff >= 0.7) {
            token.setFeatureValue("RELLENGTHDIFF>=0.7", 1.0);
        } else if (relLenDiff >= 0.5) {
            token.setFeatureValue("RELLENGTHDIFF>=0.5", 1.0);
        } else {
            token.setFeatureValue("RELLENGTHDIFF<0.5", 1.0);
        }
        if (this.debug) {
            System.out.println("\n--------------------------------------------\nFeatures for: " + term1 + "\t" + term2 + "\t" + label + "\n" + token.toString());
        }
        carrier.setData((Object)token);
        carrier.setTarget((Object)target);
        carrier.setSource((Object)(term1 + " <-> " + term2));
        carrier.setName((Object)target.toString());
        return carrier;
    }

    private ArrayList<String> makeBigrams(String term) {
        String[] split = term.split(" ");
        ArrayList<String> bigrams = new ArrayList<String>();
        for (int i = 1; i < split.length; ++i) {
            Object bigram = split[i - 1] + " " + split[i];
            bigram = ((String)bigram).trim();
            bigrams.add((String)bigram);
        }
        return bigrams;
    }

    private String[] allBigrams(String term) {
        ArrayList<String> bigrams = this.makeBigrams(term);
        String[] bigramArray = bigrams.toArray(new String[0]);
        return bigramArray;
    }

    private String[] commonBigrams(String term1, String term2) {
        ArrayList<String> commons = new ArrayList<String>();
        ArrayList<String> bigrams1 = this.makeBigrams(term1);
        String[] bigramList1 = bigrams1.toArray(new String[0]);
        ArrayList<String> bigrams2 = this.makeBigrams(term2);
        String[] bigramList2 = bigrams2.toArray(new String[0]);
        for (String bigram1 : bigramList1) {
            if (!bigrams2.contains(bigram1)) continue;
            commons.add(bigram1);
        }
        for (String bigram2 : bigramList2) {
            if (!bigrams1.contains(bigram2) || commons.contains(bigram2)) continue;
            commons.add(bigram2);
        }
        return commons.toArray(new String[0]);
    }

    private ArrayList<String> makeCharTrigrams(String term) {
        StringBuilder sb = new StringBuilder(term);
        ArrayList<String> trigrams = new ArrayList<String>();
        for (int i = 2; i < sb.length(); ++i) {
            String trigram = "" + sb.charAt(i - 2) + sb.charAt(i - 1) + sb.charAt(i);
            trigrams.add(trigram);
        }
        return trigrams;
    }

    private String[] commonCharTrigrams(String term1, String term2) {
        ArrayList<String> commons = new ArrayList<String>();
        ArrayList<String> trigrams1 = this.makeCharTrigrams(term1);
        String[] trigramList1 = trigrams1.toArray(new String[0]);
        ArrayList<String> trigrams2 = this.makeCharTrigrams(term2);
        String[] trigramList2 = trigrams2.toArray(new String[0]);
        for (String trigram1 : trigramList1) {
            if (!trigrams2.contains(trigram1)) continue;
            commons.add(trigram1);
        }
        for (String trigram2 : trigramList2) {
            if (!trigrams1.contains(trigram2) || commons.contains(trigram2)) continue;
            commons.add(trigram2);
        }
        return commons.toArray(new String[0]);
    }

    private ArrayList<String> makeTrigrams(String term) {
        String[] split = term.split(" ");
        ArrayList<String> trigrams = new ArrayList<String>();
        for (int i = 2; i < split.length; ++i) {
            Object trigram = split[i - 2] + " " + split[i - 1] + " " + split[i];
            trigram = ((String)trigram).trim();
            trigrams.add((String)trigram);
        }
        return trigrams;
    }

    private String[] commonTrigrams(String term1, String term2) {
        ArrayList<String> commons = new ArrayList<String>();
        ArrayList<String> trigrams1 = this.makeTrigrams(term1);
        String[] trigramList1 = trigrams1.toArray(new String[0]);
        ArrayList<String> trigrams2 = this.makeTrigrams(term2);
        String[] trigramList2 = trigrams2.toArray(new String[0]);
        for (String trigram1 : trigramList1) {
            if (!trigrams2.contains(trigram1)) continue;
            commons.add(trigram1);
        }
        for (String trigram2 : trigramList2) {
            if (!trigrams1.contains(trigram2) || commons.contains(trigram2)) continue;
            commons.add(trigram2);
        }
        return commons.toArray(new String[0]);
    }

    private String[] differentBigrams(String term1, String term2) {
        ArrayList<String> differents = new ArrayList<String>();
        ArrayList<String> bigrams1 = this.makeBigrams(term1);
        String[] bigramList1 = bigrams1.toArray(new String[0]);
        ArrayList<String> bigrams2 = this.makeBigrams(term2);
        String[] bigramList2 = bigrams2.toArray(new String[0]);
        for (String bigram1 : bigramList1) {
            if (bigrams2.contains(bigram1)) continue;
            differents.add(bigram1);
        }
        for (String bigram2 : bigramList2) {
            if (bigrams1.contains(bigram2) || differents.contains(bigram2)) continue;
            differents.add(bigram2);
        }
        return differents.toArray(new String[0]);
    }

    private String[] differentTrigrams(String term1, String term2) {
        ArrayList<String> differents = new ArrayList<String>();
        ArrayList<String> trigrams1 = this.makeTrigrams(term1);
        String[] trigramList1 = trigrams1.toArray(new String[0]);
        ArrayList<String> trigrams2 = this.makeTrigrams(term2);
        String[] trigramList2 = trigrams2.toArray(new String[0]);
        for (String trigram1 : trigramList1) {
            if (trigrams2.contains(trigram1)) continue;
            differents.add(trigram1);
        }
        for (String trigram2 : trigramList2) {
            if (trigrams1.contains(trigram2) || differents.contains(trigram2)) continue;
            differents.add(trigram2);
        }
        return differents.toArray(new String[0]);
    }

    private void add2HashMap(HashMap<String, Integer> map, String key) {
        int count = 0;
        if (map.containsKey(key)) {
            count = map.get(key);
        }
        map.put(key, ++count);
    }
}

