/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.filtering.families;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneSet;

public class MentionTypeHint
extends Pipe {
    private static final long serialVersionUID = -163529848335469810L;

    @Override
    public Instance pipe(Instance inst) {
        Token t = (Token)inst.getData();
        GeneMention gm = (GeneMention)inst.getProperty("gm");
        GeneSet geneSet = gm.getSingleGeneSet();
        String[] tokens = gm.getNormalizedText().split("\\s+");
        for (int p = 0; p < tokens.length; ++p) {
            String mentionType = null;
            if (tokens[p].matches("(ytochrome|cytochrome)")) {
                mentionType = "-Type_cytochrome-";
            } else if (tokens[p].matches(".*target")) {
                mentionType = "-Type_target-";
            } else if (tokens[p].matches(".*(irradiation|hybrid|fusion|experiment|gst|est|gap|antigen)")) {
                mentionType = "-Type_ExperimentNoun-";
            } else if (tokens[p].matches(".*(disease|disorder|dystrophy|deficiency|syndrome|dysgenesis|cancer|injury|neoplasm|diabetes|diabete)")) {
                mentionType = "-Type_Disease-";
            } else if (tokens[p].matches(".*(motif|domain|omain|site|region|sequence|frameshift|finger).*") && (p >= tokens.length - 1 || !tokens[p + 1].matches("proteins?"))) {
                mentionType = "-Type_DomainMotif-";
            } else if (tokens[p].equals("-") && p < tokens.length - 1 && tokens[p + 1].matches(".*(motif|domain|omain|binding|site|region|sequence|frameshift|finger|box).*")) {
                mentionType = "-Type_DomainMotif-";
            } else if (tokens[p].matches("[rmc]") && p < tokens.length - 1 && (tokens[p + 1].equals("DNA") || tokens[p + 1].equals("RNA"))) {
                mentionType = "-Type_DomainMotif-";
            } else if (tokens[p].matches(".*(famil|complex|cluster|proteins|genes|factors|transporter|proteinase|membrane|ligand|enzyme|channels|tors$|ase$|ases$)")) {
                mentionType = "-Type_Family-";
            } else if (tokens[p].toLowerCase().matches("^marker")) {
                mentionType = "-Type_Marker-";
            } else if (tokens[p].equals(".*cell.*") || p < tokens.length - 1 && tokens[p + 1].equals("cell") && tokens[p].matches("^(T|B|monocytic|cancer|tumor|myeloma|epithelial|crypt)$")) {
                mentionType = "-Type_Cell-";
            } else if (tokens[p].equals(".*chromosome.*")) {
                mentionType = "-Type_Chromosome-";
            } else if (tokens[p].matches("[pq]") && (p < tokens.length - 1 && tokens[p + 1].matches("^[0-9]+$") || p > 0 && tokens[p - 1].matches("^[0-9]+$"))) {
                mentionType = "-Type_ChromosomeStrain-";
            } else if (tokens[p].matches(".*(related|regulated|associated|correlated|reactive).*")) {
                mentionType = "-Type_relation-";
            } else if (tokens[p].toLowerCase().matches(".*(polymorphism|mutation|deletion|insertion|duplication|genotype|genotypes).*")) {
                mentionType = "-Type_VariationTerms-";
            } else if (tokens[p].matches(".*(oxidase|transferase|transferases|kinase|kinese|receptor|adrenoceptor|transporter|regulator|transcription|antigen|protein|gene|factor|member|molecule|channel|deaminase|spectrin).*")) {
                mentionType = "-Type_suffix-";
            } else if (tokens[p].matches("[\\(\\-\\_]") && p < tokens.length - 1 && tokens[p + 1].toLowerCase().matches(".*(alpha|beta|gamma|delta|theta|kappa|zeta|sigma|omega|i|ii|iii|iv|v|vi|[abcdefgyr])") && geneSet.stream().noneMatch(GeneMention::hasExactCandidateMatch)) {
                mentionType = "-Type_strain-";
            } else if (tokens[p].matches("(alpha|beta|gamma|delta|theta|kappa|zeta|sigma|omega|i|ii|iii|iv|v|vi|[abcdefgyr])") && geneSet.stream().noneMatch(GeneMention::hasExactCandidateMatch)) {
                mentionType = "-Type_strain-";
            }
            if (mentionType == null) continue;
            t.setFeatureValue(mentionType, 1.0);
        }
        return inst;
    }
}

