/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.filtering.families;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import de.julielab.geneexpbase.genemodel.GeneMention;

public class ProteinSymbols
extends Pipe {
    private static final long serialVersionUID = -7211355932042290296L;

    public Instance pipe(Instance inst) {
        Token t = (Token)inst.getData();
        GeneMention gm = (GeneMention)inst.getProperty("gm");
        String normalizedGeneName = gm.getNormalizedText();
        for (String tokenText : normalizedGeneName.split("\\s+")) {
            String proteinSym = null;
            if (tokenText.matches(".*(glutamine|glutamic|leucine|valine|isoleucine|lysine|alanine|glycine|aspartate|methionine|threonine|histidine|aspartic|asparticacid|arginine|asparagine|tryptophan|proline|phenylalanine|cysteine|serine|glutamate|tyrosine|stop|frameshift).*")) {
                proteinSym = "-ProteinSymFull-";
            } else if (tokenText.matches("(cys|ile|ser|gln|met|asn|pro|lys|asp|thr|phe|ala|gly|his|leu|arg|trp|val|glu|tyr|fs|fsx)")) {
                proteinSym = "-ProteinSymTri-";
            } else if (tokenText.matches("[CISQMNPKDTFAGHLRWVEYX]")) {
                proteinSym = "-ProteinSymChar-";
            }
            if (proteinSym == null) continue;
            t.setFeatureValue(proteinSym, 1.0);
        }
        return inst;
    }
}

