package czsem.gate.plugins;

import czsem.gate.utils.GateUtils;
import gate.Annotation;
import gate.Document;
import gate.FeatureMap;
import gate.Utils;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.RunTime;
import org.apache.commons.lang3.StringEscapeUtils;

@CreoleResource(name = "czsem NormalizeTokenForms", comment = "Adds clean_lemma, clean_lemma_noAccents and form_noAccents features to existing tokens.")
/* loaded from: input_file:czsem/gate/plugins/NormalizeTokenForms.class */
public class NormalizeTokenForms extends AbstractLanguageAnalyser {
    private static final long serialVersionUID = -4427651577074969377L;
    protected String annotationSetName = null;
    protected String tokenAnnotationTypeName = "Token";
    protected String formFeatureName = "form";
    protected String lemmaFeatureName = "lemma";
    protected String tagFeatureName = "tag";
    protected String negationRegexp = "^..........N....$";

    public void execute() throws ExecutionException {
        Document document = getDocument();
        for (Annotation annotation : document.getAnnotations(getAnnotationSetName()).get(getTokenAnnotationTypeName())) {
            FeatureMap features = annotation.getFeatures();
            String cleanTokenLemma = cleanTokenLemma(features, annotation);
            if (cleanTokenLemma != null) {
                String removeDiacritics = GateUtils.removeDiacritics(cleanTokenLemma);
                features.put("clean_" + getLemmaFeatureName() + "_noAccents", removeDiacritics);
                String str = (String) features.get(getTagFeatureName());
                if (str != null) {
                    if (str.matches(getNegationRegexp())) {
                        features.put("neg_clean_" + getLemmaFeatureName(), "NEG" + cleanTokenLemma);
                        features.put("neg_clean_" + getLemmaFeatureName() + "_noAccents", "NEG" + removeDiacritics);
                    } else {
                        features.put("neg_clean_" + getLemmaFeatureName(), cleanTokenLemma);
                        features.put("neg_clean_" + getLemmaFeatureName() + "_noAccents", removeDiacritics);
                    }
                }
            }
            String str2 = (String) features.get(getFormFeatureName());
            if (str2 == null) {
                str2 = Utils.stringFor(document, annotation);
            }
            features.put(getFormFeatureName() + "_noAccents", GateUtils.removeDiacritics(str2));
        }
    }

    public static String truncateLemma(String str) {
        return str.replaceFirst("((?:(`|_;|_:|_,|_\\^|))+)(`|_;|_:|_,|_\\^).+$", "$1").replaceFirst("^(.+)-\\d+$", "$1");
    }

    protected String cleanTokenLemma(FeatureMap featureMap, Annotation annotation) {
        String str = (String) featureMap.get(getLemmaFeatureName());
        String str2 = (String) featureMap.get(getFormFeatureName());
        if (str2 == null) {
            str2 = Utils.stringFor(getDocument(), annotation);
        }
        if (str == null) {
            str = str2;
        }
        if (str.startsWith("&")) {
            str = StringEscapeUtils.unescapeXml(str);
        }
        if (str2.startsWith("&")) {
            StringEscapeUtils.unescapeXml(str2);
        }
        String replace = truncateLemma(str).replace('|', '_');
        featureMap.put("clean_" + getLemmaFeatureName(), replace);
        return replace;
    }

    public String getAnnotationSetName() {
        return this.annotationSetName;
    }

    @CreoleParameter(defaultValue = "")
    @RunTime
    public void setAnnotationSetName(String str) {
        this.annotationSetName = str;
    }

    public String getTokenAnnotationTypeName() {
        return this.tokenAnnotationTypeName;
    }

    @CreoleParameter(defaultValue = "Token")
    @RunTime
    public void setTokenAnnotationTypeName(String str) {
        this.tokenAnnotationTypeName = str;
    }

    public String getFormFeatureName() {
        return this.formFeatureName;
    }

    @CreoleParameter(defaultValue = "form")
    @RunTime
    public void setFormFeatureName(String str) {
        this.formFeatureName = str;
    }

    public String getLemmaFeatureName() {
        return this.lemmaFeatureName;
    }

    @CreoleParameter(defaultValue = "lemma")
    @RunTime
    public void setLemmaFeatureName(String str) {
        this.lemmaFeatureName = str;
    }

    public String getTagFeatureName() {
        return this.tagFeatureName;
    }

    @CreoleParameter(defaultValue = "tag")
    @RunTime
    public void setTagFeatureName(String str) {
        this.tagFeatureName = str;
    }

    public String getNegationRegexp() {
        return this.negationRegexp;
    }

    @CreoleParameter(defaultValue = "^..........N....$")
    @RunTime
    public void setNegationRegexp(String str) {
        this.negationRegexp = str;
    }
}
