package cz.vutbr.fit.layout.text.taggers;

import cz.vutbr.fit.layout.api.Parameter;
import cz.vutbr.fit.layout.impl.ParameterInt;
import cz.vutbr.fit.layout.impl.ParameterString;
import cz.vutbr.fit.layout.model.Area;
import cz.vutbr.fit.layout.model.Tag;
import cz.vutbr.fit.layout.model.TagOccurrence;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:cz/vutbr/fit/layout/text/taggers/RegexpTagger.class */
public class RegexpTagger extends BaseTagger {
    private static final float YES = 0.6f;
    private static final float COULDBE = 0.1f;
    private static final float NO = 0.0f;
    private int minWords = 3;
    private int minWordLength = 3;
    private Pattern areaExpr = Pattern.compile("[A-Z0-9]");
    private Pattern mainExpr = Pattern.compile("[A-Z][A-Za-z\\s\\.\\:\\-\\p{Pd}]*");
    private Pattern contExpr = Pattern.compile("[A-Za-z\\s\\.\\:\\-\\p{Pd}]+");
    protected List<String> blacklist = new ArrayList();

    public String getId() {
        return "FITLayout.Tag.Regexp";
    }

    public String getName() {
        return "Titles";
    }

    public String getDescription() {
        return "General paper or news titles";
    }

    public List<Parameter> defineParams() {
        return List.of(new ParameterString("areaExpr", 0, 512), new ParameterString("mainExpr", 0, 512), new ParameterString("contExpr", 0, 512), new ParameterInt("minWords", 0, 100));
    }

    public String getAreaExpr() {
        return this.areaExpr.toString();
    }

    public void setAreaExpr(String str) {
        this.areaExpr = Pattern.compile(str);
    }

    public String getMainExpr() {
        return this.mainExpr.toString();
    }

    public void setMainExpr(String str) {
        this.mainExpr = Pattern.compile(str);
    }

    public String getContExpr() {
        return this.contExpr.toString();
    }

    public void setContExpr(String str) {
        this.contExpr = Pattern.compile(str);
    }

    public int getMinWords() {
        return this.minWords;
    }

    public void setMinWords(int i) {
        this.minWords = i;
    }

    public float belongsTo(Area area) {
        if (!area.isLeaf()) {
            return NO;
        }
        String text = getText(area);
        if (!this.areaExpr.matcher(text).lookingAt()) {
            return NO;
        }
        Matcher matcher = this.mainExpr.matcher(text);
        float f = 0.0f;
        while (matcher.find()) {
            String[] split = matcher.group().split("\\s+");
            if (!containsBlacklistedWord(split)) {
                f = wordCount(split) >= this.minWords ? 0.6f : Math.max(f, COULDBE);
            }
        }
        return f;
    }

    public boolean allowsContinuation(Area area) {
        if (area.isLeaf()) {
            return this.contExpr.matcher(area.getText().trim()).lookingAt();
        }
        return false;
    }

    public boolean allowsJoining() {
        return true;
    }

    public boolean mayCoexistWith(Tag tag) {
        return true;
    }

    public List<TagOccurrence> extract(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = this.mainExpr.matcher(str);
        while (matcher.find()) {
            TagOccurrence tagOccurrence = new TagOccurrence(matcher.group(), matcher.start(), COULDBE);
            if (wordCount(tagOccurrence.getText().split("\\s+")) >= this.minWords) {
                tagOccurrence.setSupport(YES);
            }
            arrayList.add(tagOccurrence);
        }
        return arrayList;
    }

    protected String getText(Area area) {
        return area.getText().trim().replaceAll("^[\\\"\\p{Pi}]+", "").replaceAll("[\\\"\\p{Pf}]+$", "");
    }

    protected boolean containsBlacklistedWord(String[] strArr) {
        for (String str : strArr) {
            if (this.blacklist.contains(str.toLowerCase())) {
                return true;
            }
        }
        return false;
    }

    protected int wordCount(String[] strArr) {
        int i = 0;
        for (String str : strArr) {
            if (str.length() >= this.minWordLength) {
                i++;
            }
        }
        return i;
    }
}
