package net.ontopia.topicmaps.classify;

import gnu.trove.map.hash.TObjectIntHashMap;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:net/ontopia/topicmaps/classify/CompoundAnalyzer.class */
public class CompoundAnalyzer extends AbstractDocumentAnalyzer implements TermAnalyzerIF {
    private static Logger log = LoggerFactory.getLogger(CompoundAnalyzer.class.getName());
    private TermDatabase tdb;
    private TermStemmerIF termStemmer;
    private Map<Variant, Followers> followers;
    private int maxLength;
    private double term1ScoreThreshold;
    private double term2ScoreThreshold;
    private int compositeOccsThreshold;
    private double compoundFactor;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:net/ontopia/topicmaps/classify/CompoundAnalyzer$CompositeScoreComparator.class */
    public class CompositeScoreComparator implements Comparator<Variant> {
        private Followers f;

        CompositeScoreComparator(Followers followers) {
            this.f = followers;
        }

        @Override // java.util.Comparator
        public int compare(Variant variant, Variant variant2) {
            return Double.compare(this.f.getScore(variant2), this.f.getScore(variant));
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:net/ontopia/topicmaps/classify/CompoundAnalyzer$Followers.class */
    public class Followers {
        private TObjectIntHashMap<Variant> followers;
        private int followedByDelimiter;
        private int totalFollowerOccurrences;

        private Followers() {
            this.followers = new TObjectIntHashMap<>();
        }

        public void addFollower(Token token, int i) {
            if (token.getType() != 1) {
                this.followedByDelimiter += i;
                return;
            }
            Variant variant = (Variant) token;
            if (this.followers.get(variant) > 0) {
                this.followers.adjustValue(variant, i);
            } else {
                this.followers.put(variant, i);
            }
            this.totalFollowerOccurrences += i;
        }

        public Variant[] getFollowers() {
            return (Variant[]) this.followers.keys(new Variant[this.followers.keys().length]);
        }

        public Variant[] getFollowersByRank() {
            Variant[] variantArr = (Variant[]) this.followers.keys(new Variant[this.followers.keys().length]);
            Arrays.sort(variantArr, new CompositeScoreComparator(this));
            return variantArr;
        }

        public int getTotalFollowerOccurences() {
            return this.totalFollowerOccurrences;
        }

        public int getFollowerOccurrences(Variant variant) {
            return this.followers.get(variant);
        }

        public int getFollowerOccurrences(Term term) {
            int i = 0;
            for (Variant variant : term.getVariants()) {
                if (this.followers.get(variant) > 0) {
                    i += getFollowerOccurrences(variant);
                }
            }
            return i;
        }

        public int getFollowedByDelimiter() {
            return this.followedByDelimiter;
        }

        public double getScore(Variant variant) {
            return (1.0d * getFollowerOccurrences(variant)) / this.totalFollowerOccurrences;
        }

        public double getScore(Term term) {
            double d = 0.0d;
            for (Variant variant : term.getVariants()) {
                if (this.followers.get(variant) > 0) {
                    d += getScore(variant);
                }
            }
            return d;
        }

        public double getLimit() {
            return 0.64d - (Math.log(this.totalFollowerOccurrences) / 15.0d);
        }
    }

    public CompoundAnalyzer() {
        super(1);
        this.followers = new HashMap();
        this.maxLength = 3;
        this.term1ScoreThreshold = 0.02d;
        this.term2ScoreThreshold = 0.02d;
        this.compositeOccsThreshold = 2;
        this.compoundFactor = 2.0d;
    }

    public void setMaxLength(int i) {
        this.maxLength = i;
    }

    public void setTerm1ScoreThreshold(double d) {
        this.term1ScoreThreshold = d;
    }

    public void setTerm2ScoreThreshold(double d) {
        this.term2ScoreThreshold = d;
    }

    public void setCompositeOccurrencesThreshold(int i) {
        this.compositeOccsThreshold = i;
    }

    public void setTermStemmer(TermStemmerIF termStemmerIF) {
        this.termStemmer = termStemmerIF;
    }

    protected void addFollower(Variant variant, Token token) {
        addFollower(variant, token, 1);
    }

    protected void addFollower(Variant variant, Token token, int i) {
        Followers followers = this.followers.get(variant);
        if (followers == null) {
            followers = new Followers();
            this.followers.put(variant, followers);
        }
        followers.addFollower(token, i);
    }

    @Override // net.ontopia.topicmaps.classify.AbstractDocumentAnalyzer, net.ontopia.topicmaps.classify.DocumentAnalyzerIF
    public void analyzeToken(TextBlock textBlock, Token token, int i) {
        if (token.getType() == 1) {
            List<Token> tokens = textBlock.getTokens();
            if (tokens.size() - 1 > i) {
                addFollower((Variant) token, tokens.get(i + 1));
            }
        }
    }

    @Override // net.ontopia.topicmaps.classify.TermAnalyzerIF
    public void analyzeTerm(Term term) {
        addComposites(this.tdb, term, 2);
    }

    @Override // net.ontopia.topicmaps.classify.TermAnalyzerIF
    public void startAnalysis(TermDatabase termDatabase) {
        this.tdb = termDatabase;
    }

    @Override // net.ontopia.topicmaps.classify.AbstractDocumentAnalyzer, net.ontopia.topicmaps.classify.DocumentAnalyzerIF
    public void endAnalysis() {
        this.tdb = null;
    }

    public void addComposites(TermDatabase termDatabase, Term term, int i) {
        double score = term.getScore();
        if (score < this.term1ScoreThreshold) {
            return;
        }
        for (Variant variant : term.getVariants()) {
            Followers followers = this.followers.get(variant);
            if (followers != null) {
                double limit = followers.getLimit();
                for (Variant variant2 : followers.getFollowers()) {
                    Term term2 = variant2.getTerm();
                    if (!term.equals(term2)) {
                        double score2 = term2.getScore();
                        if (score2 >= this.term2ScoreThreshold) {
                            double score3 = followers.getScore(term2);
                            int followerOccurrences = followers.getFollowerOccurrences(term2);
                            String str = variant.getValue() + " " + variant2.getValue();
                            log.debug("k:" + str + " " + (score3 - limit) + ", " + followerOccurrences + "/" + followers.getTotalFollowerOccurences());
                            if (score3 >= limit && followerOccurrences >= this.compositeOccsThreshold) {
                                double score4 = followers.getScore(variant2);
                                int followerOccurrences2 = followers.getFollowerOccurrences(variant2);
                                Variant createVariant = termDatabase.createVariant(str);
                                Term term3 = createVariant.getTerm();
                                if (term3 == null) {
                                    String stem = this.termStemmer != null ? this.termStemmer.stem(str) : str;
                                    term3 = termDatabase.getTerm(stem);
                                    double d = (score + (score2 * score4)) * this.compoundFactor;
                                    if (term3 == null) {
                                        term3 = termDatabase.createTerm(stem);
                                        term3.setScore(d, "new compound score");
                                        log.debug("c:" + term3.getStem() + " " + term3.getScore() + ", " + followerOccurrences2 + "\n : (" + score + " + (" + score2 + " * " + score4 + ")) * " + this.compoundFactor + ")");
                                    } else {
                                        log.debug("d:" + followerOccurrences2 + " * " + score4);
                                        term3.addScore(d, "compound adjustment");
                                    }
                                    createVariant.setTerm(term3);
                                }
                                term3.addVariant(createVariant, followerOccurrences2);
                                Followers followers2 = this.followers.get(variant2);
                                if (followers2 != null) {
                                    for (Variant variant3 : followers2.getFollowers()) {
                                        addFollower(createVariant, variant3, followers2.getFollowerOccurrences(variant3));
                                    }
                                }
                                log.debug("  b: " + term.getScore() + " " + term2.getScore());
                                double occurrences = (1.0d * followerOccurrences2) / term.getOccurrences();
                                double occurrences2 = (1.0d * followerOccurrences2) / term2.getOccurrences();
                                if (occurrences < 1.0d) {
                                    term.multiplyScore(1.0d - occurrences, "compound individiual adjustment");
                                }
                                if (occurrences2 < 1.0d) {
                                    term2.multiplyScore(1.0d - occurrences2, "compound individiual adjustment");
                                }
                                log.debug("  a: " + term.getScore() + " " + term2.getScore());
                                if (i < this.maxLength) {
                                    i++;
                                    addComposites(termDatabase, term3, i);
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    public void dump(Term term) {
        System.out.println("t:" + term.getPreferredName() + " " + term.getScore() + ", " + term.getOccurrences());
        for (Variant variant : term.getVariantsByRank()) {
            System.out.println("  v:" + variant + ":" + term.getOccurrences(variant));
            Followers followers = this.followers.get(variant);
            if (followers == null) {
                System.out.println("    f:null");
            } else {
                System.out.println("    f:delimiters: " + followers.getFollowedByDelimiter());
                for (Variant variant2 : followers.getFollowersByRank()) {
                    System.out.println("    f:" + variant2.getValue() + " " + variant2.getTerm().getScore() + ", " + followers.getFollowerOccurrences(variant2) + ", " + followers.getScore(variant2));
                }
            }
        }
    }
}
