package net.ontopia.topicmaps.classify;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:net/ontopia/topicmaps/classify/DocumentClassifier.class */
public class DocumentClassifier {
    private TermDatabase tdb;
    private TermStemmerIF termStemmer;
    private List<DocumentAnalyzerIF> docAnalyzers = new ArrayList();
    private List<TermAnalyzerIF> termAnalyzers = new ArrayList();

    public DocumentClassifier(TermDatabase termDatabase) {
        this.tdb = termDatabase;
    }

    public TermDatabase getTermDatabase() {
        return this.tdb;
    }

    public void setTermDatabase(TermDatabase termDatabase) {
        this.tdb = termDatabase;
    }

    public void setTermStemmer(TermStemmerIF termStemmerIF) {
        this.termStemmer = termStemmerIF;
    }

    public void addDocumentAnalyzer(DocumentAnalyzerIF documentAnalyzerIF) {
        this.docAnalyzers.add(documentAnalyzerIF);
    }

    public void addTermAnalyzer(TermAnalyzerIF termAnalyzerIF) {
        this.termAnalyzers.add(termAnalyzerIF);
    }

    protected void extractTerms(Document document) {
        extractTerms(document.getRoot());
    }

    protected void extractTerms(Region region) {
        for (Object obj : region.getChildren()) {
            if (obj instanceof TextBlock) {
                extractTerms(region, (TextBlock) obj);
            } else {
                extractTerms((Region) obj);
            }
        }
    }

    protected void extractTerms(Region region, TextBlock textBlock) {
        for (Token token : textBlock.getTokens()) {
            if (token.getType() == 1) {
                Variant variant = (Variant) token;
                Term term = variant.getTerm();
                if (term == null) {
                    term = this.tdb.createTerm(this.termStemmer.stem(token.getValue()));
                    variant.setTerm(term);
                }
                term.addVariant(variant);
            }
        }
    }

    public void analyzeDocument(Document document) {
        extractTerms(document);
        if (this.docAnalyzers == null || this.docAnalyzers.isEmpty()) {
            return;
        }
        Region root = document.getRoot();
        Iterator<DocumentAnalyzerIF> it = this.docAnalyzers.iterator();
        while (it.hasNext()) {
            DocumentAnalyzerIF next = it.next();
            next.startAnalysis();
            while (next.doDocumentAnalysis()) {
                try {
                    next.startDocument(document);
                    analyzeRegion(root, next);
                    next.endDocument(document);
                } finally {
                    next.endAnalysis();
                }
            }
        }
    }

    protected void analyzeRegion(Region region, DocumentAnalyzerIF documentAnalyzerIF) {
        documentAnalyzerIF.startRegion(region);
        for (Object obj : region.getChildren()) {
            if (obj instanceof TextBlock) {
                analyzeTextBlock(region, (TextBlock) obj, documentAnalyzerIF);
            } else {
                analyzeRegion((Region) obj, documentAnalyzerIF);
            }
        }
        documentAnalyzerIF.endRegion(region);
    }

    protected void analyzeTextBlock(Region region, TextBlock textBlock, DocumentAnalyzerIF documentAnalyzerIF) {
        List<Token> tokens = textBlock.getTokens();
        int size = tokens.size();
        for (int i = 0; i < size; i++) {
            documentAnalyzerIF.analyzeToken(textBlock, tokens.get(i), i);
        }
    }

    public void analyzeTerms() {
        if (this.termAnalyzers == null || this.termAnalyzers.isEmpty()) {
            return;
        }
        Iterator<TermAnalyzerIF> it = this.termAnalyzers.iterator();
        while (it.hasNext()) {
            TermAnalyzerIF next = it.next();
            next.startAnalysis(this.tdb);
            try {
                for (Term term : (Term[]) this.tdb.getTerms().toArray(new Term[0])) {
                    next.analyzeTerm(term);
                }
            } finally {
                next.endAnalysis();
            }
        }
    }

    public void dump() {
        for (TermAnalyzerIF termAnalyzerIF : this.termAnalyzers) {
            if (termAnalyzerIF instanceof CompoundAnalyzer) {
                CompoundAnalyzer compoundAnalyzer = (CompoundAnalyzer) termAnalyzerIF;
                for (Term term : this.tdb.getTermsByRank()) {
                    compoundAnalyzer.dump(term);
                }
            }
        }
    }
}
