package org.apache.jackrabbit.oak.plugins.tika;

import java.io.File;
import java.io.IOException;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.jackrabbit.guava.common.base.Charsets;
import org.apache.jackrabbit.guava.common.base.Stopwatch;
import org.apache.jackrabbit.guava.common.io.Closer;
import org.apache.jackrabbit.oak.plugins.blob.datastore.TextWriter;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:org/apache/jackrabbit/oak/plugins/tika/TextPopulator.class */
public class TextPopulator {
    private static final Logger log = LoggerFactory.getLogger((Class<?>) TextPopulator.class);
    static final String BLOB_ID = "blobId";
    static final String ERROR_TEXT = "TextExtractionError";
    private final TextWriter textWriter;
    private PopulatorStats stats = new PopulatorStats();

    /* loaded from: input_file:org/apache/jackrabbit/oak/plugins/tika/TextPopulator$PopulatorStats.class */
    static class PopulatorStats {
        int read = 0;
        int ignored = 0;
        int processed = 0;
        int parsed = 0;
        int errored = 0;
        int empty = 0;
        Stopwatch w = Stopwatch.createStarted();

        PopulatorStats() {
        }

        void readAndDumpStatsIfRequired(String str) {
            this.read++;
            if (this.read % 10000 == 0) {
                TextPopulator.log.info("{} - currently at {}", toString(), str);
            }
        }

        public String toString() {
            return String.format("Text populator stats - Read: %s; Ignored: %s; Processed: %s; Parsed: %s; Errored: %s; Empty: %s (in %s)", Integer.valueOf(this.read), Integer.valueOf(this.ignored), Integer.valueOf(this.processed), Integer.valueOf(this.parsed), Integer.valueOf(this.errored), Integer.valueOf(this.empty), this.w);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public TextPopulator(TextWriter textWriter) {
        this.textWriter = textWriter;
    }

    void setStats(PopulatorStats populatorStats) {
        this.stats = populatorStats;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void populate(File file, File file2) throws IOException {
        Closer create = Closer.create();
        try {
            Iterable<CSVRecord> iterable = (Iterable) create.register(CSVParser.parse(file, Charsets.UTF_8, CSVFileBinaryResourceProvider.FORMAT));
            DirectoryReader directoryReader = (DirectoryReader) create.register(DirectoryReader.open((FSDirectory) create.register(FSDirectory.open(file2))));
            IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
            for (CSVRecord cSVRecord : iterable) {
                String str = cSVRecord.get(BLOB_ID);
                String str2 = cSVRecord.get("jcr:path");
                if (this.textWriter.isProcessed(str)) {
                    this.stats.ignored++;
                } else {
                    String text = getText(directoryReader, indexSearcher, str2);
                    this.stats.processed++;
                    if (text == null) {
                        this.stats.errored++;
                    } else if ("TextExtractionError".equals(text)) {
                        this.textWriter.markError(str);
                        this.stats.errored++;
                    } else if (text.length() == 0) {
                        this.textWriter.markEmpty(str);
                        this.stats.empty++;
                    } else {
                        this.textWriter.write(str, text);
                        this.stats.parsed++;
                    }
                }
                this.stats.readAndDumpStatsIfRequired(str2);
            }
            log.info(this.stats.toString());
            if (create != null) {
                create.close();
            }
        } catch (Throwable th) {
            if (create != null) {
                try {
                    create.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private static String getText(DirectoryReader directoryReader, IndexSearcher indexSearcher, String str) {
        try {
            ScoreDoc[] scoreDocArr = indexSearcher.search(new TermQuery(new Term(":path", str)), 1).scoreDocs;
            if (scoreDocArr.length != 1) {
                return null;
            }
            String[] values = directoryReader.document(scoreDocArr[0].doc).getValues(FieldNames.FULLTEXT);
            if (values.length != 1) {
                return null;
            }
            return values[0].trim();
        } catch (IOException e) {
            return null;
        }
    }
}
