package org.apache.mahout.text;

import com.google.common.base.Strings;
import com.google.common.io.Closeables;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/text/SequenceFilesFromLuceneStorage.class */
public class SequenceFilesFromLuceneStorage {
    private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromLuceneStorage.class);

    /* loaded from: input_file:org/apache/mahout/text/SequenceFilesFromLuceneStorage$SeqFileWriterCollector.class */
    private static class SeqFileWriterCollector extends Collector {
        private final LuceneStorageConfiguration lucene2seqConf;
        private final SequenceFile.Writer sequenceFileWriter;
        public int processedDocs;
        AtomicReaderContext arc;

        SeqFileWriterCollector(LuceneStorageConfiguration luceneStorageConfiguration, SequenceFile.Writer writer, int i) {
            this.lucene2seqConf = luceneStorageConfiguration;
            this.sequenceFileWriter = writer;
            this.processedDocs = i;
        }

        @Override // org.apache.lucene.search.Collector
        public void setScorer(Scorer scorer) throws IOException {
        }

        @Override // org.apache.lucene.search.Collector
        public void collect(int i) throws IOException {
            if (this.processedDocs < this.lucene2seqConf.getMaxHits()) {
                DocumentStoredFieldVisitor storedFieldVisitor = this.lucene2seqConf.getStoredFieldVisitor();
                this.arc.reader().document(i, storedFieldVisitor);
                Document document = storedFieldVisitor.getDocument();
                List<String> fields = this.lucene2seqConf.getFields();
                Text text = new Text(Strings.nullToEmpty(document.get(this.lucene2seqConf.getIdField())));
                Text text2 = new Text();
                LuceneSeqFileHelper.populateValues(document, text2, fields);
                if (StringUtils.isBlank(text.toString()) && StringUtils.isBlank(text2.toString())) {
                    return;
                }
                this.sequenceFileWriter.append(text, text2);
                this.processedDocs++;
            }
        }

        @Override // org.apache.lucene.search.Collector
        public void setNextReader(AtomicReaderContext atomicReaderContext) throws IOException {
            this.arc = atomicReaderContext;
        }

        @Override // org.apache.lucene.search.Collector
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }
    }

    public void run(LuceneStorageConfiguration luceneStorageConfiguration) throws IOException {
        int i = 0;
        for (Path path : luceneStorageConfiguration.getIndexPaths()) {
            FSDirectory open = FSDirectory.open(new File(path.toUri().getPath()));
            DirectoryReader open2 = DirectoryReader.open(open);
            IndexSearcher indexSearcher = new IndexSearcher(open2);
            LuceneIndexHelper.fieldShouldExistInIndex(indexSearcher, luceneStorageConfiguration.getIdField());
            Iterator<String> it = luceneStorageConfiguration.getFields().iterator();
            while (it.hasNext()) {
                LuceneIndexHelper.fieldShouldExistInIndex(indexSearcher, it.next());
            }
            Configuration configuration = luceneStorageConfiguration.getConfiguration();
            FileSystem fileSystem = FileSystem.get(configuration);
            Path path2 = new Path(luceneStorageConfiguration.getSequenceFilesOutputPath(), path.getName());
            SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, path2, Text.class, Text.class);
            SeqFileWriterCollector seqFileWriterCollector = new SeqFileWriterCollector(luceneStorageConfiguration, writer, i);
            indexSearcher.search(luceneStorageConfiguration.getQuery(), seqFileWriterCollector);
            log.info("Wrote " + seqFileWriterCollector.processedDocs + " documents in " + path2.toUri());
            i = seqFileWriterCollector.processedDocs;
            Closeables.close(writer, false);
            open.close();
            open2.close();
        }
    }
}
