package org.apache.mahout.vectorizer;

import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.StringTuple;
import org.junit.Test;

/* loaded from: input_file:org/apache/mahout/vectorizer/DocumentProcessorTest.class */
public class DocumentProcessorTest extends MahoutTestCase {
    @Test
    public void testTokenizeDocuments() throws Exception {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(configuration);
        Path path = new Path(getTestTempDirPath(), "inputDir");
        Path path2 = new Path(getTestTempDirPath(), "outputDir");
        SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, path, Text.class, Text.class);
        writer.append(new Text("123"), new Text("A test for the document processor"));
        writer.append(new Text("456"), new Text("and another one"));
        writer.close();
        DocumentProcessor.tokenizeDocuments(path, DefaultAnalyzer.class, path2, configuration);
        FileStatus[] listStatus = fileSystem.listStatus(path2);
        assertEquals(1L, listStatus.length);
        SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, listStatus[0].getPath(), configuration);
        Text text = (Text) reader.getKeyClass().asSubclass(Text.class).newInstance();
        StringTuple stringTuple = (StringTuple) reader.getValueClass().asSubclass(StringTuple.class).newInstance();
        reader.next(text, stringTuple);
        assertEquals("123", text.toString());
        assertEquals(Arrays.asList("test", "document", "processor"), stringTuple.getEntries());
        reader.next(text, stringTuple);
        assertEquals("456", text.toString());
        assertEquals(Arrays.asList("another", "one"), stringTuple.getEntries());
    }
}
