package org.apache.tika.eval;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import java.util.concurrent.ArrayBlockingQueue;
import org.apache.tika.MockDBWriter;
import org.apache.tika.TikaTest;
import org.apache.tika.eval.AbstractProfiler;
import org.apache.tika.eval.db.Cols;
import org.apache.tika.eval.db.TableInfo;
import org.apache.tika.eval.io.ExtractReader;
import org.apache.tika.eval.io.ExtractReaderException;
import org.apache.tika.eval.util.LanguageIDWrapper;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

/* loaded from: input_file:org/apache/tika/eval/SimpleComparerTest.class */
public class SimpleComparerTest extends TikaTest {
    private ExtractComparer comparer = null;
    private MockDBWriter writer = null;

    @Before
    public void setUp() throws Exception {
        this.writer = new MockDBWriter();
        this.comparer = new ExtractComparer((ArrayBlockingQueue) null, (Path) null, Paths.get("extractsA", new String[0]), Paths.get("extractsB", new String[0]), new ExtractReader(ExtractReader.ALTER_METADATA_LIST.AS_IS, -1L, -1L), this.writer);
        AbstractProfiler.loadCommonTokens(getResourceAsFile("/common_tokens").toPath(), "en");
        LanguageIDWrapper.loadBuiltInModels();
    }

    @Test
    public void testBasic() throws Exception {
        this.comparer.compareFiles(new EvalFilePaths(Paths.get("file1.pdf.json", new String[0]), getResourceAsFile("/test-dirs/extractsA/file1.pdf.json").toPath()), new EvalFilePaths(Paths.get("file1.pdf.json", new String[0]), getResourceAsFile("/test-dirs/extractsB/file1.pdf.json").toPath()));
        Map<Cols, String> map = this.writer.getTable(ExtractComparer.CONTENT_COMPARISONS).get(0);
        Assert.assertEquals("0", map.get(Cols.ID));
        Assert.assertTrue(map.get(Cols.TOP_10_UNIQUE_TOKEN_DIFFS_A).startsWith("1,200: 1 | 120000: 1 | over: 1"));
        Map<Cols, String> map2 = this.writer.getTable(ExtractComparer.CONTENTS_TABLE_A).get(0);
        Assert.assertEquals("0", map2.get(Cols.ID));
        Assert.assertEquals("70", map2.get(Cols.CONTENT_LENGTH));
        Assert.assertEquals("10", map2.get(Cols.NUM_UNIQUE_TOKENS));
        Assert.assertEquals("14", map2.get(Cols.NUM_TOKENS));
        Assert.assertEquals("12", map2.get(Cols.NUM_ALPHABETIC_TOKENS));
        Assert.assertEquals("6", map2.get(Cols.NUM_COMMON_TOKENS));
        Assert.assertEquals("57", map2.get(Cols.TOKEN_LENGTH_SUM));
        Assert.assertEquals("en", map2.get(Cols.COMMON_TOKENS_LANG));
        Map<Cols, String> map3 = this.writer.getTable(ExtractComparer.CONTENTS_TABLE_B).get(0);
        Assert.assertEquals("0", map3.get(Cols.ID));
        Assert.assertEquals("76", map3.get(Cols.CONTENT_LENGTH));
        Assert.assertEquals("9", map3.get(Cols.NUM_UNIQUE_TOKENS));
        Assert.assertEquals("13", map3.get(Cols.NUM_TOKENS));
        Assert.assertEquals("4", map3.get(Cols.NUM_COMMON_TOKENS));
        Assert.assertEquals("64", map3.get(Cols.TOKEN_LENGTH_SUM));
        Assert.assertEquals("en", map3.get(Cols.COMMON_TOKENS_LANG));
        Assert.assertEquals("2", this.writer.getTable(ExtractComparer.PROFILES_A).get(0).get(Cols.NUM_PAGES));
    }

    @Test
    public void testBasicSpanish() throws Exception {
        this.comparer.compareFiles(new EvalFilePaths(Paths.get("file1.pdf.json", new String[0]), getResourceAsFile("/test-dirs/extractsA/file12_es.txt.json").toPath()), new EvalFilePaths(Paths.get("file1.pdf.json", new String[0]), getResourceAsFile("/test-dirs/extractsB/file12_es.txt.json").toPath()));
        Map<Cols, String> map = this.writer.getTable(ExtractComparer.CONTENTS_TABLE_A).get(0);
        Assert.assertEquals("133", map.get(Cols.CONTENT_LENGTH));
        Assert.assertEquals("7", map.get(Cols.NUM_UNIQUE_TOKENS));
        Assert.assertEquals("24", map.get(Cols.NUM_TOKENS));
        Assert.assertEquals("3", map.get(Cols.NUM_COMMON_TOKENS));
        Assert.assertEquals("108", map.get(Cols.TOKEN_LENGTH_SUM));
        Assert.assertEquals("es", map.get(Cols.COMMON_TOKENS_LANG));
        Assert.assertEquals("24", map.get(Cols.NUM_ALPHABETIC_TOKENS));
    }

    @Test
    public void testChinese() throws Exception {
        this.comparer.compareFiles(new EvalFilePaths(Paths.get("file13_attachANotB.doc.json", new String[0]), getResourceAsFile("/test-dirs/extractsA/file13_attachANotB.doc.json").toPath()), new EvalFilePaths(Paths.get("non-existent.json", new String[0]), getResourceAsFile("/test-dirs/extractsB/non-existent.json").toPath()));
        Map<Cols, String> map = this.writer.getTable(ExtractComparer.CONTENTS_TABLE_A).get(0);
        Assert.assertEquals("122", map.get(Cols.TOKEN_LENGTH_SUM));
        Assert.assertEquals("3", map.get(Cols.NUM_COMMON_TOKENS));
        Assert.assertEquals("zh-cn", map.get(Cols.COMMON_TOKENS_LANG));
    }

    @Test
    public void testEmpty() throws Exception {
        this.comparer.compareFiles(new EvalFilePaths(Paths.get("file1.pdf", new String[0]), getResourceAsFile("/test-dirs/extractsA/file1.pdf.json").toPath()), new EvalFilePaths(Paths.get("file1.pdf", new String[0]), getResourceAsFile("/test-dirs/extractsB/file4_emptyB.pdf.json").toPath()));
        Assert.assertEquals(Integer.toString(ExtractReaderException.TYPE.ZERO_BYTE_EXTRACT_FILE.ordinal()), this.writer.getTable(ExtractComparer.EXTRACT_EXCEPTION_TABLE_B).get(0).get(Cols.EXTRACT_EXCEPTION_ID));
    }

    @Test
    public void testGetContent() throws Exception {
        new Metadata().add(RecursiveParserWrapper.TIKA_CONTENT, "0123456789");
        HashMap hashMap = new HashMap();
        Assert.assertEquals(10L, AbstractProfiler.getContent(r0, 10, hashMap).length());
        Assert.assertEquals("FALSE", hashMap.get(Cols.CONTENT_TRUNCATED_AT_MAX_LEN));
        Assert.assertEquals(4L, AbstractProfiler.getContent(r0, 4, hashMap).length());
        Assert.assertEquals("TRUE", hashMap.get(Cols.CONTENT_TRUNCATED_AT_MAX_LEN));
        Assert.assertEquals(0L, AbstractProfiler.getContent(new Metadata(), 10, hashMap).length());
        Assert.assertEquals("FALSE", hashMap.get(Cols.CONTENT_TRUNCATED_AT_MAX_LEN));
        Assert.assertEquals(0L, AbstractProfiler.getContent((Metadata) null, 10, hashMap).length());
        Assert.assertEquals("FALSE", hashMap.get(Cols.CONTENT_TRUNCATED_AT_MAX_LEN));
    }

    @Test
    public void testAccessException() throws Exception {
        this.comparer.compareFiles(new EvalFilePaths(Paths.get("file6_accessEx.pdf.json", new String[0]), getResourceAsFile("/test-dirs/extractsA/file6_accessEx.pdf.json").toPath()), new EvalFilePaths(Paths.get("file6_accessEx.pdf.json", new String[0]), getResourceAsFile("/test-dirs/extractsB/file6_accessEx.pdf.json").toPath()));
        for (TableInfo tableInfo : new TableInfo[]{ExtractComparer.EXCEPTION_TABLE_A, ExtractComparer.EXCEPTION_TABLE_B}) {
            Map<Cols, String> map = this.writer.getTable(tableInfo).get(0);
            Assert.assertEquals(Integer.toString(AbstractProfiler.EXCEPTION_TYPE.ACCESS_PERMISSION.ordinal()), map.get(Cols.PARSE_EXCEPTION_ID));
            Assert.assertNull(map.get(Cols.ORIG_STACK_TRACE));
            Assert.assertNull(map.get(Cols.SORT_STACK_TRACE));
        }
    }

    @Test
    public void testAttachmentCounts() {
        ArrayList arrayList = new ArrayList();
        Metadata metadata = new Metadata();
        metadata.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "dir1/dir2/file.zip");
        arrayList.add(metadata);
        Metadata metadata2 = new Metadata();
        metadata2.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx/f2.zip/text1.txt");
        arrayList.add(metadata2);
        Metadata metadata3 = new Metadata();
        metadata3.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx/f2.zip/text2.txt");
        arrayList.add(metadata3);
        Metadata metadata4 = new Metadata();
        metadata4.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx/f2.zip");
        arrayList.add(metadata4);
        Metadata metadata5 = new Metadata();
        metadata5.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx");
        arrayList.add(metadata5);
        Metadata metadata6 = new Metadata();
        metadata6.set(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH, "/f1.docx/text3.txt");
        arrayList.add(metadata6);
        List countAttachments = AbstractProfiler.countAttachments(arrayList);
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(5);
        arrayList2.add(0);
        arrayList2.add(0);
        arrayList2.add(2);
        arrayList2.add(4);
        arrayList2.add(0);
        Assert.assertEquals(arrayList2, countAttachments);
    }

    @Test
    public void testDifferentlyOrderedAttachments() throws Exception {
        this.comparer.compareFiles(new EvalFilePaths(Paths.get("file14_diffAttachOrder.json", new String[0]), getResourceAsFile("/test-dirs/extractsA/file14_diffAttachOrder.json").toPath()), new EvalFilePaths(Paths.get("file6_accessEx.pdf.json", new String[0]), getResourceAsFile("/test-dirs/extractsB/file14_diffAttachOrder.json").toPath()));
        List<Map<Cols, String>> table = this.writer.getTable(ExtractComparer.CONTENT_COMPARISONS);
        Assert.assertEquals(3L, table.size());
        for (int i = 0; i < table.size(); i++) {
            Assert.assertEquals("1.0", table.get(i).get(Cols.OVERLAP));
        }
    }

    @Test
    @Ignore
    public void testDebug() throws Exception {
        AbstractProfiler.loadCommonTokens(Paths.get(getResourceAsFile("/common_tokens_short.txt").toURI()), "en");
        this.comparer.compareFiles(new EvalFilePaths(Paths.get("file1.pdf.json", new String[0]), getResourceAsFile("/test-dirs/extractsA/file1.pdf.json").toPath()), new EvalFilePaths(Paths.get("file1.pdf.json", new String[0]), getResourceAsFile("/test-dirs/extractsB/file1.pdf.json").toPath()));
        for (TableInfo tableInfo : new TableInfo[]{ExtractComparer.COMPARISON_CONTAINERS, ExtractComparer.EXTRACT_EXCEPTION_TABLE_A, ExtractComparer.EXTRACT_EXCEPTION_TABLE_B, ExtractComparer.EXCEPTION_TABLE_A, ExtractComparer.EXCEPTION_TABLE_B, ExtractComparer.PROFILES_A, ExtractComparer.PROFILES_B, ExtractComparer.CONTENTS_TABLE_A, ExtractComparer.CONTENTS_TABLE_B, ExtractComparer.CONTENT_COMPARISONS}) {
        }
    }

    private void debugPrintTable(TableInfo tableInfo) {
        List<Map<Cols, String>> table = this.writer.getTable(tableInfo);
        if (table == null) {
            return;
        }
        int i = 0;
        System.out.println("TABLE: " + tableInfo.getName());
        for (Map<Cols, String> map : table) {
            for (Cols cols : new TreeSet(map.keySet())) {
                System.out.println(i + " :: " + cols + " : " + map.get(cols));
            }
            i++;
        }
        System.out.println("");
    }

    private void debugPrintRow(Map<Cols, String> map) {
        for (Cols cols : new TreeSet(map.keySet())) {
            System.out.println(cols + " : " + map.get(cols));
        }
    }

    @Test
    @Ignore("useful for testing 2 files not in test set")
    public void oneOff() throws Exception {
        this.comparer.compareFiles(new EvalFilePaths(Paths.get("file1.pdf.json", new String[0]), Paths.get("", new String[0])), new EvalFilePaths(Paths.get("file1.pdf.json", new String[0]), Paths.get("", new String[0])));
        for (TableInfo tableInfo : new TableInfo[]{ExtractComparer.COMPARISON_CONTAINERS, ExtractComparer.EXTRACT_EXCEPTION_TABLE_A, ExtractComparer.EXTRACT_EXCEPTION_TABLE_B, ExtractComparer.EXCEPTION_TABLE_A, ExtractComparer.EXCEPTION_TABLE_B, ExtractComparer.PROFILES_A, ExtractComparer.PROFILES_B, ExtractComparer.CONTENTS_TABLE_A, ExtractComparer.CONTENTS_TABLE_B, ExtractComparer.CONTENT_COMPARISONS}) {
            debugPrintTable(tableInfo);
        }
    }
}
