package water.parser.parquet;

import com.google.common.io.Files;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import water.DKV;
import water.Key;
import water.TestUtil;
import water.fvec.Frame;
import water.parser.BufferedString;
import water.parser.ParseDataset;
import water.parser.ParseSetup;
import water.util.IcedInt;
import water.util.PrettyPrint;

@RunWith(Parameterized.class)
/* loaded from: input_file:water/parser/parquet/ParseTestParquet.class */
public class ParseTestParquet extends TestUtil {
    private static double EPSILON = 1.0E-9d;

    @Parameterized.Parameter
    public boolean disableParallelParse;
    public TestUtil.ParseSetupTransformer psTransformer;

    @BeforeClass
    public static void setup() {
        TestUtil.stall_till_cloudsize(1);
    }

    @Parameterized.Parameters
    public static Object[] data() {
        return new Object[]{false, true};
    }

    @Before
    public void makeParseSetupTransformer() {
        this.psTransformer = new TestUtil.ParseSetupTransformer() { // from class: water.parser.parquet.ParseTestParquet.1
            public ParseSetup transformSetup(ParseSetup parseSetup) {
                parseSetup.disableParallelParse = ParseTestParquet.this.disableParallelParse;
                return parseSetup;
            }
        };
    }

    private Frame parse_parquet(String str) {
        return TestUtil.parse_test_file(str, this.psTransformer);
    }

    @Test
    public void testParseSimple() {
        Frame frame = null;
        Frame frame2 = null;
        try {
            frame = parse_test_file("smalldata/airlines/AirlinesTrain.csv.zip");
            frame2 = parse_parquet("smalldata/parser/parquet/airlines-simple.snappy.parquet");
            Assert.assertEquals(Arrays.asList(frame._names), Arrays.asList(frame2._names));
            Assert.assertEquals(Arrays.asList(frame.typesStr()), Arrays.asList(frame2.typesStr()));
            Assert.assertEquals(frame.numRows(), frame2.numRows());
            assertBitIdentical(frame, frame2);
            if (frame != null) {
                frame.delete();
            }
            if (frame2 != null) {
                frame2.delete();
            }
        } catch (Throwable th) {
            if (frame != null) {
                frame.delete();
            }
            if (frame2 != null) {
                frame2.delete();
            }
            throw th;
        }
    }

    @Test
    public void testParseWithTypeOverride() {
        Frame frame = null;
        Frame frame2 = null;
        try {
            Key[] keyArr = {makeNfsFileVec("smalldata/parser/parquet/airlines-simple.snappy.parquet")._key};
            ParseSetup guessSetup = ParseSetup.guessSetup(keyArr, false, 0);
            byte[] columnTypes = guessSetup.getColumnTypes();
            columnTypes[1] = 2;
            guessSetup.setColumnTypes(columnTypes);
            guessSetup.disableParallelParse = this.disableParallelParse;
            ParseDataset forkParseDataset = ParseDataset.forkParseDataset(Key.make(), keyArr, guessSetup, true);
            frame2 = (Frame) forkParseDataset._job.get();
            frame = parse_test_file("smalldata/airlines/AirlinesTrain.csv.zip");
            frame.replace(1, frame.vec(1).toStringVec()).remove();
            Assert.assertEquals("String", frame2.typesStr()[1]);
            Assert.assertEquals(Arrays.asList(frame._names), Arrays.asList(frame2._names));
            Assert.assertEquals(Arrays.asList(frame.typesStr()), Arrays.asList(frame2.typesStr()));
            assertBitIdentical(frame, frame2);
            Assert.assertNull(forkParseDataset._job.warns());
            if (frame != null) {
                frame.delete();
            }
            if (frame2 != null) {
                frame2.delete();
            }
        } catch (Throwable th) {
            if (frame != null) {
                frame.delete();
            }
            if (frame2 != null) {
                frame2.delete();
            }
            throw th;
        }
    }

    @Test
    public void testParseWithInvalidTypeOverride() {
        Frame frame = null;
        Frame frame2 = null;
        try {
            Key[] keyArr = {makeNfsFileVec("smalldata/parser/parquet/airlines-simple.snappy.parquet")._key};
            ParseSetup guessSetup = ParseSetup.guessSetup(keyArr, false, 0);
            byte[] columnTypes = guessSetup.getColumnTypes();
            columnTypes[9] = 2;
            guessSetup.setColumnTypes(columnTypes);
            guessSetup.disableParallelParse = this.disableParallelParse;
            ParseDataset forkParseDataset = ParseDataset.forkParseDataset(Key.make(), keyArr, guessSetup, true);
            frame2 = (Frame) forkParseDataset._job.get();
            Assert.assertEquals("Numeric", frame2.typesStr()[9]);
            frame = parse_test_file("smalldata/airlines/AirlinesTrain.csv.zip");
            Assert.assertEquals(Arrays.asList(frame._names), Arrays.asList(frame2._names));
            Assert.assertEquals(Arrays.asList(frame.typesStr()), Arrays.asList(frame2.typesStr()));
            assertBitIdentical(frame, frame2);
            Assert.assertEquals(1L, forkParseDataset._job.warns().length);
            Assert.assertTrue(forkParseDataset._job.warns()[0].endsWith("error = 'Unsupported type override (Numeric -> String). Column Distance will be parsed as Numeric'"));
            if (frame != null) {
                frame.delete();
            }
            if (frame2 != null) {
                frame2.delete();
            }
        } catch (Throwable th) {
            if (frame != null) {
                frame.delete();
            }
            if (frame2 != null) {
                frame2.delete();
            }
            throw th;
        }
    }

    @Test
    public void testParseMulti() {
        assertFrameAssertion(new TestUtil.GenFrameAssertion("testParseMulti-$.parquet", TestUtil.ari(new int[]{9, 100}), this.psTransformer) { // from class: water.parser.parquet.ParseTestParquet.2
            protected File prepareFile() throws IOException {
                File createTempDir = Files.createTempDir();
                for (int i = 0; i < 10; i++) {
                    String replace = this.file.replace("$", String.valueOf(i));
                    File generateAvroPrimitiveTypes = ParquetFileGenerator.generateAvroPrimitiveTypes(createTempDir, replace, nrows() / 10, new Date());
                    File file = new File(generateAvroPrimitiveTypes.getCanonicalPath().replace(replace, "." + replace + ".crc"));
                    if (file.exists() && !file.delete()) {
                        throw new IllegalStateException("Unable to delete Parquet CRC for file: " + generateAvroPrimitiveTypes);
                    }
                }
                return createTempDir;
            }

            public void check(Frame frame) {
                Assert.assertArrayEquals("Column names need to match!", TestUtil.ar(new String[]{"myboolean", "myint", "mylong", "myfloat", "mydouble", "mydate", "myuuid", "mystring", "myenum"}), frame.names());
                Assert.assertArrayEquals("Column types need to match!", TestUtil.ar(new byte[]{4, 3, 3, 3, 3, 5, 1, 2, 4}), frame.types());
                Assert.assertArrayEquals("Boolean domain needs to be [False,True]", TestUtil.ar(new String[]{"False", "True"}), frame.vec(0).domain());
            }
        });
    }

    @Test
    public void testParseAvroPrimitiveTypes() {
        assertFrameAssertion(new TestUtil.GenFrameAssertion("avroPrimitiveTypes.parquet", TestUtil.ari(new int[]{9, 100}), this.psTransformer) { // from class: water.parser.parquet.ParseTestParquet.3
            protected File prepareFile() throws IOException {
                return ParquetFileGenerator.generateAvroPrimitiveTypes(Files.createTempDir(), this.file, nrows(), new Date());
            }

            public void check(Frame frame) {
                Assert.assertArrayEquals("Column names need to match!", TestUtil.ar(new String[]{"myboolean", "myint", "mylong", "myfloat", "mydouble", "mydate", "myuuid", "mystring", "myenum"}), frame.names());
                Assert.assertArrayEquals("Column types need to match!", TestUtil.ar(new byte[]{4, 3, 3, 3, 3, 5, 1, 2, 4}), frame.types());
                Assert.assertArrayEquals("Boolean domain needs to be [False,True]", TestUtil.ar(new String[]{"False", "True"}), frame.vec(0).domain());
                BufferedString bufferedString = new BufferedString();
                for (int i = 0; i < nrows(); i++) {
                    Assert.assertEquals("Value in column myboolean", 1 - (i % 2), frame.vec(0).at8(i));
                    Assert.assertEquals("Value in column myint", 1 + i, frame.vec(1).at8(i));
                    Assert.assertEquals("Value in column mylong", 2 + i, frame.vec(2).at8(i));
                    Assert.assertEquals("Value in column myfloat", 3.1f + i, frame.vec(3).at(i), ParseTestParquet.EPSILON);
                    Assert.assertEquals("Value in column myfloat", 4.1d + i, frame.vec(4).at(i), ParseTestParquet.EPSILON);
                    Assert.assertEquals("Value in column mystring", "hello world: " + i, frame.vec(7).atStr(bufferedString, i).toSanitizedString());
                    Assert.assertEquals("Value in column myenum", i % 2 == 0 ? "a" : "b", frame.vec(8).factor(frame.vec(8).at8(i)));
                }
            }
        });
    }

    @Test
    public void testParseTimestamps() {
        final Date date = new Date();
        assertFrameAssertion(new TestUtil.GenFrameAssertion("avroPrimitiveTypes.parquet", TestUtil.ari(new int[]{5, 100}), this.psTransformer) { // from class: water.parser.parquet.ParseTestParquet.4
            protected File prepareFile() throws IOException {
                return ParquetFileGenerator.generateParquetFile(Files.createTempDir(), this.file, nrows(), date);
            }

            public void check(Frame frame) {
                Assert.assertArrayEquals("Column names need to match!", TestUtil.ar(new String[]{"int32_field", "int64_field", "float_field", "double_field", "timestamp_field"}), frame.names());
                Assert.assertArrayEquals("Column types need to match!", TestUtil.ar(new byte[]{3, 3, 3, 3, 5}), frame.types());
                for (int i = 0; i < nrows(); i++) {
                    Assert.assertEquals("Value in column int32_field", 32 + i, frame.vec(0).at8(i));
                    Assert.assertEquals("Value in column timestamp_field", date.getTime() + (i * 117), frame.vec(4).at8(i));
                }
            }
        });
    }

    @Test
    public void testParseSingleEmpty() {
        assertFrameAssertion(new TestUtil.GenFrameAssertion("empty.parquet", TestUtil.ari(new int[]{5, 0}), this.psTransformer) { // from class: water.parser.parquet.ParseTestParquet.5
            protected File prepareFile() throws IOException {
                return ParquetFileGenerator.generateEmptyWithSchema(Files.createTempDir(), this.file);
            }

            public void check(Frame frame) {
                Assert.assertArrayEquals("Column names need to match!", TestUtil.ar(new String[]{"int32_field", "int64_field", "float_field", "double_field", "timestamp_field"}), frame.names());
                Assert.assertArrayEquals("Column types need to match!", TestUtil.ar(new byte[]{3, 3, 3, 3, 5}), frame.types());
            }
        });
    }

    @Test
    public void testParseStringOverflow() {
        TestUtil.GenFrameAssertion genFrameAssertion = new TestUtil.GenFrameAssertion("large.parquet", TestUtil.ari(new int[]{1, 1}), this.psTransformer) { // from class: water.parser.parquet.ParseTestParquet.6
            protected File prepareFile() throws IOException {
                return ParquetFileGenerator.generateStringParquet(Files.createTempDir(), this.file);
            }

            public Frame prepare() {
                try {
                    File prepareFile = prepareFile();
                    ((TestUtil.GenFrameAssertion) this).generatedFile = prepareFile;
                    System.out.println("File generated into: " + prepareFile.getCanonicalPath());
                    return ParseTestParquet.this.parse_test_file(prepareFile.getCanonicalPath(), null, 1, new byte[]{2}, this.psTransformer);
                } catch (IOException e) {
                    throw new RuntimeException("Cannot prepare test frame from file: " + this.file, e);
                }
            }

            public void check(Frame frame) {
                Assert.assertArrayEquals("Column names need to match!", TestUtil.ar(new String[]{"string_field"}), frame.names());
                Assert.assertArrayEquals("Column types need to match!", TestUtil.ar(new byte[]{2}), frame.types());
                Assert.assertEquals(1L, frame.naCount());
                Assert.assertEquals(1L, frame.numCols());
                Assert.assertEquals(1L, frame.numRows());
            }
        };
        Key make = Key.make(WriterDelegate.class.getCanonicalName() + "_maxStringSize");
        try {
            DKV.put(make, new IcedInt(6));
            assertFrameAssertion(genFrameAssertion);
        } finally {
            DKV.remove(make);
        }
    }

    @Test
    public void testParseMultiWithEmpty() {
        assertFrameAssertion(new TestUtil.GenFrameAssertion("testParseMultiEmpty-$.parquet", TestUtil.ari(new int[]{5, 90}), this.psTransformer) { // from class: water.parser.parquet.ParseTestParquet.7
            protected File prepareFile() throws IOException {
                File createTempDir = Files.createTempDir();
                for (int i = 0; i < 9; i++) {
                    String replace = this.file.replace("$", String.valueOf(i));
                    File generateParquetFile = ParquetFileGenerator.generateParquetFile(createTempDir, replace, nrows() / 9, new Date());
                    File file = new File(generateParquetFile.getCanonicalPath().replace(replace, "." + replace + ".crc"));
                    if (file.exists() && !file.delete()) {
                        throw new IllegalStateException("Unable to delete Parquet CRC for file: " + generateParquetFile);
                    }
                }
                String replace2 = this.file.replace("$", String.valueOf(9));
                File file2 = new File(ParquetFileGenerator.generateEmptyWithSchema(createTempDir, replace2).getCanonicalPath().replace(replace2, "." + replace2 + ".crc"));
                if (!file2.exists() || file2.delete()) {
                    return createTempDir;
                }
                throw new IllegalStateException("Unable to delete Parquet CRC for file: " + replace2);
            }

            public void check(Frame frame) {
                Assert.assertArrayEquals("Column names need to match!", TestUtil.ar(new String[]{"int32_field", "int64_field", "float_field", "double_field", "timestamp_field"}), frame.names());
                Assert.assertArrayEquals("Column types need to match!", TestUtil.ar(new byte[]{3, 3, 3, 3, 5}), frame.types());
            }
        });
    }

    @Test
    public void testParseSparseColumns() {
        assertFrameAssertion(new TestUtil.GenFrameAssertion("sparseColumns.parquet", TestUtil.ari(new int[]{4, 100}), this.psTransformer) { // from class: water.parser.parquet.ParseTestParquet.8
            protected File prepareFile() throws IOException {
                return ParquetFileGenerator.generateSparseParquetFile(Files.createTempDir(), this.file, nrows());
            }

            public void check(Frame frame) {
                Assert.assertArrayEquals("Column names need to match!", TestUtil.ar(new String[]{"int32_field", "string_field", "row", "int32_field2"}), frame.names());
                Assert.assertArrayEquals("Column types need to match!", TestUtil.ar(new byte[]{3, 4, 3, 3}), frame.types());
                for (int i = 0; i < nrows(); i++) {
                    if (i % 10 == 0) {
                        Assert.assertEquals("Value in column int32_field", i, frame.vec(0).at8(i));
                        Assert.assertEquals("Value in column string_field", "CAT_" + (i % 10), frame.vec(1).factor(frame.vec(1).at8(i)));
                        Assert.assertEquals("Value in column int32_field2", i, frame.vec(3).at8(i));
                    } else {
                        Assert.assertTrue(frame.vec(0).isNA(i));
                        Assert.assertTrue(frame.vec(1).isNA(i));
                        Assert.assertTrue(frame.vec(3).isNA(i));
                    }
                    Assert.assertEquals("Value in column row", i, frame.vec(2).at8(i));
                }
            }
        });
    }

    @Test
    public void testParseCategoricalsWithZeroCharacters() {
        assertFrameAssertion(new TestUtil.GenFrameAssertion("nullCharacters.parquet", TestUtil.ari(new int[]{1, 100}), this.psTransformer) { // from class: water.parser.parquet.ParseTestParquet.9
            protected File prepareFile() throws IOException {
                return ParquetFileGenerator.generateParquetFileWithNullCharacters(Files.createTempDir(), this.file, nrows());
            }

            public void check(Frame frame) {
                Assert.assertArrayEquals("Column names need to match!", TestUtil.ar(new String[]{"cat_field"}), frame.names());
                Assert.assertArrayEquals("Column types need to match!", TestUtil.ar(new byte[]{4}), frame.types());
                int i = 0;
                while (i < nrows()) {
                    Assert.assertEquals("Value in column string_field", i == 66 ? "CAT_0_weird��" : "CAT_" + (i % 10), frame.vec(0).factor(frame.vec(0).at8(i)));
                    i++;
                }
            }
        });
    }

    @Test
    public void testParseDecimals() {
        assertFrameAssertion(new TestUtil.GenFrameAssertion("decimals.parquet", TestUtil.ari(new int[]{2, 18}), this.psTransformer) { // from class: water.parser.parquet.ParseTestParquet.10
            protected File prepareFile() throws IOException {
                return ParquetFileGenerator.generateParquetFileDecimals(Files.createTempDir(), this.file, nrows());
            }

            public void check(Frame frame) {
                Assert.assertArrayEquals("Column names need to match!", TestUtil.ar(new String[]{"decimal32", "decimal64"}), frame.names());
                Assert.assertArrayEquals("Column types need to match!", TestUtil.ar(new byte[]{3, 3}), frame.types());
                for (int i = 0; i < nrows(); i++) {
                    Assert.assertEquals("Value in column decimal32", (1.0d + PrettyPrint.pow10(1L, i % 9)) / 100000.0d, frame.vec(0).at(i), 0.0d);
                    Assert.assertEquals("Value in column decimal64", (1.0d + PrettyPrint.pow10(1L, i % 18)) / 1.0E10d, frame.vec(1).at(i), 0.0d);
                }
            }
        });
    }

    @Test
    public void testPubdev5673() {
        Frame frame = null;
        try {
            frame = parse_parquet("smalldata/jira/pubdev-5673.parquet");
            Assert.assertEquals(9.877654321199876E10d, frame.vec(0).at(0L), 0.0d);
            if (frame != null) {
                frame.delete();
            }
        } catch (Throwable th) {
            if (frame != null) {
                frame.delete();
            }
            throw th;
        }
    }
}
