package com.google.cloud.spark.bigquery.integration;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.truth.Truth;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.MetadataBuilder;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;

/* loaded from: input_file:com/google/cloud/spark/bigquery/integration/ReadIntegrationTestBase.class */
public class ReadIntegrationTestBase extends SparkBigQueryIntegrationTestBase {
    protected final String PROJECT_ID = (String) Preconditions.checkNotNull(System.getenv("GOOGLE_CLOUD_PROJECT"), "Please set the GOOGLE_CLOUD_PROJECT env variable in order to read views");
    private static final String LARGE_TABLE = "bigquery-public-data.samples.natality";
    private static final String LARGE_TABLE_FIELD = "is_male";
    private static final long LARGE_TABLE_NUM_ROWS = 33271914;
    private static final String NON_EXISTENT_TABLE = "non-existent.non-existent.non-existent";
    private static final String STRUCT_COLUMN_ORDER_TEST_TABLE_NAME = "struct_column_order";
    private static final String ALL_TYPES_TABLE_NAME = "all_types";
    private static final String ALL_TYPES_VIEW_NAME = "all_types_view";
    private static final Map<String, Collection<String>> FILTER_DATA = ImmutableMap.builder().put("word_count == 4", ImmutableList.of("'A", "'But", "'Faith")).put("word_count > 3", ImmutableList.of("'", "''Tis", "'A")).put("word_count >= 2", ImmutableList.of("'", "''Lo", "''O")).put("word_count < 3", ImmutableList.of("''All", "''Among", "''And")).put("word_count <= 5", ImmutableList.of("'", "''All", "''Among")).put("word_count in(8, 9)", ImmutableList.of("'", "'Faith", "'Tis")).put("word_count is null", ImmutableList.of()).put("word_count is not null", ImmutableList.of("'", "''All", "''Among")).put("word_count == 4 and corpus == 'twelfthnight'", ImmutableList.of("'Thou", "'em", "Art")).put("word_count == 4 or corpus > 'twelfthnight'", ImmutableList.of("'", "''Tis", "''twas")).put("not word_count in(8, 9)", ImmutableList.of("'", "''All", "''Among")).put("corpus like 'king%'", ImmutableList.of("'", "'A", "'Affectionate")).put("corpus like '%kinghenryiv'", ImmutableList.of("'", "'And", "'Anon")).put("corpus like '%king%'", ImmutableList.of("'", "'A", "'Affectionate")).build();
    private static final StructType SHAKESPEARE_TABLE_SCHEMA_WITH_METADATA_COMMENT = new StructType((StructField[]) Stream.of((Object[]) TestConstants.SHAKESPEARE_TABLE_SCHEMA.fields()).map(structField -> {
        return new StructField(structField.name(), structField.dataType(), structField.nullable(), new MetadataBuilder().withMetadata(structField.metadata()).putString("comment", structField.metadata().getString("description")).build());
    }).toArray(i -> {
        return new StructField[i];
    }));

    private void testShakespeare(Dataset<Row> dataset) {
        Truth.assertThat(dataset.schema()).isEqualTo(SHAKESPEARE_TABLE_SCHEMA_WITH_METADATA_COMMENT);
        Truth.assertThat(Long.valueOf(dataset.count())).isEqualTo(164656L);
        Truth.assertThat(Arrays.asList((String[]) dataset.select("word", new String[0]).where("word >= 'a' AND word not like '%\\'%'").distinct().as(Encoders.STRING()).sort("word", new String[0]).take(3))).containsExactly(new Object[]{"a", "abaissiez", "abandon"});
    }

    @Test
    public void testReadWithOption() {
        testShakespeare(this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").load());
    }

    @Test
    public void testReadWithSimplifiedApi() {
        testShakespeare(this.spark.read().format("bigquery").load("bigquery-public-data.samples.shakespeare"));
    }

    @Test
    @Ignore("DSv2 only")
    public void testReadCompressed() {
        Dataset<Row> load = this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("bqEncodedCreateReadSessionRequest", "EgZCBBoCEAI=").load();
        load.head();
        testShakespeare(load);
    }

    @Test
    @Ignore("DSv2 only")
    public void testReadCompressedWith1BackgroundThreads() {
        Dataset<Row> load = this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("bqEncodedCreateReadSessionRequest", "EgZCBBoCEAI=").option("bqBackgroundThreadsPerStream", "1").load();
        load.head();
        testShakespeare(load);
    }

    @Test
    @Ignore("DSv2 only")
    public void testReadCompressedWith4BackgroundThreads() {
        Dataset<Row> load = this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("bqEncodedCreateReadSessionRequest", "EgZCBBoCEAI=").option("bqBackgroundThreadsPerStream", "4").load();
        load.head();
        testShakespeare(load);
    }

    @Test
    public void testFilters() {
        Dataset load = this.spark.read().format("bigquery").load("bigquery-public-data.samples.shakespeare");
        Truth.assertThat(load.schema()).isEqualTo(SHAKESPEARE_TABLE_SCHEMA_WITH_METADATA_COMMENT);
        Truth.assertThat(Long.valueOf(load.count())).isEqualTo(164656L);
        FILTER_DATA.forEach((str, collection) -> {
            Truth.assertThat(Arrays.asList((String[]) load.select("word", new String[0]).where(str).distinct().as(Encoders.STRING()).sort("word", new String[0]).take(3))).containsExactlyElementsIn(collection);
        });
    }

    Dataset<Row> readAllTypesTable() {
        return this.spark.read().format("bigquery").option("dataset", testDataset.toString()).option("table", ALL_TYPES_TABLE_NAME).load();
    }

    @Test
    public void testCountWithFilters() {
        Truth.assertThat(Long.valueOf(this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("readDataFormat", "ARROW").load().where("word_count = 1 OR corpus_date = 0").count())).isEqualTo(Long.valueOf(this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("readDataFormat", "ARROW").load().where("word_count = 1 OR corpus_date = 0").collectAsList().size()));
    }

    @Test
    public void testKnownSizeInBytes() {
        Truth.assertThat(Long.valueOf(readAllTypesTable().queryExecution().analyzed().stats().sizeInBytes().longValue())).isEqualTo(Integer.valueOf(TestConstants.ALL_TYPES_TABLE_SIZE));
    }

    @Test
    public void testKnownSchema() {
        Truth.assertThat(readAllTypesTable().schema()).isEqualTo(TestConstants.ALL_TYPES_TABLE_SCHEMA);
    }

    @Test
    public void testUserDefinedSchema() {
        StructType structType = new StructType(new StructField[]{new StructField("whatever", DataTypes.ByteType, true, Metadata.empty())});
        Truth.assertThat(structType).isEqualTo(this.spark.read().schema(structType).format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").load().schema());
    }

    @Test
    public void testNonExistentSchema() {
        Assert.assertThrows("Trying to read a non existing table should throw an exception", RuntimeException.class, () -> {
            this.spark.read().format("bigquery").option("table", NON_EXISTENT_TABLE).load();
        });
    }

    @Test(timeout = 10000)
    public void testHeadDoesNotTimeoutAndOOM() {
        this.spark.read().format("bigquery").option("table", LARGE_TABLE).load().select(LARGE_TABLE_FIELD, new String[0]).head();
    }

    @Test
    public void testUnhandleFilterOnStruct() {
        Truth.assertThat(this.spark.read().format("bigquery").option("table", "bigquery-public-data:samples.github_nested").option("filter", "url like '%spark'").load().select("url", new String[0]).where("repository is not null").collectAsList()).hasSize(85);
    }

    @Test
    public void testQueryMaterializedView() {
        Truth.assertThat(Long.valueOf(this.spark.read().format("bigquery").option("table", "bigquery-public-data:ethereum_blockchain.live_logs").option("viewsEnabled", "true").option("viewMaterializationProject", this.PROJECT_ID).option("viewMaterializationDataset", testDataset.toString()).load().count())).isGreaterThan(1);
    }

    @Test
    public void testOrAcrossColumnsAndFormats() {
        List collectAsList = this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("filter", "word_count = 1 OR corpus_date = 0").option("readDataFormat", "AVRO").load().collectAsList();
        Truth.assertThat(collectAsList).isEqualTo(this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("readDataFormat", "ARROW").load().where("word_count = 1 OR corpus_date = 0").collectAsList());
    }

    @Test
    public void testArrowCompressionCodec() {
        List collectAsList = this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("filter", "word_count = 1 OR corpus_date = 0").option("readDataFormat", "AVRO").load().collectAsList();
        Truth.assertThat(collectAsList).isEqualTo(this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("readDataFormat", "ARROW").option("arrowCompressionCodec", "ZSTD").load().where("word_count = 1 OR corpus_date = 0").collectAsList());
        Truth.assertThat(collectAsList).isEqualTo(this.spark.read().format("bigquery").option("table", "bigquery-public-data.samples.shakespeare").option("readDataFormat", "ARROW").option("arrowCompressionCodec", "LZ4_FRAME").load().where("word_count = 1 OR corpus_date = 0").collectAsList());
    }
}
