package org.apache.druid.indexing.common.task.batch.parallel;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.apache.druid.data.input.FirehoseFactory;
import org.apache.druid.data.input.InputFormat;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSource;
import org.apache.druid.data.input.InputSourceReader;
import org.apache.druid.data.input.InputSplit;
import org.apache.druid.data.input.SplitHintSpec;
import org.apache.druid.data.input.impl.ByteEntity;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.InputEntityIteratingReader;
import org.apache.druid.data.input.impl.JsonInputFormat;
import org.apache.druid.data.input.impl.SplittableInputSource;
import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.indexer.TaskState;
import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
import org.apache.druid.indexer.partitions.PartitionsSpec;
import org.apache.druid.indexing.common.LockGranularity;
import org.apache.druid.indexing.common.task.TaskResource;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.parsers.JSONPathFieldSpec;
import org.apache.druid.java.util.common.parsers.JSONPathFieldType;
import org.apache.druid.java.util.common.parsers.JSONPathSpec;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.timeline.DataSegment;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

@RunWith(Parameterized.class)
/* loaded from: input_file:org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingWithNullColumnTest.class */
public class MultiPhaseParallelIndexingWithNullColumnTest extends AbstractMultiPhaseParallelIndexingTest {
    private static final TimestampSpec TIMESTAMP_SPEC = new TimestampSpec("ts", "auto", (DateTime) null);
    private static final DimensionsSpec DIMENSIONS_SPEC = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim1", "dim2")));
    private static final InputFormat JSON_FORMAT = new JsonInputFormat((JSONPathSpec) null, (Map) null, (Boolean) null, (Boolean) null, (Boolean) null);
    private static final List<Interval> INTERVAL_TO_INDEX = Collections.singletonList(Intervals.of("2022-01/P1M"));
    private final PartitionsSpec partitionsSpec;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingWithNullColumnTest$SplittableInlineDataSource.class */
    public static final class SplittableInlineDataSource implements SplittableInputSource<String> {
        private final List<String> data;

        @JsonCreator
        public SplittableInlineDataSource(@JsonProperty("data") List<String> list) {
            this.data = list;
        }

        @JsonProperty
        public List<String> getData() {
            return this.data;
        }

        public Stream<InputSplit<String>> createSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) {
            return this.data.stream().map((v1) -> {
                return new InputSplit(v1);
            });
        }

        public int estimateNumSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) {
            return this.data.size();
        }

        public InputSource withSplit(InputSplit<String> inputSplit) {
            return new SplittableInlineDataSource(ImmutableList.of(inputSplit.get()));
        }

        public boolean needsFormat() {
            return true;
        }

        public InputSourceReader reader(InputRowSchema inputRowSchema, @Nullable InputFormat inputFormat, File file) {
            return new InputEntityIteratingReader(inputRowSchema, inputFormat, this.data.stream().map(str -> {
                return new ByteEntity(StringUtils.toUtf8(str));
            }).iterator(), file);
        }
    }

    @Parameterized.Parameters
    public static Iterable<Object[]> constructorFeeder() {
        return ImmutableList.of(new Object[]{new HashedPartitionsSpec(10, (Integer) null, ImmutableList.of("ts", "unknownDim"))}, new Object[]{new DimensionRangePartitionsSpec(10, (Integer) null, Collections.singletonList("unknownDim"), false)});
    }

    public MultiPhaseParallelIndexingWithNullColumnTest(PartitionsSpec partitionsSpec) {
        super(LockGranularity.TIME_CHUNK, true, 0.0d, 0.0d);
        this.partitionsSpec = partitionsSpec;
        getObjectMapper().registerSubtypes(new Class[]{SplittableInlineDataSource.class});
    }

    @Test
    public void testIngestNullColumn() throws JsonProcessingException {
        List defaultSchemas = DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "unknownDim"));
        ParallelIndexSupervisorTask parallelIndexSupervisorTask = new ParallelIndexSupervisorTask((String) null, (String) null, (TaskResource) null, new ParallelIndexIngestionSpec(new DataSchema("dataSource", TIMESTAMP_SPEC, DIMENSIONS_SPEC.withDimensions(defaultSchemas), DEFAULT_METRICS_SPEC, new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, INTERVAL_TO_INDEX), (TransformSpec) null), new ParallelIndexIOConfig((FirehoseFactory) null, getInputSource(), JSON_FORMAT, false, (Boolean) null), newTuningConfig(this.partitionsSpec, 2, true)), (Map) null);
        Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(parallelIndexSupervisorTask).getStatusCode());
        Set<DataSegment> publishedSegments = getIndexingServiceClient().getPublishedSegments(parallelIndexSupervisorTask);
        Assert.assertFalse(publishedSegments.isEmpty());
        for (DataSegment dataSegment : publishedSegments) {
            Assert.assertEquals(defaultSchemas.size(), dataSegment.getDimensions().size());
            for (int i = 0; i < defaultSchemas.size(); i++) {
                Assert.assertEquals(((DimensionSchema) defaultSchemas.get(i)).getName(), dataSegment.getDimensions().get(i));
            }
        }
    }

    @Test
    public void testIngestNullColumn_useFieldDiscovery_includeAllDimensions_shouldStoreAllColumns() throws JsonProcessingException {
        ParallelIndexSupervisorTask parallelIndexSupervisorTask = new ParallelIndexSupervisorTask((String) null, (String) null, (TaskResource) null, new ParallelIndexIngestionSpec(new DataSchema("dataSource", TIMESTAMP_SPEC, new DimensionsSpec.Builder().setDimensions(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "unknownDim", "dim1"))).setIncludeAllDimensions(true).build(), DEFAULT_METRICS_SPEC, new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, INTERVAL_TO_INDEX), (TransformSpec) null), new ParallelIndexIOConfig((FirehoseFactory) null, getInputSource(), new JsonInputFormat(new JSONPathSpec(true, (List) null), (Map) null, (Boolean) null, (Boolean) null, (Boolean) null), false, (Boolean) null), newTuningConfig(this.partitionsSpec, 2, true)), (Map) null);
        Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(parallelIndexSupervisorTask).getStatusCode());
        Set<DataSegment> publishedSegments = getIndexingServiceClient().getPublishedSegments(parallelIndexSupervisorTask);
        Assert.assertFalse(publishedSegments.isEmpty());
        ImmutableList of = ImmutableList.of("ts", "unknownDim", "dim1");
        ImmutableSet of2 = ImmutableSet.of("dim2", "dim3");
        for (DataSegment dataSegment : publishedSegments) {
            Assert.assertEquals(of, dataSegment.getDimensions().subList(0, of.size()));
            Assert.assertEquals(of2, new HashSet(dataSegment.getDimensions().subList(of.size(), dataSegment.getDimensions().size())));
        }
    }

    @Test
    public void testIngestNullColumn_explicitPathSpec_useFieldDiscovery_includeAllDimensions_shouldStoreAllColumns() throws JsonProcessingException {
        ParallelIndexSupervisorTask parallelIndexSupervisorTask = new ParallelIndexSupervisorTask((String) null, (String) null, (TaskResource) null, new ParallelIndexIngestionSpec(new DataSchema("dataSource", TIMESTAMP_SPEC, new DimensionsSpec.Builder().setIncludeAllDimensions(true).build(), DEFAULT_METRICS_SPEC, new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, (List) null), (TransformSpec) null), new ParallelIndexIOConfig((FirehoseFactory) null, getInputSource(), new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim1", "$.dim1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "k", "$.dim4.k"))), (Map) null, (Boolean) null, (Boolean) null, (Boolean) null), false, (Boolean) null), newTuningConfig(this.partitionsSpec, 2, true)), (Map) null);
        Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(parallelIndexSupervisorTask).getStatusCode());
        Set<DataSegment> publishedSegments = getIndexingServiceClient().getPublishedSegments(parallelIndexSupervisorTask);
        Assert.assertFalse(publishedSegments.isEmpty());
        ImmutableList of = ImmutableList.of("dim1", "k");
        ImmutableSet of2 = ImmutableSet.of("dim2", "dim3");
        for (DataSegment dataSegment : publishedSegments) {
            Assert.assertEquals(of, dataSegment.getDimensions().subList(0, of.size()));
            Assert.assertEquals(of2, new HashSet(dataSegment.getDimensions().subList(of.size(), dataSegment.getDimensions().size())));
        }
    }

    @Test
    public void testIngestNullColumn_storeEmptyColumnsOff_shouldNotStoreEmptyColumns() throws JsonProcessingException {
        ParallelIndexSupervisorTask parallelIndexSupervisorTask = new ParallelIndexSupervisorTask((String) null, (String) null, (TaskResource) null, new ParallelIndexIngestionSpec(new DataSchema("dataSource", TIMESTAMP_SPEC, DIMENSIONS_SPEC.withDimensions(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "unknownDim"))), DEFAULT_METRICS_SPEC, new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, INTERVAL_TO_INDEX), (TransformSpec) null), new ParallelIndexIOConfig((FirehoseFactory) null, getInputSource(), JSON_FORMAT, false, (Boolean) null), newTuningConfig(this.partitionsSpec, 2, true)), (Map) null);
        parallelIndexSupervisorTask.addToContext("storeEmptyColumns", false);
        Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(parallelIndexSupervisorTask).getStatusCode());
        Set<DataSegment> publishedSegments = getIndexingServiceClient().getPublishedSegments(parallelIndexSupervisorTask);
        Assert.assertFalse(publishedSegments.isEmpty());
        List defaultSchemas = DimensionsSpec.getDefaultSchemas(Collections.singletonList("ts"));
        for (DataSegment dataSegment : publishedSegments) {
            Assert.assertEquals(defaultSchemas.size(), dataSegment.getDimensions().size());
            for (int i = 0; i < defaultSchemas.size(); i++) {
                Assert.assertEquals(((DimensionSchema) defaultSchemas.get(i)).getName(), dataSegment.getDimensions().get(i));
            }
        }
    }

    private InputSource getInputSource() throws JsonProcessingException {
        ObjectMapper objectMapper = getObjectMapper();
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < 3; i++) {
            arrayList.add(row(StringUtils.format("2022-01-%02d", new Object[]{Integer.valueOf(i + 1)}), "val1", "val2", null));
        }
        arrayList.add(row("2022-01-04", null, null, null, ImmutableMap.of("k", "v")));
        return new SplittableInlineDataSource(ImmutableList.of(StringUtils.format("%s\n%s\n%s\n%s\n", new Object[]{objectMapper.writeValueAsString(arrayList.get(0)), objectMapper.writeValueAsString(arrayList.get(1)), objectMapper.writeValueAsString(arrayList.get(2)), objectMapper.writeValueAsString(arrayList.get(3))})));
    }

    private static Map<String, Object> row(String str, Object... objArr) {
        HashMap hashMap = new HashMap();
        hashMap.put("ts", str);
        IntStream.range(0, objArr.length).forEach(i -> {
            hashMap.put("dim" + (i + 1), objArr[i]);
        });
        return hashMap;
    }
}
