package org.apache.druid.indexing.common.task.batch.parallel;

import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.hash.BloomFilter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.druid.client.indexing.IndexingServiceClient;
import org.apache.druid.data.input.HandlingInputRowIterator;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSource;
import org.apache.druid.indexer.TaskStatus;
import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
import org.apache.druid.indexing.common.TaskToolbox;
import org.apache.druid.indexing.common.actions.TaskActionClient;
import org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider;
import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
import org.apache.druid.indexing.common.task.TaskResource;
import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketch;
import org.apache.druid.indexing.common.task.batch.parallel.distribution.TimeDimTuple;
import org.apache.druid.indexing.common.task.batch.parallel.distribution.TimeDimTupleFactory;
import org.apache.druid.indexing.common.task.batch.parallel.distribution.TimeDimTupleFunnel;
import org.apache.druid.indexing.common.task.batch.parallel.iterator.IndexTaskInputRowIteratorBuilder;
import org.apache.druid.indexing.common.task.batch.parallel.iterator.RangePartitionIndexTaskInputRowIteratorBuilder;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.java.util.common.parsers.ParseException;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.granularity.GranularitySpec;
import org.joda.time.DateTime;
import org.joda.time.Interval;

/* loaded from: input_file:org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.class */
public class PartialDimensionDistributionTask extends PerfectRollupWorkerTask {
    public static final String TYPE = "partial_dimension_distribution";
    private static final Logger LOG = new Logger(PartialDimensionDistributionTask.class);
    private static final boolean SKIP_NULL = true;
    private final int numAttempts;
    private final ParallelIndexIngestionSpec ingestionSchema;
    private final String supervisorTaskId;
    private final IndexingServiceClient indexingServiceClient;
    private final IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory;
    private final Supplier<DedupRowDimensionValueFilter> dedupRowDimValueFilterSupplier;

    /* JADX INFO: Access modifiers changed from: package-private */
    @VisibleForTesting
    /* loaded from: input_file:org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask$DedupRowDimensionValueFilter.class */
    public static class DedupRowDimensionValueFilter implements DimensionValueFilter {
        private static final int BLOOM_FILTER_EXPECTED_INSERTIONS = 100000000;
        private static final double BLOOM_FILTER_EXPECTED_FALSE_POSITIVE_PROBABILTY = 0.001d;
        private final PassthroughRowDimensionValueFilter delegate;
        private final TimeDimTupleFactory timeDimTupleFactory;
        private final BloomFilter<TimeDimTuple> timeDimTupleBloomFilter;

        DedupRowDimensionValueFilter(Granularity granularity) {
            this(granularity, BLOOM_FILTER_EXPECTED_INSERTIONS, BLOOM_FILTER_EXPECTED_FALSE_POSITIVE_PROBABILTY);
        }

        @VisibleForTesting
        DedupRowDimensionValueFilter(Granularity granularity, int i, double d) {
            this.delegate = new PassthroughRowDimensionValueFilter();
            this.timeDimTupleFactory = new TimeDimTupleFactory(granularity);
            this.timeDimTupleBloomFilter = BloomFilter.create(TimeDimTupleFunnel.INSTANCE, i, d);
        }

        @Override // org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask.DimensionValueFilter
        @Nullable
        public String accept(Interval interval, DateTime dateTime, String str) {
            this.delegate.accept(interval, dateTime, str);
            TimeDimTuple createWithBucketedTimestamp = this.timeDimTupleFactory.createWithBucketedTimestamp(dateTime, str);
            if (this.timeDimTupleBloomFilter.mightContain(createWithBucketedTimestamp)) {
                return null;
            }
            this.timeDimTupleBloomFilter.put(createWithBucketedTimestamp);
            return str;
        }

        @Override // org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask.DimensionValueFilter
        public Map<Interval, String> getIntervalToMinDimensionValue() {
            return this.delegate.getIntervalToMinDimensionValue();
        }

        @Override // org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask.DimensionValueFilter
        public Map<Interval, String> getIntervalToMaxDimensionValue() {
            return this.delegate.getIntervalToMaxDimensionValue();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask$DimensionValueFilter.class */
    public interface DimensionValueFilter {
        @Nullable
        String accept(Interval interval, DateTime dateTime, String str);

        Map<Interval, String> getIntervalToMinDimensionValue();

        Map<Interval, String> getIntervalToMaxDimensionValue();
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask$PassthroughRowDimensionValueFilter.class */
    public static class PassthroughRowDimensionValueFilter implements DimensionValueFilter {
        private final Map<Interval, String> intervalToMinDimensionValue = new HashMap();
        private final Map<Interval, String> intervalToMaxDimensionValue = new HashMap();

        PassthroughRowDimensionValueFilter() {
        }

        @Override // org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask.DimensionValueFilter
        @Nullable
        public String accept(Interval interval, DateTime dateTime, String str) {
            updateMinDimensionValue(interval, str);
            updateMaxDimensionValue(interval, str);
            return str;
        }

        private void updateMinDimensionValue(Interval interval, String str) {
            this.intervalToMinDimensionValue.compute(interval, (interval2, str2) -> {
                return (str2 == null || str.compareTo(str2) < 0) ? str : str2;
            });
        }

        private void updateMaxDimensionValue(Interval interval, String str) {
            this.intervalToMaxDimensionValue.compute(interval, (interval2, str2) -> {
                return (str2 == null || str.compareTo(str2) > 0) ? str : str2;
            });
        }

        @Override // org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask.DimensionValueFilter
        public Map<Interval, String> getIntervalToMinDimensionValue() {
            return this.intervalToMinDimensionValue;
        }

        @Override // org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask.DimensionValueFilter
        public Map<Interval, String> getIntervalToMaxDimensionValue() {
            return this.intervalToMaxDimensionValue;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @JsonCreator
    public PartialDimensionDistributionTask(@JsonProperty("id") @Nullable String str, @JsonProperty("groupId") String str2, @JsonProperty("resource") TaskResource taskResource, @JsonProperty("supervisorTaskId") String str3, @JsonProperty("numAttempts") int i, @JsonProperty("spec") ParallelIndexIngestionSpec parallelIndexIngestionSpec, @JsonProperty("context") Map<String, Object> map, @JacksonInject IndexingServiceClient indexingServiceClient, @JacksonInject IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> indexTaskClientFactory) {
        this(str, str2, taskResource, str3, i, parallelIndexIngestionSpec, map, indexingServiceClient, indexTaskClientFactory, () -> {
            return new DedupRowDimensionValueFilter(parallelIndexIngestionSpec.getDataSchema().getGranularitySpec().getQueryGranularity());
        });
    }

    @VisibleForTesting
    PartialDimensionDistributionTask(@Nullable String str, String str2, TaskResource taskResource, String str3, int i, ParallelIndexIngestionSpec parallelIndexIngestionSpec, Map<String, Object> map, IndexingServiceClient indexingServiceClient, IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> indexTaskClientFactory, Supplier<DedupRowDimensionValueFilter> supplier) {
        super(getOrMakeId(str, TYPE, parallelIndexIngestionSpec.getDataSchema().getDataSource()), str2, taskResource, parallelIndexIngestionSpec.getDataSchema(), parallelIndexIngestionSpec.m37getTuningConfig(), map);
        Preconditions.checkArgument(parallelIndexIngestionSpec.m37getTuningConfig().getPartitionsSpec() instanceof SingleDimensionPartitionsSpec, "%s partitionsSpec required", new Object[]{"single_dim"});
        this.numAttempts = i;
        this.ingestionSchema = parallelIndexIngestionSpec;
        this.supervisorTaskId = str3;
        this.indexingServiceClient = indexingServiceClient;
        this.taskClientFactory = indexTaskClientFactory;
        this.dedupRowDimValueFilterSupplier = supplier;
    }

    @JsonProperty
    private int getNumAttempts() {
        return this.numAttempts;
    }

    @JsonProperty("spec")
    private ParallelIndexIngestionSpec getIngestionSchema() {
        return this.ingestionSchema;
    }

    @JsonProperty
    private String getSupervisorTaskId() {
        return this.supervisorTaskId;
    }

    @Override // org.apache.druid.indexing.common.task.Task
    public String getType() {
        return TYPE;
    }

    @Override // org.apache.druid.indexing.common.task.Task
    public boolean isReady(TaskActionClient taskActionClient) throws Exception {
        return tryTimeChunkLock(taskActionClient, getIngestionSchema().getDataSchema().getGranularitySpec().inputIntervals());
    }

    @Override // org.apache.druid.indexing.common.task.AbstractBatchIndexTask
    public TaskStatus runTask(TaskToolbox taskToolbox) throws Exception {
        DataSchema dataSchema = this.ingestionSchema.getDataSchema();
        GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
        ParallelIndexTuningConfig m37getTuningConfig = this.ingestionSchema.m37getTuningConfig();
        SingleDimensionPartitionsSpec partitionsSpec = m37getTuningConfig.getPartitionsSpec();
        Preconditions.checkNotNull(partitionsSpec, "partitionsSpec required in tuningConfig");
        String partitionDimension = partitionsSpec.getPartitionDimension();
        Preconditions.checkNotNull(partitionDimension, "partitionDimension required in partitionsSpec");
        boolean isAssumeGrouped = partitionsSpec.isAssumeGrouped();
        InputSource nonNullInputSource = this.ingestionSchema.m38getIOConfig().getNonNullInputSource(this.ingestionSchema.getDataSchema().getParser());
        CloseableIterator<InputRow> read = dataSchema.getTransformSpec().decorate(nonNullInputSource.reader(new InputRowSchema(dataSchema.getTimestampSpec(), dataSchema.getDimensionsSpec(), (List) Arrays.stream(dataSchema.getAggregators()).map((v0) -> {
            return v0.getName();
        }).collect(Collectors.toList())), nonNullInputSource.needsFormat() ? ParallelIndexSupervisorTask.getInputFormat(this.ingestionSchema) : null, taskToolbox.getIndexingTmpDir())).read();
        Throwable th = null;
        try {
            HandlingInputRowIterator build = new RangePartitionIndexTaskInputRowIteratorBuilder(partitionDimension, true).delegate(read).granularitySpec(granularitySpec).nullRowRunnable(IndexTaskInputRowIteratorBuilder.NOOP_RUNNABLE).absentBucketIntervalConsumer(IndexTaskInputRowIteratorBuilder.NOOP_CONSUMER).build();
            Throwable th2 = null;
            try {
                sendReport(new DimensionDistributionReport(getId(), determineDistribution(build, granularitySpec, partitionDimension, isAssumeGrouped, m37getTuningConfig.isLogParseExceptions(), m37getTuningConfig.getMaxParseExceptions())));
                if (build != null) {
                    if (0 != 0) {
                        try {
                            build.close();
                        } catch (Throwable th3) {
                            th2.addSuppressed(th3);
                        }
                    } else {
                        build.close();
                    }
                }
                return TaskStatus.success(getId());
            } catch (Throwable th4) {
                if (build != null) {
                    if (0 != 0) {
                        try {
                            build.close();
                        } catch (Throwable th5) {
                            th2.addSuppressed(th5);
                        }
                    } else {
                        build.close();
                    }
                }
                throw th4;
            }
        } finally {
            if (read != null) {
                if (0 != 0) {
                    try {
                        read.close();
                    } catch (Throwable th6) {
                        th.addSuppressed(th6);
                    }
                } else {
                    read.close();
                }
            }
        }
    }

    private Map<Interval, StringDistribution> determineDistribution(HandlingInputRowIterator handlingInputRowIterator, GranularitySpec granularitySpec, String str, boolean z, boolean z2, int i) {
        HashMap hashMap = new HashMap();
        DimensionValueFilter passthroughRowDimensionValueFilter = (z || !granularitySpec.isRollup()) ? new PassthroughRowDimensionValueFilter() : this.dedupRowDimValueFilterSupplier.get();
        int i2 = 0;
        while (handlingInputRowIterator.hasNext()) {
            try {
                InputRow next = handlingInputRowIterator.next();
                if (next != null) {
                    DateTime timestamp = next.getTimestamp();
                    Interval interval = (Interval) granularitySpec.bucketInterval(timestamp).get();
                    StringDistribution stringDistribution = (StringDistribution) hashMap.computeIfAbsent(interval, interval2 -> {
                        return new StringSketch();
                    });
                    String accept = passthroughRowDimensionValueFilter.accept(interval, timestamp, (String) Iterables.getOnlyElement(next.getDimension(str)));
                    if (accept != null) {
                        stringDistribution.put(accept);
                    }
                }
            } catch (ParseException e) {
                if (z2) {
                    LOG.error(e, "Encountered parse exception:", new Object[0]);
                }
                i2++;
                if (i2 > i) {
                    throw new RuntimeException("Max parse exceptions exceeded, terminating task...");
                }
            }
        }
        passthroughRowDimensionValueFilter.getIntervalToMinDimensionValue().forEach((interval3, str2) -> {
            ((StringDistribution) hashMap.get(interval3)).putIfNewMin(str2);
        });
        passthroughRowDimensionValueFilter.getIntervalToMaxDimensionValue().forEach((interval4, str3) -> {
            ((StringDistribution) hashMap.get(interval4)).putIfNewMax(str3);
        });
        return hashMap;
    }

    private void sendReport(DimensionDistributionReport dimensionDistributionReport) {
        this.taskClientFactory.build(new ClientBasedTaskInfoProvider(this.indexingServiceClient), getId(), 1, this.ingestionSchema.m37getTuningConfig().getChatHandlerTimeout(), this.ingestionSchema.m37getTuningConfig().getChatHandlerNumRetries()).report(this.supervisorTaskId, dimensionDistributionReport);
    }
}
