package org.apache.gobblin.compaction.conditions;

import com.google.common.base.Splitter;
import com.google.common.collect.Maps;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.gobblin.annotation.Alias;
import org.apache.gobblin.compaction.dataset.Dataset;
import org.apache.gobblin.compaction.dataset.DatasetHelper;
import org.apache.gobblin.compaction.mapreduce.MRCompactor;
import org.apache.gobblin.util.DatasetFilterUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Alias("RecompactionConditionBasedOnRatio")
/* loaded from: input_file:org/apache/gobblin/compaction/conditions/RecompactionConditionBasedOnRatio.class */
public class RecompactionConditionBasedOnRatio implements RecompactionCondition {
    public static final char DATASETS_WITH_DIFFERENT_RECOMPACT_THRESHOLDS_SEPARATOR = ';';
    public static final char DATASETS_WITH_SAME_RECOMPACT_THRESHOLDS_SEPARATOR = ',';
    public static final char DATASETS_AND_RECOMPACT_THRESHOLD_SEPARATOR = ':';
    private static final Logger logger = LoggerFactory.getLogger(RecompactionConditionBasedOnRatio.class);
    private final double ratio;

    @Alias(MRCompactor.DEFAULT_COMPACTION_RECOMPACT_CONDITION)
    /* loaded from: input_file:org/apache/gobblin/compaction/conditions/RecompactionConditionBasedOnRatio$Factory.class */
    public static class Factory implements RecompactionConditionFactory {
        @Override // org.apache.gobblin.compaction.conditions.RecompactionConditionFactory
        public RecompactionCondition createRecompactionCondition(Dataset dataset) {
            return new RecompactionConditionBasedOnRatio(dataset);
        }
    }

    private RecompactionConditionBasedOnRatio(Dataset dataset) {
        this.ratio = getOwnRatioThreshold(dataset, getDatasetRegexAndRecompactThreshold(dataset.jobProps().getProp(MRCompactor.COMPACTION_LATEDATA_THRESHOLD_FOR_RECOMPACT_PER_DATASET, "")));
    }

    public static Map<String, Double> getDatasetRegexAndRecompactThreshold(String str) {
        HashMap newHashMap = Maps.newHashMap();
        Iterator it = Splitter.on(';').trimResults().omitEmptyStrings().splitToList(str).iterator();
        while (it.hasNext()) {
            List splitToList = Splitter.on(':').trimResults().omitEmptyStrings().splitToList((String) it.next());
            if (splitToList.size() != 2) {
                logger.error("Invalid form (DATASET_NAME:THRESHOLD) in compaction.latedata.threshold.for.recompact.per.topic.");
            } else {
                newHashMap.put(splitToList.get(0), Double.valueOf(Double.parseDouble((String) splitToList.get(1))));
            }
        }
        return newHashMap;
    }

    private double getOwnRatioThreshold(Dataset dataset, Map<String, Double> map) {
        return getRatioThresholdByDatasetName(dataset.getDatasetName(), map);
    }

    public static double getRatioThresholdByDatasetName(String str, Map<String, Double> map) {
        for (Map.Entry<String, Double> entry : map.entrySet()) {
            if (DatasetFilterUtils.stringInPatterns(str, DatasetFilterUtils.getPatternsFromStrings(Splitter.on(',').trimResults().omitEmptyStrings().splitToList(entry.getKey())))) {
                return entry.getValue().doubleValue();
            }
        }
        return 1.0d;
    }

    @Override // org.apache.gobblin.compaction.conditions.RecompactionCondition
    public boolean isRecompactionNeeded(DatasetHelper datasetHelper) {
        double lateOutputRecordCount = (datasetHelper.getLateOutputRecordCount() * 1.0d) / (r0 + datasetHelper.getOutputRecordCount());
        logger.info("Late data ratio is " + lateOutputRecordCount + " and threshold is " + this.ratio);
        return lateOutputRecordCount > this.ratio;
    }
}
