package com.spotify.ratatool.samplers;

import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hasher;
import com.spotify.ratatool.Command;
import com.spotify.ratatool.avro.specific.TestRecord;
import com.spotify.ratatool.samplers.util.Approximate$;
import com.spotify.ratatool.samplers.util.ByteEncoding;
import com.spotify.ratatool.samplers.util.ByteEncoding$;
import com.spotify.ratatool.samplers.util.ByteHasher$;
import com.spotify.ratatool.samplers.util.Determinism;
import com.spotify.ratatool.samplers.util.Determinism$;
import com.spotify.ratatool.samplers.util.Deterministic$;
import com.spotify.ratatool.samplers.util.Exact$;
import com.spotify.ratatool.samplers.util.FarmHash$;
import com.spotify.ratatool.samplers.util.HashAlgorithm;
import com.spotify.ratatool.samplers.util.HashAlgorithm$;
import com.spotify.ratatool.samplers.util.NonDeterministic$;
import com.spotify.ratatool.samplers.util.Precision;
import com.spotify.ratatool.samplers.util.Precision$;
import com.spotify.ratatool.samplers.util.RawEncoding$;
import com.spotify.ratatool.samplers.util.SampleDistribution;
import com.spotify.ratatool.samplers.util.SampleDistribution$;
import com.spotify.ratatool.samplers.util.SamplerSCollectionFunctions;
import com.spotify.ratatool.samplers.util.SamplerSCollectionFunctions$;
import com.spotify.ratatool.samplers.util.StratifiedDistribution$;
import com.spotify.ratatool.samplers.util.UniformDistribution$;
import com.spotify.scio.Args;
import com.spotify.scio.ContextAndArgs$;
import com.spotify.scio.ScioContext;
import com.spotify.scio.ScioContext$;
import com.spotify.scio.coders.Coder;
import com.spotify.scio.coders.Coder$;
import com.spotify.scio.io.ClosedTap;
import com.spotify.scio.values.SCollection;
import com.spotify.scio.values.SCollection$;
import com.spotify.scio.values.SideInput;
import java.net.URI;
import java.nio.charset.Charset;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.beam.runners.dataflow.options.DataflowPipelineWorkerPoolOptions;
import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers;
import org.apache.beam.sdk.options.PipelineOptions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Function1;
import scala.Function3;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple9;
import scala.collection.ArrayOps$;
import scala.collection.StringOps$;
import scala.collection.immutable.List;
import scala.collection.immutable.Seq;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.LazyRef;
import scala.util.Try$;

/* compiled from: BigSampler.scala */
/* loaded from: input_file:com/spotify/ratatool/samplers/BigSampler$.class */
public final class BigSampler$ implements Command {
    public static final BigSampler$ MODULE$ = new BigSampler$();
    private static final String command = "bigSampler";
    private static final Logger log = LoggerFactory.getLogger(MODULE$.getClass());
    private static final Charset utf8Charset = Charset.forName("UTF-8");
    private static final char fieldSep = '.';

    public String command() {
        return command;
    }

    private Logger log() {
        return log;
    }

    public Charset utf8Charset() {
        return utf8Charset;
    }

    public char fieldSep() {
        return fieldSep;
    }

    public Hasher hashFun(HashAlgorithm hashAlgorithm, Option<Object> option) {
        return hashAlgorithm.hashFn(option);
    }

    public HashAlgorithm hashFun$default$1() {
        return FarmHash$.MODULE$;
    }

    public Option<Object> hashFun$default$2() {
        return None$.MODULE$;
    }

    public double boundLong(long j) {
        return (j - Long.MIN_VALUE) / (Long.MAX_VALUE - Long.MIN_VALUE);
    }

    public <T> Option<T> diceElement(T t, HashCode hashCode, double d) {
        return boundLong(hashCode.asLong()) < d ? new Some(t) : None$.MODULE$;
    }

    private Option<TableReference> parseAsBigQueryTable(String str) {
        return Try$.MODULE$.apply(() -> {
            return BigQueryHelpers.parseTableSpec(str);
        }).toOption();
    }

    private Option<URI> parseAsURI(String str) {
        return Try$.MODULE$.apply(() -> {
            return new URI(str);
        }).toOption();
    }

    private void usage() {
        Predef$.MODULE$.println(StringOps$.MODULE$.stripMargin$extension(Predef$.MODULE$.augmentString(new StringBuilder(2556).append("BigSampler - a tool for big data sampling\n        |Usage: ratatool ").append(command()).append(" [dataflow_options] [options]\n        |\n        |  --sample=<percentage>                      Percentage of records to take in sample, a decimal between 0.0 and 1.0\n        |  --input=<path>                             Input file path or BigQuery table\n        |  --output=<path>                            Output file path or BigQuery table\n        |  [--fields=<field1,field2,...>]             An optional list of fields to include in hashing for sampling cohort selection\n        |  [--seed=<seed>]                            An optional seed used in hashing for sampling cohort selection\n        |  [--hashAlgorithm=(murmur|farm)]            An optional arg to select the hashing algorithm for sampling cohort selection. Defaults to FarmHash for BigQuery compatibility\n        |  [--distribution=(uniform|stratified)]      An optional arg to sample for a stratified or uniform distribution. Must provide `distributionFields`\n        |  [--distributionFields=<field1,field2,...>] An optional list of fields to sample for distribution. Must provide `distribution`\n        |  [--exact]                                  An optional arg for higher precision distribution sampling.\n        |  [--byteEncoding=(raw|hex|base64)]          An optional arg for how to encode fields of type bytes: raw bytes, hex encoded string, or base64 encoded string. Default is to hash raw bytes.\n        |\n        |Since this runs a Scio/Beam pipeline, Dataflow options will have to be provided. At a\n        |minimum, the following should be specified:\n        |\n        |   --project=<gcp-project-id>                GCP Project used to run your job\n        |   --runner=DataflowRunner                   Executes the job on Google Cloud Dataflow\n        |   --tempLocation=<gcs-path>                 Location for temporary files. GCS bucket must be created prior to running job.\n        |\n        |The following options are recommended, but may not be necessary.\n        |\n        |   --serviceAccount=<your-service-account>   Service account used on Dataflow workers. Useful to avoid permissions issues.\n        |   --workerMachineType=<machine-type>        Can be tweaked based on your specific needs, but is not necessary.\n        |   --maxNumWorkers=<num-workers>             Limits the number of workers (machines) used in the job to avoid using up quota.\n        |\n        |For more details regarding Dataflow options see here: https://cloud.google.com/dataflow/pipelines/specifying-exec-params\n      ").toString())));
        throw scala.sys.package$.MODULE$.exit(1);
    }

    public Hasher hashTableRow(TableRow tableRow, String str, Seq<TableFieldSchema> seq, Hasher hasher) {
        return BigSamplerBigQuery$.MODULE$.hashTableRow(() -> {
            return seq;
        }, tableRow, str, hasher);
    }

    public Hasher hashAvroField(TestRecord testRecord, String str, Schema schema, Hasher hasher) {
        return BigSamplerAvro$.MODULE$.hashAvroField(schema, (GenericRecord) testRecord, str, hasher);
    }

    public Hasher hashAvroField(GenericRecord genericRecord, String str, Schema schema, Hasher hasher) {
        return BigSamplerAvro$.MODULE$.hashAvroField(schema, genericRecord, str, hasher);
    }

    public ClosedTap<?> singleInput(String[] strArr) {
        Tuple2 apply = ContextAndArgs$.MODULE$.apply(strArr);
        if (apply == null) {
            throw new MatchError(apply);
        }
        Tuple2 tuple2 = new Tuple2((ScioContext) apply._1(), (Args) apply._2());
        ScioContext scioContext = (ScioContext) tuple2._1();
        Args args = (Args) tuple2._2();
        Tuple2 parseArguments = ScioContext$.MODULE$.parseArguments(strArr, ScioContext$.MODULE$.parseArguments$default$2(), ClassTag$.MODULE$.apply(PipelineOptions.class));
        if (parseArguments == null) {
            throw new MatchError(parseArguments);
        }
        PipelineOptions pipelineOptions = (PipelineOptions) parseArguments._1();
        int i = BoxesRunTime.unboxToInt(Try$.MODULE$.apply(() -> {
            return ((DataflowPipelineWorkerPoolOptions) pipelineOptions).getWorkerMachineType();
        }).map(str -> {
            return BoxesRunTime.boxToInteger($anonfun$singleInput$2(str));
        }).getOrElse(() -> {
            return 4;
        })) > 8 ? (int) 1.0E9d : (int) 1000000.0d;
        Tuple9 liftedTree1$1 = liftedTree1$1(args);
        if (liftedTree1$1 == null) {
            throw new MatchError(liftedTree1$1);
        }
        Tuple9 tuple9 = new Tuple9(BoxesRunTime.boxToFloat(BoxesRunTime.unboxToFloat(liftedTree1$1._1())), (String) liftedTree1$1._2(), (String) liftedTree1$1._3(), (List) liftedTree1$1._4(), (Option) liftedTree1$1._5(), (HashAlgorithm) liftedTree1$1._6(), (Option) liftedTree1$1._7(), (List) liftedTree1$1._8(), (Precision) liftedTree1$1._9());
        float unboxToFloat = BoxesRunTime.unboxToFloat(tuple9._1());
        String str2 = (String) tuple9._2();
        String str3 = (String) tuple9._3();
        List<String> list = (List) tuple9._4();
        Option option = (Option) tuple9._5();
        HashAlgorithm hashAlgorithm = (HashAlgorithm) tuple9._6();
        Option<SampleDistribution> option2 = (Option) tuple9._7();
        List<String> list2 = (List) tuple9._8();
        Precision precision = (Precision) tuple9._9();
        ByteEncoding fromString = ByteEncoding$.MODULE$.fromString(args.getOrElse("byteEncoding", () -> {
            return "raw";
        }));
        if (list.isEmpty()) {
            log().warn("No fields to hash on specified, won't guarantee cohorts between datasets.");
        }
        if (option.isEmpty()) {
            log().warn("No seed specified, won't guarantee cohorts between datasets.");
        }
        if (option2.isEmpty()) {
            log().warn("No distribution specified, won't guarantee output distribution");
        }
        if (option2.isDefined() && list2.isEmpty()) {
            throw new IllegalArgumentException("distributionFields must be specified if a distribution is given");
        }
        if (parseAsBigQueryTable(str2).isDefined()) {
            Predef$.MODULE$.require(parseAsBigQueryTable(str3).isDefined(), () -> {
                return new StringBuilder(0).append(new StringBuilder(68).append("Input is a BigQuery table `").append(str2).append("`, output should be a BigQuery table too,").toString()).append(new StringBuilder(20).append("but instead it's `").append(str3).append("`.").toString()).toString();
            });
            return BigSamplerBigQuery$.MODULE$.sample(scioContext, (TableReference) parseAsBigQueryTable(str2).get(), (TableReference) parseAsBigQueryTable(str3).get(), list, unboxToFloat, option.map(str4 -> {
                return BoxesRunTime.boxToInteger($anonfun$singleInput$9(str4));
            }), hashAlgorithm, option2, list2, precision, i, fromString);
        }
        if (!parseAsURI(str2).isDefined()) {
            throw new UnsupportedOperationException(new StringBuilder(22).append("Input `").append(str2).append(" not supported.").toString());
        }
        Predef$.MODULE$.require(parseAsURI(str3).isDefined(), () -> {
            return new StringBuilder(68).append("Input is a URI: `").append(str2).append("`, output should be a URI too, but instead it's `").append(str3).append("`.").toString();
        });
        FileSystems.setDefaultPipelineOptions(pipelineOptions);
        return BigSamplerAvro$.MODULE$.sample(scioContext, str2, str3, list, unboxToFloat, option.map(str5 -> {
            return BoxesRunTime.boxToInteger($anonfun$singleInput$11(str5));
        }), hashAlgorithm, option2, list2, precision, i, fromString);
    }

    public <T, U> SCollection<T> sample(SCollection<T> sCollection, double d, Seq<String> seq, Option<Object> option, HashAlgorithm hashAlgorithm, Option<SampleDistribution> option2, Seq<String> seq2, Precision precision, Function3<T, String, Hasher, Hasher> function3, Function1<T, U> function1, int i, ByteEncoding byteEncoding, ClassTag<T> classTag, Coder<T> coder, ClassTag<U> classTag2, Coder<U> coder2) {
        SCollection<T> exactSampleDist;
        LazyRef lazyRef = new LazyRef();
        Tuple3 tuple3 = new Tuple3(Determinism$.MODULE$.fromSeq(seq), option2, precision);
        if (tuple3 != null) {
            Determinism determinism = (Determinism) tuple3._1();
            Option option3 = (Option) tuple3._2();
            Precision precision2 = (Precision) tuple3._3();
            if (NonDeterministic$.MODULE$.equals(determinism) && None$.MODULE$.equals(option3) && Approximate$.MODULE$.equals(precision2)) {
                exactSampleDist = sCollection.sample(false, d);
                return exactSampleDist;
            }
        }
        if (tuple3 != null) {
            Determinism determinism2 = (Determinism) tuple3._1();
            Some some = (Option) tuple3._2();
            Precision precision3 = (Precision) tuple3._3();
            if (NonDeterministic$.MODULE$.equals(determinism2) && (some instanceof Some)) {
                SampleDistribution sampleDistribution = (SampleDistribution) some.value();
                if (Approximate$.MODULE$.equals(precision3)) {
                    exactSampleDist = SamplerSCollectionFunctions$.MODULE$.RatatoolSCollection(sCollection, classTag).sampleDist(sampleDistribution, function1, d, classTag2, coder2, coder);
                    return exactSampleDist;
                }
            }
        }
        if (tuple3 != null) {
            Determinism determinism3 = (Determinism) tuple3._1();
            Option option4 = (Option) tuple3._2();
            Precision precision4 = (Precision) tuple3._3();
            if (Deterministic$.MODULE$.equals(determinism3) && None$.MODULE$.equals(option4) && Approximate$.MODULE$.equals(precision4)) {
                exactSampleDist = sCollection.flatMap(obj -> {
                    return MODULE$.diceElement(obj, ((Hasher) seq.foldLeft(ByteHasher$.MODULE$.wrap(MODULE$.hashFun(hashAlgorithm, option), byteEncoding, MODULE$.utf8Charset()), (hasher, str) -> {
                        return (Hasher) function3.apply(obj, str, hasher);
                    })).hash(), d);
                }, coder);
                return exactSampleDist;
            }
        }
        if (tuple3 != null) {
            Determinism determinism4 = (Determinism) tuple3._1();
            Some some2 = (Option) tuple3._2();
            Precision precision5 = (Precision) tuple3._3();
            if (Deterministic$.MODULE$.equals(determinism4) && (some2 instanceof Some)) {
                if (StratifiedDistribution$.MODULE$.equals((SampleDistribution) some2.value()) && Approximate$.MODULE$.equals(precision5)) {
                    SCollection<Tuple2<U, T>> keyBy = sCollection.flatMap(obj2 -> {
                        return MODULE$.diceElement(obj2, ((Hasher) seq.foldLeft(ByteHasher$.MODULE$.wrap(MODULE$.hashFun(hashAlgorithm, option), byteEncoding, MODULE$.utf8Charset()), (hasher, str) -> {
                            return (Hasher) function3.apply(obj2, str, hasher);
                        })).hash(), d);
                    }, coder).keyBy(obj3 -> {
                        return function1.apply(obj3);
                    }, coder2);
                    SamplerSCollectionFunctions$.MODULE$.logDistributionDiffs(SamplerSCollectionFunctions$.MODULE$.buildStratifiedDiffs(sCollection, keyBy, function1, d, SamplerSCollectionFunctions$.MODULE$.buildStratifiedDiffs$default$5(), classTag, coder, classTag2, coder2), logSerDe$1(lazyRef), classTag2);
                    exactSampleDist = SCollection$.MODULE$.makePairSCollectionFunctions(keyBy).values();
                    return exactSampleDist;
                }
            }
        }
        if (tuple3 != null) {
            Determinism determinism5 = (Determinism) tuple3._1();
            Some some3 = (Option) tuple3._2();
            Precision precision6 = (Precision) tuple3._3();
            if (Deterministic$.MODULE$.equals(determinism5) && (some3 instanceof Some)) {
                if (UniformDistribution$.MODULE$.equals((SampleDistribution) some3.value()) && Approximate$.MODULE$.equals(precision6)) {
                    Tuple2<SideInput<Object>, SCollection<Tuple2<U, Object>>> uniformParams = SamplerSCollectionFunctions$.MODULE$.uniformParams(sCollection, function1, d, classTag, coder, classTag2, coder2);
                    if (uniformParams == null) {
                        throw new MatchError(uniformParams);
                    }
                    Tuple2 tuple2 = new Tuple2((SideInput) uniformParams._1(), (SCollection) uniformParams._2());
                    SideInput<Object> sideInput = (SideInput) tuple2._1();
                    SCollection<Tuple2<U, T>> flatMap = SCollection$.MODULE$.makePairHashSCollectionFunctions(sCollection.keyBy(obj4 -> {
                        return function1.apply(obj4);
                    }, coder2)).hashJoin((SCollection) tuple2._2()).flatMap(tuple22 -> {
                        if (tuple22 != null) {
                            Object _1 = tuple22._1();
                            Tuple2 tuple22 = (Tuple2) tuple22._2();
                            if (tuple22 != null) {
                                Object _12 = tuple22._1();
                                double _2$mcD$sp = tuple22._2$mcD$sp();
                                return MODULE$.diceElement(_12, ((Hasher) seq.foldLeft(ByteHasher$.MODULE$.wrap(MODULE$.hashFun(hashAlgorithm, option), byteEncoding, MODULE$.utf8Charset()), (hasher, str) -> {
                                    return (Hasher) function3.apply(_12, str, hasher);
                                })).hash(), _2$mcD$sp).map(obj5 -> {
                                    return new Tuple2(_1, obj5);
                                });
                            }
                        }
                        throw new MatchError(tuple22);
                    }, Coder$.MODULE$.tuple2Coder(coder2, coder));
                    SamplerSCollectionFunctions$.MODULE$.logDistributionDiffs(SamplerSCollectionFunctions$.MODULE$.buildUniformDiffs(sCollection, flatMap, function1, d, sideInput, SamplerSCollectionFunctions$.MODULE$.buildUniformDiffs$default$6(), classTag, coder, classTag2, coder2), logSerDe$1(lazyRef), classTag2);
                    exactSampleDist = SCollection$.MODULE$.makePairSCollectionFunctions(flatMap).values();
                    return exactSampleDist;
                }
            }
        }
        if (tuple3 != null) {
            Determinism determinism6 = (Determinism) tuple3._1();
            Some some4 = (Option) tuple3._2();
            Precision precision7 = (Precision) tuple3._3();
            if (NonDeterministic$.MODULE$.equals(determinism6) && (some4 instanceof Some)) {
                SampleDistribution sampleDistribution2 = (SampleDistribution) some4.value();
                if (Exact$.MODULE$.equals(precision7)) {
                    SamplerSCollectionFunctions.RatatoolKVDSCollection<T, U> RatatoolKVDSCollection = SamplerSCollectionFunctions$.MODULE$.RatatoolKVDSCollection(SamplerSCollectionFunctions$.MODULE$.assignRandomRoll(sCollection, function1, classTag, coder, classTag2, coder2), classTag, classTag2);
                    exactSampleDist = RatatoolKVDSCollection.exactSampleDist(sampleDistribution2, function1, d, i, RatatoolKVDSCollection.exactSampleDist$default$5(), coder, coder2);
                    return exactSampleDist;
                }
            }
        }
        if (tuple3 != null) {
            Determinism determinism7 = (Determinism) tuple3._1();
            Some some5 = (Option) tuple3._2();
            Precision precision8 = (Precision) tuple3._3();
            if (Deterministic$.MODULE$.equals(determinism7) && (some5 instanceof Some)) {
                SampleDistribution sampleDistribution3 = (SampleDistribution) some5.value();
                if (Exact$.MODULE$.equals(precision8)) {
                    exactSampleDist = SamplerSCollectionFunctions$.MODULE$.RatatoolKVDSCollection(assignHashRoll$1(sCollection, option, seq, hashAlgorithm, byteEncoding, function3, function1, coder2, coder), classTag, classTag2).exactSampleDist(sampleDistribution3, function1, d, i, 1.0E-6d, coder, coder2);
                    return exactSampleDist;
                }
            }
        }
        throw new UnsupportedOperationException("This sampling mode is not currently supported");
    }

    public <T, U> int sample$default$11() {
        return (int) 1000000.0d;
    }

    public <T, U> ByteEncoding sample$default$12() {
        return RawEncoding$.MODULE$;
    }

    public void run(String[] strArr) {
        singleInput(strArr);
    }

    public static final /* synthetic */ int $anonfun$singleInput$2(String str) {
        return StringOps$.MODULE$.toInt$extension(Predef$.MODULE$.augmentString((String) ArrayOps$.MODULE$.last$extension(Predef$.MODULE$.refArrayOps(str.split("-")))));
    }

    private final /* synthetic */ Tuple9 liftedTree1$1(Args args) {
        try {
            float float$extension = StringOps$.MODULE$.toFloat$extension(Predef$.MODULE$.augmentString(args.apply("sample")));
            Predef$.MODULE$.require(float$extension > 0.0f && float$extension <= 1.0f);
            return new Tuple9(BoxesRunTime.boxToFloat(float$extension), args.apply("input"), args.apply("output"), args.list("fields"), args.optional("seed"), args.optional("hashAlgorithm").map(str -> {
                return HashAlgorithm$.MODULE$.fromString(str);
            }).getOrElse(() -> {
                return FarmHash$.MODULE$;
            }), args.optional("distribution").map(str2 -> {
                return SampleDistribution$.MODULE$.fromString(str2);
            }), args.list("distributionFields"), Precision$.MODULE$.fromBoolean(args.boolean("exact", false)));
        } catch (Throwable th) {
            usage();
            throw th;
        }
    }

    public static final /* synthetic */ int $anonfun$singleInput$9(String str) {
        return StringOps$.MODULE$.toInt$extension(Predef$.MODULE$.augmentString(str));
    }

    public static final /* synthetic */ int $anonfun$singleInput$11(String str) {
        return StringOps$.MODULE$.toInt$extension(Predef$.MODULE$.augmentString(str));
    }

    private static final SCollection assignHashRoll$1(SCollection sCollection, Option option, Seq seq, HashAlgorithm hashAlgorithm, ByteEncoding byteEncoding, Function3 function3, Function1 function1, Coder coder, Coder coder2) {
        return sCollection.map(obj -> {
            return new Tuple2(function1.apply(obj), new Tuple2(obj, BoxesRunTime.boxToDouble(MODULE$.boundLong(((Hasher) seq.foldLeft(ByteHasher$.MODULE$.wrap(MODULE$.hashFun(hashAlgorithm, option), byteEncoding, MODULE$.utf8Charset()), (hasher, str) -> {
                return (Hasher) function3.apply(obj, str, hasher);
            })).hash().asLong()))));
        }, Coder$.MODULE$.tuple2Coder(coder, Coder$.MODULE$.tuple2Coder(coder2, Coder$.MODULE$.doubleCoder())));
    }

    private final /* synthetic */ Logger logSerDe$lzycompute$1(LazyRef lazyRef) {
        Logger logger;
        synchronized (lazyRef) {
            logger = lazyRef.initialized() ? (Logger) lazyRef.value() : (Logger) lazyRef.initialize(LoggerFactory.getLogger(getClass()));
        }
        return logger;
    }

    private final Logger logSerDe$1(LazyRef lazyRef) {
        return lazyRef.initialized() ? (Logger) lazyRef.value() : logSerDe$lzycompute$1(lazyRef);
    }

    private BigSampler$() {
    }
}
