package com.spotify.ratatool.samplers;

import com.spotify.ratatool.GcsConfiguration;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Array$;
import scala.MatchError;
import scala.Option;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.generic.GenericTraversableTemplate;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.ListBuffer;
import scala.collection.mutable.ListBuffer$;
import scala.concurrent.Await$;
import scala.concurrent.ExecutionContext$Implicits$;
import scala.concurrent.Future;
import scala.concurrent.Future$;
import scala.concurrent.duration.Duration$;
import scala.math.Numeric$LongIsIntegral$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.util.Random;
import scala.util.Random$;

/* compiled from: AvroSampler.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005\ra\u0001B\u0001\u0003\u0001-\u00111\"\u0011<s_N\u000bW\u000e\u001d7fe*\u00111\u0001B\u0001\tg\u0006l\u0007\u000f\\3sg*\u0011QAB\u0001\te\u0006$\u0018\r^8pY*\u0011q\u0001C\u0001\bgB|G/\u001b4z\u0015\u0005I\u0011aA2p[\u000e\u00011c\u0001\u0001\r%A\u0011Q\u0002E\u0007\u0002\u001d)\tq\"A\u0003tG\u0006d\u0017-\u0003\u0002\u0012\u001d\t1\u0011I\\=SK\u001a\u00042a\u0005\u000b\u0017\u001b\u0005\u0011\u0011BA\u000b\u0003\u0005\u001d\u0019\u0016-\u001c9mKJ\u0004\"a\u0006\u0011\u000e\u0003aQ!!\u0007\u000e\u0002\u000f\u001d,g.\u001a:jG*\u00111\u0004H\u0001\u0005CZ\u0014xN\u0003\u0002\u001e=\u00051\u0011\r]1dQ\u0016T\u0011aH\u0001\u0004_J<\u0017BA\u0011\u0019\u000559UM\\3sS\u000e\u0014VmY8sI\"A1\u0005\u0001B\u0001B\u0003%A%\u0001\u0003qCRD\u0007CA\u0013+\u001b\u00051#BA\u0014)\u0003\t17O\u0003\u0002*9\u00051\u0001.\u00193p_BL!a\u000b\u0014\u0003\tA\u000bG\u000f\u001b\u0005\t[\u0001\u0011)\u0019!C\t]\u0005!1/Z3e+\u0005y\u0003cA\u00071e%\u0011\u0011G\u0004\u0002\u0007\u001fB$\u0018n\u001c8\u0011\u00055\u0019\u0014B\u0001\u001b\u000f\u0005\u0011auN\\4\t\u0011Y\u0002!\u0011!Q\u0001\n=\nQa]3fI\u0002BQ\u0001\u000f\u0001\u0005\u0002e\na\u0001P5oSRtDc\u0001\u001e<yA\u00111\u0003\u0001\u0005\u0006G]\u0002\r\u0001\n\u0005\b[]\u0002\n\u00111\u00010\u0011\u001dq\u0004A1A\u0005\n}\na\u0001\\8hO\u0016\u0014X#\u0001!\u0011\u0005\u0005#U\"\u0001\"\u000b\u0005\rs\u0012!B:mMRR\u0017BA#C\u0005\u0019aunZ4fe\"1q\t\u0001Q\u0001\n\u0001\u000bq\u0001\\8hO\u0016\u0014\b\u0005C\u0003J\u0001\u0011%!*\u0001\bhKR4\u0015\u000e\\3D_:$X\r\u001f;\u0016\u0003-\u0003\"!\n'\n\u000553#a\u0003$jY\u0016\u001cuN\u001c;fqRDQa\u0014\u0001\u0005BA\u000baa]1na2,GcA)^?B\u0019!K\u0017\f\u000f\u0005MCfB\u0001+X\u001b\u0005)&B\u0001,\u000b\u0003\u0019a$o\\8u}%\tq\"\u0003\u0002Z\u001d\u00059\u0001/Y2lC\u001e,\u0017BA.]\u0005\r\u0019V-\u001d\u0006\u00033:AQA\u0018(A\u0002I\n\u0011A\u001c\u0005\u0006A:\u0003\r!Y\u0001\u0005Q\u0016\fG\r\u0005\u0002\u000eE&\u00111M\u0004\u0002\b\u0005>|G.Z1o\u0011\u0015)\u0007\u0001\"\u0003g\u00031\u00198-\u00197f/\u0016Lw\r\u001b;t)\r9'\u000e\u001c\t\u0004\u001b!\u0014\u0014BA5\u000f\u0005\u0015\t%O]1z\u0011\u0015YG\r1\u0001h\u0003\u001d9X-[4iiNDQA\u00183A\u0002I:qA\u001c\u0002\u0002\u0002#\u0005q.A\u0006BmJ|7+Y7qY\u0016\u0014\bCA\nq\r\u001d\t!!!A\t\u0002E\u001c\"\u0001\u001d\u0007\t\u000ba\u0002H\u0011A:\u0015\u0003=Dq!\u001e9\u0012\u0002\u0013\u0005a/A\u000e%Y\u0016\u001c8/\u001b8ji\u0012:'/Z1uKJ$C-\u001a4bk2$HEM\u000b\u0002o*\u0012q\u0006_\u0016\u0002sB\u0011!p`\u0007\u0002w*\u0011A0`\u0001\nk:\u001c\u0007.Z2lK\u0012T!A \b\u0002\u0015\u0005tgn\u001c;bi&|g.C\u0002\u0002\u0002m\u0014\u0011#\u001e8dQ\u0016\u001c7.\u001a3WCJL\u0017M\\2f\u0001")
/* loaded from: input_file:com/spotify/ratatool/samplers/AvroSampler.class */
public class AvroSampler implements Sampler<GenericRecord> {
    private final Path path;
    private final Option<Object> seed;
    private final Logger logger;
    private final Random random;

    @Override // com.spotify.ratatool.samplers.Sampler
    public long nextLong(long j) {
        long nextLong;
        nextLong = nextLong(j);
        return nextLong;
    }

    @Override // com.spotify.ratatool.samplers.Sampler
    public Random random() {
        return this.random;
    }

    @Override // com.spotify.ratatool.samplers.Sampler
    public void com$spotify$ratatool$samplers$Sampler$_setter_$random_$eq(Random random) {
        this.random = random;
    }

    @Override // com.spotify.ratatool.samplers.Sampler
    public Option<Object> seed() {
        return this.seed;
    }

    private Logger logger() {
        return this.logger;
    }

    private FileContext getFileContext() {
        return FileContext.getFileContext(GcsConfiguration.get());
    }

    @Override // com.spotify.ratatool.samplers.Sampler
    public Seq<GenericRecord> sample(long j, boolean z) {
        Predef$.MODULE$.require(j > 0, () -> {
            return "n must be > 0";
        });
        logger().info("Taking a sample of {} from Avro {}", BoxesRunTime.boxToLong(j), this.path);
        FileSystem fileSystem = FileSystem.get(this.path.toUri(), GcsConfiguration.get());
        if (fileSystem.isFile(this.path)) {
            return new AvroFileSampler(getFileContext(), this.path, seed()).sample(j, z);
        }
        final AvroSampler avroSampler = null;
        FileStatus[] fileStatusArr = (FileStatus[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(fileSystem.listStatus(this.path, new PathFilter(avroSampler) { // from class: com.spotify.ratatool.samplers.AvroSampler$$anon$1
            public boolean accept(Path path) {
                return path.getName().endsWith(".avro");
            }
        }))).sortBy(fileStatus -> {
            return fileStatus.getPath().toString();
        }, Ordering$String$.MODULE$);
        Path[] pathArr = (Path[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(fileStatusArr)).map(fileStatus2 -> {
            return fileStatus2.getPath();
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Path.class)));
        if (!z) {
            return ((GenericTraversableTemplate) Await$.MODULE$.result(Future$.MODULE$.sequence(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(pathArr)).zip(Predef$.MODULE$.wrapLongArray(scaleWeights((long[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(fileStatusArr)).map(fileStatus3 -> {
                return BoxesRunTime.boxToLong(fileStatus3.getLen());
            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Long())), j)), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).map(tuple2 -> {
                if (tuple2 == null) {
                    throw new MatchError(tuple2);
                }
                Path path = (Path) tuple2._1();
                long _2$mcJ$sp = tuple2._2$mcJ$sp();
                FileContext fileContext = this.getFileContext();
                return Future$.MODULE$.apply(() -> {
                    return new AvroFileSampler(fileContext, path, AvroFileSampler$.MODULE$.$lessinit$greater$default$3()).sample(_2$mcJ$sp, z);
                }, ExecutionContext$Implicits$.MODULE$.global());
            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Future.class))))).toSeq(), Seq$.MODULE$.canBuildFrom(), ExecutionContext$Implicits$.MODULE$.global()), Duration$.MODULE$.Inf())).flatten(Predef$.MODULE$.$conforms());
        }
        ListBuffer empty = ListBuffer$.MODULE$.empty();
        Iterator it = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(pathArr)).iterator();
        while (empty.size() < j && it.hasNext()) {
            empty.appendAll(new AvroFileSampler(getFileContext(), (Path) it.next(), AvroFileSampler$.MODULE$.$lessinit$greater$default$3()).sample(j, z));
        }
        return empty;
    }

    private long[] scaleWeights(long[] jArr, long j) {
        long unboxToLong = BoxesRunTime.unboxToLong(new ArrayOps.ofLong(Predef$.MODULE$.longArrayOps(jArr)).sum(Numeric$LongIsIntegral$.MODULE$));
        Predef$.MODULE$.require(unboxToLong > j, () -> {
            return "sum of weights must be > n";
        });
        long[] jArr2 = (long[]) new ArrayOps.ofLong(Predef$.MODULE$.longArrayOps(jArr)).map(j2 -> {
            return (long) ((j2 / unboxToLong) * j);
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Long()));
        long unboxToLong2 = j - BoxesRunTime.unboxToLong(new ArrayOps.ofLong(Predef$.MODULE$.longArrayOps(jArr2)).sum(Numeric$LongIsIntegral$.MODULE$));
        int length = jArr.length;
        for (int i = 0; i < unboxToLong2; i++) {
            int nextInt = Random$.MODULE$.nextInt(length);
            jArr2[nextInt] = jArr2[nextInt] + 1;
        }
        return jArr2;
    }

    public AvroSampler(Path path, Option<Object> option) {
        this.path = path;
        this.seed = option;
        Sampler.$init$(this);
        this.logger = LoggerFactory.getLogger(AvroSampler.class);
    }
}
