/*
 * Decompiled with CFR 0.152.
 */
package org.apache.beam.sdk.transforms;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.LongStream;
import org.apache.beam.repackaged.beam_sdks_java_core.com.google.common.collect.ImmutableList;
import org.apache.beam.repackaged.beam_sdks_java_core.com.google.common.collect.Lists;
import org.apache.beam.sdk.TestUtils;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.DoubleCoder;
import org.apache.beam.sdk.testing.CombineFnTester;
import org.apache.beam.sdk.testing.NeedsRunner;
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.transforms.ApproximateUnique;
import org.apache.beam.sdk.transforms.Combine;
import org.apache.beam.sdk.transforms.Count;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.Distinct;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.beam.sdk.transforms.View;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.transforms.display.DisplayDataMatchers;
import org.apache.beam.sdk.transforms.display.HasDisplayData;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.hamcrest.Matcher;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;
import org.hamcrest.core.Is;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.junit.runners.Parameterized;

public class ApproximateUniqueTest
implements Serializable {
    @Rule
    public final transient TestPipeline p = TestPipeline.create();

    private static void verifyEstimate(long uniqueCount, int sampleSize, long estimate) {
        if (uniqueCount < (long)sampleSize) {
            Assert.assertEquals((String)"Number of hashes is less than the sample size. Estimate should be exact", (long)uniqueCount, (long)estimate);
        }
        double error = 100.0 * (double)Math.abs(estimate - uniqueCount) / (double)uniqueCount;
        double maxError = 200.0 / Math.sqrt(sampleSize);
        Assert.assertTrue((String)("Estimate=" + estimate + " Actual=" + uniqueCount + " Error=" + error + "%, MaxError=" + maxError + "%."), (error < maxError ? 1 : 0) != 0);
        Assert.assertTrue((String)("Estimate=" + estimate + " Actual=" + uniqueCount + " Error=" + error + "%, MaxError=" + maxError + "%."), (error < maxError ? 1 : 0) != 0);
    }

    private static Matcher<Long> estimateIsWithinRangeFor(long uniqueCount, int sampleSize) {
        if (uniqueCount <= (long)sampleSize) {
            return Is.is((Object)uniqueCount);
        }
        long maxError = (long)Math.ceil(2.0 * (double)uniqueCount / Math.sqrt(sampleSize));
        return Matchers.both((Matcher)Matchers.lessThan((Comparable)Long.valueOf(uniqueCount + maxError))).and(Matchers.greaterThan((Comparable)Long.valueOf(uniqueCount - maxError)));
    }

    @RunWith(value=JUnit4.class)
    public static class ApproximateUniqueMiscTest
    extends ApproximateUniqueTest {
        @Test
        public void testEstimationErrorToSampleSize() {
            Assert.assertEquals((long)40000L, (long)ApproximateUnique.sampleSizeFromEstimationError((double)0.01));
            Assert.assertEquals((long)10000L, (long)ApproximateUnique.sampleSizeFromEstimationError((double)0.02));
            Assert.assertEquals((long)2500L, (long)ApproximateUnique.sampleSizeFromEstimationError((double)0.04));
            Assert.assertEquals((long)1600L, (long)ApproximateUnique.sampleSizeFromEstimationError((double)0.05));
            Assert.assertEquals((long)400L, (long)ApproximateUnique.sampleSizeFromEstimationError((double)0.1));
            Assert.assertEquals((long)100L, (long)ApproximateUnique.sampleSizeFromEstimationError((double)0.2));
            Assert.assertEquals((long)25L, (long)ApproximateUnique.sampleSizeFromEstimationError((double)0.4));
            Assert.assertEquals((long)16L, (long)ApproximateUnique.sampleSizeFromEstimationError((double)0.5));
        }

        @Test
        @Category(value={NeedsRunner.class})
        public void testApproximateUniqueWithSmallInput() {
            PCollection input = (PCollection)this.p.apply((PTransform)Create.of(Arrays.asList(1, 2, 3, 3)));
            PCollection estimate = (PCollection)input.apply((PTransform)ApproximateUnique.globally((int)1000));
            PAssert.thatSingleton((PCollection)estimate).isEqualTo((Object)3L);
            this.p.run();
        }

        @Test
        @Category(value={NeedsRunner.class})
        public void testApproximateUniqueWithSkewedDistributionsAndLargeSampleSize() {
            this.runApproximateUniqueWithSkewedDistributions(10000, 2000, 1000);
        }

        private void runApproximateUniqueWithSkewedDistributions(int elementCount, int uniqueCount, int sampleSize) {
            ArrayList<Integer> elements = Lists.newArrayList();
            double s = 1.0 - 1.0 * (double)uniqueCount / (double)elementCount;
            double maxCount = Math.pow(uniqueCount, s);
            for (int k = 0; k < uniqueCount; ++k) {
                int count = Math.max(1, (int)Math.round(maxCount * Math.pow(k, -s)));
                for (int c = 0; c < count; ++c) {
                    elements.add(k);
                }
            }
            PCollection input = (PCollection)this.p.apply((PTransform)Create.of(elements));
            PCollection estimate = (PCollection)input.apply((PTransform)ApproximateUnique.globally((int)sampleSize));
            PAssert.thatSingleton((PCollection)estimate).satisfies((SerializableFunction)new VerifyEstimateFn(uniqueCount, sampleSize));
            this.p.run();
        }

        @Test
        @Category(value={NeedsRunner.class})
        public void testApproximateUniquePerKey() {
            ArrayList<KV> elements = Lists.newArrayList();
            ImmutableList<Long> keys = ImmutableList.of(Long.valueOf(20L), Long.valueOf(50L), Long.valueOf(100L));
            int elementCount = 1000;
            int sampleSize = 100;
            Iterator iterator = keys.iterator();
            while (iterator.hasNext()) {
                long uniqueCount = (Long)iterator.next();
                for (long value = 0L; value < 1000L; ++value) {
                    elements.add(KV.of((Object)uniqueCount, (Object)(value % uniqueCount)));
                }
            }
            PCollection input = (PCollection)this.p.apply((PTransform)Create.of(elements));
            PCollection counts = (PCollection)input.apply((PTransform)ApproximateUnique.perKey((int)100));
            PAssert.that((PCollection)counts).satisfies((SerializableFunction)new VerifyEstimatePerKeyFn(100));
            this.p.run();
        }

        @Test
        public void testApproximateUniqueGetName() {
            Assert.assertEquals((Object)"ApproximateUnique.PerKey", (Object)ApproximateUnique.perKey((int)16).getName());
            Assert.assertEquals((Object)"ApproximateUnique.Globally", (Object)ApproximateUnique.globally((int)16).getName());
        }

        @Test
        public void testDisplayData() {
            ApproximateUnique.Globally specifiedSampleSize = ApproximateUnique.globally((int)1234);
            ApproximateUnique.PerKey specifiedMaxError = ApproximateUnique.perKey((double)0.1234);
            MatcherAssert.assertThat((Object)DisplayData.from((HasDisplayData)specifiedSampleSize), DisplayDataMatchers.hasDisplayItem("sampleSize", 1234L));
            DisplayData maxErrorDisplayData = DisplayData.from((HasDisplayData)specifiedMaxError);
            MatcherAssert.assertThat((Object)maxErrorDisplayData, DisplayDataMatchers.hasDisplayItem("maximumEstimationError", 0.1234));
            MatcherAssert.assertThat((String)"calculated sampleSize should be included", (Object)maxErrorDisplayData, DisplayDataMatchers.hasDisplayItem("sampleSize"));
        }
    }

    @RunWith(value=JUnit4.class)
    public static class ApproximateUniqueCombineFnTest {
        private void runCombineFnTest(long elementCount, long uniqueCount, int sampleSize) {
            List input = LongStream.range(0L, elementCount).mapToObj(i -> 1.0 / (double)(i % uniqueCount + 1L)).collect(Collectors.toList());
            CombineFnTester.testCombineFn((Combine.CombineFn)new ApproximateUnique.ApproximateUniqueCombineFn((long)sampleSize, (Coder)DoubleCoder.of()), input, (Matcher)ApproximateUniqueTest.estimateIsWithinRangeFor(uniqueCount, sampleSize));
        }

        @Test
        public void testFnWithSmallerFractionOfUniques() {
            this.runCombineFnTest(1000L, 100L, 16);
        }

        @Test
        public void testWithLargerFractionOfUniques() {
            this.runCombineFnTest(1000L, 800L, 100);
        }

        @Test
        public void testWithLargeSampleSize() {
            this.runCombineFnTest(200L, 100L, 150);
        }
    }

    @RunWith(value=Parameterized.class)
    public static class ApproximateUniqueVariationsTest
    extends ApproximateUniqueTest {
        private static final int TEST_PAGES = 100;
        private static final List<String> TEST_LINES = new ArrayList<String>(100 * TestUtils.LINES.size());
        @Parameterized.Parameter
        public int sampleSize;

        @Parameterized.Parameters(name="sampleSize_{0}")
        public static Iterable<Object[]> data() throws IOException {
            return ((ImmutableList.Builder)ImmutableList.builder().add((Object[])new Object[][]{{16}, {64}, {128}, {256}, {512}, {1000}, {2014}, {15}})).build();
        }

        private void runApproximateUniquePipeline(int sampleSize) {
            PCollection input = (PCollection)this.p.apply((PTransform)Create.of(TEST_LINES));
            PCollection approximate = (PCollection)input.apply((PTransform)ApproximateUnique.globally((int)sampleSize));
            final PCollectionView exact = (PCollectionView)((PCollection)((PCollection)input.apply((PTransform)Distinct.create())).apply(Count.globally())).apply((PTransform)View.asSingleton());
            PCollection approximateAndExact = (PCollection)approximate.apply((PTransform)ParDo.of((DoFn)new DoFn<Long, KV<Long, Long>>(){

                @DoFn.ProcessElement
                public void processElement(DoFn.ProcessContext c) {
                    c.output((Object)KV.of((Object)((Long)c.element()), (Object)((Long)c.sideInput(exact))));
                }
            }).withSideInputs(new PCollectionView[]{exact}));
            PAssert.that((PCollection)approximateAndExact).satisfies((SerializableFunction)new VerifyEstimatePerKeyFn(sampleSize));
            this.p.run();
        }

        @Test
        @Category(value={NeedsRunner.class})
        public void testApproximateUniqueWithDifferentSampleSizes() {
            if (this.sampleSize > 16) {
                this.runApproximateUniquePipeline(this.sampleSize);
            } else {
                try {
                    this.p.enableAbandonedNodeEnforcement(false);
                    this.runApproximateUniquePipeline(15);
                    Assert.fail((String)"Accepted sampleSize < 16");
                }
                catch (IllegalArgumentException e) {
                    Assert.assertTrue((String)"Expected an exception due to sampleSize < 16", (boolean)e.getMessage().startsWith("ApproximateUnique needs a sampleSize >= 16"));
                }
            }
        }

        static {
            for (int i = 0; i < 100; ++i) {
                TEST_LINES.addAll(TestUtils.LINES);
            }
        }
    }

    @RunWith(value=Parameterized.class)
    public static class ApproximateUniqueWithDuplicatesTest
    extends ApproximateUniqueTest {
        @Parameterized.Parameter
        public int elementCount;
        @Parameterized.Parameter(value=1)
        public int uniqueCount;
        @Parameterized.Parameter(value=2)
        public int sampleSize;

        @Parameterized.Parameters(name="total_{0}_unique_{1}_sample_{2}")
        public static Iterable<Object[]> data() throws IOException {
            return ((ImmutableList.Builder)ImmutableList.builder().add((Object[])new Object[][]{{100, 100, 100}, {1000, 1000, 100}, {1500, 1000, 100}, {10000, 1000, 100}})).build();
        }

        private void runApproximateUniqueWithDuplicates(int elementCount, int uniqueCount, int sampleSize) {
            assert (elementCount >= uniqueCount);
            ArrayList<Double> elements = Lists.newArrayList();
            for (int i = 0; i < elementCount; ++i) {
                elements.add(1.0 / (double)(i % uniqueCount + 1));
            }
            Collections.shuffle(elements);
            PCollection input = (PCollection)this.p.apply((PTransform)Create.of(elements));
            PCollection estimate = (PCollection)input.apply((PTransform)ApproximateUnique.globally((int)sampleSize));
            PAssert.thatSingleton((PCollection)estimate).satisfies((SerializableFunction)new VerifyEstimateFn(uniqueCount, sampleSize));
            this.p.run();
        }

        @Test
        @Category(value={NeedsRunner.class})
        public void testApproximateUniqueWithDuplicates() {
            this.runApproximateUniqueWithDuplicates(this.elementCount, this.uniqueCount, this.sampleSize);
        }
    }

    private static class VerifyEstimatePerKeyFn
    implements SerializableFunction<Iterable<KV<Long, Long>>, Void> {
        private final int sampleSize;

        private VerifyEstimatePerKeyFn(int sampleSize) {
            this.sampleSize = sampleSize;
        }

        public Void apply(Iterable<KV<Long, Long>> estimatePerKey) {
            for (KV<Long, Long> result : estimatePerKey) {
                ApproximateUniqueTest.verifyEstimate((Long)result.getKey(), this.sampleSize, (Long)result.getValue());
            }
            return null;
        }
    }

    private static class VerifyEstimateFn
    implements SerializableFunction<Long, Void> {
        private final long uniqueCount;
        private final int sampleSize;

        private VerifyEstimateFn(long uniqueCount, int sampleSize) {
            this.uniqueCount = uniqueCount;
            this.sampleSize = sampleSize;
        }

        public Void apply(Long estimate) {
            ApproximateUniqueTest.verifyEstimate(this.uniqueCount, this.sampleSize, estimate);
            return null;
        }
    }
}

