001/*
002# Licensed Materials - Property of IBM
003# Copyright IBM Corp. 2015  
004 */
005package state;
006
007import java.util.Random;
008
009import org.apache.commons.math.stat.descriptive.moment.Mean;
010import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
011
012import com.ibm.streamsx.topology.TStream;
013import com.ibm.streamsx.topology.Topology;
014import com.ibm.streamsx.topology.context.StreamsContextFactory;
015import com.ibm.streamsx.topology.function.Predicate;
016import com.ibm.streamsx.topology.function.Supplier;
017
018/**
019 * Finds outliers from a sequence of doubles (e.g. simulating a sensor reading).
020 * 
021 * Demonstrates function logic that maintains state across tuples.
022 *
023 */
024public class FindOutliers {
025
026    public static void main(String[] args) throws Exception {
027
028        final double threshold = args.length == 0 ? 2.0 : Double
029                .parseDouble(args[0]);
030
031        Topology t = new Topology("StandardDeviationFilter");
032
033        final Random rand = new Random();
034
035        // Produce a stream of random double values with a normal
036        // distribution, mean 0.0 and standard deviation 1.
037        TStream<Double> values = t.limitedSource(new Supplier<Double>() {
038            private static final long serialVersionUID = 1L;
039
040            @Override
041            public Double get() {
042                return rand.nextGaussian();
043            }
044
045        }, 100000);
046
047        /*
048         * Filters the values based on calculating the mean and standard
049         * deviation from the incoming data. In this case only outliers are
050         * present in the output stream outliers. A outlier is defined as one
051         * more than (threshold*standard deviation) from the mean.
052         * 
053         * This demonstrates an anonymous functional logic class that is
054         * stateful. The two fields mean and sd maintain their values across
055         * multiple invocations of the test method, that is for multiple tuples.
056         * 
057         * Note both Mean & StandardDeviation classes are serializable.
058         */
059        TStream<Double> outliers = values.filter(new Predicate<Double>() {
060
061            private static final long serialVersionUID = 1L;
062            private final Mean mean = new Mean();
063            private final StandardDeviation sd = new StandardDeviation();
064
065            @Override
066            public boolean test(Double tuple) {
067                mean.increment(tuple);
068                sd.increment(tuple);
069
070                double multpleSd = threshold * sd.getResult();
071                double absMean = Math.abs(mean.getResult());
072                double absTuple = Math.abs(tuple);
073
074                return absTuple > absMean + multpleSd;
075            }
076        });
077
078        outliers.print();
079
080        StreamsContextFactory.getEmbedded().submit(t).get();
081    }
082}