001/*
002# Licensed Materials - Property of IBM
003# Copyright IBM Corp. 2015  
004 */
005package simple;
006
007import java.io.ObjectStreamException;
008import java.util.concurrent.Future;
009import java.util.regex.Matcher;
010import java.util.regex.Pattern;
011
012import com.ibm.streamsx.topology.TStream;
013import com.ibm.streamsx.topology.Topology;
014import com.ibm.streamsx.topology.context.StreamsContextFactory;
015import com.ibm.streamsx.topology.file.FileStreams;
016import com.ibm.streamsx.topology.function.Predicate;
017
018/**
019 * Sample continuous (streaming) regular expression grep topology application.
020 * This is a variant of the {@link Grep} application that demonstrates
021 * filtering using Java functional programming.
022 * This Java application builds a
023 * simple topology that watches a directory for files, reads each file and
024 * output lines that match a regular expression.
025 * Thus as each file is added to the directory, the application will read
026 * it and output matching lines.
027 * <BR>
028 * The application implements the typical pattern of code that declares a
029 * topology followed by submission of the topology to a Streams context {@code
030 * com.ibm.streamsx.topology.context.StreamsContext}.
031 * <BR>
032 * This demonstrates Java functional programming using an anonymous class.
033 * <P>
034 * <BR>
035 * This may be executed from the {@code samples/java/functional} directory as:
036 * <UL>
037 * <LI>
038 * {@code java -cp functionalsamples.jar:../../../com.ibm.streamsx.topology/lib/com.ibm.streamsx.topology.jar:$STREAMS_INSTALL/lib/com.ibm.streams.operator.samples.jar
039 *   simple.RegexGrep CONTEXT_TYPE $HOME/books ".*Queen.*England.*"
040 * } - Run directly from the command line.
041 * <i>CONTEXT_TYPE</i> is one of:
042 * <UL>
043 * <LI>{@code DISTRIBUTED} - Run as an IBM Streams distributed
044 * application.</LI>
045 * <LI>{@code STANDALONE} - Run as an IBM Streams standalone
046 * application.</LI>
047 * <LI>{@code EMBEDDED} - Run embedded within this JVM.</LI>
048 * <LI>{@code BUNDLE} - Create an IBM Streams application bundle.</LI>
049 * <LI>{@code TOOLKIT} - Create an IBM Streams application toolkit.</LI>
050 * </UL>
051 * </LI>
052 * <LI>
053 * An application execution within your IDE once you set the class path to include the correct jars.</LI>
054 * </UL>
055 * </P>
056 */
057public class RegexGrep {
058    @SuppressWarnings("serial")
059    public static void main(String[] args) throws Exception {
060        String contextType = args[0];
061        String directory = args[1];
062        final Pattern pattern = Pattern.compile(args[2]);
063
064        // Define the topology
065        Topology topology = new Topology("RegexGrep");
066
067        // All streams with tuples that are Java String objects
068        TStream<String> files = FileStreams.directoryWatcher(topology, directory);
069        TStream<String> lines = FileStreams.textFileReader(files);
070        
071        /*
072         * Functional filter using an anonymous class to define the
073         * filtering logic, in this case execution of a regular
074         * expression against each input String tuple (each line
075         * of the files in the directory).
076         */
077        TStream<String> filtered = lines.filter(new Predicate<String>() {
078
079            @Override
080            public boolean test(String v1) {
081                // Pass the line through if it matches the
082                // regular expression pattern
083                return matcher.reset(v1).matches();
084            }
085
086            // Recreate the matcher (which is not serializable)
087            // when the object is deserialized using readResolve.
088            transient Matcher matcher;
089
090            /*
091             * Since the constructor is no invoked after serialization
092             * we use readResolve as a hook to execute initialization
093             * code, in this case creating the matcher from the
094             * pattern. 
095             * The alternative would be to create it on its first use,
096             * which would require an if statement in the test method.
097             */
098            private Object readResolve() throws ObjectStreamException {
099                matcher = pattern.matcher("");
100                return this;
101            }
102        });
103
104        // For debugging just print out the tuples
105        filtered.print();
106
107        // Execute the topology, just like Grep.
108        Future<?> future = StreamsContextFactory.getStreamsContext(contextType)
109                .submit(topology);
110        Thread.sleep(30000);
111        future.cancel(true);
112    }
113}