001/*
002# Licensed Materials - Property of IBM
003# Copyright IBM Corp. 2015  
004 */
005package simple;
006
007import java.util.concurrent.Future;
008
009import com.ibm.streamsx.topology.TStream;
010import com.ibm.streamsx.topology.Topology;
011import com.ibm.streamsx.topology.context.StreamsContextFactory;
012import com.ibm.streamsx.topology.file.FileStreams;
013import com.ibm.streamsx.topology.streams.StringStreams;
014
015/**
016 * Sample continuous (streaming) grep topology application. This Java application builds a
017 * simple topology that watches a directory for files, reads each file and
018 * output lines that contain the search term.
019 * Thus as each file is added to the directory, the application will read
020 * it and output matching lines.
021 * <BR>
022 * The application implements the typical pattern of code that declares a
023 * topology followed by submission of the topology to a Streams context
024 * {@code com.ibm.streamsx.topology.context.StreamsContext}.
025 * <BR>
026 * This demonstrates the a continuous application and use of
027 * utility classes that produce streams.
028 * <P>
029 * <BR>
030 * This may be executed from the {@code samples/java/functional} directory as:
031 * <UL>
032 * <LI>
033 * {@code java -cp functionalsamples.jar:../../../com.ibm.streamsx.topology/lib/com.ibm.streamsx.topology.jar:$STREAMS_INSTALL/lib/com.ibm.streams.operator.samples.jar
034 *    simple.Grep CONTEXT_TYPE $HOME/books Elizabeth
035 * } - Run directly from the command line.
036 * </LI>
037 * <i>CONTEXT_TYPE</i> is one of:
038 * <UL>
039 * <LI>{@code DISTRIBUTED} - Run as an IBM Streams distributed
040 * application.</LI>
041 * <LI>{@code STANDALONE} - Run as an IBM Streams standalone
042 * application.</LI>
043 * <LI>{@code EMBEDDED} - Run embedded within this JVM.</LI>
044 * <LI>{@code BUNDLE} - Create an IBM Streams application bundle.</LI>
045 * <LI>{@code TOOLKIT} - Create an IBM Streams application toolkit.</LI>
046 * </UL>
047 * </LI>
048 * <LI>
049 * An application execution within your IDE once you set the class path to include the correct jars.</LI>
050 * </UL>
051 * </P>
052 */
053public class Grep {
054    
055    /**
056     * Sample continuous (streaming) grep topology application. This Java
057     * application builds a simple topology that watches a directory for
058     * files, reads each file and output lines that contain the search term.
059     * Thus as each file is added to the directory, the application will read
060     * it and output matching lines.
061     * <BR>
062     * The application implements the typical pattern of code that declares a
063     * topology followed by submission of the topology to a Streams context {@code
064     * com.ibm.streamsx.topology.context.StreamsContext}.
065     * <P>
066     * Three arguments are required:
067     * <UL>
068     * <LI>{@code contextType} - The type of the context to execute the topology in, e.g. {@code EMBEDDED or STANDALONE}.</LI>
069     * <LI>{@code directory} - Directory to watch for files.</LI>
070     * <LI>{@code term} - Search term, if any line in a file contains {@code term} then it will be printed.
071     * </UL>
072     * For example (classpath omitted for brevity):
073     * <BR>
074     * {@code java simple.Grep EMBEDDED $HOME/books Elizabeth}
075     */
076    public static void main(String[] args) throws Exception {
077        String contextType = args[0];
078        String directory = args[1];
079        String term = args[2];
080
081        Topology topology = new Topology("Grep");
082
083        /*
084         * Use the file stream utility class com.ibm.streamsx.topology.file.FileStreams
085         * to declare a stream that will contain file names from the specified directory.
086         * As each new file is created in directory its absolute file path will
087         * appear on fileNames.
088         */
089        TStream<String> filePaths = FileStreams.directoryWatcher(topology, directory);
090        
091        /* 
092         * Use the file stream utility class com.ibm.streamsx.topology.file.FileStreams
093         * to declare a stream that will contain the contents of the files.
094         * FileStreams.textFileReader creates a function that for each input
095         * file pat, opens the file and reads its contents as a text file,
096         * producing a tuple for each line of the file. The tuple contains
097         * the contents of the line, as a String.
098         */
099        TStream<String> lines = FileStreams.textFileReader(filePaths);
100        
101        /*
102         * Use the string stream utility class com.ibm.streamsx.topology.streams.StringStreams
103         * to filter out non-matching lines. StringStreams.contains creates a functional
104         * Predicate that will be executed for each tuple on lines, that is each line
105         * read from a file.
106         */
107        TStream<String> matching = StringStreams.contains(lines, term);
108        
109        /*
110         * And print the matching lines to standard out.
111         */
112        matching.print();
113
114        /*
115         * Execute the topology, since FileStreams.directoryWatcher declares
116         * a stream that lasts forever, that it is is always watching the directory,
117         * then when submit() returns the application is not complete.
118         * In fact it will never complete.
119         */
120        Future<?> future = StreamsContextFactory.getStreamsContext(contextType)
121                .submit(topology);
122        
123        /*
124         * Let the application run for thirty seconds (30,000ms)
125         * and then cancel it.
126         */
127        Thread.sleep(30 * 1000);
128        future.cancel(true);
129    }
130}