001/* 002# Licensed Materials - Property of IBM 003# Copyright IBM Corp. 2015 004 */ 005package simple; 006 007import java.util.concurrent.Future; 008 009import com.ibm.streamsx.topology.TStream; 010import com.ibm.streamsx.topology.Topology; 011import com.ibm.streamsx.topology.context.StreamsContextFactory; 012import com.ibm.streamsx.topology.file.FileStreams; 013import com.ibm.streamsx.topology.streams.StringStreams; 014 015/** 016 * Sample continuous (streaming) grep topology application. This Java application builds a 017 * simple topology that watches a directory for files, reads each file and 018 * output lines that contain the search term. 019 * Thus as each file is added to the directory, the application will read 020 * it and output matching lines. 021 * <BR> 022 * The application implements the typical pattern of code that declares a 023 * topology followed by submission of the topology to a Streams context 024 * {@code com.ibm.streamsx.topology.context.StreamsContext}. 025 * <BR> 026 * This demonstrates the a continuous application and use of 027 * utility classes that produce streams. 028 * <P> 029 * <BR> 030 * This may be executed from the {@code samples/java/functional} directory as: 031 * <UL> 032 * <LI> 033 * {@code java -cp functionalsamples.jar:../../../com.ibm.streamsx.topology/lib/com.ibm.streamsx.topology.jar:$STREAMS_INSTALL/lib/com.ibm.streams.operator.samples.jar 034 * simple.Grep CONTEXT_TYPE $HOME/books Elizabeth 035 * } - Run directly from the command line. 036 * </LI> 037 * <i>CONTEXT_TYPE</i> is one of: 038 * <UL> 039 * <LI>{@code DISTRIBUTED} - Run as an IBM Streams distributed 040 * application.</LI> 041 * <LI>{@code STANDALONE} - Run as an IBM Streams standalone 042 * application.</LI> 043 * <LI>{@code EMBEDDED} - Run embedded within this JVM.</LI> 044 * <LI>{@code BUNDLE} - Create an IBM Streams application bundle.</LI> 045 * <LI>{@code TOOLKIT} - Create an IBM Streams application toolkit.</LI> 046 * </UL> 047 * </LI> 048 * <LI> 049 * An application execution within your IDE once you set the class path to include the correct jars.</LI> 050 * </UL> 051 * </P> 052 */ 053public class Grep { 054 055 /** 056 * Sample continuous (streaming) grep topology application. This Java 057 * application builds a simple topology that watches a directory for 058 * files, reads each file and output lines that contain the search term. 059 * Thus as each file is added to the directory, the application will read 060 * it and output matching lines. 061 * <BR> 062 * The application implements the typical pattern of code that declares a 063 * topology followed by submission of the topology to a Streams context {@code 064 * com.ibm.streamsx.topology.context.StreamsContext}. 065 * <P> 066 * Three arguments are required: 067 * <UL> 068 * <LI>{@code contextType} - The type of the context to execute the topology in, e.g. {@code EMBEDDED or STANDALONE}.</LI> 069 * <LI>{@code directory} - Directory to watch for files.</LI> 070 * <LI>{@code term} - Search term, if any line in a file contains {@code term} then it will be printed. 071 * </UL> 072 * For example (classpath omitted for brevity): 073 * <BR> 074 * {@code java simple.Grep EMBEDDED $HOME/books Elizabeth} 075 */ 076 public static void main(String[] args) throws Exception { 077 String contextType = args[0]; 078 String directory = args[1]; 079 String term = args[2]; 080 081 Topology topology = new Topology("Grep"); 082 083 /* 084 * Use the file stream utility class com.ibm.streamsx.topology.file.FileStreams 085 * to declare a stream that will contain file names from the specified directory. 086 * As each new file is created in directory its absolute file path will 087 * appear on fileNames. 088 */ 089 TStream<String> filePaths = FileStreams.directoryWatcher(topology, directory); 090 091 /* 092 * Use the file stream utility class com.ibm.streamsx.topology.file.FileStreams 093 * to declare a stream that will contain the contents of the files. 094 * FileStreams.textFileReader creates a function that for each input 095 * file pat, opens the file and reads its contents as a text file, 096 * producing a tuple for each line of the file. The tuple contains 097 * the contents of the line, as a String. 098 */ 099 TStream<String> lines = FileStreams.textFileReader(filePaths); 100 101 /* 102 * Use the string stream utility class com.ibm.streamsx.topology.streams.StringStreams 103 * to filter out non-matching lines. StringStreams.contains creates a functional 104 * Predicate that will be executed for each tuple on lines, that is each line 105 * read from a file. 106 */ 107 TStream<String> matching = StringStreams.contains(lines, term); 108 109 /* 110 * And print the matching lines to standard out. 111 */ 112 matching.print(); 113 114 /* 115 * Execute the topology, since FileStreams.directoryWatcher declares 116 * a stream that lasts forever, that it is is always watching the directory, 117 * then when submit() returns the application is not complete. 118 * In fact it will never complete. 119 */ 120 Future<?> future = StreamsContextFactory.getStreamsContext(contextType) 121 .submit(topology); 122 123 /* 124 * Let the application run for thirty seconds (30,000ms) 125 * and then cancel it. 126 */ 127 Thread.sleep(30 * 1000); 128 future.cancel(true); 129 } 130}