001/* 002# Licensed Materials - Property of IBM 003# Copyright IBM Corp. 2015 004 */ 005package simple; 006 007import java.io.ObjectStreamException; 008import java.util.concurrent.Future; 009import java.util.regex.Matcher; 010import java.util.regex.Pattern; 011 012import com.ibm.streamsx.topology.TStream; 013import com.ibm.streamsx.topology.Topology; 014import com.ibm.streamsx.topology.context.StreamsContextFactory; 015import com.ibm.streamsx.topology.file.FileStreams; 016import com.ibm.streamsx.topology.function.Predicate; 017 018/** 019 * Sample continuous (streaming) regular expression grep topology application. 020 * This is a variant of the {@link Grep} application that demonstrates 021 * filtering using Java functional programming. 022 * This Java application builds a 023 * simple topology that watches a directory for files, reads each file and 024 * output lines that match a regular expression. 025 * Thus as each file is added to the directory, the application will read 026 * it and output matching lines. 027 * <BR> 028 * The application implements the typical pattern of code that declares a 029 * topology followed by submission of the topology to a Streams context {@code 030 * com.ibm.streamsx.topology.context.StreamsContext}. 031 * <BR> 032 * This demonstrates Java functional programming using an anonymous class. 033 * <P> 034 * <BR> 035 * This may be executed from the {@code samples/java/functional} directory as: 036 * <UL> 037 * <LI> 038 * {@code java -cp functionalsamples.jar:../../../com.ibm.streamsx.topology/lib/com.ibm.streamsx.topology.jar:$STREAMS_INSTALL/lib/com.ibm.streams.operator.samples.jar 039 * simple.RegexGrep CONTEXT_TYPE $HOME/books ".*Queen.*England.*" 040 * } - Run directly from the command line. 041 * <i>CONTEXT_TYPE</i> is one of: 042 * <UL> 043 * <LI>{@code DISTRIBUTED} - Run as an IBM Streams distributed 044 * application.</LI> 045 * <LI>{@code STANDALONE} - Run as an IBM Streams standalone 046 * application.</LI> 047 * <LI>{@code EMBEDDED} - Run embedded within this JVM.</LI> 048 * <LI>{@code BUNDLE} - Create an IBM Streams application bundle.</LI> 049 * <LI>{@code TOOLKIT} - Create an IBM Streams application toolkit.</LI> 050 * </UL> 051 * </LI> 052 * <LI> 053 * An application execution within your IDE once you set the class path to include the correct jars.</LI> 054 * </UL> 055 * </P> 056 */ 057public class RegexGrep { 058 @SuppressWarnings("serial") 059 public static void main(String[] args) throws Exception { 060 String contextType = args[0]; 061 String directory = args[1]; 062 final Pattern pattern = Pattern.compile(args[2]); 063 064 // Define the topology 065 Topology topology = new Topology("RegexGrep"); 066 067 // All streams with tuples that are Java String objects 068 TStream<String> files = FileStreams.directoryWatcher(topology, directory); 069 TStream<String> lines = FileStreams.textFileReader(files); 070 071 /* 072 * Functional filter using an anonymous class to define the 073 * filtering logic, in this case execution of a regular 074 * expression against each input String tuple (each line 075 * of the files in the directory). 076 */ 077 TStream<String> filtered = lines.filter(new Predicate<String>() { 078 079 @Override 080 public boolean test(String v1) { 081 // Pass the line through if it matches the 082 // regular expression pattern 083 return matcher.reset(v1).matches(); 084 } 085 086 // Recreate the matcher (which is not serializable) 087 // when the object is deserialized using readResolve. 088 transient Matcher matcher; 089 090 /* 091 * Since the constructor is no invoked after serialization 092 * we use readResolve as a hook to execute initialization 093 * code, in this case creating the matcher from the 094 * pattern. 095 * The alternative would be to create it on its first use, 096 * which would require an if statement in the test method. 097 */ 098 private Object readResolve() throws ObjectStreamException { 099 matcher = pattern.matcher(""); 100 return this; 101 } 102 }); 103 104 // For debugging just print out the tuples 105 filtered.print(); 106 107 // Execute the topology, just like Grep. 108 Future<?> future = StreamsContextFactory.getStreamsContext(contextType) 109 .submit(topology); 110 Thread.sleep(30000); 111 future.cancel(true); 112 } 113}