001/*
002# Licensed Materials - Property of IBM
003# Copyright IBM Corp. 2015  
004 */
005package kafka;
006
007import java.io.File;
008import java.text.SimpleDateFormat;
009import java.util.Date;
010import java.util.HashMap;
011import java.util.Map;
012import java.util.concurrent.Future;
013
014import javax.xml.parsers.DocumentBuilder;
015import javax.xml.parsers.DocumentBuilderFactory;
016import javax.xml.xpath.XPath;
017import javax.xml.xpath.XPathConstants;
018import javax.xml.xpath.XPathFactory;
019
020import org.w3c.dom.Document;
021import org.w3c.dom.Element;
022import org.w3c.dom.Node;
023import org.w3c.dom.NodeList;
024
025import com.ibm.streamsx.topology.TStream;
026import com.ibm.streamsx.topology.Topology;
027import com.ibm.streamsx.topology.context.ContextProperties;
028import com.ibm.streamsx.topology.context.StreamsContextFactory;
029import com.ibm.streamsx.topology.function.Function;
030import com.ibm.streamsx.topology.function.Supplier;
031import com.ibm.streamsx.topology.function.UnaryOperator;
032import com.ibm.streamsx.topology.logic.Value;
033import com.ibm.streamsx.topology.messaging.kafka.KafkaConsumer;
034import com.ibm.streamsx.topology.messaging.kafka.KafkaProducer;
035import com.ibm.streamsx.topology.tuple.Message;
036import com.ibm.streamsx.topology.tuple.SimpleMessage;
037
038
039/**
040 * Demonstrate integrating with the Apache Kafka messaging system
041 * <a href="http://kafka.apache.org">http://kafka.apache.org</a>.
042 * <p>
043 * Connectors are used to create a bridge between topology streams
044 * and a Kafka cluster:
045 * <ul>
046 * <li>{@link com.ibm.streamsx.topology.messaging.kafka.KafkaConsumer KafkaConsumer} - subscribe to Kafka topics and create streams of messages.</li>
047 * <li>{@link com.ibm.streamsx.topology.messaging.kafka.KafkaProducer KafkaProducer} - publish streams of messages to Kafka topics.</li>
048 * </ul>
049 * <p>
050 * The sample publishes some messages to a Kafka topic.  
051 * It also subscribes to the topic and reports the messages received.
052 * The messages received may include messages from prior runs of the sample.
053 * <p>
054 * The sample requires a running Kafka cluster with the following
055 * characteristics:
056 * <ul>
057 * <li>the kafka topic (@code kafkaSampleTopic} has been created.
058 * e.g.<br>
059 * {@code ${KAFKA_HOME}/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic kafkaSampleTopic}
060 * </li>
061 * <li>the Kafka cluster's zookeeper connection is {@code localhost:2181}</li>
062 * <li>the Kafka cluster's brokers addresses is {@code localhost:9092}</li>
063 * </ul>
064 * <p>
065 * Required IBM Streams environment variables:
066 * <ul>
067 * <li>STREAMS_INSTALL - the Streams installation directory</li>
068 * <li>STREAMS_DOMAIN_ID - the Streams domain to use for context {@code DISTRIBUTED}
069 * <li>STREAMS_INSTANCE_ID - the Streams instance to use for context {@code DISTRIBUTED}
070 * </ul>
071 * <p>
072 * See the Apache Kafka link above for information about setting up a Kafka
073 * cluster and creating a topic.
074 * <p>
075 * This may be executed from the {@code samples/java/functional} directory as:
076 * <UL>
077 * <LI>{@code ant run.kafka.distributed} - Using Apache Ant, this will run in distributed mode.</li>
078 * <LI>{@code ant run.kafka} - Using Apache Ant, this will run in standalone mode.</li>
079 * <LI>
080 * {@code java -cp functionalsamples.jar:../../../com.ibm.streamsx.topology/lib/com.ibm.streamsx.topology.jar:$STREAMS_INSTALL/lib/com.ibm.streams.operator.samples.jar
081 *  kafka.KafkaSample CONTEXT_TYPE
082 * } - Run directly from the command line.
083 * </LI>
084 * <i>CONTEXT_TYPE</i> is one of:
085 * <UL>
086 * <LI>{@code DISTRIBUTED} - Run as an IBM Streams distributed application.</LI>
087 * <LI>{@code STANDALONE} - Run as an IBM Streams standalone application.</LI>
088 * <LI>{@code BUNDLE} - Create an IBM Streams application bundle.</LI>
089 * <LI>{@code TOOLKIT} - Create an IBM Streams application toolkit.</LI>
090 * </UL>
091 * <LI>
092 * An application execution within your IDE once you set the class path to include the correct jars.</LI>
093 * </UL>
094 */
095public class KafkaSample {
096    private static final String ZOOKEEPER_CONNECT = "localhost:2181";    
097    private static final String KAFKA_BOOTSTRAP_SERVER_LIST = "localhost:9092";
098    
099    private static final String TOPIC = "kafkaSampleTopic";
100
101    private static final int PUB_DELAY_MSEC = 5*1000;
102    private static final String uniq = new SimpleDateFormat("HH:mm:ss.SSS").format(new Date());
103    private boolean captureArtifacts = false;
104    private boolean setAppTracingLevel = false;
105    private java.util.logging.Level appTracingLevel = java.util.logging.Level.FINE;
106    private Map<String,Object> config = new HashMap<>();
107    private String streamsxMessagingVer;
108    
109    public static void main(String[] args) throws Exception {
110        String contextType = "DISTRIBUTED";
111        if (args.length > 0)
112            contextType = args[0];
113        System.out.println("\nREQUIRES:"
114                + " Kafka topic " + TOPIC + " exists"
115                + ", Kafka broker at " + KAFKA_BOOTSTRAP_SERVER_LIST
116                + ", Kafka zookeeper at " + ZOOKEEPER_CONNECT
117                + "\n"
118                );
119
120        KafkaSample app = new KafkaSample();
121        app.publishSubscribe(contextType);
122    }
123    
124    /**
125     * Publish some messages to a topic, scribe to the topic and report
126     * received messages.
127     * @param contextType string value of a {@code StreamsContext.Type}
128     * @throws Exception
129     */
130    public void publishSubscribe(String contextType) throws Exception {
131        
132        setupConfig();
133        identifyStreamsxMessagingVer();
134        Topology top = new Topology("kafkaSample");
135        String groupId = newGroupId(top.getName());
136        Supplier<String> topic = new Value<String>(TOPIC);
137
138        KafkaProducer producer = new KafkaProducer(top, createProducerConfig());
139        KafkaConsumer consumer = new KafkaConsumer(top, createConsumerConfig(groupId));
140        
141        TStream<Message> msgs = makeStreamToPublish(top);
142
143        // for the sample, give the consumer a chance to become ready
144        msgs = msgs.modify(initialDelayFunc(PUB_DELAY_MSEC));
145
146        producer.publish(msgs, topic);
147        
148        TStream<Message> rcvdMsgs = consumer.subscribe(topic);
149
150        rcvdMsgs.print();  // show what we received
151
152        // Execute the topology, to send and receive the messages.
153        Future<?> future = StreamsContextFactory.getStreamsContext(contextType)
154                .submit(top, config);
155        
156        if (contextType.contains("DISTRIBUTED")) {
157            System.out.println("\nSee the job's PE console logs for the topology output.\n");
158        }
159        else if (contextType.contains("STANDALONE")
160                || contextType.contains("EMBEDDED")) {
161            Thread.sleep(15000);
162            future.cancel(true);
163        }
164    }
165    
166    private Map<String,Object> createConsumerConfig(String groupId) {
167        Map<String,Object> props = new HashMap<>();
168        props.put("zookeeper.connect", ZOOKEEPER_CONNECT);
169        props.put("group.id", groupId);
170        props.put("zookeeper.session.timeout.ms", "400");
171        props.put("zookeeper.sync.time.ms", "200");
172        props.put("auto.commit.interval.ms", "1000");
173        return props;
174    }
175    
176    private Map<String,Object> createProducerConfig() {
177        Map<String,Object> props = new HashMap<>();
178        if (streamsxMessagingVer.startsWith("2.0")) {
179            props.put("metadata.broker.list", KAFKA_BOOTSTRAP_SERVER_LIST);
180            props.put("serializer.class", "kafka.serializer.StringEncoder");
181            props.put("request.required.acks", "1");
182        }
183        else {
184            // starting with steamsx.messaging v3.0, the 
185            // kafka "new producer configs" are used. 
186            props.put("bootstrap.servers", KAFKA_BOOTSTRAP_SERVER_LIST);
187            props.put("acks", "1");
188        }
189        return props;
190    }
191    
192    @SuppressWarnings("serial")
193    private static TStream<Message> makeStreamToPublish(Topology top) {
194        return top.strings("Hello", "Are you there?",
195                           "3 of 5", "4 of 5", "5 of 5"
196                ).transform(new Function<String,Message>() {
197                    private String timestamp;
198                    @Override
199                    public Message apply(String v) {
200                        if (timestamp == null)
201                            timestamp = new SimpleDateFormat("HH:mm:ss.SSS ").format(new Date());
202                        return new SimpleMessage(timestamp + v);
203                    }
204                });
205    }
206    
207    private void setupConfig() {
208        if (captureArtifacts)
209            config.put(ContextProperties.KEEP_ARTIFACTS, true);
210        if (setAppTracingLevel)
211            config.put(ContextProperties.TRACING_LEVEL, appTracingLevel);
212    }
213    
214    private String newGroupId(String name) {
215        // be insensitive to old consumers for the topic/groupId hanging around
216        String groupId = name + "_" + uniq.replaceAll(":", "");
217        System.out.println("Using Kafka consumer group.id " + groupId);
218        return groupId;
219    }
220
221    @SuppressWarnings("serial")
222    private static UnaryOperator<Message> initialDelayFunc(final int delayMsec) {
223        return new UnaryOperator<Message>() {
224            private int initialDelayMsec = delayMsec;
225    
226            @Override
227            public Message apply(Message v) {
228                if (initialDelayMsec != -1) {
229                    try {
230                        Thread.sleep(initialDelayMsec);
231                    } catch (InterruptedException e) {
232                        // done delaying
233                    }
234                    initialDelayMsec = -1;
235                }
236                return v;
237            }
238        };
239    }
240    
241    private void identifyStreamsxMessagingVer() throws Exception {
242        String tkloc = System.getenv("STREAMS_INSTALL")
243                        + "/toolkits/com.ibm.streamsx.messaging";
244        File info = new File(tkloc, "info.xml");
245        // e.g., <info:version>2.0.1</info:version>
246
247        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
248        DocumentBuilder db = dbf.newDocumentBuilder();
249        Document d = db.parse(info);
250        XPath xpath = XPathFactory.newInstance().newXPath();
251        NodeList nodes = (NodeList)xpath.evaluate("/toolkitInfoModel/identity/version",
252                d.getDocumentElement(), XPathConstants.NODESET);
253        Element e = (Element) nodes.item(0);
254        Node n = e.getChildNodes().item(0);
255        String ver = n.getNodeValue();
256        streamsxMessagingVer = ver;
257    }
258}