001/* 002# Licensed Materials - Property of IBM 003# Copyright IBM Corp. 2015 004 */ 005package kafka; 006 007import java.io.File; 008import java.text.SimpleDateFormat; 009import java.util.Date; 010import java.util.HashMap; 011import java.util.Map; 012import java.util.concurrent.Future; 013 014import javax.xml.parsers.DocumentBuilder; 015import javax.xml.parsers.DocumentBuilderFactory; 016import javax.xml.xpath.XPath; 017import javax.xml.xpath.XPathConstants; 018import javax.xml.xpath.XPathFactory; 019 020import org.w3c.dom.Document; 021import org.w3c.dom.Element; 022import org.w3c.dom.Node; 023import org.w3c.dom.NodeList; 024 025import com.ibm.streamsx.topology.TStream; 026import com.ibm.streamsx.topology.Topology; 027import com.ibm.streamsx.topology.context.ContextProperties; 028import com.ibm.streamsx.topology.context.StreamsContextFactory; 029import com.ibm.streamsx.topology.function.Function; 030import com.ibm.streamsx.topology.function.Supplier; 031import com.ibm.streamsx.topology.function.UnaryOperator; 032import com.ibm.streamsx.topology.logic.Value; 033import com.ibm.streamsx.topology.messaging.kafka.KafkaConsumer; 034import com.ibm.streamsx.topology.messaging.kafka.KafkaProducer; 035import com.ibm.streamsx.topology.tuple.Message; 036import com.ibm.streamsx.topology.tuple.SimpleMessage; 037 038 039/** 040 * Demonstrate integrating with the Apache Kafka messaging system 041 * <a href="http://kafka.apache.org">http://kafka.apache.org</a>. 042 * <p> 043 * Connectors are used to create a bridge between topology streams 044 * and a Kafka cluster: 045 * <ul> 046 * <li>{@link com.ibm.streamsx.topology.messaging.kafka.KafkaConsumer KafkaConsumer} - subscribe to Kafka topics and create streams of messages.</li> 047 * <li>{@link com.ibm.streamsx.topology.messaging.kafka.KafkaProducer KafkaProducer} - publish streams of messages to Kafka topics.</li> 048 * </ul> 049 * <p> 050 * The sample publishes some messages to a Kafka topic. 051 * It also subscribes to the topic and reports the messages received. 052 * The messages received may include messages from prior runs of the sample. 053 * <p> 054 * The sample requires a running Kafka cluster with the following 055 * characteristics: 056 * <ul> 057 * <li>the kafka topic (@code kafkaSampleTopic} has been created. 058 * e.g.<br> 059 * {@code ${KAFKA_HOME}/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic kafkaSampleTopic} 060 * </li> 061 * <li>the Kafka cluster's zookeeper connection is {@code localhost:2181}</li> 062 * <li>the Kafka cluster's brokers addresses is {@code localhost:9092}</li> 063 * </ul> 064 * <p> 065 * Required IBM Streams environment variables: 066 * <ul> 067 * <li>STREAMS_INSTALL - the Streams installation directory</li> 068 * <li>STREAMS_DOMAIN_ID - the Streams domain to use for context {@code DISTRIBUTED} 069 * <li>STREAMS_INSTANCE_ID - the Streams instance to use for context {@code DISTRIBUTED} 070 * </ul> 071 * <p> 072 * See the Apache Kafka link above for information about setting up a Kafka 073 * cluster and creating a topic. 074 * <p> 075 * This may be executed from the {@code samples/java/functional} directory as: 076 * <UL> 077 * <LI>{@code ant run.kafka.distributed} - Using Apache Ant, this will run in distributed mode.</li> 078 * <LI>{@code ant run.kafka} - Using Apache Ant, this will run in standalone mode.</li> 079 * <LI> 080 * {@code java -cp functionalsamples.jar:../../../com.ibm.streamsx.topology/lib/com.ibm.streamsx.topology.jar:$STREAMS_INSTALL/lib/com.ibm.streams.operator.samples.jar 081 * kafka.KafkaSample CONTEXT_TYPE 082 * } - Run directly from the command line. 083 * </LI> 084 * <i>CONTEXT_TYPE</i> is one of: 085 * <UL> 086 * <LI>{@code DISTRIBUTED} - Run as an IBM Streams distributed application.</LI> 087 * <LI>{@code STANDALONE} - Run as an IBM Streams standalone application.</LI> 088 * <LI>{@code BUNDLE} - Create an IBM Streams application bundle.</LI> 089 * <LI>{@code TOOLKIT} - Create an IBM Streams application toolkit.</LI> 090 * </UL> 091 * <LI> 092 * An application execution within your IDE once you set the class path to include the correct jars.</LI> 093 * </UL> 094 */ 095public class KafkaSample { 096 private static final String ZOOKEEPER_CONNECT = "localhost:2181"; 097 private static final String KAFKA_BOOTSTRAP_SERVER_LIST = "localhost:9092"; 098 099 private static final String TOPIC = "kafkaSampleTopic"; 100 101 private static final int PUB_DELAY_MSEC = 5*1000; 102 private static final String uniq = new SimpleDateFormat("HH:mm:ss.SSS").format(new Date()); 103 private boolean captureArtifacts = false; 104 private boolean setAppTracingLevel = false; 105 private java.util.logging.Level appTracingLevel = java.util.logging.Level.FINE; 106 private Map<String,Object> config = new HashMap<>(); 107 private String streamsxMessagingVer; 108 109 public static void main(String[] args) throws Exception { 110 String contextType = "DISTRIBUTED"; 111 if (args.length > 0) 112 contextType = args[0]; 113 System.out.println("\nREQUIRES:" 114 + " Kafka topic " + TOPIC + " exists" 115 + ", Kafka broker at " + KAFKA_BOOTSTRAP_SERVER_LIST 116 + ", Kafka zookeeper at " + ZOOKEEPER_CONNECT 117 + "\n" 118 ); 119 120 KafkaSample app = new KafkaSample(); 121 app.publishSubscribe(contextType); 122 } 123 124 /** 125 * Publish some messages to a topic, scribe to the topic and report 126 * received messages. 127 * @param contextType string value of a {@code StreamsContext.Type} 128 * @throws Exception 129 */ 130 public void publishSubscribe(String contextType) throws Exception { 131 132 setupConfig(); 133 identifyStreamsxMessagingVer(); 134 Topology top = new Topology("kafkaSample"); 135 String groupId = newGroupId(top.getName()); 136 Supplier<String> topic = new Value<String>(TOPIC); 137 138 KafkaProducer producer = new KafkaProducer(top, createProducerConfig()); 139 KafkaConsumer consumer = new KafkaConsumer(top, createConsumerConfig(groupId)); 140 141 TStream<Message> msgs = makeStreamToPublish(top); 142 143 // for the sample, give the consumer a chance to become ready 144 msgs = msgs.modify(initialDelayFunc(PUB_DELAY_MSEC)); 145 146 producer.publish(msgs, topic); 147 148 TStream<Message> rcvdMsgs = consumer.subscribe(topic); 149 150 rcvdMsgs.print(); // show what we received 151 152 // Execute the topology, to send and receive the messages. 153 Future<?> future = StreamsContextFactory.getStreamsContext(contextType) 154 .submit(top, config); 155 156 if (contextType.contains("DISTRIBUTED")) { 157 System.out.println("\nSee the job's PE console logs for the topology output.\n"); 158 } 159 else if (contextType.contains("STANDALONE") 160 || contextType.contains("EMBEDDED")) { 161 Thread.sleep(15000); 162 future.cancel(true); 163 } 164 } 165 166 private Map<String,Object> createConsumerConfig(String groupId) { 167 Map<String,Object> props = new HashMap<>(); 168 props.put("zookeeper.connect", ZOOKEEPER_CONNECT); 169 props.put("group.id", groupId); 170 props.put("zookeeper.session.timeout.ms", "400"); 171 props.put("zookeeper.sync.time.ms", "200"); 172 props.put("auto.commit.interval.ms", "1000"); 173 return props; 174 } 175 176 private Map<String,Object> createProducerConfig() { 177 Map<String,Object> props = new HashMap<>(); 178 if (streamsxMessagingVer.startsWith("2.0")) { 179 props.put("metadata.broker.list", KAFKA_BOOTSTRAP_SERVER_LIST); 180 props.put("serializer.class", "kafka.serializer.StringEncoder"); 181 props.put("request.required.acks", "1"); 182 } 183 else { 184 // starting with steamsx.messaging v3.0, the 185 // kafka "new producer configs" are used. 186 props.put("bootstrap.servers", KAFKA_BOOTSTRAP_SERVER_LIST); 187 props.put("acks", "1"); 188 } 189 return props; 190 } 191 192 @SuppressWarnings("serial") 193 private static TStream<Message> makeStreamToPublish(Topology top) { 194 return top.strings("Hello", "Are you there?", 195 "3 of 5", "4 of 5", "5 of 5" 196 ).transform(new Function<String,Message>() { 197 private String timestamp; 198 @Override 199 public Message apply(String v) { 200 if (timestamp == null) 201 timestamp = new SimpleDateFormat("HH:mm:ss.SSS ").format(new Date()); 202 return new SimpleMessage(timestamp + v); 203 } 204 }); 205 } 206 207 private void setupConfig() { 208 if (captureArtifacts) 209 config.put(ContextProperties.KEEP_ARTIFACTS, true); 210 if (setAppTracingLevel) 211 config.put(ContextProperties.TRACING_LEVEL, appTracingLevel); 212 } 213 214 private String newGroupId(String name) { 215 // be insensitive to old consumers for the topic/groupId hanging around 216 String groupId = name + "_" + uniq.replaceAll(":", ""); 217 System.out.println("Using Kafka consumer group.id " + groupId); 218 return groupId; 219 } 220 221 @SuppressWarnings("serial") 222 private static UnaryOperator<Message> initialDelayFunc(final int delayMsec) { 223 return new UnaryOperator<Message>() { 224 private int initialDelayMsec = delayMsec; 225 226 @Override 227 public Message apply(Message v) { 228 if (initialDelayMsec != -1) { 229 try { 230 Thread.sleep(initialDelayMsec); 231 } catch (InterruptedException e) { 232 // done delaying 233 } 234 initialDelayMsec = -1; 235 } 236 return v; 237 } 238 }; 239 } 240 241 private void identifyStreamsxMessagingVer() throws Exception { 242 String tkloc = System.getenv("STREAMS_INSTALL") 243 + "/toolkits/com.ibm.streamsx.messaging"; 244 File info = new File(tkloc, "info.xml"); 245 // e.g., <info:version>2.0.1</info:version> 246 247 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 248 DocumentBuilder db = dbf.newDocumentBuilder(); 249 Document d = db.parse(info); 250 XPath xpath = XPathFactory.newInstance().newXPath(); 251 NodeList nodes = (NodeList)xpath.evaluate("/toolkitInfoModel/identity/version", 252 d.getDocumentElement(), XPathConstants.NODESET); 253 Element e = (Element) nodes.item(0); 254 Node n = e.getChildNodes().item(0); 255 String ver = n.getNodeValue(); 256 streamsxMessagingVer = ver; 257 } 258}