T - The type of records produced by this data source.public class FlinkPulsarSource<T>
extends org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction<T>
implements org.apache.flink.api.java.typeutils.ResultTypeQueryable<T>, org.apache.flink.runtime.state.CheckpointListener, org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
| Modifier and Type | Field and Description |
|---|---|
protected String |
adminUrl |
protected Map<String,String> |
caseInsensitiveParams |
protected org.apache.pulsar.client.impl.conf.ClientConfigurationData |
clientConfigurationData |
protected int |
commitMaxRetries |
protected PulsarDeserializationSchema<T> |
deserializer |
static String |
KEY_DISABLE_METRICS
Boolean configuration key to disable metrics tracking.
|
static int |
MAX_NUM_PENDING_CHECKPOINTS
The maximum number of pending non-committed checkpoints to track, to avoid memory leaks.
|
protected PulsarMetadataReader |
metadataReader
The partition discoverer, used to find new partitions.
|
protected int |
pollTimeoutMs |
protected Properties |
properties |
protected Map<String,Object> |
readerConf |
protected UUID |
uuid |
| Constructor and Description |
|---|
FlinkPulsarSource(String adminUrl,
org.apache.pulsar.client.impl.conf.ClientConfigurationData clientConf,
PulsarDeserializationSchema<T> deserializer,
Properties properties) |
FlinkPulsarSource(String serviceUrl,
String adminUrl,
org.apache.flink.api.common.serialization.DeserializationSchema<T> deserializer,
Properties properties) |
FlinkPulsarSource(String serviceUrl,
String adminUrl,
PulsarDeserializationSchema<T> deserializer,
Properties properties) |
| Modifier and Type | Method and Description |
|---|---|
FlinkPulsarSource<T> |
assignTimestampsAndWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks<T> assigner)
Deprecated.
|
FlinkPulsarSource<T> |
assignTimestampsAndWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks<T> assigner)
Deprecated.
|
FlinkPulsarSource<T> |
assignTimestampsAndWatermarks(org.apache.flink.api.common.eventtime.WatermarkStrategy<T> watermarkStrategy)
Sets the given
WatermarkStrategy on this consumer. |
void |
cancel() |
void |
close() |
protected PulsarFetcher<T> |
createFetcher(org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext<T> sourceContext,
Map<TopicRange,org.apache.pulsar.client.api.MessageId> seedTopicsWithInitialOffsets,
org.apache.flink.util.SerializedValue<org.apache.flink.api.common.eventtime.WatermarkStrategy<T>> watermarkStrategy,
org.apache.flink.streaming.runtime.tasks.ProcessingTimeService processingTimeProvider,
long autoWatermarkInterval,
ClassLoader userCodeClassLoader,
org.apache.flink.streaming.api.operators.StreamingRuntimeContext streamingRuntime,
boolean useMetrics,
Set<TopicRange> excludeStartMessageIds) |
protected PulsarMetadataReader |
createMetadataReader() |
Map<TopicRange,org.apache.pulsar.client.api.MessageId> |
getOwnedTopicStarts() |
Map<Long,Map<TopicRange,org.apache.pulsar.client.api.MessageId>> |
getPendingOffsetsToCommit() |
org.apache.flink.api.common.typeinfo.TypeInformation<T> |
getProducedType() |
protected String |
getSubscriptionName() |
void |
initializeState(org.apache.flink.runtime.state.FunctionInitializationContext context) |
void |
joinDiscoveryLoopThread() |
void |
notifyCheckpointAborted(long checkpointId) |
void |
notifyCheckpointComplete(long checkpointId) |
Map<TopicRange,org.apache.pulsar.client.api.MessageId> |
offsetForEachTopic(Set<TopicRange> topics,
StartupMode mode,
Map<TopicRange,org.apache.pulsar.client.api.MessageId> specificStartupOffsets) |
void |
open(org.apache.flink.configuration.Configuration parameters) |
void |
run(org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext<T> ctx) |
void |
runWithTopicsDiscovery() |
FlinkPulsarSource<T> |
setStartFromEarliest() |
FlinkPulsarSource<T> |
setStartFromLatest() |
FlinkPulsarSource<T> |
setStartFromSpecificOffsets(Map<String,org.apache.pulsar.client.api.MessageId> specificStartupOffsets) |
FlinkPulsarSource<T> |
setStartFromSubscription(String externalSubscriptionName) |
FlinkPulsarSource<T> |
setStartFromSubscription(String externalSubscriptionName,
org.apache.pulsar.client.api.MessageId subscriptionPosition) |
void |
snapshotState(org.apache.flink.runtime.state.FunctionSnapshotContext context) |
public static final int MAX_NUM_PENDING_CHECKPOINTS
public static final String KEY_DISABLE_METRICS
protected String adminUrl
protected org.apache.pulsar.client.impl.conf.ClientConfigurationData clientConfigurationData
protected volatile PulsarDeserializationSchema<T> deserializer
protected final int pollTimeoutMs
protected final int commitMaxRetries
protected final Properties properties
protected final UUID uuid
protected transient volatile PulsarMetadataReader metadataReader
public FlinkPulsarSource(String adminUrl, org.apache.pulsar.client.impl.conf.ClientConfigurationData clientConf, PulsarDeserializationSchema<T> deserializer, Properties properties)
public FlinkPulsarSource(String serviceUrl, String adminUrl, PulsarDeserializationSchema<T> deserializer, Properties properties)
public FlinkPulsarSource(String serviceUrl, String adminUrl, org.apache.flink.api.common.serialization.DeserializationSchema<T> deserializer, Properties properties)
@Deprecated public FlinkPulsarSource<T> assignTimestampsAndWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks<T> assigner)
AssignerWithPunctuatedWatermarks to emit watermarks
in a punctuated manner. The watermark extractor will run per Pulsar partition,
watermarks will be merged across partitions in the same way as in the Flink runtime,
when streams are merged.
When a subtask of a FlinkPulsarSource source reads multiple Pulsar partitions, the streams from the partitions are unioned in a "first come first serve" fashion. Per-partition characteristics are usually lost that way. For example, if the timestamps are strictly ascending per Pulsar partition, they will not be strictly ascending in the resulting Flink DataStream, if the parallel source subtask reads more that one partition.
Running timestamp extractors / watermark generators directly inside the Pulsar source, per Pulsar partition, allows users to let them exploit the per-partition characteristics.
Note: One can use either an AssignerWithPunctuatedWatermarks or an
AssignerWithPeriodicWatermarks, not both at the same time.
assigner - The timestamp assigner / watermark generator to use.@Deprecated public FlinkPulsarSource<T> assignTimestampsAndWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks<T> assigner)
AssignerWithPunctuatedWatermarks to emit watermarks
in a punctuated manner. The watermark extractor will run per Pulsar partition,
watermarks will be merged across partitions in the same way as in the Flink runtime,
when streams are merged.
When a subtask of a FlinkPulsarSource source reads multiple Pulsar partitions, the streams from the partitions are unioned in a "first come first serve" fashion. Per-partition characteristics are usually lost that way. For example, if the timestamps are strictly ascending per Pulsar partition, they will not be strictly ascending in the resulting Flink DataStream, if the parallel source subtask reads more that one partition.
Running timestamp extractors / watermark generators directly inside the Pulsar source, per Pulsar partition, allows users to let them exploit the per-partition characteristics.
Note: One can use either an AssignerWithPunctuatedWatermarks or an
AssignerWithPeriodicWatermarks, not both at the same time.
assigner - The timestamp assigner / watermark generator to use.public FlinkPulsarSource<T> assignTimestampsAndWatermarks(org.apache.flink.api.common.eventtime.WatermarkStrategy<T> watermarkStrategy)
WatermarkStrategy on this consumer. These will be used to assign
timestamps to records and generates watermarks to signal event time progress.
Running timestamp extractors / watermark generators directly inside the Pulsar source (which you can do by using this method), per Pulsar partition, allows users to let them exploit the per-partition characteristics.
When a subtask of a FlinkPulsarSource reads multiple pulsar partitions, the streams from the partitions are unioned in a "first come first serve" fashion. Per-partition characteristics are usually lost that way. For example, if the timestamps are strictly ascending per Pulsar partition, they will not be strictly ascending in the resulting Flink DataStream, if the parallel source subtask reads more than one partition.
Common watermark generation patterns can be found as static methods in the
WatermarkStrategy class.
public FlinkPulsarSource<T> setStartFromEarliest()
public FlinkPulsarSource<T> setStartFromLatest()
public FlinkPulsarSource<T> setStartFromSpecificOffsets(Map<String,org.apache.pulsar.client.api.MessageId> specificStartupOffsets)
public FlinkPulsarSource<T> setStartFromSubscription(String externalSubscriptionName)
public FlinkPulsarSource<T> setStartFromSubscription(String externalSubscriptionName, org.apache.pulsar.client.api.MessageId subscriptionPosition)
public void open(org.apache.flink.configuration.Configuration parameters)
throws Exception
open in interface org.apache.flink.api.common.functions.RichFunctionopen in class org.apache.flink.api.common.functions.AbstractRichFunctionExceptionprotected String getSubscriptionName()
protected PulsarMetadataReader createMetadataReader() throws org.apache.pulsar.client.api.PulsarClientException
org.apache.pulsar.client.api.PulsarClientExceptionpublic void run(org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext<T> ctx) throws Exception
protected PulsarFetcher<T> createFetcher(org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext<T> sourceContext, Map<TopicRange,org.apache.pulsar.client.api.MessageId> seedTopicsWithInitialOffsets, org.apache.flink.util.SerializedValue<org.apache.flink.api.common.eventtime.WatermarkStrategy<T>> watermarkStrategy, org.apache.flink.streaming.runtime.tasks.ProcessingTimeService processingTimeProvider, long autoWatermarkInterval, ClassLoader userCodeClassLoader, org.apache.flink.streaming.api.operators.StreamingRuntimeContext streamingRuntime, boolean useMetrics, Set<TopicRange> excludeStartMessageIds) throws Exception
Exceptionpublic void joinDiscoveryLoopThread()
throws InterruptedException
InterruptedExceptionpublic void close()
throws Exception
close in interface org.apache.flink.api.common.functions.RichFunctionclose in class org.apache.flink.api.common.functions.AbstractRichFunctionExceptionpublic void cancel()
cancel in interface org.apache.flink.streaming.api.functions.source.SourceFunction<T>public org.apache.flink.api.common.typeinfo.TypeInformation<T> getProducedType()
getProducedType in interface org.apache.flink.api.java.typeutils.ResultTypeQueryable<T>public void initializeState(org.apache.flink.runtime.state.FunctionInitializationContext context)
throws Exception
initializeState in interface org.apache.flink.streaming.api.checkpoint.CheckpointedFunctionExceptionpublic void snapshotState(org.apache.flink.runtime.state.FunctionSnapshotContext context)
throws Exception
snapshotState in interface org.apache.flink.streaming.api.checkpoint.CheckpointedFunctionExceptionpublic void notifyCheckpointComplete(long checkpointId)
throws Exception
notifyCheckpointComplete in interface org.apache.flink.api.common.state.CheckpointListenerExceptionpublic void notifyCheckpointAborted(long checkpointId)
throws Exception
notifyCheckpointAborted in interface org.apache.flink.api.common.state.CheckpointListenerExceptionpublic Map<TopicRange,org.apache.pulsar.client.api.MessageId> offsetForEachTopic(Set<TopicRange> topics, StartupMode mode, Map<TopicRange,org.apache.pulsar.client.api.MessageId> specificStartupOffsets)
public Map<Long,Map<TopicRange,org.apache.pulsar.client.api.MessageId>> getPendingOffsetsToCommit()
public Map<TopicRange,org.apache.pulsar.client.api.MessageId> getOwnedTopicStarts()
Copyright © 2019–2022 The Apache Software Foundation. All rights reserved.