Class GetHDFSEvents

java.lang.Object
org.apache.nifi.components.AbstractConfigurableComponent
org.apache.nifi.processor.AbstractSessionFactoryProcessor
org.apache.nifi.processor.AbstractProcessor
org.apache.nifi.processors.hadoop.AbstractHadoopProcessor
org.apache.nifi.processors.hadoop.inotify.GetHDFSEvents
All Implemented Interfaces:
org.apache.nifi.components.ClassloaderIsolationKeyProvider, org.apache.nifi.components.ConfigurableComponent, org.apache.nifi.processor.Processor

@TriggerSerially @TriggerWhenEmpty @Tags({"hadoop","events","inotify","notifications","filesystem"}) @WritesAttribute(attribute="mime.type",description="This is always application/json.") @WritesAttribute(attribute="hdfs.inotify.event.type",description="This will specify the specific HDFS notification event type. Currently there are six types of events (append, close, create, metadata, rename, and unlink).") @WritesAttribute(attribute="hdfs.inotify.event.path",description="The specific path that the event is tied to.") @InputRequirement(INPUT_FORBIDDEN) @CapabilityDescription("This processor polls the notification events provided by the HdfsAdmin API. Since this uses the HdfsAdmin APIs it is required to run as an HDFS super user. Currently there are six types of events (append, close, create, metadata, rename, and unlink). Please see org.apache.hadoop.hdfs.inotify.Event documentation for full explanations of each event. This processor will poll for new events based on a defined duration. For each event received a new flow file will be created with the expected attributes and the event itself serialized to JSON and written to the flow file\'s content. For example, if event.type is APPEND then the content of the flow file will contain a JSON file containing the information about the append event. If successful the flow files are sent to the \'success\' relationship. Be careful of where the generated flow files are stored. If the flow files are stored in one of processor\'s watch directories there will be a never ending flow of events. It is also important to be aware that this processor must consume all events. The filtering must happen within the processor. This is because the HDFS admin\'s event notifications API does not have filtering.") @Stateful(scopes=CLUSTER, description="The last used transaction id is stored. This is used ") @SeeAlso({GetHDFS.class,FetchHDFS.class,PutHDFS.class,ListHDFS.class}) public class GetHDFSEvents extends AbstractHadoopProcessor
  • Field Details

    • POLL_DURATION

      static final org.apache.nifi.components.PropertyDescriptor POLL_DURATION
    • HDFS_PATH_TO_WATCH

      static final org.apache.nifi.components.PropertyDescriptor HDFS_PATH_TO_WATCH
    • IGNORE_HIDDEN_FILES

      static final org.apache.nifi.components.PropertyDescriptor IGNORE_HIDDEN_FILES
    • EVENT_TYPES

      static final org.apache.nifi.components.PropertyDescriptor EVENT_TYPES
    • NUMBER_OF_RETRIES_FOR_POLL

      static final org.apache.nifi.components.PropertyDescriptor NUMBER_OF_RETRIES_FOR_POLL
    • REL_SUCCESS

      static final org.apache.nifi.processor.Relationship REL_SUCCESS
    • PROPERTY_DESCRIPTORS

      private static final List<org.apache.nifi.components.PropertyDescriptor> PROPERTY_DESCRIPTORS
    • RELATIONSHIPS

      private static final Set<org.apache.nifi.processor.Relationship> RELATIONSHIPS
    • LAST_TX_ID

      private static final String LAST_TX_ID
      See Also:
    • lastTxId

      private volatile long lastTxId
    • OBJECT_MAPPER

      private static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER
    • notificationConfig

      private GetHDFSEvents.NotificationConfig notificationConfig
  • Constructor Details

    • GetHDFSEvents

      public GetHDFSEvents()
  • Method Details

    • getSupportedPropertyDescriptors

      protected List<org.apache.nifi.components.PropertyDescriptor> getSupportedPropertyDescriptors()
      Overrides:
      getSupportedPropertyDescriptors in class AbstractHadoopProcessor
    • getRelationships

      public Set<org.apache.nifi.processor.Relationship> getRelationships()
      Specified by:
      getRelationships in interface org.apache.nifi.processor.Processor
      Overrides:
      getRelationships in class org.apache.nifi.processor.AbstractSessionFactoryProcessor
    • onSchedule

      @OnScheduled public void onSchedule(org.apache.nifi.processor.ProcessContext context)
    • onTrigger

      public void onTrigger(org.apache.nifi.processor.ProcessContext context, org.apache.nifi.processor.ProcessSession session) throws org.apache.nifi.processor.exception.ProcessException
      Specified by:
      onTrigger in class org.apache.nifi.processor.AbstractProcessor
      Throws:
      org.apache.nifi.processor.exception.ProcessException
    • getEventBatch

      private org.apache.hadoop.hdfs.inotify.EventBatch getEventBatch(org.apache.hadoop.hdfs.DFSInotifyEventInputStream eventStream, long duration, TimeUnit timeUnit, int retries) throws IOException, InterruptedException, org.apache.hadoop.hdfs.inotify.MissingEventsException
      Throws:
      IOException
      InterruptedException
      org.apache.hadoop.hdfs.inotify.MissingEventsException
    • updateClusterStateForTxId

      private void updateClusterStateForTxId(org.apache.nifi.processor.ProcessSession session)
    • getHdfsAdmin

      protected org.apache.hadoop.hdfs.client.HdfsAdmin getHdfsAdmin()
    • toProcessEvent

      private boolean toProcessEvent(org.apache.nifi.processor.ProcessContext context, org.apache.hadoop.hdfs.inotify.Event event)
    • getPath

      private String getPath(org.apache.hadoop.hdfs.inotify.Event event)