Class GetHDFS

java.lang.Object
org.apache.nifi.components.AbstractConfigurableComponent
org.apache.nifi.processor.AbstractSessionFactoryProcessor
org.apache.nifi.processor.AbstractProcessor
org.apache.nifi.processors.hadoop.AbstractHadoopProcessor
org.apache.nifi.processors.hadoop.GetHDFS
All Implemented Interfaces:
org.apache.nifi.components.ClassloaderIsolationKeyProvider, org.apache.nifi.components.ConfigurableComponent, org.apache.nifi.processor.Processor
Direct Known Subclasses:
GetHDFSSequenceFile

@TriggerWhenEmpty @InputRequirement(INPUT_FORBIDDEN) @Tags({"hadoop","HCFS","HDFS","get","fetch","ingest","source","filesystem"}) @CapabilityDescription("Fetch files from Hadoop Distributed File System (HDFS) into FlowFiles. This Processor will delete the file from HDFS after fetching it.") @WritesAttribute(attribute="filename",description="The name of the file that was read from HDFS.") @WritesAttribute(attribute="path",description="The path is set to the relative path of the file\'s directory on HDFS. For example, if the Directory property is set to /tmp, then files picked up from /tmp will have the path attribute set to \"./\". If the Recurse Subdirectories property is set to true and a file is picked up from /tmp/abc/1/2/3, then the path attribute will be set to \"abc/1/2/3\".") @SeeAlso({PutHDFS.class,ListHDFS.class}) @Restriction(requiredPermission=READ_DISTRIBUTED_FILESYSTEM,explanation="Provides operator the ability to retrieve any file that NiFi has access to in HDFS or the local filesystem.") @Restriction(requiredPermission=WRITE_DISTRIBUTED_FILESYSTEM,explanation="Provides operator the ability to delete any file that NiFi has access to in HDFS or the local filesystem.") public class GetHDFS extends AbstractHadoopProcessor
  • Field Details

    • BUFFER_SIZE_KEY

      public static final String BUFFER_SIZE_KEY
      See Also:
    • BUFFER_SIZE_DEFAULT

      public static final int BUFFER_SIZE_DEFAULT
      See Also:
    • MAX_WORKING_QUEUE_SIZE

      public static final int MAX_WORKING_QUEUE_SIZE
      See Also:
    • REL_SUCCESS

      public static final org.apache.nifi.processor.Relationship REL_SUCCESS
    • RECURSE_SUBDIRS

      public static final org.apache.nifi.components.PropertyDescriptor RECURSE_SUBDIRS
    • KEEP_SOURCE_FILE

      public static final org.apache.nifi.components.PropertyDescriptor KEEP_SOURCE_FILE
    • FILE_FILTER_REGEX

      public static final org.apache.nifi.components.PropertyDescriptor FILE_FILTER_REGEX
    • FILTER_MATCH_NAME_ONLY

      public static final org.apache.nifi.components.PropertyDescriptor FILTER_MATCH_NAME_ONLY
    • IGNORE_DOTTED_FILES

      public static final org.apache.nifi.components.PropertyDescriptor IGNORE_DOTTED_FILES
    • MIN_AGE

      public static final org.apache.nifi.components.PropertyDescriptor MIN_AGE
    • MAX_AGE

      public static final org.apache.nifi.components.PropertyDescriptor MAX_AGE
    • BATCH_SIZE

      public static final org.apache.nifi.components.PropertyDescriptor BATCH_SIZE
    • POLLING_INTERVAL

      public static final org.apache.nifi.components.PropertyDescriptor POLLING_INTERVAL
    • BUFFER_SIZE

      public static final org.apache.nifi.components.PropertyDescriptor BUFFER_SIZE
    • RELATIONSHIPS

      private static final Set<org.apache.nifi.processor.Relationship> RELATIONSHIPS
    • PROPERTY_DESCRIPTORS

      private static final List<org.apache.nifi.components.PropertyDescriptor> PROPERTY_DESCRIPTORS
    • processorConfig

      protected GetHDFS.ProcessorConfiguration processorConfig
    • logEmptyListing

      private final AtomicLong logEmptyListing
    • lastPollTime

      private final AtomicLong lastPollTime
    • listingLock

      private final Lock listingLock
    • queueLock

      private final Lock queueLock
    • filePathQueue

      private final BlockingQueue<org.apache.hadoop.fs.Path> filePathQueue
    • processing

      private final BlockingQueue<org.apache.hadoop.fs.Path> processing
  • Constructor Details

    • GetHDFS

      public GetHDFS()
  • Method Details

    • getRelationships

      public Set<org.apache.nifi.processor.Relationship> getRelationships()
      Specified by:
      getRelationships in interface org.apache.nifi.processor.Processor
      Overrides:
      getRelationships in class org.apache.nifi.processor.AbstractSessionFactoryProcessor
    • getSupportedPropertyDescriptors

      protected List<org.apache.nifi.components.PropertyDescriptor> getSupportedPropertyDescriptors()
      Overrides:
      getSupportedPropertyDescriptors in class AbstractHadoopProcessor
    • customValidate

      protected Collection<org.apache.nifi.components.ValidationResult> customValidate(org.apache.nifi.components.ValidationContext context)
      Overrides:
      customValidate in class AbstractHadoopProcessor
    • onScheduled

      @OnScheduled public void onScheduled(org.apache.nifi.processor.ProcessContext context) throws IOException
      Throws:
      IOException
    • onTrigger

      public void onTrigger(org.apache.nifi.processor.ProcessContext context, org.apache.nifi.processor.ProcessSession session) throws org.apache.nifi.processor.exception.ProcessException
      Specified by:
      onTrigger in class org.apache.nifi.processor.AbstractProcessor
      Throws:
      org.apache.nifi.processor.exception.ProcessException
    • processBatchOfFiles

      protected void processBatchOfFiles(List<org.apache.hadoop.fs.Path> files, org.apache.nifi.processor.ProcessContext context, org.apache.nifi.processor.ProcessSession session)
    • performListing

      protected Set<org.apache.hadoop.fs.Path> performListing(org.apache.nifi.processor.ProcessContext context) throws IOException, InterruptedException
      Do a listing of HDFS if the POLLING_INTERVAL has lapsed. Will return null if POLLING_INTERVAL has not lapsed. Will return an empty set if no files were found on HDFS that matched the configured filters.
      Parameters:
      context - context
      Returns:
      null if POLLING_INTERVAL has not lapsed. Will return an empty set if no files were found on HDFS that matched the configured filters
      Throws:
      IOException - ex
      InterruptedException
    • selectFiles

      protected Set<org.apache.hadoop.fs.Path> selectFiles(org.apache.hadoop.fs.FileSystem hdfs, org.apache.hadoop.fs.Path dir, Set<org.apache.hadoop.fs.Path> filesVisited) throws IOException, InterruptedException
      Poll HDFS for files to process that match the configured file filters.
      Parameters:
      hdfs - hdfs
      dir - dir
      filesVisited - filesVisited
      Returns:
      files to process
      Throws:
      IOException - ex
      InterruptedException