Class AbstractHadoopProcessor

java.lang.Object
org.apache.nifi.components.AbstractConfigurableComponent
org.apache.nifi.processor.AbstractSessionFactoryProcessor
org.apache.nifi.processor.AbstractProcessor
org.apache.nifi.processors.hadoop.AbstractHadoopProcessor
All Implemented Interfaces:
org.apache.nifi.components.ClassloaderIsolationKeyProvider, org.apache.nifi.components.ConfigurableComponent, org.apache.nifi.processor.Processor

@RequiresInstanceClassLoading(cloneAncestorResources=true) public abstract class AbstractHadoopProcessor extends org.apache.nifi.processor.AbstractProcessor implements org.apache.nifi.components.ClassloaderIsolationKeyProvider
This is a base class that is helpful when building processors interacting with HDFS.

As of Apache NiFi 1.5.0, the Relogin Period property is no longer used in the configuration of a Hadoop processor. Due to changes made to SecurityUtil.loginKerberos(Configuration, String, String), which is used by this class to authenticate a principal with Kerberos, Hadoop components no longer attempt relogins explicitly. For more information, please read the documentation for SecurityUtil.loginKerberos(Configuration, String, String).

See Also:
  • Field Details

    • DENY_LFS_ACCESS

      private static final String DENY_LFS_ACCESS
      See Also:
    • DENY_LFS_EXPLANATION

      private static final String DENY_LFS_EXPLANATION
    • LOCAL_FILE_SYSTEM_URI

      private static final Pattern LOCAL_FILE_SYSTEM_URI
    • NORMALIZE_ERROR_WITH_PROPERTY

      private static final String NORMALIZE_ERROR_WITH_PROPERTY
      See Also:
    • NORMALIZE_ERROR_WITHOUT_PROPERTY

      private static final String NORMALIZE_ERROR_WITHOUT_PROPERTY
      See Also:
    • HADOOP_CONFIGURATION_RESOURCES

      public static final org.apache.nifi.components.PropertyDescriptor HADOOP_CONFIGURATION_RESOURCES
    • DIRECTORY

      public static final org.apache.nifi.components.PropertyDescriptor DIRECTORY
    • COMPRESSION_CODEC

      public static final org.apache.nifi.components.PropertyDescriptor COMPRESSION_CODEC
    • ADDITIONAL_CLASSPATH_RESOURCES

      public static final org.apache.nifi.components.PropertyDescriptor ADDITIONAL_CLASSPATH_RESOURCES
    • KERBEROS_USER_SERVICE

      public static final org.apache.nifi.components.PropertyDescriptor KERBEROS_USER_SERVICE
    • ABSOLUTE_HDFS_PATH_ATTRIBUTE

      public static final String ABSOLUTE_HDFS_PATH_ATTRIBUTE
      See Also:
    • HADOOP_FILE_URL_ATTRIBUTE

      public static final String HADOOP_FILE_URL_ATTRIBUTE
      See Also:
    • TARGET_HDFS_DIR_CREATED_ATTRIBUTE

      protected static final String TARGET_HDFS_DIR_CREATED_ATTRIBUTE
      See Also:
    • RESOURCES_LOCK

      private static final Object RESOURCES_LOCK
    • EMPTY_HDFS_RESOURCES

      private static final HdfsResources EMPTY_HDFS_RESOURCES
    • properties

      protected List<org.apache.nifi.components.PropertyDescriptor> properties
    • hdfsResources

      final AtomicReference<HdfsResources> hdfsResources
    • validationResourceHolder

      private final AtomicReference<AbstractHadoopProcessor.ValidationResources> validationResourceHolder
  • Constructor Details

    • AbstractHadoopProcessor

      public AbstractHadoopProcessor()
  • Method Details

    • init

      protected void init(org.apache.nifi.processor.ProcessorInitializationContext context)
      Overrides:
      init in class org.apache.nifi.processor.AbstractSessionFactoryProcessor
    • migrateProperties

      public void migrateProperties(org.apache.nifi.migration.PropertyConfiguration config)
      Specified by:
      migrateProperties in interface org.apache.nifi.processor.Processor
    • getSupportedPropertyDescriptors

      protected List<org.apache.nifi.components.PropertyDescriptor> getSupportedPropertyDescriptors()
      Overrides:
      getSupportedPropertyDescriptors in class org.apache.nifi.components.AbstractConfigurableComponent
    • getClassloaderIsolationKey

      public String getClassloaderIsolationKey(org.apache.nifi.context.PropertyContext context)
      Specified by:
      getClassloaderIsolationKey in interface org.apache.nifi.components.ClassloaderIsolationKeyProvider
    • customValidate

      protected Collection<org.apache.nifi.components.ValidationResult> customValidate(org.apache.nifi.components.ValidationContext validationContext)
      Overrides:
      customValidate in class org.apache.nifi.components.AbstractConfigurableComponent
    • validateFileSystem

      protected Collection<org.apache.nifi.components.ValidationResult> validateFileSystem(org.apache.hadoop.conf.Configuration configuration)
    • getHadoopConfigurationForValidation

      protected org.apache.hadoop.conf.Configuration getHadoopConfigurationForValidation(List<String> locations) throws IOException
      Throws:
      IOException
    • abstractOnScheduled

      @OnScheduled public final void abstractOnScheduled(org.apache.nifi.processor.ProcessContext context) throws IOException
      If your subclass also has an @OnScheduled annotated method and you need hdfsResources in that method, then be sure to call super.abstractOnScheduled(context)
      Throws:
      IOException
    • getConfigLocations

      protected List<String> getConfigLocations(org.apache.nifi.context.PropertyContext context)
    • abstractOnStopped

      @OnStopped public final void abstractOnStopped()
    • getConfigurationFromResources

      private static org.apache.hadoop.conf.Configuration getConfigurationFromResources(org.apache.hadoop.conf.Configuration config, List<String> locations) throws IOException
      Throws:
      IOException
    • resetHDFSResources

      HdfsResources resetHDFSResources(List<String> resourceLocations, org.apache.nifi.processor.ProcessContext context) throws IOException
      Throws:
      IOException
    • getKerberosUser

      private KerberosUser getKerberosUser(org.apache.nifi.processor.ProcessContext context)
    • preProcessConfiguration

      protected void preProcessConfiguration(org.apache.hadoop.conf.Configuration config, org.apache.nifi.processor.ProcessContext context)
      This method will be called after the Configuration has been created, but before the FileSystem is created, allowing sub-classes to take further action on the Configuration before creating the FileSystem.
      Parameters:
      config - the Configuration that will be used to create the FileSystem
      context - the context that can be used to retrieve additional values
    • getFileSystem

      protected org.apache.hadoop.fs.FileSystem getFileSystem(org.apache.hadoop.conf.Configuration config) throws IOException
      This exists in order to allow unit tests to override it so that they don't take several minutes waiting for UDP packets to be received
      Parameters:
      config - the configuration to use
      Returns:
      the FileSystem that is created for the given Configuration
      Throws:
      IOException - if unable to create the FileSystem
    • getFileSystemAsUser

      protected org.apache.hadoop.fs.FileSystem getFileSystemAsUser(org.apache.hadoop.conf.Configuration config, org.apache.hadoop.security.UserGroupInformation ugi) throws IOException
      Throws:
      IOException
    • checkHdfsUriForTimeout

      protected void checkHdfsUriForTimeout(org.apache.hadoop.conf.Configuration config) throws IOException
      Throws:
      IOException
    • getCompressionCodec

      protected org.apache.hadoop.io.compress.CompressionCodec getCompressionCodec(org.apache.nifi.processor.ProcessContext context, org.apache.hadoop.conf.Configuration configuration)
      Returns the configured CompressionCodec, or null if none is configured.
      Parameters:
      context - the ProcessContext
      configuration - the Hadoop Configuration
      Returns:
      CompressionCodec or null
    • getPathDifference

      public static String getPathDifference(org.apache.hadoop.fs.Path root, org.apache.hadoop.fs.Path child)
      Returns the relative path of the child that does not include the filename or the root path.
      Parameters:
      root - the path to relativize from
      child - the path to relativize
      Returns:
      the relative path
    • getConfiguration

      protected org.apache.hadoop.conf.Configuration getConfiguration()
    • getFileSystem

      protected org.apache.hadoop.fs.FileSystem getFileSystem()
    • getUserGroupInformation

      protected org.apache.hadoop.security.UserGroupInformation getUserGroupInformation()
    • isLocalFileSystemAccessDenied

      boolean isLocalFileSystemAccessDenied()
    • isFileSystemAccessDenied

      protected boolean isFileSystemAccessDenied(URI fileSystemUri)
    • getNormalizedPath

      protected org.apache.hadoop.fs.Path getNormalizedPath(org.apache.nifi.processor.ProcessContext context, org.apache.nifi.components.PropertyDescriptor property)
    • getNormalizedPath

      protected org.apache.hadoop.fs.Path getNormalizedPath(String rawPath)
    • getNormalizedPath

      protected org.apache.hadoop.fs.Path getNormalizedPath(org.apache.nifi.processor.ProcessContext context, org.apache.nifi.components.PropertyDescriptor property, org.apache.nifi.flowfile.FlowFile flowFile)
    • getNormalizedPath

      private org.apache.hadoop.fs.Path getNormalizedPath(String rawPath, Optional<String> propertyName)
    • findCause

      protected <T extends Throwable> Optional<T> findCause(Throwable t, Class<T> expectedCauseType, Predicate<T> causePredicate)
      Returns an optional with the first throwable in the causal chain that is assignable to the provided cause type, and satisfies the provided cause predicate, Optional.empty() otherwise.
      Parameters:
      t - The throwable to inspect for the cause.
      Returns:
      Throwable Cause
    • handleAuthErrors

      protected boolean handleAuthErrors(Throwable t, org.apache.nifi.processor.ProcessSession session, org.apache.nifi.processor.ProcessContext context, BiConsumer<org.apache.nifi.processor.ProcessSession,org.apache.nifi.processor.ProcessContext> sessionHandler)