Class GetHDFSFileInfo
java.lang.Object
org.apache.nifi.components.AbstractConfigurableComponent
org.apache.nifi.processor.AbstractSessionFactoryProcessor
org.apache.nifi.processor.AbstractProcessor
org.apache.nifi.processors.hadoop.AbstractHadoopProcessor
org.apache.nifi.processors.hadoop.GetHDFSFileInfo
- All Implemented Interfaces:
org.apache.nifi.components.ClassloaderIsolationKeyProvider,org.apache.nifi.components.ConfigurableComponent,org.apache.nifi.processor.Processor
@InputRequirement(INPUT_ALLOWED)
@Tags({"hadoop","HCFS","HDFS","get","list","ingest","source","filesystem"})
@CapabilityDescription("Retrieves a listing of files and directories from HDFS. This processor creates a FlowFile(s) that represents the HDFS file/dir with relevant information. Main purpose of this processor to provide functionality similar to HDFS Client, i.e. count, du, ls, test, etc. Unlike ListHDFS, this processor is stateless, supports incoming connections and provides information on a dir level. ")
@WritesAttribute(attribute="hdfs.objectName",description="The name of the file/dir found on HDFS.") @WritesAttribute(attribute="hdfs.path",description="The path is set to the absolute path of the object\'s parent directory on HDFS. For example, if an object is a directory \'foo\', under directory \'/bar\' then \'hdfs.objectName\' will have value \'foo\', and \'hdfs.path\' will be \'/bar\'") @WritesAttribute(attribute="hdfs.type",description="The type of an object. Possible values: directory, file, link") @WritesAttribute(attribute="hdfs.owner",description="The user that owns the object in HDFS") @WritesAttribute(attribute="hdfs.group",description="The group that owns the object in HDFS") @WritesAttribute(attribute="hdfs.lastModified",description="The timestamp of when the object in HDFS was last modified, as milliseconds since midnight Jan 1, 1970 UTC") @WritesAttribute(attribute="hdfs.length",description="In case of files: The number of bytes in the file in HDFS. In case of dirs: Retuns storage space consumed by directory. ") @WritesAttribute(attribute="hdfs.count.files",description="In case of type=\'directory\' will represent total count of files under this dir. Won\'t be populated to other types of HDFS objects. ") @WritesAttribute(attribute="hdfs.count.dirs",description="In case of type=\'directory\' will represent total count of directories under this dir (including itself). Won\'t be populated to other types of HDFS objects. ") @WritesAttribute(attribute="hdfs.replication",description="The number of HDFS replicas for the file") @WritesAttribute(attribute="hdfs.permissions",description="The permissions for the object in HDFS. This is formatted as 3 characters for the owner, 3 for the group, and 3 for other users. For example rw-rw-r--") @WritesAttribute(attribute="hdfs.status",description="The status contains comma separated list of file/dir paths, which couldn\'t be listed/accessed. Status won\'t be set if no errors occured.") @WritesAttribute(attribute="hdfs.full.tree",description="When destination is \'attribute\', will be populated with full tree of HDFS directory in JSON format.WARNING: In case when scan finds thousands or millions of objects, having huge values in attribute could impact flow file repo and GC/heap usage. Use content destination for such cases")
@SeeAlso({ListHDFS.class,GetHDFS.class,FetchHDFS.class,PutHDFS.class})
public class GetHDFSFileInfo
extends AbstractHadoopProcessor
-
Nested Class Summary
Nested ClassesModifier and TypeClassDescription(package private) static class(package private) static class(package private) classNested classes/interfaces inherited from class org.apache.nifi.processors.hadoop.AbstractHadoopProcessor
AbstractHadoopProcessor.ValidationResources -
Field Summary
FieldsModifier and TypeFieldDescriptionstatic final Stringstatic final org.apache.nifi.components.PropertyDescriptorstatic final org.apache.nifi.components.PropertyDescriptor(package private) static final org.apache.nifi.components.AllowableValue(package private) static final org.apache.nifi.components.AllowableValuestatic final org.apache.nifi.components.PropertyDescriptorstatic final org.apache.nifi.components.PropertyDescriptorstatic final org.apache.nifi.components.PropertyDescriptorstatic final org.apache.nifi.components.PropertyDescriptor(package private) static final org.apache.nifi.components.AllowableValue(package private) static final org.apache.nifi.components.AllowableValue(package private) static final org.apache.nifi.components.AllowableValuestatic final org.apache.nifi.components.PropertyDescriptorstatic final org.apache.nifi.components.PropertyDescriptorstatic final org.apache.nifi.components.PropertyDescriptorprivate static final List<org.apache.nifi.components.PropertyDescriptor> static final org.apache.nifi.components.PropertyDescriptorstatic final org.apache.nifi.processor.Relationshipstatic final org.apache.nifi.processor.Relationshipstatic final org.apache.nifi.processor.Relationshipstatic final org.apache.nifi.processor.Relationshipprivate static final Set<org.apache.nifi.processor.Relationship> Fields inherited from class org.apache.nifi.processors.hadoop.AbstractHadoopProcessor
ABSOLUTE_HDFS_PATH_ATTRIBUTE, ADDITIONAL_CLASSPATH_RESOURCES, COMPRESSION_CODEC, DIRECTORY, HADOOP_CONFIGURATION_RESOURCES, HADOOP_FILE_URL_ATTRIBUTE, hdfsResources, KERBEROS_USER_SERVICE, TARGET_HDFS_DIR_CREATED_ATTRIBUTE -
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionprivate org.apache.nifi.flowfile.FlowFileaddAsAttributes(org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.HDFSObjectInfoDetails o, org.apache.nifi.flowfile.FlowFile ff) private org.apache.nifi.flowfile.FlowFileaddAsContent(GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.HDFSObjectInfoDetails o, org.apache.nifi.flowfile.FlowFile ff) private org.apache.nifi.flowfile.FlowFileaddFullTreeToAttribute(org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.HDFSObjectInfoDetails o, org.apache.nifi.flowfile.FlowFile ff) protected GetHDFSFileInfo.HDFSFileInfoRequestbuildRequestDetails(org.apache.nifi.processor.ProcessContext context, org.apache.nifi.flowfile.FlowFile ff) protected Collection<org.apache.nifi.components.ValidationResult> customValidate(org.apache.nifi.components.ValidationContext validationContext) private voidfinishProcessing(GetHDFSFileInfo.HDFSFileInfoRequest req, GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.processor.ProcessSession session) protected StringgetPerms(org.apache.hadoop.fs.permission.FsPermission permission) private org.apache.nifi.flowfile.FlowFilegetReadyFlowFile(GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.processor.ProcessSession session, org.apache.nifi.flowfile.FlowFile origFF) Set<org.apache.nifi.processor.Relationship> protected List<org.apache.nifi.components.PropertyDescriptor> protected voidinit(org.apache.nifi.processor.ProcessorInitializationContext context) voidonTrigger(org.apache.nifi.processor.ProcessContext context, org.apache.nifi.processor.ProcessSession session) protected voidprocessHDFSObject(org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.flowfile.FlowFile origFF, GetHDFSFileInfo.HDFSFileInfoRequest req, GetHDFSFileInfo.HDFSObjectInfoDetails o, boolean isRoot) protected GetHDFSFileInfo.HDFSObjectInfoDetailsvalidateMatchingPatterns(GetHDFSFileInfo.HDFSObjectInfoDetails o, GetHDFSFileInfo.HDFSFileInfoRequest req) protected GetHDFSFileInfo.HDFSObjectInfoDetailswalkHDFSTree(org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.flowfile.FlowFile origFF, org.apache.hadoop.fs.FileSystem hdfs, org.apache.hadoop.security.UserGroupInformation ugi, GetHDFSFileInfo.HDFSFileInfoRequest req, GetHDFSFileInfo.HDFSObjectInfoDetails parent, boolean statsOnly) Methods inherited from class org.apache.nifi.processors.hadoop.AbstractHadoopProcessor
abstractOnScheduled, abstractOnStopped, checkHdfsUriForTimeout, findCause, getClassloaderIsolationKey, getCommonPropertyDescriptors, getCompressionCodec, getConfigLocations, getConfiguration, getFileSystem, getFileSystem, getFileSystemAsUser, getHadoopConfigurationForValidation, getNormalizedPath, getNormalizedPath, getNormalizedPath, getPathDifference, getUserGroupInformation, handleAuthErrors, isFileSystemAccessDenied, isLocalFileSystemAccessDenied, migrateProperties, preProcessConfiguration, resetHDFSResources, validateFileSystemMethods inherited from class org.apache.nifi.processor.AbstractProcessor
onTriggerMethods inherited from class org.apache.nifi.processor.AbstractSessionFactoryProcessor
getControllerServiceLookup, getIdentifier, getLogger, getNodeTypeProvider, initialize, isConfigurationRestored, isScheduled, toString, updateConfiguredRestoredTrue, updateScheduledFalse, updateScheduledTrueMethods inherited from class org.apache.nifi.components.AbstractConfigurableComponent
equals, getPropertyDescriptor, getPropertyDescriptors, getSupportedDynamicPropertyDescriptor, hashCode, onPropertyModified, validateMethods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, waitMethods inherited from interface org.apache.nifi.components.ConfigurableComponent
getPropertyDescriptor, getPropertyDescriptors, onPropertyModified, validateMethods inherited from interface org.apache.nifi.processor.Processor
isStateful, migrateRelationships
-
Field Details
-
APPLICATION_JSON
- See Also:
-
FULL_PATH
public static final org.apache.nifi.components.PropertyDescriptor FULL_PATH -
RECURSE_SUBDIRS
public static final org.apache.nifi.components.PropertyDescriptor RECURSE_SUBDIRS -
DIR_FILTER
public static final org.apache.nifi.components.PropertyDescriptor DIR_FILTER -
FILE_FILTER
public static final org.apache.nifi.components.PropertyDescriptor FILE_FILTER -
FILE_EXCLUDE_FILTER
public static final org.apache.nifi.components.PropertyDescriptor FILE_EXCLUDE_FILTER -
IGNORE_DOTTED_DIRS
public static final org.apache.nifi.components.PropertyDescriptor IGNORE_DOTTED_DIRS -
IGNORE_DOTTED_FILES
public static final org.apache.nifi.components.PropertyDescriptor IGNORE_DOTTED_FILES -
GROUP_ALL
static final org.apache.nifi.components.AllowableValue GROUP_ALL -
GROUP_PARENT_DIR
static final org.apache.nifi.components.AllowableValue GROUP_PARENT_DIR -
GROUP_NONE
static final org.apache.nifi.components.AllowableValue GROUP_NONE -
GROUPING
public static final org.apache.nifi.components.PropertyDescriptor GROUPING -
BATCH_SIZE
public static final org.apache.nifi.components.PropertyDescriptor BATCH_SIZE -
DESTINATION_ATTRIBUTES
static final org.apache.nifi.components.AllowableValue DESTINATION_ATTRIBUTES -
DESTINATION_CONTENT
static final org.apache.nifi.components.AllowableValue DESTINATION_CONTENT -
DESTINATION
public static final org.apache.nifi.components.PropertyDescriptor DESTINATION -
REL_SUCCESS
public static final org.apache.nifi.processor.Relationship REL_SUCCESS -
REL_NOT_FOUND
public static final org.apache.nifi.processor.Relationship REL_NOT_FOUND -
REL_ORIGINAL
public static final org.apache.nifi.processor.Relationship REL_ORIGINAL -
REL_FAILURE
public static final org.apache.nifi.processor.Relationship REL_FAILURE -
PROPERTY_DESCRIPTORS
-
RELATIONSHIPS
-
-
Constructor Details
-
GetHDFSFileInfo
public GetHDFSFileInfo()
-
-
Method Details
-
init
protected void init(org.apache.nifi.processor.ProcessorInitializationContext context) - Overrides:
initin classAbstractHadoopProcessor
-
getSupportedPropertyDescriptors
- Overrides:
getSupportedPropertyDescriptorsin classAbstractHadoopProcessor
-
getRelationships
- Specified by:
getRelationshipsin interfaceorg.apache.nifi.processor.Processor- Overrides:
getRelationshipsin classorg.apache.nifi.processor.AbstractSessionFactoryProcessor
-
customValidate
protected Collection<org.apache.nifi.components.ValidationResult> customValidate(org.apache.nifi.components.ValidationContext validationContext) - Overrides:
customValidatein classAbstractHadoopProcessor
-
onTrigger
public void onTrigger(org.apache.nifi.processor.ProcessContext context, org.apache.nifi.processor.ProcessSession session) throws org.apache.nifi.processor.exception.ProcessException - Specified by:
onTriggerin classorg.apache.nifi.processor.AbstractProcessor- Throws:
org.apache.nifi.processor.exception.ProcessException
-
walkHDFSTree
protected GetHDFSFileInfo.HDFSObjectInfoDetails walkHDFSTree(org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.flowfile.FlowFile origFF, org.apache.hadoop.fs.FileSystem hdfs, org.apache.hadoop.security.UserGroupInformation ugi, GetHDFSFileInfo.HDFSFileInfoRequest req, GetHDFSFileInfo.HDFSObjectInfoDetails parent, boolean statsOnly) throws Exception - Throws:
Exception
-
validateMatchingPatterns
protected GetHDFSFileInfo.HDFSObjectInfoDetails validateMatchingPatterns(GetHDFSFileInfo.HDFSObjectInfoDetails o, GetHDFSFileInfo.HDFSFileInfoRequest req) -
processHDFSObject
protected void processHDFSObject(org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.flowfile.FlowFile origFF, GetHDFSFileInfo.HDFSFileInfoRequest req, GetHDFSFileInfo.HDFSObjectInfoDetails o, boolean isRoot) -
getReadyFlowFile
private org.apache.nifi.flowfile.FlowFile getReadyFlowFile(GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.processor.ProcessSession session, org.apache.nifi.flowfile.FlowFile origFF) -
finishProcessing
private void finishProcessing(GetHDFSFileInfo.HDFSFileInfoRequest req, GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.processor.ProcessSession session) -
addAsContent
private org.apache.nifi.flowfile.FlowFile addAsContent(GetHDFSFileInfo.ExecutionContext executionContext, org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.HDFSObjectInfoDetails o, org.apache.nifi.flowfile.FlowFile ff) -
addAsAttributes
private org.apache.nifi.flowfile.FlowFile addAsAttributes(org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.HDFSObjectInfoDetails o, org.apache.nifi.flowfile.FlowFile ff) -
addFullTreeToAttribute
private org.apache.nifi.flowfile.FlowFile addFullTreeToAttribute(org.apache.nifi.processor.ProcessSession session, GetHDFSFileInfo.HDFSObjectInfoDetails o, org.apache.nifi.flowfile.FlowFile ff) -
getPerms
-
buildRequestDetails
protected GetHDFSFileInfo.HDFSFileInfoRequest buildRequestDetails(org.apache.nifi.processor.ProcessContext context, org.apache.nifi.flowfile.FlowFile ff)
-