@SideEffectFree @SupportsBatching @Tags(value={"avro","orc","hive","convert"}) @InputRequirement(value=INPUT_REQUIRED) @CapabilityDescription(value="Converts an Avro record into ORC file format. This processor provides a direct mapping of an Avro record to an ORC record, such that the resulting ORC file will have the same hierarchical structure as the Avro document. If an incoming FlowFile contains a stream of multiple Avro records, the resultant FlowFile will contain a ORC file containing all of the Avro records. If an incoming FlowFile does not contain any records, an empty ORC file is the output. NOTE: Many Avro datatypes (collections, primitives, and unions of primitives, e.g.) can be converted to ORC, but unions of collections and other complex datatypes may not be able to be converted to ORC.") @WritesAttribute(attribute="mime.type",description="Sets the mime type to application/octet-stream") @WritesAttribute(attribute="filename",description="Sets the filename to the existing filename with the extension replaced by / added to by .orc") @WritesAttribute(attribute="record.count",description="Sets the number of records in the ORC file.") @WritesAttribute(attribute="hive.ddl",description="Creates a partial Hive DDL statement for creating a table in Hive from this ORC file. This can be used in ReplaceText for setting the content to the DDL. To make it valid DDL, add \"LOCATION \'<path_to_orc_file_in_hdfs>\'\", where the path is the directory that contains this ORC file on HDFS. For example, ConvertAvroToORC can send flow files to a PutHDFS processor to send the file to HDFS, then to a ReplaceText to set the content to this DDL (plus the LOCATION clause as described), then to PutHiveQL processor to create the table if it doesn\'t exist.") public class ConvertAvroToORC extends AbstractProcessor
| Modifier and Type | Field and Description |
|---|---|
static PropertyDescriptor |
BUFFER_SIZE |
static PropertyDescriptor |
COMPRESSION_TYPE |
static String |
HIVE_DDL_ATTRIBUTE |
static PropertyDescriptor |
HIVE_TABLE_NAME |
static PropertyDescriptor |
ORC_CONFIGURATION_RESOURCES |
static String |
ORC_MIME_TYPE |
private org.apache.hadoop.conf.Configuration |
orcConfig |
private static List<PropertyDescriptor> |
propertyDescriptors |
static String |
RECORD_COUNT_ATTRIBUTE |
(package private) static Relationship |
REL_FAILURE |
(package private) static Relationship |
REL_SUCCESS |
private static Set<Relationship> |
relationships |
static PropertyDescriptor |
STRIPE_SIZE |
| Constructor and Description |
|---|
ConvertAvroToORC() |
| Modifier and Type | Method and Description |
|---|---|
Set<Relationship> |
getRelationships() |
protected List<PropertyDescriptor> |
getSupportedPropertyDescriptors() |
void |
onTrigger(ProcessContext context,
ProcessSession session) |
void |
setup(ProcessContext context) |
onTriggergetControllerServiceLookup, getIdentifier, getLogger, getNodeTypeProvider, init, initialize, isConfigurationRestored, isScheduled, toString, updateConfiguredRestoredTrue, updateScheduledFalse, updateScheduledTruecustomValidate, equals, getPropertyDescriptor, getPropertyDescriptors, getSupportedDynamicPropertyDescriptor, hashCode, onPropertyModified, validateclone, finalize, getClass, notify, notifyAll, wait, wait, waitisStatefulgetPropertyDescriptor, getPropertyDescriptors, onPropertyModified, validatepublic static final String ORC_MIME_TYPE
public static final String HIVE_DDL_ATTRIBUTE
public static final String RECORD_COUNT_ATTRIBUTE
public static final PropertyDescriptor ORC_CONFIGURATION_RESOURCES
public static final PropertyDescriptor STRIPE_SIZE
public static final PropertyDescriptor BUFFER_SIZE
public static final PropertyDescriptor COMPRESSION_TYPE
public static final PropertyDescriptor HIVE_TABLE_NAME
static final Relationship REL_SUCCESS
static final Relationship REL_FAILURE
private static final List<PropertyDescriptor> propertyDescriptors
private static final Set<Relationship> relationships
private volatile org.apache.hadoop.conf.Configuration orcConfig
protected List<PropertyDescriptor> getSupportedPropertyDescriptors()
getSupportedPropertyDescriptors in class AbstractConfigurableComponentpublic Set<Relationship> getRelationships()
getRelationships in interface ProcessorgetRelationships in class AbstractSessionFactoryProcessor@OnScheduled public void setup(ProcessContext context)
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException
onTrigger in class AbstractProcessorProcessExceptionCopyright © 2023 Apache NiFi Project. All rights reserved.