@Tags(value={"hive","streaming","put","database","store"}) @CapabilityDescription(value="This processor uses Hive Streaming to send flow file data to an Apache Hive table. The incoming flow file is expected to be in Avro format and the table must exist in Hive. Please see the Hive documentation for requirements on the Hive table (format, partitions, etc.). The partition values are extracted from the Avro record based on the names of the partition columns as specified in the processor. NOTE: If multiple concurrent tasks are configured for this processor, only one table can be written to at any time by a single thread. Additional tasks intending to write to the same table will wait for the current task to finish writing to the table.") @WritesAttribute(attribute="hivestreaming.record.count",description="This attribute is written on the flow files routed to the \'success\' and \'failure\' relationships, and contains the number of records from the incoming flow file written successfully and unsuccessfully, respectively.") @WritesAttribute(attribute="query.output.tables",description="This attribute is written on the flow files routed to the \'success\' and \'failure\' relationships, and contains the target table name in \'databaseName.tableName\' format.") @RequiresInstanceClassLoading @DeprecationNotice(classNames="org.apache.nifi.processors.hive.PutHive3Streaming") public class PutHiveStreaming extends AbstractSessionFactoryProcessor
| Modifier and Type | Class and Description |
|---|---|
private static class |
PutHiveStreaming.FunctionContext |
protected class |
PutHiveStreaming.HiveStreamingRecord |
private static class |
PutHiveStreaming.ShouldRetryException |
| Constructor and Description |
|---|
PutHiveStreaming() |
| Modifier and Type | Method and Description |
|---|---|
private void |
abortAllWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers)
Abort current Txn on all writers
|
private void |
abortAndCloseWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers) |
void |
cleanup() |
private void |
closeAllWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers)
Closes all writers and remove them from cache
|
protected Collection<ValidationResult> |
customValidate(ValidationContext validationContext) |
private void |
flushAllWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers,
boolean rollToNext) |
protected KerberosProperties |
getKerberosProperties() |
private HiveWriter |
getOrCreateWriter(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers,
HiveOptions options,
org.apache.hive.hcatalog.streaming.HiveEndPoint endPoint) |
Set<Relationship> |
getRelationships() |
protected List<PropertyDescriptor> |
getSupportedPropertyDescriptors() |
(package private) org.apache.hadoop.security.UserGroupInformation |
getUgi() |
protected void |
init(ProcessorInitializationContext context) |
(package private) boolean |
isAllowExplicitKeytab() |
protected org.apache.hive.hcatalog.streaming.HiveEndPoint |
makeHiveEndPoint(List<String> partitionValues,
HiveOptions options) |
protected HiveWriter |
makeHiveWriter(org.apache.hive.hcatalog.streaming.HiveEndPoint endPoint,
ExecutorService callTimeoutPool,
org.apache.hadoop.security.UserGroupInformation ugi,
HiveOptions options) |
private ExceptionHandler.OnError<PutHiveStreaming.FunctionContext,PutHiveStreaming.HiveStreamingRecord> |
onHiveRecordError(ProcessContext context,
ProcessSession session,
Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers) |
private ExceptionHandler.OnError<PutHiveStreaming.FunctionContext,List<PutHiveStreaming.HiveStreamingRecord>> |
onHiveRecordsError(ProcessContext context,
ProcessSession session,
Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers) |
private ExceptionHandler.OnError<PutHiveStreaming.FunctionContext,org.apache.avro.generic.GenericRecord> |
onRecordError(ProcessContext context,
ProcessSession session,
Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers) |
void |
onTrigger(ProcessContext context,
ProcessSessionFactory sessionFactory) |
private void |
onTrigger(ProcessContext context,
ProcessSession session,
PutHiveStreaming.FunctionContext functionContext) |
private void |
retireEldestWriter(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers)
Locate writer that has not been used for longest time and retire it
|
private int |
retireIdleWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers,
int idleTimeout)
Locate all writers past idle timeout and retire them
|
private void |
sendHeartBeatOnAllWriters() |
void |
setup(ProcessContext context) |
private void |
setupHeartBeatTimer(int heartbeatInterval) |
getControllerServiceLookup, getIdentifier, getLogger, getNodeTypeProvider, initialize, isConfigurationRestored, isScheduled, toString, updateConfiguredRestoredTrue, updateScheduledFalse, updateScheduledTrueequals, getPropertyDescriptor, getPropertyDescriptors, getSupportedDynamicPropertyDescriptor, hashCode, onPropertyModified, validateclone, finalize, getClass, notify, notifyAll, wait, wait, waitisStatefulgetPropertyDescriptor, getPropertyDescriptors, onPropertyModified, validateprivate static final String ALLOW_EXPLICIT_KEYTAB
public static final String HIVE_STREAMING_RECORD_COUNT_ATTR
private static final String CLIENT_CACHE_DISABLED_PROPERTY
private static final Validator GREATER_THAN_ONE_VALIDATOR
public static final PropertyDescriptor METASTORE_URI
public static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES
public static final PropertyDescriptor DB_NAME
public static final PropertyDescriptor TABLE_NAME
public static final PropertyDescriptor PARTITION_COLUMNS
public static final PropertyDescriptor AUTOCREATE_PARTITIONS
public static final PropertyDescriptor MAX_OPEN_CONNECTIONS
public static final PropertyDescriptor HEARTBEAT_INTERVAL
public static final PropertyDescriptor TXNS_PER_BATCH
public static final PropertyDescriptor RECORDS_PER_TXN
public static final PropertyDescriptor CALL_TIMEOUT
public static final PropertyDescriptor ROLLBACK_ON_FAILURE
static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE
public static final Relationship REL_SUCCESS
public static final Relationship REL_FAILURE
public static final Relationship REL_RETRY
private List<PropertyDescriptor> propertyDescriptors
private Set<Relationship> relationships
protected KerberosProperties kerberosProperties
private volatile File kerberosConfigFile
protected volatile HiveConfigurator hiveConfigurator
protected volatile org.apache.hadoop.security.UserGroupInformation ugi
protected final AtomicReference<KerberosUser> kerberosUserReference
protected volatile org.apache.hadoop.hive.conf.HiveConf hiveConfig
protected final AtomicBoolean sendHeartBeat
protected volatile int callTimeout
protected ExecutorService callTimeoutPool
protected transient Timer heartBeatTimer
protected volatile ConcurrentLinkedQueue<Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter>> threadWriterList
protected volatile ConcurrentHashMap<String,Semaphore> tableSemaphoreMap
private final AtomicReference<ValidationResources> validationResourceHolder
protected void init(ProcessorInitializationContext context)
init in class AbstractSessionFactoryProcessorprotected List<PropertyDescriptor> getSupportedPropertyDescriptors()
getSupportedPropertyDescriptors in class AbstractConfigurableComponentpublic Set<Relationship> getRelationships()
getRelationships in interface ProcessorgetRelationships in class AbstractSessionFactoryProcessorprotected Collection<ValidationResult> customValidate(ValidationContext validationContext)
customValidate in class AbstractConfigurableComponent@OnScheduled public void setup(ProcessContext context)
private ExceptionHandler.OnError<PutHiveStreaming.FunctionContext,List<PutHiveStreaming.HiveStreamingRecord>> onHiveRecordsError(ProcessContext context, ProcessSession session, Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers)
private ExceptionHandler.OnError<PutHiveStreaming.FunctionContext,PutHiveStreaming.HiveStreamingRecord> onHiveRecordError(ProcessContext context, ProcessSession session, Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers)
private ExceptionHandler.OnError<PutHiveStreaming.FunctionContext,org.apache.avro.generic.GenericRecord> onRecordError(ProcessContext context, ProcessSession session, Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers)
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException
ProcessExceptionprivate void onTrigger(ProcessContext context, ProcessSession session, PutHiveStreaming.FunctionContext functionContext) throws ProcessException
ProcessException@OnStopped public void cleanup()
private void setupHeartBeatTimer(int heartbeatInterval)
private void sendHeartBeatOnAllWriters()
throws InterruptedException
InterruptedExceptionprivate void flushAllWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers, boolean rollToNext) throws HiveWriter.CommitFailure, HiveWriter.TxnBatchFailure, HiveWriter.TxnFailure, InterruptedException
private void abortAndCloseWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers)
private void abortAllWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers) throws InterruptedException, org.apache.hive.hcatalog.streaming.StreamingException, HiveWriter.TxnBatchFailure
InterruptedExceptionorg.apache.hive.hcatalog.streaming.StreamingExceptionHiveWriter.TxnBatchFailureprivate void closeAllWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers)
private HiveWriter getOrCreateWriter(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers, HiveOptions options, org.apache.hive.hcatalog.streaming.HiveEndPoint endPoint) throws HiveWriter.ConnectFailure, InterruptedException
private void retireEldestWriter(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers)
private int retireIdleWriters(Map<org.apache.hive.hcatalog.streaming.HiveEndPoint,HiveWriter> writers, int idleTimeout)
protected org.apache.hive.hcatalog.streaming.HiveEndPoint makeHiveEndPoint(List<String> partitionValues, HiveOptions options) throws org.apache.hive.hcatalog.streaming.ConnectionError
org.apache.hive.hcatalog.streaming.ConnectionErrorprotected HiveWriter makeHiveWriter(org.apache.hive.hcatalog.streaming.HiveEndPoint endPoint, ExecutorService callTimeoutPool, org.apache.hadoop.security.UserGroupInformation ugi, HiveOptions options) throws HiveWriter.ConnectFailure, InterruptedException
protected KerberosProperties getKerberosProperties()
org.apache.hadoop.security.UserGroupInformation getUgi()
boolean isAllowExplicitKeytab()
Copyright © 2023 Apache NiFi Project. All rights reserved.