org.apache.avro.Schema sourceSchema
org.apache.avro.Schema targetSchema
TypedProperties config
org.apache.flink.configuration.Configuration conf
HoodieFlinkWriteClient<T> writeClient
NonThrownExecutor executor
boolean isCleaning
org.apache.flink.runtime.jobgraph.OperatorID operatorId
org.apache.flink.configuration.Configuration conf
org.apache.flink.table.types.logical.RowType rowType
HoodieTable<T,I,K,O> hoodieTable
CkpMetadata ckpMetadata
org.apache.flink.configuration.Configuration conf
Pattern pattern
String lastInstantTime
int parallelism
int bucketNum
String indexKeyFields
Map<K,V> bucketIndex
Set<E> incBucketIndex
int taskId
String instantTime
int rowId
org.apache.flink.configuration.Configuration config
org.apache.flink.table.types.logical.RowType rowType
int taskID
String initInstant
CkpMetadata ckpMetadata
String[] recordKeyFields
String[] partitionPathFields
RowDataProjection recordKeyProjection
RowDataProjection partitionPathProjection
boolean hiveStylePartitioning
boolean encodePartitionPath
boolean consistentLogicalTimestampEnabled
Option<T> keyGenOpt
boolean simpleRecordKey
org.apache.flink.table.data.RowData.FieldGetter recordKeyFieldGetter
boolean simplePartitionPath
org.apache.flink.table.data.RowData.FieldGetter partitionPathFieldGetter
boolean nonPartitioned
org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer gComputer
org.apache.flink.table.runtime.generated.GeneratedRecordComparator gComparator
org.apache.flink.configuration.Configuration conf
org.apache.flink.configuration.Configuration conf
org.apache.flink.configuration.Configuration conf
org.apache.flink.table.types.logical.RowType rowType
int taskID
boolean asyncClustering
boolean sortClusteringEnabled
String clusteringInstantTime
ClusteringGroupInfo clusteringGroupInfo
Map<K,V> strategyParams
org.apache.flink.configuration.Configuration conf
HoodieClusteringPlan clusteringPlan
String clusteringInstantTime
org.apache.flink.configuration.Configuration conf
Boolean help
String path
Integer clusteringDeltaCommits
Integer clusteringTasks
String cleanPolicy
Integer cleanRetainCommits
Integer cleanRetainHours
Integer cleanRetainFileVersions
Integer archiveMinCommits
Integer archiveMaxCommits
Boolean schedule
String clusteringInstantTime
Boolean cleanAsyncEnable
String planStrategyClass
String planPartitionFilterMode
Long targetFileMaxBytes
Long smallFileLimit
Integer skipFromLatestPartitions
String sortColumns
Integer sortMemory
Integer maxNumGroups
Integer targetPartitions
String clusterBeginPartition
String clusterEndPartition
String partitionRegexPattern
String partitionSelected
String clusteringSeq
Boolean serviceMode
Integer minClusteringIntervalSeconds
List<E> configs
String propsFilePath
FlinkClusteringConfig cfg
org.apache.flink.configuration.Configuration conf
HoodieTableMetaClient metaClient
HoodieFlinkWriteClient<T> writeClient
HoodieFlinkTable<T> table
ExecutorService executor
org.apache.flink.configuration.Configuration config
int taskID
String currentInstant
boolean confirming
The flag is needed because the write task does not block during the waiting time interval, some data buckets still flush out with old instant time. There are two cases that the flush may produce corrupted files if the old instant is committed successfully: 1) the write handle was writing data but interrupted, left a corrupted parquet file; 2) the write handle finished the write but was not closed, left an empty parquet file.
To solve, when this flag was set to true, we block the data flushing thus the #processElement method, the flag was reset to false if the task receives the checkpoint success event or the latest inflight instant time changed(the last instant committed successfully).
List<E> writeStatuses
AbstractWriteFunction<I> function
AbstractWriteOperator<I> operator
org.apache.flink.configuration.Configuration conf
org.apache.flink.configuration.Configuration conf
String compactionInstantTime
CompactionOperation operation
org.apache.flink.configuration.Configuration conf
org.apache.flink.configuration.Configuration conf
boolean asyncCompaction
int taskID
Boolean help
String path
String compactionTriggerStrategy
Integer compactionDeltaCommits
Integer compactionDeltaSeconds
Boolean cleanAsyncEnable
String cleanPolicy
Integer cleanRetainCommits
Integer cleanRetainHours
Integer cleanRetainFileVersions
Integer archiveMinCommits
Integer archiveMaxCommits
Integer compactionMaxMemory
Long compactionTargetIo
Integer compactionTasks
Boolean schedule
String compactionSeq
Boolean serviceMode
Integer minCompactionIntervalSeconds
String compactionPlanSelectStrategy
Integer maxNumCompactionPlans
String compactionPlanInstant
String spillableMapPath
List<E> configs
String propsFilePath
FlinkCompactionConfig cfg
org.apache.flink.configuration.Configuration conf
HoodieTableMetaClient metaClient
HoodieFlinkWriteClient<T> writeClient
HoodieFlinkTable<T> table
ExecutorService executor
long checkpointId
List<E> writeStatuses
int taskID
String instantTime
boolean lastBatch
boolean endInput
boolean bootstrap
String instant
CkpMessage.State state
org.apache.flink.api.common.state.ValueState<T> indexState
BucketAssigner to generate a new bucket IDBucketAssigner bucketAssigner
org.apache.flink.configuration.Configuration conf
boolean isChangingRecords
PayloadCreation payloadCreation
boolean globalIndex
int bucketNum
String indexKeyFields
org.apache.flink.table.types.logical.RowType rowType
org.apache.flink.configuration.Configuration config
double totalLimit
boolean shouldCombine
Constructor<T> constructor
String preCombineField
ExpressionEvaluators.Evaluator[] evaluators
Object[] vals
org.apache.flink.table.types.logical.LogicalType type
String name
int index
ExpressionEvaluators.Evaluator evaluator
Object val
ExpressionEvaluators.Evaluator[] evaluators
ExpressionPredicates.Predicate[] predicates
org.apache.flink.table.types.logical.LogicalType literalType
String columnName
Serializable literal
ExpressionPredicates.Predicate predicate
ExpressionPredicates.Predicate[] predicates
org.apache.flink.configuration.Configuration conf
org.apache.flink.core.fs.Path path
org.apache.flink.table.types.logical.RowType rowType
long maxCompactionMemoryInBytes
PartitionPruners.PartitionPruner partitionPruner
boolean skipCompaction
boolean skipClustering
org.apache.flink.core.fs.Path path
long interval
boolean cdcEnabled
boolean isRunning
String issuedInstant
String issuedOffset
org.apache.flink.configuration.Configuration conf
HoodieTableMetaClient metaClient
IncrementalInputSplits incrementalInputSplits
MailboxExecutorAdapter executor
MergeOnReadInputFormat format
String kafkaTopic
String kafkaGroupId
String kafkaBootstrapServers
String flinkCheckPointPath
org.apache.flink.runtime.state.StateBackend stateBackend
String instantRetryTimes
String instantRetryInterval
String targetBasePath
String targetTableName
String tableType
Boolean insertCluster
String propsFilePath
List<E> configs
String recordKeyField
String partitionPathField
String keygenClass
String keygenType
String sourceOrderingField
String payloadClassName
String recordMergerImpls
String recordMergerStrategy
WriteOperationType operation
Boolean preCombine
Boolean commitOnErrors
List<E> transformerClassNames
Boolean metadataEnabled
Integer metadataCompactionDeltaCommits
String writePartitionFormat
Long writeRateLimit
Integer writeParquetBlockSize
Integer writeParquetMaxFileSize
Integer parquetPageSize
Long checkpointInterval
Boolean help
Integer indexBootstrapNum
Integer bucketAssignNum
Integer writeTaskNum
String partitionDefaultName
Boolean indexBootstrapEnabled
Double indexStateTtl
Boolean indexGlobalEnabled
String indexPartitionRegex
String sourceAvroSchemaPath
String sourceAvroSchema
Boolean utcTimezone
Boolean writePartitionUrlEncode
Boolean hiveStylePartitioning
Double writeTaskMaxSize
Double writeBatchSize
Integer writeLogBlockSize
Integer writeLogMaxSize
Integer writeMergeMaxMemory
Boolean compactionAsyncEnabled
Integer compactionTasks
String compactionTriggerStrategy
Integer compactionDeltaCommits
Integer compactionDeltaSeconds
Integer compactionMaxMemory
Long compactionTargetIo
Boolean clusteringAsyncEnabled
Integer clusteringTasks
Integer clusteringDeltaCommits
String planStrategyClass
String planPartitionFilterMode
Long targetFileMaxBytes
Long smallFileLimit
Integer skipFromLatestPartitions
String sortColumns
Integer sortMemory
Integer maxNumGroups
Integer targetPartitions
String clusterBeginPartition
String clusterEndPartition
String partitionRegexPattern
String partitionSelected
Boolean cleanAsyncEnabled
String cleanPolicy
Integer cleanRetainCommits
Integer cleanRetainHours
Integer cleanRetainFileVersions
Integer archiveMaxCommits
Integer archiveMinCommits
Boolean hiveSyncEnabled
String hiveSyncDb
String hiveSyncTable
String hiveSyncFileFormat
String hiveSyncMode
String hiveSyncUsername
String hiveSyncPassword
String hiveSyncJdbcUrl
String hiveSyncMetastoreUri
String hiveSyncPartitionFields
String hiveSyncPartitionExtractorClass
Boolean hiveSyncAssumeDatePartition
Boolean hiveSyncUseJdbc
Boolean hiveSyncAutoCreateDb
Boolean hiveSyncIgnoreExceptions
Boolean hiveSyncSkipRoSuffix
Boolean hiveSyncSupportTimestamp
boolean preserveHoodieMetadata
String[] sortColumnNames
boolean consistentLogicalTimestampEnabled
HoodieTable<T,I,K,O> table
List<E> indexKeyFields
List<E> doAppend
List<E> fileIdPfxList
HoodieWriteConfig config
HoodieTableMetaClient metaClient
HoodieIndex<I,O> index
SerializableConfiguration hadoopConfiguration
TaskContextSupplier taskContextSupplier
HoodieTableMetadata metadata
HoodieStorageLayout storageLayout
boolean isMetadataTable
HashMap<K,V> inputPartitionPathStatMap
HashMap<K,V> outputPartitionPathStatMap
WorkloadStat globalStat
WriteOperationType operationType
boolean hasOutputWorkLoadStats
org.apache.flink.configuration.Configuration conf
InternalSchema querySchema
String validCommits
String tablePath
HoodieCDCFileSplit[] changes
String[] fullFieldNames
org.apache.flink.table.types.DataType[] fullFieldTypes
int[] selectedFields
String partDefaultName
String partPathField
boolean hiveStylePartitioning
boolean utcTimestamp
org.apache.flink.formats.parquet.utils.SerializableConfiguration conf
List<E> predicates
long limit
org.apache.flink.api.common.io.FilePathFilter localFilesFilter
InternalSchemaManager internalSchemaManager
org.apache.flink.configuration.Configuration conf
MergeOnReadTableState tableState
List<E> fieldNames
List<E> fieldTypes
String defaultPartName
int[] requiredPos
List<E> predicates
long limit
long currentReadCount
boolean emitDelete
boolean closed
InternalSchemaManager internalSchemaManager
int splitNum
Option<T> basePath
Option<T> logPaths
String latestCommit
String tablePath
long maxCompactionMemoryInBytes
String mergeType
Option<T> instantRange
String fileId
long consumed
org.apache.flink.table.types.logical.RowType rowType
org.apache.flink.table.types.logical.RowType requiredRowType
String avroSchema
String requiredAvroSchema
List<E> inputSplits
String[] pkFields
int operationPos
org.apache.hadoop.fs.Path heartbeatFilePath
long heartbeatIntervalInMs
long heartbeatTimeoutThresholdInMs
ScheduledExecutorService executor
boolean started
org.apache.flink.formats.json.JsonRowDataDeserializationSchema deserializationSchema
CastMap castMap
org.apache.flink.table.data.RowData.FieldGetter[] fieldGetters
SerializableSupplier<T> initializer
Copyright © 2023 The Apache Software Foundation. All rights reserved.