public class OrcFlowFileWriter extends Object implements org.apache.hadoop.hive.ql.io.orc.Writer, org.apache.hadoop.hive.ql.io.orc.MemoryManager.Callback
This class is synchronized so that multi-threaded access is ok. In particular, because the MemoryManager is shared between writers, this class assumes that checkMemory may be called from a separate thread.
| Modifier and Type | Class and Description |
|---|---|
private static class |
OrcFlowFileWriter.BinaryTreeWriter |
private static class |
OrcFlowFileWriter.BooleanTreeWriter |
private class |
OrcFlowFileWriter.BufferedStream
This class is used to hold the contents of streams as they are buffered.
|
private static class |
OrcFlowFileWriter.ByteTreeWriter |
private static class |
OrcFlowFileWriter.CharTreeWriter
Under the covers, char is written to ORC the same way as string.
|
private static class |
OrcFlowFileWriter.DateTreeWriter |
private static class |
OrcFlowFileWriter.DecimalTreeWriter |
private class |
OrcFlowFileWriter.DirectStream
An output receiver that writes the ByteBuffers to the output stream
as they are received.
|
private static class |
OrcFlowFileWriter.DoubleTreeWriter |
private static class |
OrcFlowFileWriter.FloatTreeWriter |
private static class |
OrcFlowFileWriter.IntegerTreeWriter |
private static class |
OrcFlowFileWriter.ListTreeWriter |
private static class |
OrcFlowFileWriter.MapTreeWriter |
private static class |
OrcFlowFileWriter.RowIndexPositionRecorder |
private class |
OrcFlowFileWriter.StreamFactory
Interface from the Writer to the TreeWriters.
|
private static class |
OrcFlowFileWriter.StringTreeWriter |
private static class |
OrcFlowFileWriter.StructTreeWriter |
private static class |
OrcFlowFileWriter.TimestampTreeWriter |
private static class |
OrcFlowFileWriter.TreeWriter
The parent class of all of the writers for each column.
|
private static class |
OrcFlowFileWriter.UnionTreeWriter |
private static class |
OrcFlowFileWriter.VarcharTreeWriter
Under the covers, varchar is written to ORC the same way as string.
|
| Modifier and Type | Field and Description |
|---|---|
private boolean |
addBlockPadding |
private long |
adjustedStripeSize |
(package private) static String |
BASE_TIMESTAMP_STRING |
private long |
blockSize |
private boolean[] |
bloomFilterColumns |
private double |
bloomFilterFpp |
private int |
bufferSize |
private boolean |
buildIndex |
private org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterCallback |
callback |
private org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterContext |
callbackContext |
private org.apache.hadoop.hive.ql.io.orc.CompressionCodec |
codec |
private static int |
COLUMN_COUNT_THRESHOLD |
private int |
columnCount |
private org.apache.hadoop.hive.ql.io.orc.CompressionKind |
compress |
private org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy |
compressionStrategy |
private org.apache.hadoop.conf.Configuration |
conf |
private long |
defaultStripeSize |
private org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy |
encodingStrategy |
private OutputStream |
flowFileOutputStream |
private static int |
HDFS_BUFFER_SIZE |
private long |
headerLength |
private static org.apache.commons.logging.Log |
LOG |
private org.apache.hadoop.hive.ql.io.orc.MemoryManager |
memoryManager |
(package private) static int |
MILLIS_PER_SECOND |
private static int |
MIN_ROW_INDEX_STRIDE |
private float |
paddingTolerance |
private org.apache.hadoop.fs.Path |
path |
private com.google.protobuf.CodedOutputStream |
protobufWriter |
private long |
rawDataSize |
private ByteCountingOutputStream |
rawWriter |
private long |
rowCount |
private int |
rowIndexStride |
private int |
rowsInIndex |
private long |
rowsInStripe |
private OrcFlowFileWriter.StreamFactory |
streamFactory |
private Map<org.apache.hadoop.hive.ql.io.orc.StreamName,OrcFlowFileWriter.BufferedStream> |
streams |
private List<org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeInformation> |
stripes |
private int |
stripesAtLastFlush |
private OrcFlowFileWriter.TreeWriter |
treeWriter |
private Map<String,com.google.protobuf.ByteString> |
userMetadata |
private org.apache.hadoop.hive.ql.io.orc.OrcFile.Version |
version |
private org.apache.hadoop.hive.ql.io.orc.OutStream |
writer |
| Constructor and Description |
|---|
OrcFlowFileWriter(OutputStream flowFileOutputStream,
org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector,
long stripeSize,
org.apache.hadoop.hive.ql.io.orc.CompressionKind compress,
int bufferSize,
int rowIndexStride,
org.apache.hadoop.hive.ql.io.orc.MemoryManager memoryManager,
boolean addBlockPadding,
org.apache.hadoop.hive.ql.io.orc.OrcFile.Version version,
org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterCallback callback,
org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy encodingStrategy,
org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy compressionStrategy,
float paddingTolerance,
long blockSizeValue,
String bloomFilterColumnNames,
double bloomFilterFpp) |
| Modifier and Type | Method and Description |
|---|---|
void |
addRow(Object row) |
void |
addRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch batch) |
void |
addUserMetadata(String name,
ByteBuffer value) |
void |
appendStripe(byte[] stripe,
int offset,
int length,
org.apache.hadoop.hive.ql.io.orc.StripeInformation stripeInfo,
org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics stripeStatistics) |
void |
appendUserMetadata(List<org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem> userMetadata) |
boolean |
checkMemory(double newScale) |
void |
close() |
private long |
computeRawDataSize() |
static org.apache.hadoop.hive.ql.io.orc.CompressionCodec |
createCodec(org.apache.hadoop.hive.ql.io.orc.CompressionKind kind) |
private void |
createRowIndexEntry() |
private static OrcFlowFileWriter.TreeWriter |
createTreeWriter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector,
OrcFlowFileWriter.StreamFactory streamFactory,
boolean nullable) |
private long |
estimateStripeSize() |
private void |
flushStripe() |
private List<OrcFlowFileWriter.TreeWriter> |
getAllColumnTreeWriters(OrcFlowFileWriter.TreeWriter rootTreeWriter) |
private void |
getAllColumnTreeWritersImpl(OrcFlowFileWriter.TreeWriter tw,
List<OrcFlowFileWriter.TreeWriter> result) |
private int |
getClosestBufferSize(int estBufferSize,
int bs) |
private String |
getColumnNamesFromInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector) |
(package private) int |
getEstimatedBufferSize(int bs) |
(package private) int |
getEstimatedBufferSize(String colNames,
int bs) |
private long |
getMemoryAvailableForORC() |
long |
getNumberOfRows()
Row count gets updated when flushing the stripes.
|
long |
getRawDataSize()
Raw data size will be compute when writing the file footer.
|
private long |
getRawDataSizeFromInspectors(OrcFlowFileWriter.TreeWriter child,
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector oi) |
private long |
getRawDataSizeFromPrimitives(OrcFlowFileWriter.TreeWriter child,
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector oi) |
OutputStream |
getStream() |
private void |
updateFileStatistics(org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics stripeStatistics) |
private org.apache.hadoop.hive.ql.io.orc.OrcProto.CompressionKind |
writeCompressionKind(org.apache.hadoop.hive.ql.io.orc.CompressionKind kind) |
private void |
writeFileStatistics(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder builder,
OrcFlowFileWriter.TreeWriter writer) |
private int |
writeFooter(long bodyLength) |
long |
writeIntermediateFooter() |
private int |
writeMetadata() |
private int |
writePostScript(int footerLength,
int metadataLength) |
private static void |
writeTypes(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder builder,
OrcFlowFileWriter.TreeWriter treeWriter) |
private static final org.apache.commons.logging.Log LOG
private static final int HDFS_BUFFER_SIZE
private static final int MIN_ROW_INDEX_STRIDE
private static final int COLUMN_COUNT_THRESHOLD
private final org.apache.hadoop.fs.Path path
private final long defaultStripeSize
private long adjustedStripeSize
private final int rowIndexStride
private final org.apache.hadoop.hive.ql.io.orc.CompressionKind compress
private final org.apache.hadoop.hive.ql.io.orc.CompressionCodec codec
private final boolean addBlockPadding
private final int bufferSize
private final long blockSize
private final float paddingTolerance
private final Map<org.apache.hadoop.hive.ql.io.orc.StreamName,OrcFlowFileWriter.BufferedStream> streams
private final OutputStream flowFileOutputStream
private ByteCountingOutputStream rawWriter
private org.apache.hadoop.hive.ql.io.orc.OutStream writer
private com.google.protobuf.CodedOutputStream protobufWriter
private long headerLength
private int columnCount
private long rowCount
private long rowsInStripe
private long rawDataSize
private int rowsInIndex
private int stripesAtLastFlush
private final List<org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeInformation> stripes
private final OrcFlowFileWriter.StreamFactory streamFactory
private final OrcFlowFileWriter.TreeWriter treeWriter
private final boolean buildIndex
private final org.apache.hadoop.hive.ql.io.orc.MemoryManager memoryManager
private final org.apache.hadoop.hive.ql.io.orc.OrcFile.Version version
private final org.apache.hadoop.conf.Configuration conf
private final org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterCallback callback
private final org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterContext callbackContext
private final org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy encodingStrategy
private final org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy compressionStrategy
private final boolean[] bloomFilterColumns
private final double bloomFilterFpp
static final int MILLIS_PER_SECOND
static final String BASE_TIMESTAMP_STRING
public OrcFlowFileWriter(OutputStream flowFileOutputStream, org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector, long stripeSize, org.apache.hadoop.hive.ql.io.orc.CompressionKind compress, int bufferSize, int rowIndexStride, org.apache.hadoop.hive.ql.io.orc.MemoryManager memoryManager, boolean addBlockPadding, org.apache.hadoop.hive.ql.io.orc.OrcFile.Version version, org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterCallback callback, org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy encodingStrategy, org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy compressionStrategy, float paddingTolerance, long blockSizeValue, String bloomFilterColumnNames, double bloomFilterFpp) throws IOException
IOExceptionprivate String getColumnNamesFromInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector)
int getEstimatedBufferSize(int bs)
int getEstimatedBufferSize(String colNames, int bs)
private int getClosestBufferSize(int estBufferSize,
int bs)
private long getMemoryAvailableForORC()
public static org.apache.hadoop.hive.ql.io.orc.CompressionCodec createCodec(org.apache.hadoop.hive.ql.io.orc.CompressionKind kind)
public boolean checkMemory(double newScale)
throws IOException
checkMemory in interface org.apache.hadoop.hive.ql.io.orc.MemoryManager.CallbackIOExceptionprivate static OrcFlowFileWriter.TreeWriter createTreeWriter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector, OrcFlowFileWriter.StreamFactory streamFactory, boolean nullable) throws IOException
IOExceptionprivate static void writeTypes(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder builder,
OrcFlowFileWriter.TreeWriter treeWriter)
public OutputStream getStream() throws IOException
IOExceptionprivate void createRowIndexEntry()
throws IOException
IOExceptionprivate void flushStripe()
throws IOException
IOExceptionprivate long computeRawDataSize()
private long getRawDataSizeFromInspectors(OrcFlowFileWriter.TreeWriter child, org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector oi)
private long getRawDataSizeFromPrimitives(OrcFlowFileWriter.TreeWriter child, org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector oi)
private org.apache.hadoop.hive.ql.io.orc.OrcProto.CompressionKind writeCompressionKind(org.apache.hadoop.hive.ql.io.orc.CompressionKind kind)
private void writeFileStatistics(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder builder,
OrcFlowFileWriter.TreeWriter writer)
throws IOException
IOExceptionprivate int writeMetadata()
throws IOException
IOExceptionprivate int writeFooter(long bodyLength)
throws IOException
IOExceptionprivate int writePostScript(int footerLength,
int metadataLength)
throws IOException
IOExceptionprivate long estimateStripeSize()
public void addUserMetadata(String name, ByteBuffer value)
addUserMetadata in interface org.apache.hadoop.hive.ql.io.orc.Writerpublic void addRow(Object row) throws IOException
addRow in interface org.apache.hadoop.hive.ql.io.orc.WriterIOExceptionpublic void addRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch batch)
throws IOException
IOExceptionpublic void close()
throws IOException
close in interface org.apache.hadoop.hive.ql.io.orc.WriterIOExceptionpublic long getRawDataSize()
getRawDataSize in interface org.apache.hadoop.hive.ql.io.orc.Writerpublic long getNumberOfRows()
getNumberOfRows in interface org.apache.hadoop.hive.ql.io.orc.Writerpublic long writeIntermediateFooter()
throws IOException
writeIntermediateFooter in interface org.apache.hadoop.hive.ql.io.orc.WriterIOExceptionpublic void appendStripe(byte[] stripe,
int offset,
int length,
org.apache.hadoop.hive.ql.io.orc.StripeInformation stripeInfo,
org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics stripeStatistics)
throws IOException
appendStripe in interface org.apache.hadoop.hive.ql.io.orc.WriterIOExceptionprivate void updateFileStatistics(org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics stripeStatistics)
private List<OrcFlowFileWriter.TreeWriter> getAllColumnTreeWriters(OrcFlowFileWriter.TreeWriter rootTreeWriter)
private void getAllColumnTreeWritersImpl(OrcFlowFileWriter.TreeWriter tw, List<OrcFlowFileWriter.TreeWriter> result)
public void appendUserMetadata(List<org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem> userMetadata)
appendUserMetadata in interface org.apache.hadoop.hive.ql.io.orc.WriterCopyright © 2023 Apache NiFi Project. All rights reserved.