@Internal public abstract class GenericCsvInputFormat<OT> extends DelimitedInputFormat<OT>
FileInputFormat.FileBaseStatistics, FileInputFormat.InputSplitOpenThread| Modifier and Type | Field and Description |
|---|---|
protected int |
commentCount |
protected byte[] |
commentPrefix |
protected boolean[] |
fieldIncluded |
protected int |
invalidLineCount |
protected boolean |
lineDelimiterIsLinebreak |
RECORD_DELIMITERcurrentSplit, ENUMERATE_NESTED_FILES_FLAG, enumerateNestedFiles, filePath, INFLATER_INPUT_STREAM_FACTORIES, minSplitSize, numSplits, openTimeout, READ_WHOLE_SPLIT_FLAG, splitLength, splitStart, stream, unsplittable| Modifier | Constructor and Description |
|---|---|
protected |
GenericCsvInputFormat() |
protected |
GenericCsvInputFormat(Path filePath) |
| Modifier and Type | Method and Description |
|---|---|
protected static void |
checkAndCoSort(int[] positions,
Class<?>[] types) |
protected static void |
checkForMonotonousOrder(int[] positions,
Class<?>[] types) |
void |
close()
Closes the input by releasing all buffers and closing the file input stream.
|
void |
enableQuotedStringParsing(char quoteCharacter) |
byte[] |
getCommentPrefix() |
byte[] |
getFieldDelimiter() |
protected FieldParser<?>[] |
getFieldParsers() |
protected Class<?>[] |
getGenericFieldTypes() |
int |
getNumberOfFieldsTotal() |
int |
getNumberOfNonNullFields() |
boolean |
isLenient() |
boolean |
isSkippingFirstLineAsHeader() |
void |
open(FileInputSplit split)
Opens the given input split.
|
protected boolean |
parseRecord(Object[] holders,
byte[] bytes,
int offset,
int numBytes) |
void |
setCharset(String charset)
Set the name of the character set used for the row delimiter.
|
void |
setCommentPrefix(String commentPrefix) |
void |
setFieldDelimiter(String delimiter) |
protected void |
setFieldsGeneric(boolean[] includedMask,
Class<?>[] fieldTypes) |
protected void |
setFieldsGeneric(int[] sourceFieldIndices,
Class<?>[] fieldTypes) |
protected void |
setFieldTypesGeneric(Class<?>... fieldTypes) |
void |
setLenient(boolean lenient) |
void |
setSkipFirstLineAsHeader(boolean skipFirstLine) |
protected int |
skipFields(byte[] bytes,
int startPos,
int limit,
byte[] delim) |
boolean |
supportsMultiPaths()
Override this method to supports multiple paths.
|
configure, getBufferSize, getCharset, getCurrentState, getDelimiter, getLineLengthLimit, getNumLineSamples, getStatistics, loadConfigParameters, loadGlobalConfigParams, nextRecord, reachedEnd, readLine, readRecord, reopen, setBufferSize, setDelimiter, setDelimiter, setDelimiter, setLineLengthLimit, setNumLineSamplesacceptFile, createInputSplits, decorateInputStream, extractFileExtension, getFilePath, getFilePaths, getFileStats, getFileStats, getInflaterInputStreamFactory, getInputSplitAssigner, getMinSplitSize, getNestedFileEnumeration, getNumSplits, getOpenTimeout, getSplitLength, getSplitStart, registerInflaterInputStreamFactory, setFilePath, setFilePath, setFilePaths, setFilePaths, setFilesFilter, setMinSplitSize, setNestedFileEnumeration, setNumSplits, setOpenTimeout, testForUnsplittable, toStringcloseInputFormat, getRuntimeContext, openInputFormat, setRuntimeContextprotected boolean lineDelimiterIsLinebreak
protected transient int commentCount
protected transient int invalidLineCount
protected boolean[] fieldIncluded
protected byte[] commentPrefix
protected GenericCsvInputFormat()
protected GenericCsvInputFormat(Path filePath)
public boolean supportsMultiPaths()
FileInputFormatsupportsMultiPaths in class FileInputFormat<OT>public int getNumberOfFieldsTotal()
public int getNumberOfNonNullFields()
public void setCharset(String charset)
DelimitedInputFormatFieldParsers.
These fields are interpreted when set. Changing the charset thereafter
may cause unexpected results.setCharset in class DelimitedInputFormat<OT>charset - name of the charsetpublic byte[] getCommentPrefix()
public void setCommentPrefix(String commentPrefix)
public byte[] getFieldDelimiter()
public void setFieldDelimiter(String delimiter)
public boolean isLenient()
public void setLenient(boolean lenient)
public boolean isSkippingFirstLineAsHeader()
public void setSkipFirstLineAsHeader(boolean skipFirstLine)
public void enableQuotedStringParsing(char quoteCharacter)
protected FieldParser<?>[] getFieldParsers()
protected Class<?>[] getGenericFieldTypes()
protected void setFieldTypesGeneric(Class<?>... fieldTypes)
protected void setFieldsGeneric(int[] sourceFieldIndices,
Class<?>[] fieldTypes)
protected void setFieldsGeneric(boolean[] includedMask,
Class<?>[] fieldTypes)
public void open(FileInputSplit split) throws IOException
DelimitedInputFormatopen in interface InputFormat<OT,FileInputSplit>open in class DelimitedInputFormat<OT>split - The input split to open.IOException - Thrown, if the spit could not be opened due to an I/O problem.FileInputFormat.open(org.apache.flink.core.fs.FileInputSplit)public void close()
throws IOException
DelimitedInputFormatclose in interface InputFormat<OT,FileInputSplit>close in class DelimitedInputFormat<OT>IOException - Thrown, if the closing of the file stream causes an I/O error.protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException
ParseExceptionprotected int skipFields(byte[] bytes,
int startPos,
int limit,
byte[] delim)
protected static void checkAndCoSort(int[] positions,
Class<?>[] types)
protected static void checkForMonotonousOrder(int[] positions,
Class<?>[] types)
Copyright © 2014–2018 The Apache Software Foundation. All rights reserved.