public abstract static class TikaIO.ParseFiles extends org.apache.beam.sdk.transforms.PTransform<org.apache.beam.sdk.values.PCollection<org.apache.beam.sdk.io.FileIO.ReadableFile>,org.apache.beam.sdk.values.PCollection<ParseResult>>
TikaIO.parseFiles().| Constructor and Description |
|---|
ParseFiles() |
| Modifier and Type | Method and Description |
|---|---|
org.apache.beam.sdk.values.PCollection<ParseResult> |
expand(org.apache.beam.sdk.values.PCollection<org.apache.beam.sdk.io.FileIO.ReadableFile> input) |
void |
populateDisplayData(org.apache.beam.sdk.transforms.display.DisplayData.Builder builder) |
TikaIO.ParseFiles |
withContentTypeHint(java.lang.String contentTypeHint)
Sets a content type hint to make the file parser detection more efficient.
|
TikaIO.ParseFiles |
withInputMetadata(org.apache.tika.metadata.Metadata metadata)
Sets the input metadata for
Parser.parse(java.io.InputStream, org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, org.apache.tika.parser.ParseContext). |
TikaIO.ParseFiles |
withTikaConfigPath(java.lang.String tikaConfigPath)
Uses the given Tika
Configuration XML file.
|
TikaIO.ParseFiles |
withTikaConfigPath(org.apache.beam.sdk.options.ValueProvider<java.lang.String> tikaConfigPath)
Like
with(tikaConfigPath). |
public TikaIO.ParseFiles withTikaConfigPath(java.lang.String tikaConfigPath)
public TikaIO.ParseFiles withTikaConfigPath(org.apache.beam.sdk.options.ValueProvider<java.lang.String> tikaConfigPath)
with(tikaConfigPath).public TikaIO.ParseFiles withContentTypeHint(java.lang.String contentTypeHint)
withInputMetadata(org.apache.tika.metadata.Metadata), if any.public TikaIO.ParseFiles withInputMetadata(org.apache.tika.metadata.Metadata metadata)
Parser.parse(java.io.InputStream, org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, org.apache.tika.parser.ParseContext).public org.apache.beam.sdk.values.PCollection<ParseResult> expand(org.apache.beam.sdk.values.PCollection<org.apache.beam.sdk.io.FileIO.ReadableFile> input)
expand in class org.apache.beam.sdk.transforms.PTransform<org.apache.beam.sdk.values.PCollection<org.apache.beam.sdk.io.FileIO.ReadableFile>,org.apache.beam.sdk.values.PCollection<ParseResult>>public void populateDisplayData(org.apache.beam.sdk.transforms.display.DisplayData.Builder builder)
populateDisplayData in interface org.apache.beam.sdk.transforms.display.HasDisplayDatapopulateDisplayData in class org.apache.beam.sdk.transforms.PTransform<org.apache.beam.sdk.values.PCollection<org.apache.beam.sdk.io.FileIO.ReadableFile>,org.apache.beam.sdk.values.PCollection<ParseResult>>