public abstract class AbstractOfficeParser
extends org.apache.tika.parser.AbstractParser
OfficeParserConfig uniformly.| Constructor and Description |
|---|
AbstractOfficeParser() |
| Modifier and Type | Method and Description |
|---|---|
void |
configure(org.apache.tika.parser.ParseContext parseContext)
Checks to see if the user has specified an
OfficeParserConfig. |
boolean |
getExtractAllAlternativesFromMSG() |
boolean |
getExtractMacros() |
boolean |
getIncludeDeletedContent() |
boolean |
getIncludeMoveFromContent() |
boolean |
getUseSAXDocxExtractor() |
void |
setByteArrayMaxOverride(int maxOverride)
WARNING: this sets a static variable in POI.
|
void |
setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) |
void |
setDateFormatOverride(String format) |
void |
setExtractAllAlternativesFromMSG(boolean extractAllAlternativesFromMSG)
Some .msg files can contain body content in html, rtf and/or text.
|
void |
setExtractMacros(boolean extractMacros) |
void |
setIncludeDeletedContent(boolean includeDeletedConent) |
void |
setIncludeMoveFromContent(boolean includeMoveFromContent) |
void |
setIncludeShapeBasedContent(boolean includeShapeBasedContent) |
void |
setUseSAXDocxExtractor(boolean useSAXDocxExtractor) |
void |
setUseSAXPptxExtractor(boolean useSAXPptxExtractor) |
public void configure(org.apache.tika.parser.ParseContext parseContext)
OfficeParserConfig.
If so, no changes are made; if not, one is added to the context.parseContext - public boolean getIncludeDeletedContent()
OfficeParserConfig.getIncludeDeletedContent()public boolean getIncludeMoveFromContent()
OfficeParserConfig.getIncludeMoveFromContent()public boolean getUseSAXDocxExtractor()
OfficeParserConfig.getUseSAXDocxExtractor()public boolean getExtractMacros()
OfficeParserConfig.getExtractMacros()@Field public void setIncludeDeletedContent(boolean includeDeletedConent)
@Field public void setIncludeMoveFromContent(boolean includeMoveFromContent)
@Field public void setIncludeShapeBasedContent(boolean includeShapeBasedContent)
@Field public void setUseSAXDocxExtractor(boolean useSAXDocxExtractor)
@Field public void setUseSAXPptxExtractor(boolean useSAXPptxExtractor)
@Field public void setExtractMacros(boolean extractMacros)
@Field public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns)
@Field public void setExtractAllAlternativesFromMSG(boolean extractAllAlternativesFromMSG)
extractAllAlternativesFromMSG - whether or not to extract all alternative parts from msg filespublic boolean getExtractAllAlternativesFromMSG()
@Field public void setByteArrayMaxOverride(int maxOverride)
maxOverride - @Field public void setDateFormatOverride(String format)
Copyright © 2007–2022 The Apache Software Foundation. All rights reserved.