Serialized Form
-
Package org.apache.tika.parser.apple
-
Class org.apache.tika.parser.apple.AppleSingleFileParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.asm
-
Class org.apache.tika.parser.asm.ClassParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -3531388963354454357L
-
-
Package org.apache.tika.parser.audio
-
Class org.apache.tika.parser.audio.AudioParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -6015684081240882695L
-
Class org.apache.tika.parser.audio.MidiParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 6343278584336189432L
-
-
Package org.apache.tika.parser.chm
-
Class org.apache.tika.parser.chm.ChmParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 5938777307516469802L
-
-
Package org.apache.tika.parser.chm.accessor
-
Class org.apache.tika.parser.chm.accessor.ChmItsfHeader extends Object implements Serializable
- serialVersionUID:
- 2215291838533213826L
-
Serialized Fields
-
currentPlace
int currentPlace
-
data_offset
long data_offset
-
dataRemained
int dataRemained
-
dir_len
long dir_len
-
dir_offset
long dir_offset
-
dir_uuid
byte[] dir_uuid
-
header_len
int header_len
-
lang_id
long lang_id
-
last_modified
long last_modified
-
signature
byte[] signature
-
stream_uuid
byte[] stream_uuid
-
unknown_000c
int unknown_000c
-
unknown_len
long unknown_len
-
unknown_offset
long unknown_offset
-
version
int version
-
-
Class org.apache.tika.parser.chm.accessor.ChmItspHeader extends Object implements Serializable
- serialVersionUID:
- 1962394421998181341L
-
Serialized Fields
-
block_len
long block_len
-
blockidx_intvl
int blockidx_intvl
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
header_len
int header_len
-
index_depth
int index_depth
-
index_head
int index_head
-
index_root
int index_root
-
lang_id
long lang_id
-
num_blocks
long num_blocks
-
signature
byte[] signature
-
system_uuid
byte[] system_uuid
-
unknown_000c
int unknown_000c
-
unknown_0024
int unknown_0024
-
unknown_002c
int unknown_002c
-
unknown_0044
byte[] unknown_0044
-
version
int version
-
-
Class org.apache.tika.parser.chm.accessor.ChmLzxcControlData extends Object implements Serializable
- serialVersionUID:
- -7897854774939631565L
-
Serialized Fields
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
resetInterval
long resetInterval
-
signature
byte[] signature
-
size
long size
-
unknown_18
long unknown_18
-
version
long version
-
windowSize
long windowSize
-
windowsPerReset
long windowsPerReset
-
-
Class org.apache.tika.parser.chm.accessor.ChmLzxcResetTable extends Object implements Serializable
- serialVersionUID:
- -8209574429411707460L
-
Serialized Fields
-
block_address
long[] block_address
-
block_count
long block_count
-
block_len
long block_len
-
compressed_len
long compressed_len
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
table_offset
long table_offset
-
uncompressed_len
long uncompressed_len
-
unknown
long unknown
-
version
long version
-
-
Class org.apache.tika.parser.chm.accessor.ChmPmgiHeader extends Object implements Serializable
- serialVersionUID:
- -2092282339894303701L
-
Serialized Fields
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
free_space
long free_space
-
signature
byte[] signature
-
-
Class org.apache.tika.parser.chm.accessor.ChmPmglHeader extends Object implements Serializable
- serialVersionUID:
- -6139486487475923593L
-
Serialized Fields
-
block_next
int block_next
-
block_prev
int block_prev
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
free_space
long free_space
-
signature
byte[] signature
-
unknown_0008
long unknown_0008
-
-
-
Package org.apache.tika.parser.chm.exception
-
Class org.apache.tika.parser.chm.exception.ChmParsingException extends org.apache.tika.exception.TikaException implements Serializable
- serialVersionUID:
- 6497936044733665210L
-
-
Package org.apache.tika.parser.code
-
Class org.apache.tika.parser.code.SourceCodeParser extends org.apache.tika.parser.AbstractEncodingDetectorParser implements Serializable
- serialVersionUID:
- -4543476498190054160L
-
-
Package org.apache.tika.parser.crypto
-
Class org.apache.tika.parser.crypto.Pkcs7Parser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -7310531559075115044L
-
Class org.apache.tika.parser.crypto.TSDParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 3268158344501763323L
-
-
Package org.apache.tika.parser.csv
-
Class org.apache.tika.parser.csv.TextAndCSVParser extends org.apache.tika.parser.AbstractEncodingDetectorParser implements Serializable
-
Serialized Fields
-
delimiters
char[] delimiters
-
markLimit
int markLimit
This is the mark limit in characters (not bytes) to read from the stream when classifying the stream as csv, tsv or txt. -
minConfidence
double minConfidence
minimum confidence score that there's enough evidence to determine csv/tsv vs. txt
-
-
-
-
Package org.apache.tika.parser.ctakes
-
Class org.apache.tika.parser.ctakes.CTAKESConfig extends Object implements Serializable
- serialVersionUID:
- -1599741171775528923L
-
Serialized Fields
-
aeDescriptorPath
String aeDescriptorPath
-
annotationProps
CTAKESAnnotationProperty[] annotationProps
-
metadata
String[] metadata
-
prettyPrint
boolean prettyPrint
-
separatorChar
char separatorChar
-
serialize
boolean serialize
-
serializerType
CTAKESSerializer serializerType
-
stream
OutputStream stream
-
text
boolean text
-
UMLSPass
String UMLSPass
-
UMLSUser
String UMLSUser
-
-
Class org.apache.tika.parser.ctakes.CTAKESParser extends org.apache.tika.parser.ParserDecorator implements Serializable
- serialVersionUID:
- -2313482748027097961L
-
-
Package org.apache.tika.parser.dbf
-
Class org.apache.tika.parser.dbf.DBFParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.dif
-
Class org.apache.tika.parser.dif.DIFParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 971505521275777826L
-
-
Package org.apache.tika.parser.dwg
-
Class org.apache.tika.parser.dwg.DWGParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -7744232583079169119L
-
-
Package org.apache.tika.parser.envi
-
Class org.apache.tika.parser.envi.EnviHeaderParser extends org.apache.tika.parser.AbstractEncodingDetectorParser implements Serializable
- serialVersionUID:
- -1479368523072408091L
-
-
Package org.apache.tika.parser.epub
-
Class org.apache.tika.parser.epub.EpubContentParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Class org.apache.tika.parser.epub.EpubParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 215176772484050550L
-
Serialized Fields
-
content
org.apache.tika.parser.Parser content
-
meta
org.apache.tika.parser.Parser meta
-
streaming
boolean streaming
-
-
-
Package org.apache.tika.parser.executable
-
Class org.apache.tika.parser.executable.ExecutableParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 32128791892482L
-
-
Package org.apache.tika.parser.feed
-
Class org.apache.tika.parser.feed.FeedParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -3785361933034525186L
-
-
Package org.apache.tika.parser.font
-
Class org.apache.tika.parser.font.AdobeFontMetricParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -4820306522217196835L
-
Class org.apache.tika.parser.font.TrueTypeParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 44788554612243032L
-
-
Package org.apache.tika.parser.gdal
-
Class org.apache.tika.parser.gdal.GDALParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -3869130527323941401L
-
Serialized Fields
-
command
String command
-
-
-
Package org.apache.tika.parser.geo.topic
-
Class org.apache.tika.parser.geo.topic.GeoParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -2241391757440215491L
-
Serialized Fields
-
available
boolean available
-
config
GeoParserConfig config
-
gazetteerClient
GeoGazetteerClient gazetteerClient
-
initialized
boolean initialized
-
modelUrl
URL modelUrl
-
nameFinder
opennlp.tools.namefind.NameFinderME nameFinder
-
-
Class org.apache.tika.parser.geo.topic.GeoParserConfig extends Object implements Serializable
- serialVersionUID:
- -3167692634278575818L
-
-
Package org.apache.tika.parser.geo.topic.gazetteer
-
Class org.apache.tika.parser.geo.topic.gazetteer.Location extends Object implements Serializable
- serialVersionUID:
- -59485448766406004L
-
-
Package org.apache.tika.parser.geoinfo
-
Class org.apache.tika.parser.geoinfo.GeographicInformationParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Serialized Fields
-
SUPPORTED_TYPES
Set<org.apache.tika.mime.MediaType> SUPPORTED_TYPES
-
-
-
-
Package org.apache.tika.parser.grib
-
Class org.apache.tika.parser.grib.GribParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 7855458954474247655L
-
Serialized Fields
-
SUPPORTED_TYPES
Set<org.apache.tika.mime.MediaType> SUPPORTED_TYPES
-
-
-
Package org.apache.tika.parser.hdf
-
Class org.apache.tika.parser.hdf.HDFParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 1091208208003437549L
-
-
Package org.apache.tika.parser.html
-
Class org.apache.tika.parser.html.HtmlEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
markLimit
int markLimit
-
-
-
Class org.apache.tika.parser.html.HtmlParser extends org.apache.tika.parser.AbstractEncodingDetectorParser implements Serializable
- serialVersionUID:
- 7895315240498733128L
-
Serialized Fields
-
extractScripts
boolean extractScripts
-
-
-
Package org.apache.tika.parser.html.charsetdetector
-
Class org.apache.tika.parser.html.charsetdetector.StandardHtmlEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
markLimit
int markLimit
-
-
-
-
Package org.apache.tika.parser.hwp
-
Class org.apache.tika.parser.hwp.HwpTextExtractorV5 extends Object implements Serializable
- serialVersionUID:
- 1L
-
Class org.apache.tika.parser.hwp.HwpV5Parser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 1L
-
Serialized Fields
-
extractor
HwpTextExtractorV5 extractor
-
-
-
Package org.apache.tika.parser.image
-
Class org.apache.tika.parser.image.BPGParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -161736541253892772L
-
Class org.apache.tika.parser.image.ICNSParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 922010233654248327L
-
Class org.apache.tika.parser.image.ImageParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 7852529269245520335L
-
Class org.apache.tika.parser.image.PSDParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 883387734607994914L
-
Class org.apache.tika.parser.image.TiffParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -3941143576535464926L
-
Class org.apache.tika.parser.image.WebPParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -3941143576535464926L
-
-
Package org.apache.tika.parser.iptc
-
Class org.apache.tika.parser.iptc.IptcAnpaParser extends Object implements Serializable
- serialVersionUID:
- -6062820170212879115L
-
Serialized Fields
-
FMT_ANPA_1312
int FMT_ANPA_1312
-
FMT_ANPA_UPI
int FMT_ANPA_UPI
-
FMT_ANPA_UPI_DL
int FMT_ANPA_UPI_DL
-
FMT_IPTC_7901
int FMT_IPTC_7901
-
FMT_IPTC_AP
int FMT_IPTC_AP
-
FMT_IPTC_BLM
int FMT_IPTC_BLM
-
FMT_IPTC_CHAR
int FMT_IPTC_CHAR
-
FMT_IPTC_NYT
int FMT_IPTC_NYT
-
FMT_IPTC_PHOTO
int FMT_IPTC_PHOTO
-
FMT_IPTC_RTR
int FMT_IPTC_RTR
-
FMT_NITF
int FMT_NITF
-
FMT_NITF_RB
int FMT_NITF_RB
-
FMT_NITF_TT
int FMT_NITF_TT
-
FORMAT
int FORMAT
-
-
-
Package org.apache.tika.parser.isatab
-
Class org.apache.tika.parser.isatab.ISArchiveParser extends Object implements Serializable
- serialVersionUID:
- 3640809327541300229L
-
-
Package org.apache.tika.parser.iwork
-
Class org.apache.tika.parser.iwork.IWorkPackageParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -2160322853809682372L
-
-
Package org.apache.tika.parser.iwork.iwana
-
Class org.apache.tika.parser.iwork.iwana.IWork13PackageParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.jdbc
-
Class org.apache.tika.parser.jdbc.SQLite3Parser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -752276948656079347L
-
-
Package org.apache.tika.parser.journal
-
Class org.apache.tika.parser.journal.JournalParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 4664255544154296438L
-
-
Package org.apache.tika.parser.jpeg
-
Class org.apache.tika.parser.jpeg.JpegParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -1355028253756234603L
-
-
Package org.apache.tika.parser.mail
-
Class org.apache.tika.parser.mail.RFC822Parser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -5504243905998074168L
-
Serialized Fields
-
detector
org.apache.tika.detect.Detector detector
-
extractAllAlternatives
boolean extractAllAlternatives
-
-
-
Package org.apache.tika.parser.mat
-
Class org.apache.tika.parser.mat.MatParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Serialized Fields
-
SUPPORTED_TYPES
Set<org.apache.tika.mime.MediaType> SUPPORTED_TYPES
-
-
-
-
Package org.apache.tika.parser.mbox
-
Class org.apache.tika.parser.mbox.MboxParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -1762689436731160661L
-
Class org.apache.tika.parser.mbox.OutlookPSTParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 620998217748364063L
-
-
Package org.apache.tika.parser.microsoft
-
Class org.apache.tika.parser.microsoft.AbstractOfficeParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Serialized Fields
-
defaultOfficeParserConfig
OfficeParserConfig defaultOfficeParserConfig
-
-
-
Class org.apache.tika.parser.microsoft.EMFParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Class org.apache.tika.parser.microsoft.JackcessParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -752276948656079347L
-
Serialized Fields
-
locale
Locale locale
-
-
Class org.apache.tika.parser.microsoft.MSOwnerFileParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -752276948656079347L
-
Class org.apache.tika.parser.microsoft.OfficeParser extends AbstractOfficeParser implements Serializable
- serialVersionUID:
- 7393462244028653479L
-
Class org.apache.tika.parser.microsoft.OfficeParserConfig extends Object implements Serializable
-
Serialized Fields
-
concatenatePhoneticRuns
boolean concatenatePhoneticRuns
-
extractAllAlternativesFromMSG
boolean extractAllAlternativesFromMSG
-
extractMacros
boolean extractMacros
-
includeDeletedContent
boolean includeDeletedContent
-
includeHeadersAndFooters
boolean includeHeadersAndFooters
-
includeMissingRows
boolean includeMissingRows
-
includeMoveFromContent
boolean includeMoveFromContent
-
includeShapeBasedContent
boolean includeShapeBasedContent
-
includeSlideMasterContent
boolean includeSlideMasterContent
-
includeSlideNotes
boolean includeSlideNotes
-
useSAXDocxExtractor
boolean useSAXDocxExtractor
-
useSAXPptxExtractor
boolean useSAXPptxExtractor
-
-
-
Class org.apache.tika.parser.microsoft.OldExcelParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 4611820730372823452L
-
Class org.apache.tika.parser.microsoft.POIFSContainerDetector extends Object implements Serializable
- serialVersionUID:
- -3028021741663605293L
-
Serialized Fields
-
markLimit
int markLimit
-
-
Class org.apache.tika.parser.microsoft.TikaExcelGeneralFormat extends Format implements Serializable
- serialVersionUID:
- 1L
-
Serialized Fields
-
decimalFormat
DecimalFormat decimalFormat
-
decimalSymbols
DecimalFormatSymbols decimalSymbols
-
integerFormat
DecimalFormat integerFormat
-
scientificFormat
DecimalFormat scientificFormat
-
-
Class org.apache.tika.parser.microsoft.TNEFParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 4611820730372823452L
-
Class org.apache.tika.parser.microsoft.WMFParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.microsoft.ooxml
-
Class org.apache.tika.parser.microsoft.ooxml.OOXMLParser extends AbstractOfficeParser implements Serializable
- serialVersionUID:
- 6535995710857776481L
-
-
Package org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006
-
Class org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006.Word2006MLParser extends AbstractOfficeParser implements Serializable
-
-
Package org.apache.tika.parser.microsoft.xml
-
Class org.apache.tika.parser.microsoft.xml.AbstractXML2003Parser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Class org.apache.tika.parser.microsoft.xml.SpreadsheetMLParser extends AbstractXML2003Parser implements Serializable
-
Class org.apache.tika.parser.microsoft.xml.WordMLParser extends AbstractXML2003Parser implements Serializable
-
-
Package org.apache.tika.parser.mp3
-
Class org.apache.tika.parser.mp3.Mp3Parser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 8537074922934844370L
-
-
Package org.apache.tika.parser.mp4
-
Class org.apache.tika.parser.mp4.MP4Parser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 84011216792285L
-
Serialized Fields
-
iso6709Extractor
org.apache.tika.parser.mp4.ISO6709Extractor iso6709Extractor
-
-
-
Package org.apache.tika.parser.ner
-
Class org.apache.tika.parser.ner.NamedEntityParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Serialized Fields
-
available
boolean available
-
initialized
boolean initialized
-
nerChain
List<NERecogniser> nerChain
-
secondaryParser
org.apache.tika.Tika secondaryParser
-
-
-
-
Package org.apache.tika.parser.netcdf
-
Class org.apache.tika.parser.netcdf.NetCDFParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -5940938274907708665L
-
Serialized Fields
-
SUPPORTED_TYPES
Set<org.apache.tika.mime.MediaType> SUPPORTED_TYPES
-
-
-
Package org.apache.tika.parser.ocr
-
Class org.apache.tika.parser.ocr.TesseractOCRConfig extends Object implements Serializable
- serialVersionUID:
- -4861942486845757891L
-
Serialized Fields
-
applyRotation
boolean applyRotation
-
colorspace
String colorspace
-
density
int density
-
depth
int depth
-
enableImageProcessing
int enableImageProcessing
-
filter
String filter
-
imageMagickPath
String imageMagickPath
-
language
String language
-
maxFileSizeToOcr
long maxFileSizeToOcr
-
minFileSizeToOcr
long minFileSizeToOcr
-
otherTesseractConfig
Map<String,String> otherTesseractConfig
-
outputType
TesseractOCRConfig.OUTPUT_TYPE outputType
-
pageSegMode
String pageSegMode
-
pageSeparator
String pageSeparator
-
preserveInterwordSpacing
boolean preserveInterwordSpacing
-
resize
int resize
-
tessdataPath
String tessdataPath
-
tesseractPath
String tesseractPath
-
timeout
int timeout
-
-
Class org.apache.tika.parser.ocr.TesseractOCRParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -8167538283213097265L
-
Serialized Fields
-
defaultConfig
TesseractOCRConfig defaultConfig
-
-
-
Package org.apache.tika.parser.odf
-
Class org.apache.tika.parser.odf.OpenDocumentContentParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Class org.apache.tika.parser.odf.OpenDocumentMetaParser extends XMLParser implements Serializable
- serialVersionUID:
- -8739250869531737584L
-
Class org.apache.tika.parser.odf.OpenDocumentParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -6410276875438618287L
-
Serialized Fields
-
content
org.apache.tika.parser.Parser content
-
meta
org.apache.tika.parser.Parser meta
-
-
-
Package org.apache.tika.parser.opendocument
-
Class org.apache.tika.parser.opendocument.OpenOfficeParser extends OpenDocumentParser implements Serializable
-
-
Package org.apache.tika.parser.pdf
-
Class org.apache.tika.parser.pdf.AccessChecker extends Object implements Serializable
- serialVersionUID:
- 6492570218190936986L
-
Serialized Fields
-
allowAccessibility
boolean allowAccessibility
-
needToCheck
boolean needToCheck
-
-
Class org.apache.tika.parser.pdf.PDFParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -752276948656079347L
-
Serialized Fields
-
defaultConfig
PDFParserConfig defaultConfig
-
initializableProblemHandler
org.apache.tika.config.InitializableProblemHandler initializableProblemHandler
-
-
Class org.apache.tika.parser.pdf.PDFParserConfig extends Object implements Serializable
- serialVersionUID:
- 6492570218190936986L
-
Serialized Fields
-
accessChecker
AccessChecker accessChecker
-
averageCharTolerance
Float averageCharTolerance
-
catchIntermediateIOExceptions
boolean catchIntermediateIOExceptions
-
detectAngles
boolean detectAngles
-
enableAutoSpace
boolean enableAutoSpace
-
extractAcroFormContent
boolean extractAcroFormContent
-
extractActions
boolean extractActions
-
extractAnnotationText
boolean extractAnnotationText
-
extractBookmarksText
boolean extractBookmarksText
-
extractFontNames
boolean extractFontNames
-
extractInlineImages
boolean extractInlineImages
-
extractUniqueInlineImagesOnly
boolean extractUniqueInlineImagesOnly
-
ifXFAExtractOnlyXFA
boolean ifXFAExtractOnlyXFA
-
maxMainMemoryBytes
long maxMainMemoryBytes
-
ocrDPI
int ocrDPI
-
ocrImageFormatName
String ocrImageFormatName
-
ocrImageQuality
float ocrImageQuality
-
ocrImageScale
float ocrImageScale
deprecated ... use OCRDPI instead -
ocrImageType
org.apache.pdfbox.rendering.ImageType ocrImageType
-
ocrStrategy
PDFParserConfig.OCR_STRATEGY ocrStrategy
-
setKCMS
boolean setKCMS
-
sortByPosition
boolean sortByPosition
-
spacingTolerance
Float spacingTolerance
-
suppressDuplicateOverlappingText
boolean suppressDuplicateOverlappingText
-
-
-
Package org.apache.tika.parser.pkg
-
Class org.apache.tika.parser.pkg.CompressorParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 2793565792967222459L
-
Serialized Fields
-
memoryLimitInKb
int memoryLimitInKb
-
-
Class org.apache.tika.parser.pkg.PackageParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -5331043266963888708L
-
Class org.apache.tika.parser.pkg.RarParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 6157727985054451501L
-
Class org.apache.tika.parser.pkg.StreamingZipContainerDetector extends org.apache.tika.parser.pkg.ZipContainerDetectorBase implements Serializable
-
Class org.apache.tika.parser.pkg.ZipContainerDetector extends Object implements Serializable
- serialVersionUID:
- 2891763938430295453L
-
Serialized Fields
-
markLimit
int markLimit
-
streamingZipContainerDetector
StreamingZipContainerDetector streamingZipContainerDetector
-
-
-
Package org.apache.tika.parser.pot
-
Class org.apache.tika.parser.pot.PooledTimeSeriesParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -2855917932512164988L
-
-
Package org.apache.tika.parser.prt
-
Class org.apache.tika.parser.prt.PRTParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 4659638314375035178L
-
-
Package org.apache.tika.parser.recognition
-
Class org.apache.tika.parser.recognition.ObjectRecognitionParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Serialized Fields
-
recogniser
ObjectRecogniser recogniser
-
-
-
-
Package org.apache.tika.parser.recognition.tf
-
Class org.apache.tika.parser.recognition.tf.TensorflowImageRecParser extends org.apache.tika.parser.external.ExternalParser implements Serializable
-
-
Package org.apache.tika.parser.rtf
-
Class org.apache.tika.parser.rtf.RTFParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -4165069489372320313L
-
Serialized Fields
-
ignoreListMarkup
boolean ignoreListMarkup
-
memoryLimitInKb
int memoryLimitInKb
-
-
-
Package org.apache.tika.parser.sas
-
Class org.apache.tika.parser.sas.SAS7BDATParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -2775485539937983150L
-
-
Package org.apache.tika.parser.sentiment
-
Class org.apache.tika.parser.sentiment.SentimentAnalysisParser extends org.apache.tika.parser.AbstractParser implements Serializable
-
Serialized Fields
-
classifier
opennlp.tools.sentiment.SentimentME classifier
-
modelPath
String modelPath
Path to model path. Default is "https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/en-netflix-sentiment.bin"
The path could be one of the following:- a HTTP or HTTPS URL (Not recommended for production use since no caching is implemented)
- an absolute or relative path on local file system (recommended for production use in standalone mode)
- a relative path known to class loader (Especially useful in distributed environments, recommended for advanced users
-
-
-
-
Package org.apache.tika.parser.strings
-
Class org.apache.tika.parser.strings.FileConfig extends Object implements Serializable
- serialVersionUID:
- 5712655467296441314L
-
Serialized Fields
-
filePath
String filePath
-
mimetype
boolean mimetype
-
-
Class org.apache.tika.parser.strings.Latin1StringsParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 1L
-
Serialized Fields
-
inPos
int inPos
The position into the input buffer. -
input
byte[] input
The input buffer. -
inSize
int inSize
The number of bytes into the input buffer. -
minSize
int minSize
The minimum size of a character sequence to be extracted. -
outPos
int outPos
The current position into the output buffer. -
output
byte[] output
The output buffer. -
tmpPos
int tmpPos
The temporary position into the output buffer. -
xhtml
org.apache.tika.sax.XHTMLContentHandler xhtml
The output content handler.
-
-
Class org.apache.tika.parser.strings.StringsConfig extends Object implements Serializable
- serialVersionUID:
- -1465227101645003594L
-
Serialized Fields
-
encoding
StringsEncoding encoding
-
minLength
int minLength
-
stringsPath
String stringsPath
-
timeout
int timeout
-
-
Class org.apache.tika.parser.strings.StringsParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 802566634661575025L
-
-
Package org.apache.tika.parser.txt
-
Class org.apache.tika.parser.txt.Icu4jEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
markLimit
int markLimit
-
stripMarkup
boolean stripMarkup
-
-
-
Class org.apache.tika.parser.txt.TXTParser extends org.apache.tika.parser.AbstractEncodingDetectorParser implements Serializable
- serialVersionUID:
- -6656102320836888910L
-
Class org.apache.tika.parser.txt.UniversalEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
markLimit
int markLimit
-
-
-
-
Package org.apache.tika.parser.utils
-
Class org.apache.tika.parser.utils.DataURISchemeParseException extends org.apache.tika.exception.TikaException implements Serializable
-
-
Package org.apache.tika.parser.video
-
Class org.apache.tika.parser.video.FLVParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -8718013155719197679L
-
-
Package org.apache.tika.parser.wordperfect
-
Class org.apache.tika.parser.wordperfect.QuattroProParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 8941810225917012232L
-
Class org.apache.tika.parser.wordperfect.WordPerfectParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 8941810225917012232L
-
Serialized Fields
-
includeDeletedContent
boolean includeDeletedContent
-
-
-
Package org.apache.tika.parser.xliff
-
Class org.apache.tika.parser.xliff.XLIFF12Parser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- 1490085649251663857L
-
Class org.apache.tika.parser.xliff.XLZParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -1877314028666058564L
-
Serialized Fields
-
xliffParser
org.apache.tika.parser.Parser xliffParser
Shared Parser instance.
-
-
-
Package org.apache.tika.parser.xml
-
Class org.apache.tika.parser.xml.DcXMLParser extends XMLParser implements Serializable
- serialVersionUID:
- 4905318835463880819L
-
Class org.apache.tika.parser.xml.FictionBookParser extends XMLParser implements Serializable
- serialVersionUID:
- 4195954546491524374L
-
Class org.apache.tika.parser.xml.XMLParser extends org.apache.tika.parser.AbstractParser implements Serializable
- serialVersionUID:
- -6028836725280212837L
-