package org.apache.tika.parser.csv;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.james.mime4j.dom.field.FieldName;
import org.apache.tika.config.Field;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractEncodingDetectorParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:tika-parsers-classic-package-2.0.0-ALPHA.jar:org/apache/tika/parser/csv/TextAndCSVParser.class */
public class TextAndCSVParser extends AbstractEncodingDetectorParser {
    private static final String CSV_PREFIX = "csv";
    private static final String CHARSET = "charset";
    private static final String DELIMITER = "delimiter";
    private static final String TD = "td";
    private static final String TR = "tr";
    private static final String TABLE = "table";
    private static final int DEFAULT_MARK_LIMIT = 20000;
    private static final Set<MediaType> SUPPORTED_TYPES;
    private char[] delimiters;

    @Field
    private int markLimit;

    @Field
    private double minConfidence;
    public static final Property DELIMITER_PROPERTY = Property.externalText("csv:delimiter");
    static final MediaType CSV = MediaType.text("csv");
    static final MediaType TSV = MediaType.text("tsv");
    private static final char[] DEFAULT_DELIMITERS = {',', '\t'};
    private static final Map<Character, String> CHAR_TO_STRING_DELIMITER_MAP = new HashMap();
    private static final Map<String, Character> STRING_TO_CHAR_DELIMITER_MAP = new HashMap();

    public TextAndCSVParser() {
        this.delimiters = DEFAULT_DELIMITERS;
        this.markLimit = DEFAULT_MARK_LIMIT;
        this.minConfidence = 0.5d;
    }

    public TextAndCSVParser(EncodingDetector encodingDetector) {
        super(encodingDetector);
        this.delimiters = DEFAULT_DELIMITERS;
        this.markLimit = DEFAULT_MARK_LIMIT;
        this.minConfidence = 0.5d;
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        AutoDetectReader bufferedReader;
        Charset charset;
        CSVParams override = getOverride(metadata);
        if (override.isComplete()) {
            bufferedReader = new BufferedReader(new InputStreamReader(inputStream, override.getCharset()));
            charset = override.getCharset();
        } else {
            bufferedReader = detect(override, inputStream, metadata, parseContext);
            charset = override.getCharset() != null ? override.getCharset() : bufferedReader.getCharset();
        }
        updateMetadata(override, metadata);
        if (!override.getMediaType().getBaseType().equals(CSV) && !override.getMediaType().getBaseType().equals(TSV)) {
            handleText(bufferedReader, charset, contentHandler, metadata);
            return;
        }
        CSVFormat withDelimiter = CSVFormat.EXCEL.withDelimiter(override.getDelimiter().charValue());
        metadata.set(DELIMITER_PROPERTY, CHAR_TO_STRING_DELIMITER_MAP.get(Character.valueOf(withDelimiter.getDelimiter())));
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        CSVParser cSVParser = new CSVParser(bufferedReader, withDelimiter);
        Throwable th = null;
        try {
            xHTMLContentHandler.startDocument();
            xHTMLContentHandler.startElement(TABLE);
            try {
                Iterator<CSVRecord> it = cSVParser.iterator();
                while (it.hasNext()) {
                    CSVRecord next = it.next();
                    xHTMLContentHandler.startElement(TR);
                    Iterator<String> it2 = next.iterator();
                    while (it2.hasNext()) {
                        String next2 = it2.next();
                        xHTMLContentHandler.startElement(TD);
                        xHTMLContentHandler.characters(next2);
                        xHTMLContentHandler.endElement(TD);
                    }
                    xHTMLContentHandler.endElement(TR);
                }
                xHTMLContentHandler.endElement(TABLE);
                xHTMLContentHandler.endDocument();
                if (cSVParser != null) {
                    if (0 == 0) {
                        cSVParser.close();
                        return;
                    }
                    try {
                        cSVParser.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
            } catch (IllegalStateException e) {
                xHTMLContentHandler.endElement(TABLE);
                xHTMLContentHandler.startElement("div", "name", "after exception");
                handleText(bufferedReader, xHTMLContentHandler);
                xHTMLContentHandler.endElement("div");
                xHTMLContentHandler.endDocument();
                throw new TikaException("exception parsing the csv", e);
            }
        } catch (Throwable th3) {
            if (cSVParser != null) {
                if (0 != 0) {
                    try {
                        cSVParser.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    cSVParser.close();
                }
            }
            throw th3;
        }
    }

    private void handleText(Reader reader, Charset charset, ContentHandler contentHandler, Metadata metadata) throws SAXException, IOException, TikaException {
        MediaType parse;
        String str = metadata.get(FieldName.CONTENT_TYPE);
        MediaType mediaType = MediaType.TEXT_PLAIN;
        if (str != null && (parse = MediaType.parse(str)) != null) {
            mediaType = parse;
        }
        metadata.set(FieldName.CONTENT_TYPE, new MediaType(mediaType, charset).toString());
        metadata.set("Content-Encoding", charset.name());
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        handleText(reader, xHTMLContentHandler);
        xHTMLContentHandler.endDocument();
    }

    private static void handleText(Reader reader, XHTMLContentHandler xHTMLContentHandler) throws SAXException, IOException {
        xHTMLContentHandler.startElement("p");
        char[] cArr = new char[4096];
        int read = reader.read(cArr);
        while (true) {
            int i = read;
            if (i == -1) {
                xHTMLContentHandler.endElement("p");
                return;
            } else {
                xHTMLContentHandler.characters(cArr, 0, i);
                read = reader.read(cArr);
            }
        }
    }

    private Reader detect(CSVParams cSVParams, InputStream inputStream, Metadata metadata, ParseContext parseContext) throws IOException, TikaException {
        AutoDetectReader bufferedReader;
        String str = metadata.get(FieldName.CONTENT_TYPE);
        if (str != null) {
            MediaType parse = MediaType.parse(str);
            if (!SUPPORTED_TYPES.contains(parse.getBaseType())) {
                cSVParams.setMediaType(parse);
                return new AutoDetectReader(new CloseShieldInputStream(inputStream), metadata, getEncodingDetector(parseContext));
            }
        }
        if (cSVParams.getCharset() == null) {
            bufferedReader = new AutoDetectReader(new CloseShieldInputStream(inputStream), metadata, getEncodingDetector(parseContext));
            cSVParams.setCharset(bufferedReader.getCharset());
            if (cSVParams.isComplete()) {
                return bufferedReader;
            }
        } else {
            bufferedReader = new BufferedReader(new InputStreamReader(new CloseShieldInputStream(inputStream), cSVParams.getCharset()));
        }
        if (cSVParams.getDelimiter() == null && (cSVParams.getMediaType() == null || isCSVOrTSV(cSVParams.getMediaType()))) {
            CSVResult best = new CSVSniffer(this.markLimit, this.delimiters, this.minConfidence).getBest(bufferedReader, metadata);
            cSVParams.setMediaType(best.getMediaType());
            cSVParams.setDelimiter(best.getDelimiter());
        }
        return bufferedReader;
    }

    private CSVParams getOverride(Metadata metadata) {
        MediaType parse;
        String str;
        String str2 = metadata.get(TikaCoreProperties.CONTENT_TYPE_OVERRIDE);
        if (str2 != null && (parse = MediaType.parse(str2)) != null) {
            String str3 = (String) parse.getParameters().get("charset");
            Charset charset = null;
            if (str3 != null) {
                try {
                    charset = Charset.forName(str3);
                } catch (UnsupportedCharsetException e) {
                }
            }
            if (isCSVOrTSV(parse) && (str = (String) parse.getParameters().get(DELIMITER)) != null) {
                return STRING_TO_CHAR_DELIMITER_MAP.containsKey(str) ? new CSVParams(parse, charset, Character.valueOf(STRING_TO_CHAR_DELIMITER_MAP.get(str).charValue())) : str.length() == 1 ? new CSVParams(parse, charset, Character.valueOf(str.charAt(0))) : new CSVParams(parse, charset);
            }
            return new CSVParams(parse, charset);
        }
        return new CSVParams();
    }

    static boolean isCSVOrTSV(MediaType mediaType) {
        if (mediaType == null) {
            return false;
        }
        return mediaType.getBaseType().equals(TSV) || mediaType.getBaseType().equals(CSV);
    }

    private void updateMetadata(CSVParams cSVParams, Metadata metadata) {
        MediaType mediaType = null;
        if (cSVParams.getMediaType().getBaseType().equals(MediaType.TEXT_PLAIN)) {
            mediaType = MediaType.TEXT_PLAIN;
        } else if (cSVParams.getDelimiter() != null) {
            mediaType = cSVParams.getDelimiter().charValue() == '\t' ? TSV : CSV;
        } else if (metadata.get(FieldName.CONTENT_TYPE) != null) {
            mediaType = MediaType.parse(metadata.get(FieldName.CONTENT_TYPE));
        }
        HashMap hashMap = new HashMap();
        if (cSVParams.getCharset() != null) {
            hashMap.put("charset", cSVParams.getCharset().name());
            metadata.set("Content-Encoding", cSVParams.getCharset().name());
        }
        if (!mediaType.equals(MediaType.TEXT_PLAIN) && cSVParams.getDelimiter() != null) {
            if (CHAR_TO_STRING_DELIMITER_MAP.containsKey(cSVParams.getDelimiter())) {
                hashMap.put(DELIMITER, CHAR_TO_STRING_DELIMITER_MAP.get(cSVParams.getDelimiter()));
            } else {
                hashMap.put(DELIMITER, Integer.toString(cSVParams.getDelimiter().charValue()));
            }
        }
        metadata.set(FieldName.CONTENT_TYPE, new MediaType(mediaType, hashMap).toString());
    }

    static {
        CHAR_TO_STRING_DELIMITER_MAP.put(',', "comma");
        CHAR_TO_STRING_DELIMITER_MAP.put('\t', "tab");
        CHAR_TO_STRING_DELIMITER_MAP.put('|', "pipe");
        CHAR_TO_STRING_DELIMITER_MAP.put(';', "semicolon");
        CHAR_TO_STRING_DELIMITER_MAP.put(':', "colon");
        for (Map.Entry<Character, String> entry : CHAR_TO_STRING_DELIMITER_MAP.entrySet()) {
            STRING_TO_CHAR_DELIMITER_MAP.put(entry.getValue(), entry.getKey());
        }
        SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(CSV, TSV, MediaType.TEXT_PLAIN)));
    }
}
