package org.apache.tika.parser.mbox;

import ch.qos.logback.classic.net.SyslogAppender;
import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.james.mime4j.dom.field.ContentTypeField;
import org.apache.james.mime4j.dom.field.FieldName;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.metadata.Message;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/parser/mbox/MboxParser.class */
public class MboxParser extends AbstractParser {
    public static final String MBOX_MIME_TYPE = "application/mbox";
    public static final String MBOX_RECORD_DIVIDER = "From ";
    public static final int MAIL_MAX_SIZE = 50000000;
    private static final long serialVersionUID = -1762689436731160661L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("mbox"));
    private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)");
    private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("<(.*@.*)>");
    private static final String EMAIL_HEADER_METADATA_PREFIX = "MboxParser-";
    private static final String EMAIL_FROMLINE_METADATA = "MboxParser-from";
    private final Map<Integer, Metadata> trackingMetadata = new HashMap();
    private boolean tracking = false;

    public static Date parseDate(String str) throws ParseException {
        return new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US).parse(str);
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, TikaException, SAXException {
        EmbeddedDocumentExtractor embeddedDocumentExtractor = (EmbeddedDocumentExtractor) parseContext.get(EmbeddedDocumentExtractor.class, new ParsingEmbeddedDocumentExtractor(parseContext));
        metadata.set("Content-Type", MBOX_MIME_TYPE);
        metadata.set("Content-Encoding", "windows-1252");
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "windows-1252"));
        Throwable th = null;
        try {
            try {
                String readLine = bufferedReader.readLine();
                int i = 0;
                do {
                    if (readLine.startsWith(MBOX_RECORD_DIVIDER)) {
                        Metadata metadata2 = new Metadata();
                        LinkedList linkedList = new LinkedList();
                        metadata2.add(EMAIL_FROMLINE_METADATA, readLine.substring(MBOX_RECORD_DIVIDER.length()));
                        metadata2.set("Content-Type", ContentTypeField.TYPE_MESSAGE_RFC822);
                        readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(100000);
                        do {
                            if (readLine.startsWith(MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR) || readLine.startsWith(SyslogAppender.DEFAULT_STACKTRACE_PATTERN)) {
                                linkedList.add(((String) linkedList.poll()) + MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR + readLine.trim());
                            } else {
                                linkedList.add(readLine);
                            }
                            byteArrayOutputStream.write(readLine.getBytes("windows-1252"));
                            byteArrayOutputStream.write(10);
                            readLine = bufferedReader.readLine();
                            if (readLine == null || readLine.startsWith(MBOX_RECORD_DIVIDER)) {
                                break;
                            }
                        } while (byteArrayOutputStream.size() < 50000000);
                        Iterator it = linkedList.iterator();
                        while (it.hasNext()) {
                            saveHeaderInMetadata(metadata2, (String) it.next());
                        }
                        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray());
                        if (embeddedDocumentExtractor.shouldParseEmbedded(metadata2)) {
                            embeddedDocumentExtractor.parseEmbedded(byteArrayInputStream, xHTMLContentHandler, metadata2, true);
                        }
                        if (this.tracking) {
                            int i2 = i;
                            i++;
                            getTrackingMetadata().put(Integer.valueOf(i2), metadata2);
                        }
                    } else {
                        readLine = bufferedReader.readLine();
                    }
                    if (readLine == null) {
                        break;
                    }
                } while (!Thread.currentThread().isInterrupted());
                if (bufferedReader != null) {
                    if (0 != 0) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                xHTMLContentHandler.endDocument();
            } finally {
            }
        } catch (Throwable th3) {
            if (bufferedReader != null) {
                if (th != null) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    bufferedReader.close();
                }
            }
            throw th3;
        }
    }

    public boolean isTracking() {
        return this.tracking;
    }

    public void setTracking(boolean z) {
        this.tracking = z;
    }

    public Map<Integer, Metadata> getTrackingMetadata() {
        return this.trackingMetadata;
    }

    private void saveHeaderInMetadata(Metadata metadata, String str) {
        Matcher matcher = EMAIL_HEADER_PATTERN.matcher(str);
        if (matcher.matches()) {
            String lowerCase = matcher.group(1).toLowerCase(Locale.ROOT);
            String group = matcher.group(2);
            if (lowerCase.equalsIgnoreCase("From")) {
                metadata.set(TikaCoreProperties.CREATOR, group);
                return;
            }
            if (lowerCase.equalsIgnoreCase("To") || lowerCase.equalsIgnoreCase(FieldName.CC) || lowerCase.equalsIgnoreCase(FieldName.BCC)) {
                Matcher matcher2 = EMAIL_ADDRESS_PATTERN.matcher(group);
                if (matcher2.find()) {
                    metadata.add(Message.MESSAGE_RECIPIENT_ADDRESS, matcher2.group(1));
                } else if (group.indexOf(64) > -1) {
                    metadata.add(Message.MESSAGE_RECIPIENT_ADDRESS, group);
                }
                String str2 = Message.MESSAGE_TO;
                if (lowerCase.equalsIgnoreCase(FieldName.CC)) {
                    str2 = Message.MESSAGE_CC;
                } else if (lowerCase.equalsIgnoreCase(FieldName.BCC)) {
                    str2 = Message.MESSAGE_BCC;
                }
                metadata.add(str2, group);
                return;
            }
            if (lowerCase.equalsIgnoreCase(FieldName.SUBJECT)) {
                metadata.add(Metadata.SUBJECT, group);
                return;
            }
            if (lowerCase.equalsIgnoreCase("Date")) {
                try {
                    metadata.set(TikaCoreProperties.CREATED, parseDate(group));
                } catch (ParseException e) {
                }
            } else {
                if (lowerCase.equalsIgnoreCase("Message-Id")) {
                    metadata.set(TikaCoreProperties.IDENTIFIER, group);
                    return;
                }
                if (lowerCase.equalsIgnoreCase("In-Reply-To")) {
                    metadata.set(TikaCoreProperties.RELATION, group);
                } else if (!lowerCase.equalsIgnoreCase("Content-Type")) {
                    metadata.add(EMAIL_HEADER_METADATA_PREFIX + lowerCase, group);
                } else {
                    metadata.add("Content-Type", group);
                    metadata.set(TikaCoreProperties.FORMAT, group);
                }
            }
        }
    }
}
