package org.apache.nifi.processors.poi;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

@CapabilityDescription("Consumes a Microsoft Excel document and converts each worksheet to csv. Each sheet from the incoming Excel document will generate a new Flowfile that will be output from this processor. Each output Flowfile's contents will be formatted as a csv file where the each row from the excel sheet is output as a newline in the csv file. This processor is currently only capable of processing .xlsx (XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents. This processor also expects well formatted CSV content and will not escape cell's containing invalid content such as newlines or additional commas.")
@Tags({"excel", "csv", "poi"})
@WritesAttributes({@WritesAttribute(attribute = ConvertExcelToCSVProcessor.SHEET_NAME, description = "The name of the Excel sheet that this particular row of data came from in the Excel document"), @WritesAttribute(attribute = ConvertExcelToCSVProcessor.ROW_NUM, description = "The number of rows in this Excel Sheet"), @WritesAttribute(attribute = ConvertExcelToCSVProcessor.SOURCE_FILE_NAME, description = "The name of the Excel document file that this data originated from"), @WritesAttribute(attribute = "convertexceltocsvprocessor.error", description = "Error message that was encountered on a per Excel sheet basis. This attribute is only populated if an error was occured while processing the particular sheet. Having the error present at the sheet level will allow for the end user to better understand what syntax errors in their excel doc on a larger scale caused the error.")})
/* loaded from: input_file:org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.class */
public class ConvertExcelToCSVProcessor extends AbstractProcessor {
    private static final String CSV_MIME_TYPE = "text/csv";
    public static final String SHEET_NAME = "sheetname";
    public static final String ROW_NUM = "numrows";
    public static final String SOURCE_FILE_NAME = "sourcefilename";
    private static final String SAX_CELL_REF = "c";
    private static final String SAX_CELL_TYPE = "t";
    private static final String SAX_CELL_STRING = "s";
    private static final String SAX_CELL_CONTENT_REF = "v";
    private static final String SAX_ROW_REF = "row";
    private static final String SAX_SHEET_NAME_REF = "sheetPr";
    private static final String DESIRED_SHEETS_DELIMITER = ",";
    private static final String UNKNOWN_SHEET_NAME = "UNKNOWN";
    private static final String SAX_PARSER = "org.apache.xerces.parsers.SAXParser";
    public static final PropertyDescriptor DESIRED_SHEETS = new PropertyDescriptor.Builder().name("extract-sheets").displayName("Sheets to Extract").description("Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. The list of names is case in-sensitive. Any sheets not specified in this value will be ignored.").required(false).expressionLanguageSupported(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final Relationship ORIGINAL = new Relationship.Builder().name("original").description("Original Excel document received by this processor").build();
    public static final Relationship SUCCESS = new Relationship.Builder().name("success").description("Excel data converted to csv").build();
    public static final Relationship FAILURE = new Relationship.Builder().name("failure").description("Failed to parse the Excel document").build();
    private List<PropertyDescriptor> descriptors;
    private Set<Relationship> relationships;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor$ExcelSheetRowHandler.class */
    public class ExcelSheetRowHandler extends DefaultHandler {
        private SharedStringsTable sst;
        private String currentContent;
        private boolean nextIsString;
        private OutputStream outputStream;
        private boolean firstColInRow;
        long rowCount;
        String sheetName;

        private ExcelSheetRowHandler(SharedStringsTable sharedStringsTable) {
            this.sst = sharedStringsTable;
            this.firstColInRow = true;
            this.rowCount = 0L;
            this.sheetName = ConvertExcelToCSVProcessor.UNKNOWN_SHEET_NAME;
        }

        public void setFlowFileOutputStream(OutputStream outputStream) {
            this.outputStream = outputStream;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (str3.equals(ConvertExcelToCSVProcessor.SAX_CELL_REF)) {
                String value = attributes.getValue(ConvertExcelToCSVProcessor.SAX_CELL_TYPE);
                if (value == null || !value.equals(ConvertExcelToCSVProcessor.SAX_CELL_STRING)) {
                    this.nextIsString = false;
                } else {
                    this.nextIsString = true;
                }
            } else if (str3.equals(ConvertExcelToCSVProcessor.SAX_ROW_REF)) {
                this.firstColInRow = true;
            } else if (str3.equals(ConvertExcelToCSVProcessor.SAX_SHEET_NAME_REF)) {
                this.sheetName = attributes.getValue(0);
            }
            this.currentContent = "";
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (this.nextIsString) {
                this.currentContent = new XSSFRichTextString(this.sst.getEntryAt(Integer.parseInt(this.currentContent))).toString();
                this.nextIsString = false;
            }
            if (str3.equals(ConvertExcelToCSVProcessor.SAX_CELL_CONTENT_REF)) {
                if (this.firstColInRow) {
                    this.firstColInRow = false;
                    try {
                        this.outputStream.write(this.currentContent.getBytes());
                    } catch (IOException e) {
                        ConvertExcelToCSVProcessor.this.getLogger().error("IO error encountered while writing content of parsed cell value from sheet {}", new Object[]{getSheetName()}, e);
                    }
                } else {
                    try {
                        this.outputStream.write((ConvertExcelToCSVProcessor.DESIRED_SHEETS_DELIMITER + this.currentContent).getBytes());
                    } catch (IOException e2) {
                        ConvertExcelToCSVProcessor.this.getLogger().error("IO error encountered while writing content of parsed cell value from sheet {}", new Object[]{getSheetName()}, e2);
                    }
                }
            }
            if (!str3.equals(ConvertExcelToCSVProcessor.SAX_ROW_REF) || this.firstColInRow) {
                return;
            }
            try {
                this.rowCount++;
                this.outputStream.write("\n".getBytes());
            } catch (IOException e3) {
                ConvertExcelToCSVProcessor.this.getLogger().error("IO error encountered while writing new line indicator", e3);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            this.currentContent += new String(cArr, i, i2);
        }

        public long getRowCount() {
            return this.rowCount;
        }

        public String getSheetName() {
            return this.sheetName;
        }
    }

    protected void init(ProcessorInitializationContext processorInitializationContext) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(DESIRED_SHEETS);
        this.descriptors = Collections.unmodifiableList(arrayList);
        HashSet hashSet = new HashSet();
        hashSet.add(ORIGINAL);
        hashSet.add(SUCCESS);
        hashSet.add(FAILURE);
        this.relationships = Collections.unmodifiableSet(hashSet);
    }

    public Set<Relationship> getRelationships() {
        return this.relationships;
    }

    public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return this.descriptors;
    }

    public void onTrigger(final ProcessContext processContext, final ProcessSession processSession) throws ProcessException {
        final FlowFile flowFile = processSession.get();
        if (flowFile == null) {
            return;
        }
        try {
            processSession.read(flowFile, new InputStreamCallback() { // from class: org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.1
                public void process(InputStream inputStream) throws IOException {
                    try {
                        String value = processContext.getProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS).evaluateAttributeExpressions().getValue();
                        XSSFReader xSSFReader = new XSSFReader(OPCPackage.open(inputStream));
                        SharedStringsTable sharedStringsTable = xSSFReader.getSharedStringsTable();
                        XSSFReader.SheetIterator sheetsData = xSSFReader.getSheetsData();
                        if (value != null) {
                            String[] split = StringUtils.split(value, ConvertExcelToCSVProcessor.DESIRED_SHEETS_DELIMITER);
                            if (split != null) {
                                while (sheetsData.hasNext()) {
                                    InputStream next = sheetsData.next();
                                    String sheetName = sheetsData.getSheetName();
                                    int i = 0;
                                    while (true) {
                                        if (i >= split.length) {
                                            break;
                                        }
                                        if (sheetName.equalsIgnoreCase(split[i])) {
                                            ConvertExcelToCSVProcessor.this.handleExcelSheet(processSession, flowFile, sharedStringsTable, next, sheetName);
                                            break;
                                        }
                                        i++;
                                    }
                                }
                            } else {
                                ConvertExcelToCSVProcessor.this.getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found.");
                            }
                        } else {
                            while (sheetsData.hasNext()) {
                                ConvertExcelToCSVProcessor.this.handleExcelSheet(processSession, flowFile, sharedStringsTable, sheetsData.next(), sheetsData.getSheetName());
                            }
                        }
                    } catch (OpenXML4JException e) {
                        ConvertExcelToCSVProcessor.this.getLogger().error("Error occurred while processing Excel document metadata", e);
                    } catch (InvalidFormatException e2) {
                        ConvertExcelToCSVProcessor.this.getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", e2);
                        throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", e2);
                    }
                }
            });
            processSession.transfer(flowFile, ORIGINAL);
        } catch (RuntimeException e) {
            getLogger().error("Failed to process incoming Excel document", e);
            processSession.transfer(processSession.putAttribute(flowFile, ConvertExcelToCSVProcessor.class.getName() + ".error", e.getMessage()), FAILURE);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void handleExcelSheet(ProcessSession processSession, FlowFile flowFile, SharedStringsTable sharedStringsTable, final InputStream inputStream, String str) throws IOException {
        FlowFile putAttribute;
        FlowFile create = processSession.create();
        try {
            try {
                final XMLReader createXMLReader = XMLReaderFactory.createXMLReader(SAX_PARSER);
                ExcelSheetRowHandler excelSheetRowHandler = new ExcelSheetRowHandler(sharedStringsTable);
                createXMLReader.setContentHandler(excelSheetRowHandler);
                FlowFile write = processSession.write(create, new OutputStreamCallback() { // from class: org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.2
                    public void process(OutputStream outputStream) throws IOException {
                        InputSource inputSource = new InputSource(inputStream);
                        ExcelSheetRowHandler excelSheetRowHandler2 = null;
                        try {
                            excelSheetRowHandler2 = (ExcelSheetRowHandler) createXMLReader.getContentHandler();
                            excelSheetRowHandler2.setFlowFileOutputStream(outputStream);
                            createXMLReader.setContentHandler(excelSheetRowHandler2);
                            createXMLReader.parse(inputSource);
                            inputStream.close();
                        } catch (SAXException e) {
                            ConvertExcelToCSVProcessor.this.getLogger().error("Error occurred while processing Excel sheet {}", new Object[]{excelSheetRowHandler2.getSheetName()}, e);
                        }
                    }
                });
                if (excelSheetRowHandler.getSheetName().equals(UNKNOWN_SHEET_NAME)) {
                    putAttribute = processSession.putAttribute(write, SHEET_NAME, str);
                } else {
                    putAttribute = processSession.putAttribute(write, SHEET_NAME, excelSheetRowHandler.getSheetName());
                    str = excelSheetRowHandler.getSheetName();
                }
                FlowFile putAttribute2 = processSession.putAttribute(putAttribute, ROW_NUM, new Long(excelSheetRowHandler.getRowCount()).toString());
                FlowFile putAttribute3 = StringUtils.isNotEmpty(flowFile.getAttribute(CoreAttributes.FILENAME.key())) ? processSession.putAttribute(putAttribute2, SOURCE_FILE_NAME, flowFile.getAttribute(CoreAttributes.FILENAME.key())) : processSession.putAttribute(putAttribute2, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
                create = processSession.putAttribute(processSession.putAttribute(putAttribute3, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(putAttribute3.getAttribute(CoreAttributes.UUID.key()), putAttribute3.getAttribute(CoreAttributes.FILENAME.key()), str)), CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
                processSession.transfer(create, SUCCESS);
                inputStream.close();
            } catch (SAXException e) {
                getLogger().error("Failed to create instance of SAXParser {}", new Object[]{SAX_PARSER}, e);
                processSession.transfer(processSession.putAttribute(create, ConvertExcelToCSVProcessor.class.getName() + ".error", e.getMessage()), FAILURE);
                inputStream.close();
            }
        } catch (Throwable th) {
            inputStream.close();
            throw th;
        }
    }

    private String updateFilenameToCSVExtension(String str, String str2, String str3) {
        StringBuilder sb = new StringBuilder();
        if (StringUtils.isNotEmpty(str2)) {
            String extension = FilenameUtils.getExtension(str2);
            if (StringUtils.isNotEmpty(extension)) {
                sb.append(StringUtils.replace(str2, "." + extension, ""));
            } else {
                sb.append(str2);
            }
        } else {
            sb.append(str);
        }
        sb.append("_");
        sb.append(str3);
        sb.append(".");
        sb.append("csv");
        return sb.toString();
    }
}
