package org.apache.nifi.processors.poi;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.csv.CSVUtils;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

@CapabilityDescription("Consumes a Microsoft Excel document and converts each worksheet to csv. Each sheet from the incoming Excel document will generate a new Flowfile that will be output from this processor. Each output Flowfile's contents will be formatted as a csv file where the each row from the excel sheet is output as a newline in the csv file. This processor is currently only capable of processing .xlsx (XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents. This processor also expects well formatted CSV content and will not escape cell's containing invalid content such as newlines or additional commas.")
@Tags({"excel", "csv", "poi"})
@WritesAttributes({@WritesAttribute(attribute = ConvertExcelToCSVProcessor.SHEET_NAME, description = "The name of the Excel sheet that this particular row of data came from in the Excel document"), @WritesAttribute(attribute = ConvertExcelToCSVProcessor.ROW_NUM, description = "The number of rows in this Excel Sheet"), @WritesAttribute(attribute = ConvertExcelToCSVProcessor.SOURCE_FILE_NAME, description = "The name of the Excel document file that this data originated from"), @WritesAttribute(attribute = "convertexceltocsvprocessor.error", description = "Error message that was encountered on a per Excel sheet basis. This attribute is only populated if an error was occured while processing the particular sheet. Having the error present at the sheet level will allow for the end user to better understand what syntax errors in their excel doc on a larger scale caused the error.")})
/* loaded from: input_file:org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.class */
public class ConvertExcelToCSVProcessor extends AbstractProcessor {
    private static final String CSV_MIME_TYPE = "text/csv";
    public static final String SHEET_NAME = "sheetname";
    public static final String ROW_NUM = "numrows";
    public static final String SOURCE_FILE_NAME = "sourcefilename";
    private static final String DESIRED_SHEETS_DELIMITER = ",";
    private static final String UNKNOWN_SHEET_NAME = "UNKNOWN";
    public static final PropertyDescriptor DESIRED_SHEETS = new PropertyDescriptor.Builder().name("extract-sheets").displayName("Sheets to Extract").description("Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. The list of names is case in-sensitive. Any sheets not specified in this value will be ignored.").required(false).expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor ROWS_TO_SKIP = new PropertyDescriptor.Builder().name("excel-extract-first-row").displayName("Number of Rows to Skip").description("The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.").required(true).defaultValue("0").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR).build();
    public static final PropertyDescriptor COLUMNS_TO_SKIP = new PropertyDescriptor.Builder().name("excel-extract-column-to-skip").displayName("Columns To Skip").description("Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.").required(false).expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor FORMAT_VALUES = new PropertyDescriptor.Builder().name("excel-format-values").displayName("Format Cell Values").description("Should the cell values be written to CSV using the formatting applied in Excel, or should they be printed as raw values.").allowableValues(new String[]{"true", "false"}).defaultValue("false").required(true).build();
    public static final Relationship ORIGINAL = new Relationship.Builder().name("original").description("Original Excel document received by this processor").build();
    public static final Relationship SUCCESS = new Relationship.Builder().name("success").description("Excel data converted to csv").build();
    public static final Relationship FAILURE = new Relationship.Builder().name("failure").description("Failed to parse the Excel document").build();
    private List<PropertyDescriptor> descriptors;
    private Set<Relationship> relationships;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor$ExcelSheetReadConfig.class */
    public class ExcelSheetReadConfig {
        private int firstColumn;
        private int lastColumn;
        private int firstRow;
        private int lastRow;
        private int overrideFirstRow;
        private String sheetName;
        private boolean formatValues;
        private ReadOnlySharedStringsTable sst;
        private StylesTable styles;
        private List<Integer> columnsToSkip;

        public String getSheetName() {
            return this.sheetName;
        }

        public int getFirstColumn() {
            return this.firstColumn;
        }

        public void setFirstColumn(int i) {
            this.firstColumn = i;
        }

        public int getLastColumn() {
            return this.lastColumn;
        }

        public void setLastColumn(int i) {
            this.lastColumn = i;
        }

        public int getOverrideFirstRow() {
            return this.overrideFirstRow;
        }

        public boolean getFormatValues() {
            return this.formatValues;
        }

        public int getFirstRow() {
            return this.firstRow;
        }

        public void setFirstRow(int i) {
            this.firstRow = i;
        }

        public int getLastRow() {
            return this.lastRow;
        }

        public void setLastRow(int i) {
            this.lastRow = i;
        }

        public List<Integer> getColumnsToSkip() {
            return this.columnsToSkip;
        }

        public ReadOnlySharedStringsTable getSharedStringsTable() {
            return this.sst;
        }

        public StylesTable getStyles() {
            return this.styles;
        }

        public ExcelSheetReadConfig(List<Integer> list, int i, String str, boolean z, ReadOnlySharedStringsTable readOnlySharedStringsTable, StylesTable stylesTable) {
            this.sheetName = str;
            this.columnsToSkip = list;
            this.overrideFirstRow = i;
            this.formatValues = z;
            this.sst = readOnlySharedStringsTable;
            this.styles = stylesTable;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor$SheetToCSV.class */
    public class SheetToCSV implements XSSFSheetXMLHandler.SheetContentsHandler {
        private ExcelSheetReadConfig readConfig;
        CSVFormat csvFormat;
        private boolean firstCellOfRow;
        private boolean skipRow;
        private CSVPrinter printer;
        private ArrayList<Object> fieldValues;
        private int currentRow = -1;
        private int currentCol = -1;
        private int rowCount = 0;
        private boolean rowHasValues = false;
        private int skippedColumns = 0;
        private boolean firstRow = false;

        public int getRowCount() {
            return this.rowCount;
        }

        public void setOutput(PrintStream printStream) {
            try {
                this.printer = new CSVPrinter(new OutputStreamWriter(printStream), this.csvFormat);
            } catch (IOException e) {
                throw new ProcessException("Failed to create CSV Printer.", e);
            }
        }

        public SheetToCSV(ExcelSheetReadConfig excelSheetReadConfig, CSVFormat cSVFormat) {
            this.readConfig = excelSheetReadConfig;
            this.csvFormat = cSVFormat;
        }

        public void startRow(int i) {
            if (i <= this.readConfig.getOverrideFirstRow()) {
                this.skipRow = true;
                return;
            }
            this.skipRow = false;
            this.firstCellOfRow = true;
            this.firstRow = this.currentRow == -1;
            this.currentRow = i;
            this.currentCol = -1;
            this.rowHasValues = false;
            this.fieldValues = new ArrayList<>();
        }

        public void endRow(int i) {
            if (this.skipRow) {
                return;
            }
            if (this.firstRow) {
                this.readConfig.setLastColumn(this.currentCol);
            }
            if (this.rowHasValues) {
                int lastColumn = (this.readConfig.getLastColumn() - this.currentCol) - this.readConfig.getColumnsToSkip().size();
                for (int i2 = 0; i2 < lastColumn; i2++) {
                    this.fieldValues.add(null);
                }
                try {
                    this.printer.printRecord(this.fieldValues);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                this.rowCount++;
            }
        }

        public void cell(String str, String str2, XSSFComment xSSFComment) {
            if (this.skipRow) {
                return;
            }
            if (str == null) {
                str = new CellAddress(this.currentRow, this.currentCol).formatAsString();
            }
            short col = new CellReference(str).getCol();
            if (this.firstRow && this.firstCellOfRow) {
                this.readConfig.setFirstRow(this.currentRow);
                this.readConfig.setFirstColumn(col);
            }
            if (this.firstRow || (col >= this.readConfig.getFirstColumn() && col <= this.readConfig.getLastColumn())) {
                if (this.readConfig.getColumnsToSkip().contains(Integer.valueOf(col))) {
                    this.skippedColumns++;
                    return;
                }
                int firstColumn = ((col - this.readConfig.getFirstColumn()) - (this.currentCol - this.readConfig.getFirstColumn())) - 1;
                if (this.firstCellOfRow) {
                    firstColumn = col - this.readConfig.getFirstColumn();
                }
                int i = firstColumn - this.skippedColumns;
                if (this.firstCellOfRow) {
                    this.firstCellOfRow = false;
                }
                for (int i2 = 0; i2 < i; i2++) {
                    this.fieldValues.add(null);
                }
                this.currentCol = col;
                this.fieldValues.add(str2);
                this.rowHasValues = true;
                this.skippedColumns = 0;
            }
        }

        public void headerFooter(String str, boolean z, String str2) {
        }

        public void close() throws IOException {
            this.printer.close();
        }
    }

    protected void init(ProcessorInitializationContext processorInitializationContext) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(DESIRED_SHEETS);
        arrayList.add(ROWS_TO_SKIP);
        arrayList.add(COLUMNS_TO_SKIP);
        arrayList.add(FORMAT_VALUES);
        arrayList.add(CSVUtils.CSV_FORMAT);
        arrayList.add(CSVUtils.VALUE_SEPARATOR);
        arrayList.add(CSVUtils.INCLUDE_HEADER_LINE);
        arrayList.add(CSVUtils.QUOTE_CHAR);
        arrayList.add(CSVUtils.ESCAPE_CHAR);
        arrayList.add(CSVUtils.COMMENT_MARKER);
        arrayList.add(CSVUtils.NULL_STRING);
        arrayList.add(CSVUtils.TRIM_FIELDS);
        arrayList.add(new PropertyDescriptor.Builder().fromPropertyDescriptor(CSVUtils.QUOTE_MODE).defaultValue(CSVUtils.QUOTE_NONE.getValue()).build());
        arrayList.add(CSVUtils.RECORD_SEPARATOR);
        arrayList.add(CSVUtils.TRAILING_DELIMITER);
        this.descriptors = Collections.unmodifiableList(arrayList);
        HashSet hashSet = new HashSet();
        hashSet.add(ORIGINAL);
        hashSet.add(SUCCESS);
        hashSet.add(FAILURE);
        this.relationships = Collections.unmodifiableSet(hashSet);
    }

    public Set<Relationship> getRelationships() {
        return this.relationships;
    }

    public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return this.descriptors;
    }

    public void onTrigger(ProcessContext processContext, final ProcessSession processSession) throws ProcessException {
        final FlowFile flowFile = processSession.get();
        if (flowFile == null) {
            return;
        }
        final String value = processContext.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions(flowFile).getValue();
        final boolean booleanValue = processContext.getProperty(FORMAT_VALUES).asBoolean().booleanValue();
        final CSVFormat createCSVFormat = CSVUtils.createCSVFormat(processContext);
        final int intValue = processContext.getProperty(ROWS_TO_SKIP).evaluateAttributeExpressions(flowFile).asInteger().intValue() - 1;
        String[] split = StringUtils.split(processContext.getProperty(COLUMNS_TO_SKIP).evaluateAttributeExpressions(flowFile).getValue(), DESIRED_SHEETS_DELIMITER);
        final ArrayList arrayList = new ArrayList();
        if (split != null && split.length > 0) {
            for (String str : split) {
                try {
                    arrayList.add(Integer.valueOf(Integer.parseInt(str) - 1));
                } catch (NumberFormatException e) {
                    throw new ProcessException("Invalid column in Columns to Skip list.", e);
                }
            }
        }
        try {
            processSession.read(flowFile, new InputStreamCallback() { // from class: org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.1
                public void process(InputStream inputStream) throws IOException {
                    try {
                        OPCPackage open = OPCPackage.open(inputStream);
                        XSSFReader xSSFReader = new XSSFReader(open);
                        ReadOnlySharedStringsTable readOnlySharedStringsTable = new ReadOnlySharedStringsTable(open);
                        StylesTable stylesTable = xSSFReader.getStylesTable();
                        XSSFReader.SheetIterator sheetsData = xSSFReader.getSheetsData();
                        if (value != null) {
                            String[] split2 = StringUtils.split(value, ConvertExcelToCSVProcessor.DESIRED_SHEETS_DELIMITER);
                            if (split2 != null) {
                                while (sheetsData.hasNext()) {
                                    InputStream next = sheetsData.next();
                                    String sheetName = sheetsData.getSheetName();
                                    int i = 0;
                                    while (true) {
                                        if (i >= split2.length) {
                                            break;
                                        }
                                        if (sheetName.equalsIgnoreCase(split2[i])) {
                                            ConvertExcelToCSVProcessor.this.handleExcelSheet(processSession, flowFile, next, new ExcelSheetReadConfig(arrayList, intValue, sheetName, booleanValue, readOnlySharedStringsTable, stylesTable), createCSVFormat);
                                            break;
                                        }
                                        i++;
                                    }
                                }
                            } else {
                                ConvertExcelToCSVProcessor.this.getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found.");
                            }
                        } else {
                            while (sheetsData.hasNext()) {
                                ConvertExcelToCSVProcessor.this.handleExcelSheet(processSession, flowFile, sheetsData.next(), new ExcelSheetReadConfig(arrayList, intValue, sheetsData.getSheetName(), booleanValue, readOnlySharedStringsTable, stylesTable), createCSVFormat);
                            }
                        }
                    } catch (OpenXML4JException | SAXException e2) {
                        ConvertExcelToCSVProcessor.this.getLogger().error("Error occurred while processing Excel document metadata", e2);
                    } catch (InvalidFormatException e3) {
                        ConvertExcelToCSVProcessor.this.getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", e3);
                        throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", e3);
                    }
                }
            });
            processSession.transfer(flowFile, ORIGINAL);
        } catch (RuntimeException e2) {
            getLogger().error("Failed to process incoming Excel document. " + e2.getMessage(), e2);
            processSession.transfer(processSession.putAttribute(flowFile, ConvertExcelToCSVProcessor.class.getName() + ".error", e2.getMessage()), FAILURE);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void handleExcelSheet(ProcessSession processSession, FlowFile flowFile, final InputStream inputStream, final ExcelSheetReadConfig excelSheetReadConfig, CSVFormat cSVFormat) throws IOException {
        FlowFile create = processSession.create(flowFile);
        try {
            try {
                DataFormatter dataFormatter = new DataFormatter();
                final InputSource inputSource = new InputSource(inputStream);
                final SheetToCSV sheetToCSV = new SheetToCSV(excelSheetReadConfig, cSVFormat);
                final XMLReader newXMLReader = SAXHelper.newXMLReader();
                newXMLReader.setContentHandler(new XSSFSheetXMLHandler(excelSheetReadConfig.getFormatValues() ? excelSheetReadConfig.getStyles() : null, (CommentsTable) null, excelSheetReadConfig.getSharedStringsTable(), sheetToCSV, dataFormatter, false));
                FlowFile putAttribute = processSession.putAttribute(processSession.putAttribute(processSession.write(create, new OutputStreamCallback() { // from class: org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.2
                    public void process(OutputStream outputStream) throws IOException {
                        PrintStream printStream = new PrintStream(outputStream);
                        sheetToCSV.setOutput(printStream);
                        try {
                            newXMLReader.parse(inputSource);
                            inputStream.close();
                            sheetToCSV.close();
                            printStream.close();
                        } catch (SAXException e) {
                            ConvertExcelToCSVProcessor.this.getLogger().error("Error occurred while processing Excel sheet {}", new Object[]{excelSheetReadConfig.getSheetName()}, e);
                        }
                    }
                }), SHEET_NAME, excelSheetReadConfig.getSheetName()), ROW_NUM, new Long(sheetToCSV.getRowCount()).toString());
                FlowFile putAttribute2 = StringUtils.isNotEmpty(flowFile.getAttribute(CoreAttributes.FILENAME.key())) ? processSession.putAttribute(putAttribute, SOURCE_FILE_NAME, flowFile.getAttribute(CoreAttributes.FILENAME.key())) : processSession.putAttribute(putAttribute, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
                create = processSession.putAttribute(processSession.putAttribute(putAttribute2, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(putAttribute2.getAttribute(CoreAttributes.UUID.key()), putAttribute2.getAttribute(CoreAttributes.FILENAME.key()), excelSheetReadConfig.getSheetName())), CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
                processSession.transfer(create, SUCCESS);
                inputStream.close();
            } catch (ParserConfigurationException | SAXException e) {
                getLogger().error("Failed to create instance of Parser.", e);
                processSession.transfer(processSession.putAttribute(create, ConvertExcelToCSVProcessor.class.getName() + ".error", e.getMessage()), FAILURE);
                inputStream.close();
            }
        } catch (Throwable th) {
            inputStream.close();
            throw th;
        }
    }

    private String updateFilenameToCSVExtension(String str, String str2, String str3) {
        StringBuilder sb = new StringBuilder();
        if (StringUtils.isNotEmpty(str2)) {
            String extension = FilenameUtils.getExtension(str2);
            if (StringUtils.isNotEmpty(extension)) {
                sb.append(StringUtils.replace(str2, "." + extension, ""));
            } else {
                sb.append(str2);
            }
        } else {
            sb.append(str);
        }
        sb.append("_");
        sb.append(str3);
        sb.append(".");
        sb.append("csv");
        return sb.toString();
    }
}
