package org.apache.nifi.processors.poi;

import com.github.pjfanning.xlsx.StreamingReader;
import com.github.pjfanning.xlsx.exceptions.OpenException;
import com.github.pjfanning.xlsx.exceptions.ParseException;
import com.github.pjfanning.xlsx.exceptions.ReadException;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.csv.CSVUtils;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;

@CapabilityDescription("Consumes a Microsoft Excel document and converts each worksheet to csv. Each sheet from the incoming Excel document will generate a new Flowfile that will be output from this processor. Each output Flowfile's contents will be formatted as a csv file where the each row from the excel sheet is output as a newline in the csv file. This processor is currently only capable of processing .xlsx (XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents. This processor also expects well formatted CSV content and will not escape cell's containing invalid content such as newlines or additional commas.")
@Tags({"excel", "csv", "poi"})
@WritesAttributes({@WritesAttribute(attribute = ConvertExcelToCSVProcessor.SHEET_NAME, description = "The name of the Excel sheet that this particular row of data came from in the Excel document"), @WritesAttribute(attribute = ConvertExcelToCSVProcessor.ROW_NUM, description = "The number of rows in this Excel Sheet"), @WritesAttribute(attribute = ConvertExcelToCSVProcessor.SOURCE_FILE_NAME, description = "The name of the Excel document file that this data originated from"), @WritesAttribute(attribute = "convertexceltocsvprocessor.error", description = "Error message that was encountered on a per Excel sheet basis. This attribute is only populated if an error was occured while processing the particular sheet. Having the error present at the sheet level will allow for the end user to better understand what syntax errors in their excel doc on a larger scale caused the error.")})
/* loaded from: input_file:org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.class */
public class ConvertExcelToCSVProcessor extends AbstractProcessor {
    private static final String CSV_MIME_TYPE = "text/csv";
    public static final String SHEET_NAME = "sheetname";
    public static final String ROW_NUM = "numrows";
    public static final String SOURCE_FILE_NAME = "sourcefilename";
    private static final String DESIRED_SHEETS_DELIMITER = ",";
    private static final String UNKNOWN_SHEET_NAME = "UNKNOWN";
    public static final PropertyDescriptor DESIRED_SHEETS = new PropertyDescriptor.Builder().name("extract-sheets").displayName("Sheets to Extract").description("Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. The list of names is case in-sensitive. Any sheets not specified in this value will be ignored. A bulletin will be generated if a specified sheet(s) are not found.").required(false).expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor ROWS_TO_SKIP = new PropertyDescriptor.Builder().name("excel-extract-first-row").displayName("Number of Rows to Skip").description("The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.").required(true).defaultValue("0").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR).build();
    public static final PropertyDescriptor COLUMNS_TO_SKIP = new PropertyDescriptor.Builder().name("excel-extract-column-to-skip").displayName("Columns To Skip").description("Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.").required(false).expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor FORMAT_VALUES = new PropertyDescriptor.Builder().name("excel-format-values").displayName("Format Cell Values").description("Should the cell values be written to CSV using the formatting applied in Excel, or should they be printed as raw values.").allowableValues(new String[]{"true", "false"}).defaultValue("false").required(true).build();
    public static final Relationship ORIGINAL = new Relationship.Builder().name("original").description("Original Excel document received by this processor").build();
    public static final Relationship SUCCESS = new Relationship.Builder().name("success").description("Excel data converted to csv").build();
    public static final Relationship FAILURE = new Relationship.Builder().name("failure").description("Failed to parse the Excel document").build();
    private List<PropertyDescriptor> descriptors;
    private Set<Relationship> relationships;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor$ExcelSheetReadConfig.class */
    public static class ExcelSheetReadConfig {
        private int firstColumn;
        private int lastColumn;
        private final int overrideFirstRow;
        private final String sheetName;
        private final List<Integer> columnsToSkip;

        public String getSheetName() {
            return this.sheetName;
        }

        public int getFirstColumn() {
            return this.firstColumn;
        }

        public void setFirstColumn(int i) {
            this.firstColumn = i;
        }

        public int getLastColumn() {
            return this.lastColumn;
        }

        public void setLastColumn(int i) {
            this.lastColumn = i;
        }

        public int getOverrideFirstRow() {
            return this.overrideFirstRow;
        }

        public List<Integer> getColumnsToSkip() {
            return this.columnsToSkip;
        }

        public ExcelSheetReadConfig(List<Integer> list, int i, String str) {
            this.sheetName = str;
            this.columnsToSkip = list;
            this.overrideFirstRow = i;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor$SheetToCSV.class */
    public class SheetToCSV {
        private final ExcelSheetReadConfig readConfig;
        CSVFormat csvFormat;
        private boolean firstCellOfRow;
        private boolean skipRow;
        private CSVPrinter printer;
        private ArrayList<String> fieldValues;
        private int currentRow = -1;
        private int currentCol = -1;
        private int rowCount = 0;
        private int skippedColumns = 0;
        private boolean firstRow = false;

        public int getRowCount() {
            return this.rowCount;
        }

        public void setOutput(OutputStream outputStream) {
            try {
                this.printer = new CSVPrinter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8), this.csvFormat);
            } catch (IOException e) {
                throw new ProcessException("Failed to create CSV Printer.", e);
            }
        }

        public SheetToCSV(ExcelSheetReadConfig excelSheetReadConfig, CSVFormat cSVFormat) {
            this.readConfig = excelSheetReadConfig;
            this.csvFormat = cSVFormat;
        }

        public void startRow(int i) {
            if (i <= this.readConfig.getOverrideFirstRow()) {
                this.skipRow = true;
                return;
            }
            this.skipRow = false;
            this.firstCellOfRow = true;
            this.firstRow = this.currentRow == -1;
            this.currentRow = i;
            this.currentCol = -1;
            this.fieldValues = new ArrayList<>();
        }

        public void endRow() {
            if (this.skipRow) {
                return;
            }
            if (this.firstRow) {
                this.readConfig.setLastColumn(this.currentCol);
            }
            if (this.fieldValues.stream().noneMatch(str -> {
                return (str == null || str.isEmpty()) ? false : true;
            })) {
                return;
            }
            int lastColumn = (this.readConfig.getLastColumn() - this.currentCol) - this.readConfig.getColumnsToSkip().size();
            for (int i = 0; i < lastColumn; i++) {
                this.fieldValues.add(null);
            }
            try {
                this.printer.printRecord(this.fieldValues);
            } catch (IOException e) {
                ConvertExcelToCSVProcessor.this.getLogger().warn("Print Record failed", e);
            }
            this.rowCount++;
        }

        public void cell(Cell cell) {
            if (this.skipRow) {
                return;
            }
            int columnIndex = cell.getColumnIndex();
            if (this.firstRow && this.firstCellOfRow) {
                this.readConfig.setFirstColumn(columnIndex);
            }
            if (this.firstRow || (columnIndex >= this.readConfig.getFirstColumn() && columnIndex <= this.readConfig.getLastColumn())) {
                if (this.readConfig.getColumnsToSkip().contains(Integer.valueOf(columnIndex))) {
                    this.skippedColumns++;
                    return;
                }
                int firstColumn = ((columnIndex - this.readConfig.getFirstColumn()) - (this.currentCol - this.readConfig.getFirstColumn())) - 1;
                if (this.firstCellOfRow) {
                    firstColumn = columnIndex - this.readConfig.getFirstColumn();
                }
                int i = firstColumn - this.skippedColumns;
                if (this.firstCellOfRow) {
                    this.firstCellOfRow = false;
                }
                for (int i2 = 0; i2 < i; i2++) {
                    this.fieldValues.add(null);
                }
                this.currentCol = columnIndex;
                String stringCellValue = cell.getStringCellValue();
                this.fieldValues.add((stringCellValue == null || stringCellValue.isEmpty()) ? null : stringCellValue);
                this.skippedColumns = 0;
            }
        }

        public void close() throws IOException {
            this.printer.close();
        }
    }

    protected void init(ProcessorInitializationContext processorInitializationContext) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(DESIRED_SHEETS);
        arrayList.add(ROWS_TO_SKIP);
        arrayList.add(COLUMNS_TO_SKIP);
        arrayList.add(FORMAT_VALUES);
        arrayList.add(CSVUtils.CSV_FORMAT);
        arrayList.add(CSVUtils.VALUE_SEPARATOR);
        arrayList.add(CSVUtils.INCLUDE_HEADER_LINE);
        arrayList.add(CSVUtils.QUOTE_CHAR);
        arrayList.add(CSVUtils.ESCAPE_CHAR);
        arrayList.add(CSVUtils.COMMENT_MARKER);
        arrayList.add(CSVUtils.NULL_STRING);
        arrayList.add(CSVUtils.TRIM_FIELDS);
        arrayList.add(new PropertyDescriptor.Builder().fromPropertyDescriptor(CSVUtils.QUOTE_MODE).defaultValue(CSVUtils.QUOTE_NONE.getValue()).build());
        arrayList.add(CSVUtils.RECORD_SEPARATOR);
        arrayList.add(CSVUtils.TRAILING_DELIMITER);
        this.descriptors = Collections.unmodifiableList(arrayList);
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        linkedHashSet.add(ORIGINAL);
        linkedHashSet.add(SUCCESS);
        linkedHashSet.add(FAILURE);
        this.relationships = Collections.unmodifiableSet(linkedHashSet);
    }

    public Set<Relationship> getRelationships() {
        return this.relationships;
    }

    public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return this.descriptors;
    }

    public void onTrigger(ProcessContext processContext, ProcessSession processSession) throws ProcessException {
        FlowFile flowFile = processSession.get();
        if (flowFile == null) {
            return;
        }
        Map<String, Boolean> desiredSheets = getDesiredSheets(processContext, flowFile);
        boolean booleanValue = processContext.getProperty(FORMAT_VALUES).asBoolean().booleanValue();
        CSVFormat createCSVFormat = CSVUtils.createCSVFormat(processContext, flowFile.getAttributes());
        int intValue = processContext.getProperty(ROWS_TO_SKIP).evaluateAttributeExpressions(flowFile).asInteger().intValue() - 1;
        List<Integer> columnsToSkip = getColumnsToSkip(processContext, flowFile);
        try {
            processSession.read(flowFile, inputStream -> {
                try {
                    Workbook open = StreamingReader.builder().rowCacheSize(100).bufferSize(4096).setReadStyles(booleanValue).open(inputStream);
                    Throwable th = null;
                    try {
                        if (desiredSheets.isEmpty()) {
                            open.forEach(sheet -> {
                                handleExcelSheet(processSession, flowFile, sheet, new ExcelSheetReadConfig(columnsToSkip, intValue, sheet.getSheetName()), createCSVFormat);
                            });
                        } else {
                            desiredSheets.keySet().forEach(str -> {
                                open.forEach(sheet2 -> {
                                    if (sheet2.getSheetName().equalsIgnoreCase(str)) {
                                        handleExcelSheet(processSession, flowFile, sheet2, new ExcelSheetReadConfig(columnsToSkip, intValue, sheet2.getSheetName()), createCSVFormat);
                                        desiredSheets.put(str, Boolean.TRUE);
                                    }
                                });
                            });
                            String sheetsNotFound = getSheetsNotFound(desiredSheets);
                            if (!sheetsNotFound.isEmpty()) {
                                getLogger().warn("Excel sheet(s) not found: {}", new Object[]{sheetsNotFound});
                            }
                        }
                        if (open != null) {
                            if (0 != 0) {
                                try {
                                    open.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                open.close();
                            }
                        }
                    } catch (Throwable th3) {
                        if (open != null) {
                            if (0 != 0) {
                                try {
                                    open.close();
                                } catch (Throwable th4) {
                                    th.addSuppressed(th4);
                                }
                            } else {
                                open.close();
                            }
                        }
                        throw th3;
                    }
                } catch (ParseException | OpenException | ReadException e) {
                    if (e.getCause() instanceof InvalidFormatException) {
                        getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", e);
                        throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", e);
                    }
                    getLogger().error("Error occurred while processing Excel document metadata", e);
                }
            });
            processSession.transfer(flowFile, ORIGINAL);
        } catch (RuntimeException e) {
            getLogger().error("Failed to process incoming Excel document. " + e.getMessage(), e);
            processSession.transfer(processSession.putAttribute(flowFile, ConvertExcelToCSVProcessor.class.getName() + ".error", e.getMessage()), FAILURE);
        }
    }

    private List<Integer> getColumnsToSkip(ProcessContext processContext, FlowFile flowFile) {
        String[] split = StringUtils.split(processContext.getProperty(COLUMNS_TO_SKIP).evaluateAttributeExpressions(flowFile).getValue(), DESIRED_SHEETS_DELIMITER);
        if (split == null) {
            return new ArrayList();
        }
        try {
            return (List) Arrays.stream(split).map(str -> {
                return Integer.valueOf(Integer.parseInt(str) - 1);
            }).collect(Collectors.toList());
        } catch (NumberFormatException e) {
            throw new ProcessException("Invalid column in Columns to Skip list.", e);
        }
    }

    private Map<String, Boolean> getDesiredSheets(ProcessContext processContext, FlowFile flowFile) {
        String value = processContext.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions(flowFile).getValue();
        if (value != null) {
            String[] split = StringUtils.split(value, DESIRED_SHEETS_DELIMITER);
            if (split != null) {
                return (Map) Arrays.stream(split).collect(Collectors.toMap(str -> {
                    return str;
                }, str2 -> {
                    return Boolean.FALSE;
                }));
            }
            getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found.");
        }
        return new HashMap();
    }

    private void handleExcelSheet(ProcessSession processSession, FlowFile flowFile, Sheet sheet, ExcelSheetReadConfig excelSheetReadConfig, CSVFormat cSVFormat) {
        FlowFile create = processSession.create(flowFile);
        SheetToCSV sheetToCSV = new SheetToCSV(excelSheetReadConfig, cSVFormat);
        try {
            FlowFile putAttribute = processSession.putAttribute(processSession.putAttribute(processSession.write(create, outputStream -> {
                sheetToCSV.setOutput(outputStream);
                sheet.forEach(row -> {
                    sheetToCSV.startRow(row.getRowNum());
                    sheetToCSV.getClass();
                    row.forEach(sheetToCSV::cell);
                    sheetToCSV.endRow();
                });
                sheetToCSV.close();
            }), SHEET_NAME, excelSheetReadConfig.getSheetName()), ROW_NUM, Long.toString(sheetToCSV.getRowCount()));
            FlowFile putAttribute2 = StringUtils.isNotEmpty(flowFile.getAttribute(CoreAttributes.FILENAME.key())) ? processSession.putAttribute(putAttribute, SOURCE_FILE_NAME, flowFile.getAttribute(CoreAttributes.FILENAME.key())) : processSession.putAttribute(putAttribute, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
            create = processSession.putAttribute(processSession.putAttribute(putAttribute2, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(putAttribute2.getAttribute(CoreAttributes.UUID.key()), putAttribute2.getAttribute(CoreAttributes.FILENAME.key()), excelSheetReadConfig.getSheetName())), CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
            processSession.transfer(create, SUCCESS);
        } catch (RuntimeException e) {
            processSession.transfer(processSession.putAttribute(create, ConvertExcelToCSVProcessor.class.getName() + ".error", e.getMessage()), FAILURE);
        }
    }

    private String getSheetsNotFound(Map<String, Boolean> map) {
        return (String) map.entrySet().stream().filter(entry -> {
            return !((Boolean) entry.getValue()).booleanValue();
        }).map((v0) -> {
            return v0.getKey();
        }).collect(Collectors.joining(DESIRED_SHEETS_DELIMITER));
    }

    private String updateFilenameToCSVExtension(String str, String str2, String str3) {
        StringBuilder sb = new StringBuilder();
        if (StringUtils.isNotEmpty(str2)) {
            String extension = FilenameUtils.getExtension(str2);
            if (StringUtils.isNotEmpty(extension)) {
                sb.append(StringUtils.replace(str2, "." + extension, ""));
            } else {
                sb.append(str2);
            }
        } else {
            sb.append(str);
        }
        sb.append("_");
        sb.append(str3);
        sb.append(".");
        sb.append("csv");
        return sb.toString();
    }
}
