/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nifi.excel;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.excel.CellFieldTypeReader;
import org.apache.nifi.excel.ExcelReader;
import org.apache.nifi.excel.ExcelUtils;
import org.apache.nifi.excel.RowEvaluationStrategy;
import org.apache.nifi.excel.StandardCellFieldTypeReader;
import org.apache.nifi.schema.access.SchemaNotFoundException;
import org.apache.nifi.schema.inference.FieldTypeInference;
import org.apache.nifi.schema.inference.RecordSource;
import org.apache.nifi.schema.inference.SchemaInferenceEngine;
import org.apache.nifi.schema.inference.TimeValueInference;
import org.apache.nifi.serialization.SimpleRecordSchema;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;

public class ExcelStartingRowSchemaInference
implements SchemaInferenceEngine<Row> {
    static final AllowableValue USE_STARTING_ROW = new AllowableValue("Use Starting Row", "Use Starting Row", "The configured first row of the Excel file is a header line that contains the names of the columns. The schema will be derived by using the column names in the header of the first sheet and dependent on the strategy chosen either the subsequent 10 rows or all of the subsequent rows. However the configured header rows of subsequent sheets are skipped. NOTE: If there are duplicate column names then each subsequent duplicate column name is given a one up number. For example, column names \"Name\", \"Name\" will be changed to \"Name\", \"Name_1\".");
    private final RowEvaluationStrategy rowEvaluationStrategy;
    private final int firstRow;
    private final CellFieldTypeReader cellFieldTypeReader;
    private final DataFormatter dataFormatter;

    public ExcelStartingRowSchemaInference(RowEvaluationStrategy rowEvaluationStrategy, int firstRow, TimeValueInference timeValueInference) {
        this.rowEvaluationStrategy = rowEvaluationStrategy;
        this.firstRow = firstRow;
        this.cellFieldTypeReader = new StandardCellFieldTypeReader(timeValueInference);
        this.dataFormatter = new DataFormatter();
    }

    public RecordSchema inferSchema(RecordSource<Row> recordSource) throws IOException {
        Row row;
        LinkedHashMap<String, FieldTypeInference> typeMap = new LinkedHashMap<String, FieldTypeInference>();
        int zeroBasedFirstRow = ExcelReader.getZeroBasedIndex(this.firstRow);
        List<String> fieldNames = null;
        int index = 0;
        while ((row = (Row)recordSource.next()) != null) {
            if (index == 0) {
                fieldNames = this.getFieldNames(this.firstRow, row);
            } else {
                if (row.getRowNum() == zeroBasedFirstRow) continue;
                if (RowEvaluationStrategy.STANDARD == this.rowEvaluationStrategy) {
                    if (index > 10) break;
                    this.inferSchema(row, fieldNames, typeMap);
                } else {
                    this.inferSchema(row, fieldNames, typeMap);
                }
            }
            ++index;
        }
        return this.createSchema(typeMap);
    }

    private List<String> getFieldNames(int firstRowIndex, Row row) throws IOException {
        if (!ExcelUtils.hasCells(row)) {
            throw new IOException((Throwable)new SchemaNotFoundException(String.format("Field names could not be determined from configured header row %s, as this row has no cells with data", firstRowIndex)));
        }
        ArrayList<String> fieldNames = new ArrayList<String>();
        for (int index = 0; index < row.getLastCellNum(); ++index) {
            Cell cell = row.getCell(index);
            String fieldName = this.dataFormatter.formatCellValue(cell);
            if (fieldName == null || fieldName.isEmpty()) {
                fieldNames.add("column_" + index);
                continue;
            }
            fieldNames.add(fieldName);
        }
        List<String> renamedDuplicateFieldNames = this.renameDuplicateFieldNames(fieldNames);
        return renamedDuplicateFieldNames;
    }

    private List<String> renameDuplicateFieldNames(List<String> fieldNames) {
        HashMap<String, Integer> fieldNameCounts = new HashMap<String, Integer>();
        ArrayList<String> renamedDuplicateFieldNames = new ArrayList<String>();
        for (String fieldName : fieldNames) {
            if (fieldNameCounts.containsKey(fieldName)) {
                int count = (Integer)fieldNameCounts.get(fieldName);
                renamedDuplicateFieldNames.add("%s_%d".formatted(fieldName, count));
                fieldNameCounts.put(fieldName, count + 1);
                continue;
            }
            fieldNameCounts.put(fieldName, 1);
            renamedDuplicateFieldNames.add(fieldName);
        }
        return renamedDuplicateFieldNames;
    }

    private void inferSchema(Row row, List<String> fieldNames, Map<String, FieldTypeInference> typeMap) throws IOException {
        if (ExcelUtils.hasCells(row)) {
            if (row.getLastCellNum() > fieldNames.size()) {
                throw new IOException((Throwable)new SchemaNotFoundException(String.format("Row %s has %s cells, more than the expected %s number of field names", row.getRowNum(), row.getLastCellNum(), fieldNames.size())));
            }
            IntStream.range(0, row.getLastCellNum()).forEach(index -> {
                Cell cell = row.getCell(index);
                String fieldName = (String)fieldNames.get(index);
                this.cellFieldTypeReader.inferCellFieldType(cell, fieldName, typeMap);
            });
        }
    }

    private RecordSchema createSchema(Map<String, FieldTypeInference> inferences) throws IOException {
        if (inferences.isEmpty()) {
            throw new IOException((Throwable)new SchemaNotFoundException("Failed to infer schema from empty rows"));
        }
        List recordFields = inferences.entrySet().stream().map(entry -> new RecordField((String)entry.getKey(), ((FieldTypeInference)entry.getValue()).toDataType(), true)).collect(Collectors.toList());
        return new SimpleRecordSchema(recordFields);
    }
}

