package ai.h2o.mojos.runtime.utils;

import ai.h2o.mojos.runtime.MojoPipeline;
import ai.h2o.mojos.runtime.api.MojoColumnMeta;
import ai.h2o.mojos.runtime.frame.MojoFrame;
import ai.h2o.mojos.runtime.frame.MojoFrameBuilder;
import ai.h2o.mojos.runtime.frame.MojoRowBuilder;
import com.opencsv.CSVReader;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:ai/h2o/mojos/runtime/utils/BatchedCsvReader.class */
public class BatchedCsvReader implements Iterable<MojoFrame>, Iterator<MojoFrame> {
    private final Iterator<String[]> csvReaderIter;
    private final int batchSize;
    private final MojoPipeline pipeline;
    private final int[] csvIndexByMojoIndex;
    private final CsvProcessingConfig config;
    private int csvColumnCount;
    private long totalDataRows = 0;
    private boolean hasNext = true;
    private MojoFrame outputFrame;
    private static final Logger log = LoggerFactory.getLogger((Class<?>) BatchedCsvReader.class);
    private static final boolean STRIP_CR_FROM_LAST_COLUMN = Consts.getSysProp("parser.csv.stripCrFromLastColumn", true);
    private static final Pattern BY_CSV_INDEX = Pattern.compile("@(\\d+)");

    public BatchedCsvReader(MojoPipeline mojoPipeline, CSVReader cSVReader, int i, CsvProcessingConfig csvProcessingConfig) {
        Integer num;
        this.csvColumnCount = -1;
        this.csvReaderIter = cSVReader.iterator();
        this.batchSize = i;
        this.pipeline = mojoPipeline;
        this.config = csvProcessingConfig;
        List<MojoColumnMeta> columns = mojoPipeline.getInputMeta().getColumns();
        this.csvIndexByMojoIndex = new int[columns.size()];
        if (csvProcessingConfig.headersMissing) {
            int i2 = 0;
            for (MojoColumnMeta mojoColumnMeta : columns) {
                String columnName = mojoColumnMeta.getColumnName();
                String str = csvProcessingConfig.headersMap.get(columnName);
                int csvParseColumnRef = str == null ? i2 : csvParseColumnRef(str);
                log.debug("Mojo column '{}':{} := CSV[{}]", columnName, mojoColumnMeta.getColumnType(), Integer.valueOf(csvParseColumnRef));
                this.csvIndexByMojoIndex[i2] = csvParseColumnRef;
                i2++;
            }
            return;
        }
        String[] next = this.csvReaderIter.next();
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        Map<String, Integer> createCsvIndexLookup = createCsvIndexLookup(csvProcessingConfig, next, linkedHashSet);
        if (!linkedHashSet.isEmpty()) {
            log.warn("Duplicate CSV column names detected [{}]: {}", Integer.valueOf(linkedHashSet.size()), linkedHashSet);
            log.warn("This is a serious error, however it might be tolerated - if these columns are unused. Still, you really SHOULD fix it.");
        }
        int i3 = 0;
        for (MojoColumnMeta mojoColumnMeta2 : columns) {
            String columnName2 = mojoColumnMeta2.getColumnName();
            if (linkedHashSet.contains(columnName2)) {
                throw new UnsupportedOperationException(String.format("Model requires a CSV column that happens to be duplicate in your input: '%s'. Please check above log messages and fix the CSV file accordingly.", columnName2));
            }
            String str2 = csvProcessingConfig.headersMap.get(columnName2);
            String str3 = str2;
            str3 = str2 != null ? csvParseColumnRef(str3, next) : str3;
            if (str3 == null) {
                str3 = columnName2;
                num = csvProcessingConfig.headersIgnoreCase ? createCsvIndexLookup.get(columnName2.toUpperCase()) : createCsvIndexLookup.get(columnName2);
            } else {
                num = createCsvIndexLookup.get(str3);
            }
            if (num == null) {
                Object[] objArr = new Object[1];
                objArr[0] = str3 == null ? columnName2 : str3;
                throw new IllegalArgumentException(String.format("CSV column named '%s' is required but it is not present in input data", objArr));
            }
            log.debug("Mojo column '{}':{} := CSV[{}]'{}'", columnName2, mojoColumnMeta2.getColumnType(), num, next[num.intValue()]);
            this.csvIndexByMojoIndex[i3] = num.intValue();
            i3++;
        }
        this.csvColumnCount = next.length;
    }

    private static Map<String, Integer> createCsvIndexLookup(CsvProcessingConfig csvProcessingConfig, String[] strArr, Set<String> set) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        int i = 0;
        int length = strArr.length;
        for (int i2 = 0; i2 < length; i2++) {
            String str = strArr[i2];
            String trim = str == null ? "" : str.trim();
            linkedHashMap.put("@" + i, Integer.valueOf(i));
            Integer num = (Integer) linkedHashMap.put(trim, Integer.valueOf(i));
            if (num != null) {
                log.error(String.format("CSV file has multiple columns named '%s' - check column #%d and #%d (zero based)", trim, num, Integer.valueOf(i)));
                set.add(trim);
            }
            if (csvProcessingConfig.headersIgnoreCase) {
                linkedHashMap.put(trim.toUpperCase(), Integer.valueOf(i));
            }
            i++;
        }
        return linkedHashMap;
    }

    private String csvParseColumnRef(String str, String[] strArr) {
        Matcher matcher = BY_CSV_INDEX.matcher(str);
        if (!matcher.matches()) {
            return str;
        }
        int parseInt = Integer.parseInt(matcher.group(1));
        if (parseInt >= strArr.length) {
            throw new IllegalArgumentException(String.format("Column mapping references too high index(%s); there is only %d headers", str, Integer.valueOf(strArr.length)));
        }
        return strArr[parseInt];
    }

    private int csvParseColumnRef(String str) {
        Matcher matcher = BY_CSV_INDEX.matcher(str);
        if (matcher.matches()) {
            return Integer.parseInt(matcher.group(1));
        }
        throw new IllegalArgumentException(String.format("Column must be referenced by index, because header row is not present: '%s'", str));
    }

    public long getTotalDataRows() {
        return this.totalDataRows;
    }

    @Override // java.util.Iterator
    public boolean hasNext() {
        return this.hasNext;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.Iterator
    public MojoFrame next() {
        if (!this.hasNext) {
            throw new NoSuchElementException(String.format("No more data after reading %d rows", Long.valueOf(this.totalDataRows)));
        }
        MojoFrameBuilder inputFrameBuilder = this.pipeline.getInputFrameBuilder();
        MojoFrameBuilder outputFrameBuilder = this.pipeline.getOutputFrameBuilder(inputFrameBuilder);
        List<MojoColumnMeta> columns = this.pipeline.getInputMeta().getColumns();
        int length = this.csvIndexByMojoIndex.length;
        int i = 0;
        MojoRowBuilder mojoRowBuilder = inputFrameBuilder.getMojoRowBuilder();
        while (this.csvReaderIter.hasNext()) {
            String[] next = this.csvReaderIter.next();
            if (next.length != this.csvColumnCount) {
                if (this.csvColumnCount >= 0) {
                    throw new IllegalArgumentException(String.format("Invalid CSV data: row #%d (in current batch) has %d columns but we expect %d columns", Long.valueOf(this.totalDataRows), Integer.valueOf(next.length), Integer.valueOf(this.csvColumnCount)));
                }
                this.csvColumnCount = next.length;
            }
            for (int i2 = 0; i2 < length; i2++) {
                int i3 = this.csvIndexByMojoIndex[i2];
                String stripCrFromEol = (STRIP_CR_FROM_LAST_COLUMN && i3 == length - 1) ? BatchedCsvMojoProcessor.stripCrFromEol(next[i3]) : next[i3];
                try {
                    log.trace("mojocol#{} ['{}'] := {}", Integer.valueOf(i2), columns.get(i2).getColumnName(), stripCrFromEol);
                    mojoRowBuilder.setValue(i2, stripCrFromEol);
                } catch (Exception e) {
                    throw new IllegalArgumentException(String.format("Data parsing problem at row=%d and column=%d (0-based indices, header skipped): %s", Long.valueOf(this.totalDataRows), Integer.valueOf(i2), e.getMessage()), e);
                }
            }
            mojoRowBuilder = inputFrameBuilder.addRow(mojoRowBuilder);
            i++;
            this.totalDataRows++;
            if (i >= this.batchSize) {
                break;
            }
        }
        this.hasNext = this.csvReaderIter.hasNext();
        this.outputFrame = outputFrameBuilder.toMojoFrame(i);
        return inputFrameBuilder.toMojoFrame();
    }

    public MojoFrame getOutputFrame() {
        return this.outputFrame;
    }

    @Override // java.util.Iterator
    public void remove() {
        throw new UnsupportedOperationException("remove");
    }

    @Override // java.lang.Iterable
    public Iterator<MojoFrame> iterator() {
        return this;
    }
}
