package com.googlecode.fascinator.harvester.csv;

import au.com.bytecode.opencsv.CSVReader;
import com.googlecode.fascinator.api.harvester.HarvesterException;
import com.googlecode.fascinator.api.storage.DigitalObject;
import com.googlecode.fascinator.api.storage.Payload;
import com.googlecode.fascinator.common.JsonObject;
import com.googlecode.fascinator.common.JsonSimple;
import com.googlecode.fascinator.common.harvester.impl.GenericHarvester;
import com.googlecode.fascinator.common.storage.StorageUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/googlecode/fascinator/harvester/csv/CSVHarvester.class */
public class CSVHarvester extends GenericHarvester {
    private static final char DEFAULT_DELIMITER = ',';
    private static final String DEFAULT_PAYLOAD_ID = "metadata.json";
    private static final int DEFAULT_BATCH_SIZE = 50;
    private Logger log;
    private List<String> dataFields;
    private List<String> ignoredFields;
    private List<String> includedFields;
    private String idColumn;
    private String idPrefix;
    private long maxRows;
    private String payloadId;
    private int batchSize;
    private long currentRow;
    private boolean hasMore;
    private CSVReader csvReader;
    private String filename;

    public CSVHarvester() {
        super("csv", "CSV Harvester");
        this.log = LoggerFactory.getLogger(CSVHarvester.class);
    }

    public void init() throws HarvesterException {
        JsonSimple jsonSimple = new JsonSimple(getJsonConfig().getObject(new Object[]{"harvester", "csv"}));
        String string = jsonSimple.getString((String) null, new Object[]{"fileLocation"});
        if (string == null) {
            throw new HarvesterException("No data file provided!");
        }
        File file = new File(string);
        if (file == null || !file.exists()) {
            throw new HarvesterException("Could not find CSV file '" + string + "'");
        }
        this.filename = file.getName();
        this.idPrefix = jsonSimple.getString("", new Object[]{"recordIDPrefix"});
        this.maxRows = jsonSimple.getInteger(-1, new Object[]{"maxRows"}).intValue();
        this.ignoredFields = getStringList(jsonSimple, "ignoreFields");
        this.includedFields = getStringList(jsonSimple, "includedFields");
        this.payloadId = jsonSimple.getString(DEFAULT_PAYLOAD_ID, new Object[]{"payloadId"});
        this.batchSize = jsonSimple.getInteger(Integer.valueOf(DEFAULT_BATCH_SIZE), new Object[]{"batchSize"}).intValue();
        this.hasMore = true;
        try {
            this.csvReader = new CSVReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), jsonSimple.getString(String.valueOf(','), new Object[]{"delimiter"}).charAt(0));
            if (jsonSimple.getBoolean(true, new Object[]{"headerRow"}).booleanValue()) {
                this.dataFields = Arrays.asList(this.csvReader.readNext());
            } else {
                this.dataFields = getStringList(jsonSimple, "headerList");
            }
            this.idColumn = jsonSimple.getString((String) null, new Object[]{"idColumn"});
            if (this.idColumn == null || this.dataFields.contains(this.idColumn)) {
            } else {
                throw new HarvesterException("'" + this.idColumn + "' is invalid!");
            }
        } catch (IOException e) {
            throw new HarvesterException(e);
        }
    }

    private List<String> getStringList(JsonSimple jsonSimple, Object... objArr) {
        List<String> stringList = jsonSimple.getStringList(objArr);
        if (stringList == null) {
            stringList = Collections.emptyList();
        }
        return stringList;
    }

    public void shutdown() throws HarvesterException {
        if (this.csvReader != null) {
            try {
                this.csvReader.close();
            } catch (IOException e) {
                this.log.warn("Failed to close CSVReader!", e);
            }
            this.csvReader = null;
        }
    }

    public boolean hasMoreObjects() {
        return this.hasMore;
    }

    public Set<String> getObjectIdList() throws HarvesterException {
        HashSet hashSet = new HashSet();
        String[] strArr = null;
        int i = 0;
        boolean z = false;
        while (true) {
            if (z) {
                break;
            }
            try {
                String[] readNext = this.csvReader.readNext();
                strArr = readNext;
                if (readNext == null) {
                    break;
                }
                i++;
                this.currentRow++;
                hashSet.add(createRecord(strArr));
                if (i % this.batchSize == 0) {
                    this.log.debug("Batch size reached at row {}", Long.valueOf(this.currentRow));
                    break;
                }
                z = this.maxRows > 0 && this.currentRow < this.maxRows;
            } catch (IOException e) {
                throw new HarvesterException(e);
            }
        }
        this.hasMore = strArr != null;
        if (hashSet.size() > 0) {
            this.log.debug("Created {} objects", Integer.valueOf(hashSet.size()));
        }
        return hashSet;
    }

    private String createRecord(String[] strArr) throws HarvesterException {
        String l = Long.toString(this.currentRow);
        JsonObject jsonObject = new JsonObject();
        for (int i = 0; i < strArr.length; i++) {
            String str = this.dataFields.get(i);
            String str2 = strArr[i];
            if (this.includedFields.contains(str) && !this.ignoredFields.contains(str)) {
                jsonObject.put(str, str2);
            }
            if (str.equals(this.idColumn)) {
                l = str2;
            }
        }
        JsonObject jsonObject2 = new JsonObject();
        jsonObject2.put("dc.identifier", this.idPrefix + l);
        JsonObject jsonObject3 = new JsonObject();
        jsonObject3.put("recordIDPrefix", this.idPrefix);
        jsonObject3.put("data", jsonObject);
        jsonObject3.put("metadata", jsonObject2);
        try {
            String md5Hex = DigestUtils.md5Hex(this.filename + this.idPrefix + l);
            DigitalObject digitalObject = StorageUtils.getDigitalObject(getStorage(), md5Hex);
            Payload createOrUpdatePayload = StorageUtils.createOrUpdatePayload(digitalObject, this.payloadId, IOUtils.toInputStream(new JsonSimple(jsonObject3).toString(true), "UTF-8"));
            createOrUpdatePayload.setContentType("application/json");
            createOrUpdatePayload.close();
            digitalObject.getMetadata().setProperty("render-pending", "true");
            digitalObject.close();
            return md5Hex;
        } catch (Exception e) {
            throw new HarvesterException("Failed to store metadata", e);
        }
    }
}
