package org.apache.hugegraph.loader.reader.file;

import java.io.IOException;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hugegraph.loader.constant.Constants;
import org.apache.hugegraph.loader.exception.LoadException;
import org.apache.hugegraph.loader.reader.Readable;
import org.apache.hugegraph.loader.reader.line.Line;
import org.apache.hugegraph.loader.source.file.FileSource;
import org.apache.hugegraph.loader.util.ParquetUtil;
import org.apache.hugegraph.util.Log;
import org.apache.parquet.column.page.PageReadStore;
import org.apache.parquet.example.data.simple.SimpleGroup;
import org.apache.parquet.example.data.simple.convert.GroupRecordConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.parquet.io.ColumnIOFactory;
import org.apache.parquet.io.MessageColumnIO;
import org.apache.parquet.io.RecordReader;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Type;
import org.slf4j.Logger;

/* loaded from: input_file:org/apache/hugegraph/loader/reader/file/ParquetFileLineFetcher.class */
public class ParquetFileLineFetcher extends FileLineFetcher {
    private static final Logger LOG = Log.logger(ParquetFileLineFetcher.class);
    private final Configuration conf;
    private ParquetFileReader reader;
    private MessageType schema;
    private MessageColumnIO columnIO;
    private RecordReader<?> recordReader;
    private PageReadStore pages;
    private long pagesRowCount;
    private long currRowOffset;

    public ParquetFileLineFetcher(FileSource fileSource) {
        this(fileSource, new Configuration());
    }

    public ParquetFileLineFetcher(FileSource fileSource, Configuration configuration) {
        super(fileSource);
        this.conf = configuration;
        resetReader();
    }

    @Override // org.apache.hugegraph.loader.reader.file.FileLineFetcher, org.apache.hugegraph.loader.reader.line.LineFetcher
    public boolean ready() {
        return this.reader != null;
    }

    @Override // org.apache.hugegraph.loader.reader.file.FileLineFetcher, org.apache.hugegraph.loader.reader.line.LineFetcher
    public void resetReader() {
        this.reader = null;
        this.schema = null;
        this.columnIO = null;
        this.pages = null;
        this.pagesRowCount = -1L;
        this.currRowOffset = -1L;
    }

    @Override // org.apache.hugegraph.loader.reader.file.FileLineFetcher, org.apache.hugegraph.loader.reader.line.LineFetcher
    public boolean needReadHeader() {
        return true;
    }

    @Override // org.apache.hugegraph.loader.reader.file.FileLineFetcher, org.apache.hugegraph.loader.reader.line.LineFetcher
    public String[] readHeader(List<Readable> list) {
        Readable readable = list.get(0);
        openReader(list.get(0));
        try {
            return parseHeader(this.schema);
        } finally {
            try {
                closeReader();
            } catch (IOException e) {
                LOG.warn("Failed to close reader of '{}'", readable);
            }
        }
    }

    @Override // org.apache.hugegraph.loader.reader.file.FileLineFetcher, org.apache.hugegraph.loader.reader.line.LineFetcher
    public void openReader(Readable readable) {
        try {
            this.reader = ParquetFileReader.open(HadoopInputFile.fromPath(readable.path(), this.conf));
            this.schema = this.reader.getFooter().getFileMetaData().getSchema();
            this.columnIO = new ColumnIOFactory().getColumnIO(this.schema);
            resetOffset();
        } catch (IOException e) {
            throw new LoadException("Failed to open parquet reader for '%s'", e, readable);
        }
    }

    @Override // org.apache.hugegraph.loader.reader.file.FileLineFetcher, org.apache.hugegraph.loader.reader.line.LineFetcher
    public Line fetch() {
        if ((this.pages == null || this.currRowOffset >= this.pagesRowCount) && !fetchNextPage()) {
            return null;
        }
        int size = this.schema.getFields().size();
        Object[] objArr = new Object[size];
        SimpleGroup simpleGroup = (SimpleGroup) this.recordReader.read();
        for (int i = 0; i < size; i++) {
            objArr[i] = ParquetUtil.convertObject(simpleGroup, i);
        }
        String join = StringUtils.join(objArr, Constants.COMMA_STR);
        this.currRowOffset++;
        increaseOffset();
        return new Line(join, source().header(), objArr);
    }

    @Override // org.apache.hugegraph.loader.reader.file.FileLineFetcher, org.apache.hugegraph.loader.reader.line.LineFetcher
    public void closeReader() throws IOException {
        if (this.reader != null) {
            this.reader.close();
        }
    }

    private boolean fetchNextPage() {
        try {
            this.pages = this.reader.readNextRowGroup();
            if (this.pages == null) {
                return false;
            }
            this.recordReader = this.columnIO.getRecordReader(this.pages, new GroupRecordConverter(this.schema));
            this.pagesRowCount = this.pages.getRowCount();
            this.currRowOffset = 0L;
            return this.currRowOffset < this.pagesRowCount;
        } catch (IOException e) {
            throw new LoadException("Failed to read next page for '%s'", e);
        }
    }

    private String[] parseHeader(MessageType messageType) {
        List fields = messageType.getFields();
        String[] strArr = new String[fields.size()];
        for (int i = 0; i < fields.size(); i++) {
            strArr[i] = ((Type) fields.get(i)).getName();
        }
        return strArr;
    }
}
