package org.apache.pinot.tools.admin.command;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.pinot.common.segment.ReadMode;
import org.apache.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import org.apache.pinot.core.indexsegment.immutable.ImmutableSegment;
import org.apache.pinot.core.indexsegment.immutable.ImmutableSegmentLoader;
import org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import org.apache.pinot.plugin.inputformat.csv.CSVRecordReader;
import org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.FileFormat;
import org.apache.pinot.spi.data.readers.RecordReader;
import org.apache.pinot.spi.utils.JsonUtils;
import org.apache.pinot.tools.Command;
import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/pinot/tools/admin/command/CreateSegmentCommand.class */
public class CreateSegmentCommand extends AbstractBaseAdminCommand implements Command {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) CreateSegmentCommand.class);

    @Option(name = "-generatorConfigFile", metaVar = "<string>", usage = "Config file for segment generator.")
    private String _generatorConfigFile;

    @Option(name = "-dataDir", metaVar = "<string>", usage = "Directory containing the data.")
    private String _dataDir;

    @Option(name = "-format", metaVar = "<AVRO/CSV/JSON/THRIFT/PARQUET/ORC>", usage = "Input data format.")
    private FileFormat _format;

    @Option(name = "-outDir", metaVar = "<string>", usage = "Name of output directory.")
    private String _outDir;

    @Option(name = "-tableName", metaVar = "<string>", usage = "Name of the table.")
    private String _tableName;

    @Option(name = "-segmentName", metaVar = "<string>", usage = "Name of the segment.")
    private String _segmentName;

    @Option(name = "-timeColumnName", metaVar = "<string>", usage = "Primary time column.")
    private String _timeColumnName;

    @Option(name = "-schemaFile", metaVar = "<string>", usage = "File containing schema for data.")
    private String _schemaFile;

    @Option(name = "-readerConfigFile", metaVar = "<string>", usage = "Config file for record reader.")
    private String _readerConfigFile;

    @Option(name = "-overwrite", usage = "Overwrite existing output directory.")
    private boolean _overwrite = false;

    @Option(name = "-numThreads", metaVar = "<int>", usage = "Parallelism while generating segments, default is 1.")
    private int _numThreads = 1;

    @Option(name = "-postCreationVerification", usage = "Verify segment data file after segment creation. Please ensure you have enough local disk to hold data for verification")
    private boolean _postCreationVerification = false;

    @Option(name = "-retry", metaVar = "<int>", usage = "Number of retries if encountered any segment creation failure, default is 0.")
    private int _retry = 0;

    @Option(name = "-help", help = true, aliases = {"-h", "--h", "--help"}, usage = "Print this message.")
    private boolean _help = false;

    public CreateSegmentCommand setGeneratorConfigFile(String str) {
        this._generatorConfigFile = str;
        return this;
    }

    public CreateSegmentCommand setDataDir(String str) {
        this._dataDir = str;
        return this;
    }

    public CreateSegmentCommand setFormat(FileFormat fileFormat) {
        this._format = fileFormat;
        return this;
    }

    public CreateSegmentCommand setOutDir(String str) {
        this._outDir = str;
        return this;
    }

    public CreateSegmentCommand setOverwrite(boolean z) {
        this._overwrite = z;
        return this;
    }

    public CreateSegmentCommand setTableName(String str) {
        this._tableName = str;
        return this;
    }

    public CreateSegmentCommand setSegmentName(String str) {
        this._segmentName = str;
        return this;
    }

    public CreateSegmentCommand setTimeColumnName(String str) {
        this._timeColumnName = str;
        return this;
    }

    public CreateSegmentCommand setSchemaFile(String str) {
        this._schemaFile = str;
        return this;
    }

    public CreateSegmentCommand setReaderConfigFile(String str) {
        this._readerConfigFile = str;
        return this;
    }

    public CreateSegmentCommand setRetry(int i) {
        this._retry = i;
        return this;
    }

    public CreateSegmentCommand setPostCreationVerification(boolean z) {
        this._postCreationVerification = z;
        return this;
    }

    public CreateSegmentCommand setNumThreads(int i) {
        this._numThreads = i;
        return this;
    }

    public String toString() {
        return "CreateSegment  -generatorConfigFile " + this._generatorConfigFile + " -dataDir " + this._dataDir + " -format " + this._format + " -outDir " + this._outDir + " -overwrite " + this._overwrite + " -tableName " + this._tableName + " -segmentName " + this._segmentName + " -timeColumnName " + this._timeColumnName + " -schemaFile " + this._schemaFile + " -readerConfigFile " + this._readerConfigFile + " -numThreads " + this._numThreads;
    }

    @Override // org.apache.pinot.tools.AbstractBaseCommand
    public final String getName() {
        return "CreateSegment";
    }

    @Override // org.apache.pinot.tools.Command
    public String description() {
        return "Create pinot segments from provided avro/csv/json input data.";
    }

    @Override // org.apache.pinot.tools.Command
    public boolean getHelp() {
        return this._help;
    }

    @Override // org.apache.pinot.tools.Command
    public boolean execute() throws Exception {
        LOGGER.info("Executing command: {}", toString());
        SegmentGeneratorConfig segmentGeneratorConfig = this._generatorConfigFile != null ? (SegmentGeneratorConfig) JsonUtils.fileToObject(new File(this._generatorConfigFile), SegmentGeneratorConfig.class) : new SegmentGeneratorConfig();
        String dataDir = segmentGeneratorConfig.getDataDir();
        if (this._dataDir == null) {
            if (dataDir == null) {
                throw new RuntimeException("Must specify dataDir.");
            }
            this._dataDir = dataDir;
        } else if (dataDir != null && !dataDir.equals(this._dataDir)) {
            LOGGER.warn("Find dataDir conflict in command line and config file, use config in command line: {}", this._dataDir);
        }
        FileFormat format = segmentGeneratorConfig.getFormat();
        if (this._format == null) {
            if (format == null) {
                throw new RuntimeException("Format cannot be null in config file.");
            }
            this._format = format;
        } else if (format != this._format && format != FileFormat.AVRO) {
            LOGGER.warn("Find format conflict in command line and config file, use config in command line: {}", this._format);
        }
        String outDir = segmentGeneratorConfig.getOutDir();
        if (this._outDir == null) {
            if (outDir == null) {
                throw new RuntimeException("Must specify outDir.");
            }
            this._outDir = outDir;
        } else if (outDir != null && !outDir.equals(this._outDir)) {
            LOGGER.warn("Find outDir conflict in command line and config file, use config in command line: {}", this._outDir);
        }
        if (segmentGeneratorConfig.isOverwrite()) {
            this._overwrite = true;
        }
        String tableName = segmentGeneratorConfig.getTableName();
        if (this._tableName == null) {
            if (tableName == null) {
                throw new RuntimeException("Must specify tableName.");
            }
            this._tableName = tableName;
        } else if (tableName != null && !tableName.equals(this._tableName)) {
            LOGGER.warn("Find tableName conflict in command line and config file, use config in command line: {}", this._tableName);
        }
        String segmentName = segmentGeneratorConfig.getSegmentName();
        if (this._segmentName == null) {
            if (segmentName == null) {
                throw new RuntimeException("Must specify segmentName.");
            }
            this._segmentName = segmentName;
        } else if (segmentName != null && !segmentName.equals(this._segmentName)) {
            LOGGER.warn("Find segmentName conflict in command line and config file, use config in command line: {}", this._segmentName);
        }
        final Path path = new Path(this._dataDir);
        FileSystem fileSystem = FileSystem.get(URI.create(this._dataDir), new Configuration());
        if (!fileSystem.exists(path) || !fileSystem.isDirectory(path)) {
            throw new RuntimeException("Data directory " + this._dataDir + " not found.");
        }
        List<Path> dataFilePaths = getDataFilePaths(path);
        if (dataFilePaths == null || dataFilePaths.size() == 0) {
            throw new RuntimeException("Data directory " + this._dataDir + " does not contain " + this._format.toString().toUpperCase() + " files.");
        }
        LOGGER.info("Accepted files: {}", Arrays.toString(dataFilePaths.toArray()));
        File file = new File(this._outDir);
        if (file.exists()) {
            if (!this._overwrite) {
                throw new IOException("Output directory " + this._outDir + " already exists.");
            }
            FileUtils.deleteDirectory(file);
        }
        segmentGeneratorConfig.setDataDir(this._dataDir);
        segmentGeneratorConfig.setFormat(this._format);
        segmentGeneratorConfig.setOutDir(this._outDir);
        segmentGeneratorConfig.setOverwrite(this._overwrite);
        segmentGeneratorConfig.setTableName(this._tableName);
        segmentGeneratorConfig.setSegmentName(this._segmentName);
        if (this._timeColumnName != null) {
            segmentGeneratorConfig.setTimeColumnName(this._timeColumnName);
        }
        if (this._schemaFile != null) {
            if (segmentGeneratorConfig.getSchemaFile() != null && !segmentGeneratorConfig.getSchemaFile().equals(this._schemaFile)) {
                LOGGER.warn("Find schemaFile conflict in command line and config file, use config in command line: {}", this._schemaFile);
            }
            segmentGeneratorConfig.setSchemaFile(this._schemaFile);
        }
        if (this._readerConfigFile != null) {
            if (segmentGeneratorConfig.getReaderConfigFile() != null && !segmentGeneratorConfig.getReaderConfigFile().equals(this._readerConfigFile)) {
                LOGGER.warn("Find readerConfigFile conflict in command line and config file, use config in command line: {}", this._readerConfigFile);
            }
            segmentGeneratorConfig.setReaderConfigFile(this._readerConfigFile);
        }
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(this._numThreads);
        int i = 0;
        for (final Path path2 : dataFilePaths) {
            final int i2 = i;
            final SegmentGeneratorConfig segmentGeneratorConfig2 = segmentGeneratorConfig;
            newFixedThreadPool.execute(new Runnable() { // from class: org.apache.pinot.tools.admin.command.CreateSegmentCommand.1
                @Override // java.lang.Runnable
                public void run() {
                    for (int i3 = 0; i3 <= CreateSegmentCommand.this._retry; i3++) {
                        try {
                            SegmentGeneratorConfig segmentGeneratorConfig3 = new SegmentGeneratorConfig(segmentGeneratorConfig2);
                            String name = path2.getName();
                            path.getFileSystem(new Configuration()).copyToLocalFile(path2, new Path(name));
                            segmentGeneratorConfig3.setInputFilePath(name);
                            segmentGeneratorConfig3.setSegmentName(CreateSegmentCommand.this._segmentName + "_" + i2);
                            Schema fromFile = Schema.fromFile(new File(CreateSegmentCommand.this._schemaFile));
                            segmentGeneratorConfig3.setSchema(fromFile);
                            SegmentIndexCreationDriverImpl segmentIndexCreationDriverImpl = new SegmentIndexCreationDriverImpl();
                            switch (AnonymousClass2.$SwitchMap$org$apache$pinot$spi$data$readers$FileFormat[segmentGeneratorConfig3.getFormat().ordinal()]) {
                                case 1:
                                    segmentGeneratorConfig3.setRecordReaderPath("org.apache.pinot.plugin.inputformat.ParquetRecordReader");
                                    segmentIndexCreationDriverImpl.init(segmentGeneratorConfig3);
                                    break;
                                case 2:
                                    segmentGeneratorConfig3.setRecordReaderPath("org.apache.pinot.plugin.inputformat.orc.ORCRecordReader");
                                    segmentIndexCreationDriverImpl.init(segmentGeneratorConfig3);
                                    break;
                                case 3:
                                    CSVRecordReader cSVRecordReader = new CSVRecordReader();
                                    cSVRecordReader.init(new File(name), fromFile, CreateSegmentCommand.this._readerConfigFile != null ? (CSVRecordReaderConfig) JsonUtils.fileToObject(new File(CreateSegmentCommand.this._readerConfigFile), CSVRecordReaderConfig.class) : null);
                                    segmentIndexCreationDriverImpl.init(segmentGeneratorConfig3, (RecordReader) cSVRecordReader);
                                    break;
                                default:
                                    segmentIndexCreationDriverImpl.init(segmentGeneratorConfig3);
                                    break;
                            }
                            segmentIndexCreationDriverImpl.build();
                            if (CreateSegmentCommand.this._postCreationVerification) {
                                if (!CreateSegmentCommand.this.verifySegment(new File(segmentGeneratorConfig3.getOutDir(), segmentIndexCreationDriverImpl.getSegmentName()))) {
                                    throw new RuntimeException("Pinot segment is corrupted, please try to recreate it.");
                                }
                                CreateSegmentCommand.LOGGER.info("Post segment creation verification is succeed for segment {}.", segmentIndexCreationDriverImpl.getSegmentName());
                            }
                            return;
                        } catch (Exception e) {
                            CreateSegmentCommand.LOGGER.error("Got exception during segment creation.", (Throwable) e);
                            if (i3 == CreateSegmentCommand.this._retry) {
                                throw new RuntimeException(e);
                            }
                            CreateSegmentCommand.LOGGER.error("Failed to create Pinot segment, retry: {}/{}", Integer.valueOf(i3 + 1), Integer.valueOf(CreateSegmentCommand.this._retry));
                        }
                    }
                }
            });
            i++;
        }
        newFixedThreadPool.shutdown();
        return newFixedThreadPool.awaitTermination(1L, TimeUnit.HOURS);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public boolean verifySegment(File file) {
        File file2 = new File(FileUtils.getTempDirectory(), org.apache.pinot.common.utils.FileUtils.getRandomFileName());
        try {
            try {
                file2.getParentFile().mkdirs();
                FileSystem.get(URI.create(file.toString()), new Configuration()).copyToLocalFile(new Path(file.toString()), new Path(file2.toString()));
                try {
                    ImmutableSegment load = ImmutableSegmentLoader.load(file2, ReadMode.mmap);
                    LOGGER.info("Successfully loaded Pinot segment {} (size: {} Bytes) from {}.", load.getSegmentName(), Long.valueOf(load.getSegmentSizeBytes()), file2);
                    load.destroy();
                    FileUtils.deleteQuietly(file2);
                    return true;
                } catch (Exception e) {
                    LOGGER.error("Failed to load segment from {}.", file2, e);
                    FileUtils.deleteQuietly(file2);
                    return false;
                }
            } catch (IOException e2) {
                LOGGER.error("Failed to copy segment {} to local directory {} for verification.", file, file2, e2);
                FileUtils.deleteQuietly(file2);
                return false;
            }
        } catch (Throwable th) {
            FileUtils.deleteQuietly(file2);
            throw th;
        }
    }

    protected List<Path> getDataFilePaths(Path path) throws IOException {
        ArrayList arrayList = new ArrayList();
        FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration());
        getDataFilePathsHelper(fileSystem, fileSystem.globStatus(path), arrayList);
        return arrayList;
    }

    protected void getDataFilePathsHelper(FileSystem fileSystem, FileStatus[] fileStatusArr, List<Path> list) throws IOException {
        for (FileStatus fileStatus : fileStatusArr) {
            Path path = fileStatus.getPath();
            if (fileStatus.isDirectory()) {
                getDataFilePathsHelper(fileSystem, fileSystem.listStatus(path), list);
            } else if (isDataFile(path.getName())) {
                list.add(path);
            }
        }
    }

    protected boolean isDataFile(String str) {
        switch (this._format) {
            case PARQUET:
                return str.endsWith(".parquet");
            case ORC:
                return str.endsWith(".orc");
            case CSV:
                return str.endsWith(".csv");
            case AVRO:
            case GZIPPED_AVRO:
                return str.endsWith(".avro");
            case JSON:
                return str.endsWith(".json");
            case THRIFT:
                return str.endsWith(".thrift");
            default:
                throw new IllegalStateException("Unsupported file format for segment creation: " + this._format);
        }
    }
}
