package org.apache.gobblin.example.githubjsontoparquet;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.commons.cli.CommandLine;
import org.apache.gobblin.annotation.Alias;
import org.apache.gobblin.runtime.api.JobTemplate;
import org.apache.gobblin.runtime.api.SpecNotFoundException;
import org.apache.gobblin.runtime.cli.CliObjectSupport;
import org.apache.gobblin.runtime.cli.PublicMethodsGobblinCliFactory;
import org.apache.gobblin.runtime.embedded.EmbeddedGobblin;
import org.apache.gobblin.runtime.template.ResourceBasedJobTemplate;
import org.apache.gobblin.writer.WriterOutputFormat;
import org.codehaus.plexus.util.FileUtils;
import org.mortbay.log.Log;

/* loaded from: input_file:org/apache/gobblin/example/githubjsontoparquet/EmbeddedGithubJsonToParquet.class */
public class EmbeddedGithubJsonToParquet extends EmbeddedGobblin {
    private static final String GITHUB_ARCHIVE_URL_TEMPLATE = "http://data.githubarchive.org/%s.json.gz";
    private static final String DOWNLOAD_DIR = "archives";
    private static final String ARCHIVE_SUFFIX = ".json.gz";
    private static final String WORK_DIR_KEY = "work.dir";

    @Alias(value = "githubjsontoparquet", description = "Extract Github data and write to parquet files")
    /* loaded from: input_file:org/apache/gobblin/example/githubjsontoparquet/EmbeddedGithubJsonToParquet$CliFactory.class */
    public static class CliFactory extends PublicMethodsGobblinCliFactory {
        public CliFactory() {
            super(EmbeddedGithubJsonToParquet.class);
        }

        public EmbeddedGobblin constructEmbeddedGobblin(CommandLine commandLine) throws JobTemplate.TemplateException, IOException {
            String[] args = commandLine.getArgs();
            if (args.length < 1) {
                throw new RuntimeException("Expected 2 arguments. " + getUsageString());
            }
            try {
                if (args.length == 2) {
                    return new EmbeddedGithubJsonToParquet(args[0], args[1]);
                }
                return null;
            } catch (JobTemplate.TemplateException | IOException e) {
                e.printStackTrace();
                return null;
            }
        }

        public String getUsageString() {
            return "<Date time (yyyy-mm-dd-hh) of archive to pull> <Work dir with file system URI>";
        }
    }

    @CliObjectSupport(argumentNames = {"archiveDateAndHour", "workDir"})
    public EmbeddedGithubJsonToParquet(String str, String str2) throws JobTemplate.TemplateException, IOException {
        super("githubjsontoparquet");
        try {
            URL url = new URL(str2);
            String str3 = url.getProtocol() + ":///";
            setConfiguration(WORK_DIR_KEY, str2);
            setConfiguration("fs.uri", str3);
            setConfiguration("state.store.enabled", "true");
            setConfiguration("state.store.dir", str2 + "/store");
            setConfiguration("writer.fs.uri", str3);
            setConfiguration("data.publisher.fs.uri", str3);
            setConfiguration("data.publisher.final.dir", str2 + "/event_data");
            setConfiguration("data.publisher.metadata.output.dir", str2 + "/metadata");
            setConfiguration("writer.output.format", WriterOutputFormat.PARQUET.toString());
            try {
                setTemplate(ResourceBasedJobTemplate.forResourcePath("githubjsontoparquet.template"));
                String format = String.format(GITHUB_ARCHIVE_URL_TEMPLATE, str);
                downloadFile(format, getAbsoluteDownloadFilePath(createDownloadDir(url.getPath(), format), str));
            } catch (URISyntaxException | SpecNotFoundException e) {
                e.printStackTrace();
                throw new RuntimeException("Cannot set template");
            }
        } catch (MalformedURLException e2) {
            e2.printStackTrace();
            throw new RuntimeException("Work directory URI with no protocol or malformed.");
        }
    }

    private Path getAbsoluteDownloadFilePath(Path path, String str) {
        return Paths.get(path.toString(), str + ARCHIVE_SUFFIX);
    }

    private Path createDownloadDir(String str, String str2) {
        Path path = Paths.get(str, DOWNLOAD_DIR);
        File file = path.toFile();
        try {
            Log.info(String.format("Creating download dir %s", file.toPath().toString()));
            FileUtils.forceMkdir(file);
            Log.info(String.format("Created download dir %s", file.toPath().toString()));
            return path;
        } catch (IOException e) {
            throw new RuntimeException(String.format("Unable to create download location for archive: %s at %s", str2, path.toString()));
        }
    }

    private void downloadFile(String str, Path path) {
        if (path.toFile().exists()) {
            Log.info(String.format("Skipping download for %s at %s because destination already exists", str, path.toString()));
            return;
        }
        try {
            ReadableByteChannel newChannel = Channels.newChannel(new URL(str).openStream());
            FileOutputStream fileOutputStream = new FileOutputStream(String.valueOf(path));
            Log.info(String.format("Downloading %s at %s", str, path.toString()));
            fileOutputStream.getChannel().transferFrom(newChannel, 0L, Long.MAX_VALUE);
            Log.info(String.format("Download complete for %s at %s", str, path.toString()));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
