package org.apache.tez.examples;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.HashSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.tez.client.TezClient;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.Edge;
import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.mapreduce.input.MRInput;
import org.apache.tez.mapreduce.output.MROutput;
import org.apache.tez.mapreduce.processor.SimpleMRProcessor;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.ProcessorContext;
import org.apache.tez.runtime.library.api.KeyValueReader;
import org.apache.tez.runtime.library.api.KeyValueWriter;
import org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig;
import org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig;
import org.apache.tez.runtime.library.partitioner.HashPartitioner;
import org.apache.tez.runtime.library.processor.SimpleProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/tez/examples/HashJoinExample.class */
public class HashJoinExample extends TezExampleBase {
    private static final Logger LOG = LoggerFactory.getLogger(HashJoinExample.class);
    private static final String broadcastOption = "doBroadcast";
    private static final String streamingSide = "streamingSide";
    private static final String hashSide = "hashSide";
    private static final String inputFile = "inputFile";
    private static final String joiner = "joiner";
    private static final String joinOutput = "joinOutput";

    /* loaded from: input_file:org/apache/tez/examples/HashJoinExample$ForwardingProcessor.class */
    public static class ForwardingProcessor extends SimpleProcessor {
        public ForwardingProcessor(ProcessorContext processorContext) {
            super(processorContext);
        }

        public void run() throws Exception {
            Preconditions.checkState(getInputs().size() == 1);
            Preconditions.checkState(getOutputs().size() == 1);
            KeyValueReader reader = ((LogicalInput) getInputs().values().iterator().next()).getReader();
            Preconditions.checkState(reader instanceof KeyValueReader);
            KeyValueReader keyValueReader = reader;
            KeyValueWriter writer = ((LogicalOutput) getOutputs().values().iterator().next()).getWriter();
            while (keyValueReader.next()) {
                writer.write(keyValueReader.getCurrentValue(), NullWritable.get());
            }
        }
    }

    /* loaded from: input_file:org/apache/tez/examples/HashJoinExample$HashJoinProcessor.class */
    public static class HashJoinProcessor extends SimpleMRProcessor {
        public HashJoinProcessor(ProcessorContext processorContext) {
            super(processorContext);
        }

        public void run() throws Exception {
            Preconditions.checkState(getInputs().size() == 2);
            Preconditions.checkState(getOutputs().size() == 1);
            LogicalInput logicalInput = (LogicalInput) getInputs().get(HashJoinExample.streamingSide);
            LogicalInput logicalInput2 = (LogicalInput) getInputs().get(HashJoinExample.hashSide);
            KeyValueReader reader = logicalInput.getReader();
            KeyValueReader reader2 = logicalInput2.getReader();
            Preconditions.checkState(reader instanceof KeyValueReader);
            Preconditions.checkState(reader2 instanceof KeyValueReader);
            LogicalOutput logicalOutput = (LogicalOutput) getOutputs().get(HashJoinExample.joinOutput);
            Preconditions.checkState(logicalOutput.getWriter() instanceof KeyValueWriter);
            KeyValueWriter writer = logicalOutput.getWriter();
            KeyValueReader keyValueReader = reader2;
            HashSet hashSet = new HashSet();
            while (keyValueReader.next()) {
                hashSet.add(new Text((Text) keyValueReader.getCurrentKey()));
            }
            KeyValueReader keyValueReader2 = reader;
            while (keyValueReader2.next()) {
                Text text = (Text) keyValueReader2.getCurrentKey();
                if (hashSet.contains(text)) {
                    writer.write(text, NullWritable.get());
                }
            }
        }
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new HashJoinExample(), strArr));
    }

    @Override // org.apache.tez.examples.TezExampleBase
    protected void printUsage() {
        System.err.println("Usage: hashjoin <file1> <file2> <numPartitions> <outPath> [doBroadcast(default false)]");
    }

    @Override // org.apache.tez.examples.TezExampleBase
    protected int runJob(String[] strArr, TezConfiguration tezConfiguration, TezClient tezClient) throws Exception {
        boolean z = strArr.length == 5 && strArr[4].equals(broadcastOption);
        LOG.info("Running HashJoinExample" + (z ? "-WithBroadcast" : ""));
        String str = strArr[0];
        String str2 = strArr[1];
        int parseInt = Integer.parseInt(strArr[2]);
        String str3 = strArr[3];
        Path path = new Path(str);
        Path path2 = new Path(str2);
        Path path3 = new Path(str3);
        if (FileSystem.get(tezConfiguration).exists(path3)) {
            System.err.println("Output directory: " + str3 + " already exists");
            return 3;
        }
        if (parseInt > 0) {
            return runDag(createDag(tezConfiguration, path, path2, path3, parseInt, z), isCountersLog(), LOG);
        }
        System.err.println("NumPartitions must be > 0");
        return 4;
    }

    @Override // org.apache.tez.examples.TezExampleBase
    protected int validateArgs(String[] strArr) {
        return (strArr.length == 4 || strArr.length == 5) ? 0 : 2;
    }

    private DAG createDag(TezConfiguration tezConfiguration, Path path, Path path2, Path path3, int i, boolean z) throws IOException {
        DAG create = DAG.create("HashJoinExample" + (z ? "-WithBroadcast" : ""));
        Vertex addDataSource = Vertex.create(hashSide, ProcessorDescriptor.create(ForwardingProcessor.class.getName())).addDataSource(inputFile, MRInput.createConfigBuilder(new Configuration(tezConfiguration), TextInputFormat.class, path2.toUri().toString()).groupSplits(!isDisableSplitGrouping()).build());
        Vertex addDataSource2 = Vertex.create(streamingSide, ProcessorDescriptor.create(ForwardingProcessor.class.getName())).addDataSource(inputFile, MRInput.createConfigBuilder(new Configuration(tezConfiguration), TextInputFormat.class, path.toUri().toString()).groupSplits(!isDisableSplitGrouping()).build());
        Vertex addDataSink = Vertex.create(joiner, ProcessorDescriptor.create(HashJoinProcessor.class.getName()), i).addDataSink(joinOutput, MROutput.createConfigBuilder(new Configuration(tezConfiguration), TextOutputFormat.class, path3.toUri().toString()).build());
        UnorderedPartitionedKVEdgeConfig build = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), NullWritable.class.getName(), HashPartitioner.class.getName()).setFromConfiguration(tezConfiguration).build();
        create.addVertex(addDataSource2).addVertex(addDataSource).addVertex(addDataSink).addEdge(Edge.create(addDataSource2, addDataSink, build.createDefaultEdgeProperty())).addEdge(Edge.create(addDataSource, addDataSink, z ? UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), NullWritable.class.getName()).setFromConfiguration(tezConfiguration).build().createDefaultBroadcastEdgeProperty() : build.createDefaultEdgeProperty()));
        return create;
    }
}
