package com.senseidb.indexing.hadoop.job;

import com.senseidb.indexing.hadoop.keyvalueformat.IntermediateForm;
import com.senseidb.indexing.hadoop.keyvalueformat.Shard;
import com.senseidb.indexing.hadoop.map.SenseiMapper;
import com.senseidb.indexing.hadoop.reduce.FileSystemDirectory;
import com.senseidb.indexing.hadoop.reduce.IndexUpdateOutputFormat;
import com.senseidb.indexing.hadoop.reduce.SenseiCombiner;
import com.senseidb.indexing.hadoop.reduce.SenseiReducer;
import com.senseidb.indexing.hadoop.util.LuceneUtil;
import com.senseidb.indexing.hadoop.util.MRConfig;
import com.senseidb.indexing.hadoop.util.MRJobConfig;
import com.senseidb.indexing.hadoop.util.SenseiJobConfig;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.NumberFormat;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/senseidb/indexing/hadoop/job/MapReduceJob.class */
public class MapReduceJob extends Configured {
    private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
    private static final Logger logger = Logger.getLogger(MapReduceJob.class);

    public JobConf createJob(Class cls) throws IOException, URISyntaxException {
        Configuration conf = getConf();
        int i = conf.getInt(MRJobConfig.NUM_MAPS, 2);
        int i2 = conf.getInt(SenseiJobConfig.NUM_SHARDS, 2);
        String str = conf.get(SenseiJobConfig.INPUT_DIRS, (String) null);
        logger.info("dirs:" + str);
        String[] split = StringUtils.split(str);
        logger.info("length after split:" + split.length);
        Path[] pathArr = new Path[split.length];
        for (int i3 = 0; i3 < split.length; i3++) {
            pathArr[i3] = new Path(StringUtils.unEscapeString(split[i3]));
        }
        logger.info("path[0] is:" + pathArr[0]);
        Path path = new Path(conf.get(SenseiJobConfig.OUTPUT_DIR));
        String str2 = conf.get(SenseiJobConfig.INDEX_PATH);
        Shard[] createShards = createShards(str2, i2, conf, conf.get(SenseiJobConfig.INDEX_SUBDIR_PREFIX, ""));
        FileSystem fileSystem = FileSystem.get(conf);
        conf.get("hadoop.job.ugi");
        if (fileSystem.exists(path) && conf.getBoolean(SenseiJobConfig.FORCE_OUTPUT_OVERWRITE, false)) {
            fileSystem.delete(path, true);
        }
        if (fileSystem.exists(new Path(str2)) && conf.getBoolean(SenseiJobConfig.FORCE_OUTPUT_OVERWRITE, false)) {
            fileSystem.delete(new Path(str2), true);
        }
        setShardGeneration(conf, createShards);
        Shard.setIndexShards(conf, createShards);
        conf.setInt(MRJobConfig.IO_SORT_MB, conf.getInt(MRJobConfig.IO_SORT_MB, 100) / 2);
        conf.set(MRConfig.TEMP_DIR, "${mapred.child.tmp}/hindex/");
        if (fileSystem.exists(new Path(conf.get(MRConfig.TEMP_DIR)))) {
            fileSystem.delete(new Path(conf.get(MRConfig.TEMP_DIR)), true);
        }
        if (fileSystem.exists(new Path("./tmp"))) {
            fileSystem.delete(new Path("./tmp"), true);
        }
        new Trash(conf).expunge();
        conf.setBoolean(SenseiJobConfig.USE_COMPOUND_FILE, true);
        String str3 = conf.get(SenseiJobConfig.SCHEMA_FILE_URL);
        if (str3 == null) {
            throw new IOException("no schema file is found");
        }
        logger.info("Adding schema file: " + conf.get(SenseiJobConfig.SCHEMA_FILE_URL));
        DistributedCache.addCacheFile(new URI(str3), conf);
        JobConf jobConf = new JobConf(conf, cls);
        if (jobConf.getJobName().length() < 1) {
            jobConf.setJobName(cls.getName() + "_" + System.currentTimeMillis());
        }
        FileInputFormat.setInputPaths(jobConf, pathArr);
        FileOutputFormat.setOutputPath(jobConf, path);
        jobConf.setNumMapTasks(i);
        jobConf.setNumReduceTasks(createShards.length);
        jobConf.setInputFormat(conf.getClass(SenseiJobConfig.INPUT_FORMAT, TextInputFormat.class, InputFormat.class));
        Path[] inputPaths = FileInputFormat.getInputPaths(jobConf);
        StringBuilder sb = new StringBuilder(inputPaths[0].toString());
        for (int i4 = 1; i4 < inputPaths.length; i4++) {
            sb.append(",");
            sb.append(inputPaths[i4].toString());
        }
        logger.info("mapred.input.dir = " + sb.toString());
        logger.info("mapreduce.output.fileoutputformat.outputdir = " + FileOutputFormat.getOutputPath(jobConf).toString());
        logger.info("mapreduce.job.maps = " + jobConf.getNumMapTasks());
        logger.info("mapreduce.job.reduces = " + jobConf.getNumReduceTasks());
        logger.info(createShards.length + " shards = " + conf.get(SenseiJobConfig.INDEX_SHARDS));
        logger.info("mapred.input.format.class = " + jobConf.getInputFormat().getClass().getName());
        logger.info("mapreduce.cluster.temp.dir = " + jobConf.get(MRConfig.TEMP_DIR));
        jobConf.setMapOutputKeyClass(Shard.class);
        jobConf.setMapOutputValueClass(IntermediateForm.class);
        jobConf.setOutputKeyClass(Shard.class);
        jobConf.setOutputValueClass(Text.class);
        jobConf.setMapperClass(SenseiMapper.class);
        jobConf.setCombinerClass(SenseiCombiner.class);
        jobConf.setReducerClass(SenseiReducer.class);
        jobConf.setOutputFormat(IndexUpdateOutputFormat.class);
        jobConf.setReduceSpeculativeExecution(false);
        return jobConf;
    }

    private static FileSystem getFileSystem(String str) {
        Configuration configuration = new Configuration();
        configuration.set("hadoop.job.ugi", str);
        try {
            return FileSystem.get(configuration);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static Shard[] createShards(String str, int i, Configuration configuration, String str2) throws IOException {
        String str3;
        String str4 = Shard.normalizePath(str) + "/";
        FileSystem fileSystem = FileSystem.get(configuration);
        Path path = new Path(str);
        if (!fileSystem.exists(path)) {
            Shard[] shardArr = new Shard[i];
            for (int i2 = 0; i2 < shardArr.length; i2++) {
                shardArr[i2] = new Shard(-1L, str4 + str2 + NUMBER_FORMAT.format(i2), -1L);
            }
            return shardArr;
        }
        FileStatus[] listStatus = fileSystem.listStatus(path);
        String[] strArr = new String[listStatus.length];
        int i3 = 0;
        for (int i4 = 0; i4 < listStatus.length; i4++) {
            if (listStatus[i4].isDir()) {
                strArr[i3] = listStatus[i4].getPath().getName();
                i3++;
            }
        }
        Arrays.sort(strArr, 0, i3);
        Shard[] shardArr2 = new Shard[i3 >= i ? i3 : i];
        for (int i5 = 0; i5 < i3; i5++) {
            shardArr2[i5] = new Shard(-1L, str4 + strArr[i5], -1L);
        }
        int i6 = i3;
        for (int i7 = i3; i7 < i; i7++) {
            do {
                int i8 = i6;
                i6++;
                str3 = str4 + str2 + NUMBER_FORMAT.format(i8);
            } while (fileSystem.exists(new Path(str3)));
            shardArr2[i7] = new Shard(-1L, str3, -1L);
        }
        return shardArr2;
    }

    /* JADX WARN: Finally extract failed */
    void setShardGeneration(Configuration configuration, Shard[] shardArr) throws IOException {
        FileSystem fileSystem = FileSystem.get(configuration);
        for (int i = 0; i < shardArr.length; i++) {
            Path path = new Path(shardArr[i].getDirectory());
            long j = -1;
            if (fileSystem.exists(path)) {
                FileSystemDirectory fileSystemDirectory = null;
                try {
                    fileSystemDirectory = new FileSystemDirectory(fileSystem, path, false, configuration);
                    j = LuceneUtil.getCurrentSegmentGeneration(fileSystemDirectory);
                    if (fileSystemDirectory != null) {
                        fileSystemDirectory.close();
                    }
                } catch (Throwable th) {
                    if (fileSystemDirectory != null) {
                        fileSystemDirectory.close();
                    }
                    throw th;
                }
            }
            if (j != shardArr[i].getGeneration()) {
                shardArr[i] = new Shard(shardArr[i].getVersion(), shardArr[i].getDirectory(), j);
            }
        }
    }
}
