package org.apache.mahout.fpm.pfpgrowth;

import com.ibm.wsdl.Constants;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DefaultStringifier;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericsUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.Parameters;
import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/mahout-core-0.3.jar:org/apache/mahout/fpm/pfpgrowth/PFPGrowth.class */
public final class PFPGrowth {
    public static final Pattern SPLITTER = Pattern.compile("[ ,\t]*[,|\t][ ,\t]*");
    private static final Logger log = LoggerFactory.getLogger(PFPGrowth.class);

    private PFPGrowth() {
    }

    public static List<Pair<String, Long>> deserializeList(Parameters parameters, String str, Configuration configuration) throws IOException {
        ArrayList arrayList = new ArrayList();
        configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DefaultStringifier defaultStringifier = new DefaultStringifier(configuration, GenericsUtil.getClass(arrayList));
        return (List) defaultStringifier.fromString(parameters.get(str, defaultStringifier.toString(arrayList)));
    }

    public static Map<String, Long> deserializeMap(Parameters parameters, String str, Configuration configuration) throws IOException {
        HashMap hashMap = new HashMap();
        configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DefaultStringifier defaultStringifier = new DefaultStringifier(configuration, GenericsUtil.getClass(hashMap));
        return (Map) defaultStringifier.fromString(parameters.get(str, defaultStringifier.toString(hashMap)));
    }

    public static List<Pair<String, Long>> readFList(Parameters parameters) throws IOException {
        Text text = new Text();
        LongWritable longWritable = new LongWritable();
        int intValue = Integer.valueOf(parameters.get("minSupport", "3")).intValue();
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(new Path(parameters.get(Constants.ELEM_OUTPUT) + "/parallelcounting").toUri(), configuration);
        FileStatus[] globStatus = fileSystem.globStatus(new Path(parameters.get(Constants.ELEM_OUTPUT) + "/parallelcounting/part-*"));
        PriorityQueue priorityQueue = new PriorityQueue(11, new Comparator<Pair<String, Long>>() { // from class: org.apache.mahout.fpm.pfpgrowth.PFPGrowth.1
            @Override // java.util.Comparator
            public int compare(Pair<String, Long> pair, Pair<String, Long> pair2) {
                int compareTo = pair2.getSecond().compareTo(pair.getSecond());
                return compareTo != 0 ? compareTo : pair.getFirst().compareTo(pair2.getFirst());
            }
        });
        for (FileStatus fileStatus : globStatus) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fileStatus.getPath(), configuration);
            while (reader.next(text, longWritable)) {
                if (longWritable.get() >= intValue) {
                    priorityQueue.add(new Pair(text.toString(), Long.valueOf(longWritable.get())));
                }
            }
        }
        ArrayList arrayList = new ArrayList();
        while (!priorityQueue.isEmpty()) {
            arrayList.add(priorityQueue.poll());
        }
        return arrayList;
    }

    public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Parameters parameters) throws IOException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(new Path(parameters.get(Constants.ELEM_OUTPUT) + "/frequentPatterns").toUri(), configuration);
        FileStatus[] globStatus = fileSystem.globStatus(new Path(parameters.get(Constants.ELEM_OUTPUT) + "/frequentPatterns/part-*"));
        ArrayList arrayList = new ArrayList();
        for (FileStatus fileStatus : globStatus) {
            arrayList.addAll(FPGrowth.readFrequentPattern(fileSystem, configuration, fileStatus.getPath()));
        }
        return arrayList;
    }

    public static void runPFPGrowth(Parameters parameters) throws IOException, InterruptedException, ClassNotFoundException {
        startParallelCounting(parameters);
        startGroupingItems(parameters);
        startTransactionSorting(parameters);
        startParallelFPGrowth(parameters);
        startAggregating(parameters);
    }

    public static void startAggregating(Parameters parameters) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration configuration = new Configuration();
        parameters.set("fList", "");
        parameters.set("gList", "");
        configuration.set("pfp.parameters", parameters.toString());
        configuration.set("mapred.compress.map.output", "true");
        configuration.set("mapred.output.compression.type", "BLOCK");
        String str = parameters.get(Constants.ELEM_OUTPUT) + "/fpgrowth";
        Job job = new Job(configuration, "PFP Aggregator Driver running over input: " + str);
        job.setJarByClass(PFPGrowth.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TopKStringPatterns.class);
        FileInputFormat.addInputPath(job, new Path(str));
        Path path = new Path(parameters.get(Constants.ELEM_OUTPUT) + "/frequentPatterns");
        FileOutputFormat.setOutputPath(job, path);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(AggregatorMapper.class);
        job.setCombinerClass(AggregatorReducer.class);
        job.setReducerClass(AggregatorReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        if (fileSystem.exists(path)) {
            fileSystem.delete(path, true);
        }
        job.waitForCompletion(true);
    }

    public static void startGroupingItems(Parameters parameters) throws IOException {
        Configuration configuration = new Configuration();
        List<Pair<String, Long>> readFList = readFList(parameters);
        Integer valueOf = Integer.valueOf(parameters.get("numGroups", "50"));
        HashMap hashMap = new HashMap();
        long size = readFList.size() / valueOf.intValue();
        if (readFList.size() != size * valueOf.intValue()) {
            size++;
        }
        long j = 0;
        long j2 = 0;
        Iterator<Pair<String, Long>> it = readFList.iterator();
        while (it.hasNext()) {
            String first = it.next().getFirst();
            if (j / size == j2) {
                hashMap.put(first, Long.valueOf(j2));
            } else {
                j2++;
                hashMap.put(first, Long.valueOf(j2));
            }
            j++;
        }
        log.info("No of Features: {}", Integer.valueOf(readFList.size()));
        parameters.set("gList", serializeMap(hashMap, configuration));
        parameters.set("fList", serializeList(readFList, configuration));
    }

    public static void startParallelCounting(Parameters parameters) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration configuration = new Configuration();
        configuration.set("pfp.parameters", parameters.toString());
        configuration.set("mapred.compress.map.output", "true");
        configuration.set("mapred.output.compression.type", "BLOCK");
        String str = parameters.get(Constants.ELEM_INPUT);
        Job job = new Job(configuration, "Parallel Counting Driver running over input: " + str);
        job.setJarByClass(PFPGrowth.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        FileInputFormat.addInputPath(job, new Path(str));
        Path path = new Path(parameters.get(Constants.ELEM_OUTPUT) + "/parallelcounting");
        FileOutputFormat.setOutputPath(job, path);
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        if (fileSystem.exists(path)) {
            fileSystem.delete(path, true);
        }
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(ParallelCountingMapper.class);
        job.setCombinerClass(ParallelCountingReducer.class);
        job.setReducerClass(ParallelCountingReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.waitForCompletion(true);
    }

    public static void startTransactionSorting(Parameters parameters) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration configuration = new Configuration();
        String str = parameters.get("gList");
        parameters.set("gList", "");
        configuration.set("pfp.parameters", parameters.toString());
        configuration.set("mapred.compress.map.output", "true");
        configuration.set("mapred.output.compression.type", "BLOCK");
        String str2 = parameters.get(Constants.ELEM_INPUT);
        Job job = new Job(configuration, "PFP Transaction Sorting running over input" + str2);
        job.setJarByClass(PFPGrowth.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(TransactionTree.class);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(TransactionTree.class);
        FileInputFormat.addInputPath(job, new Path(str2));
        Path path = new Path(parameters.get(Constants.ELEM_OUTPUT) + "/sortedoutput");
        FileOutputFormat.setOutputPath(job, path);
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        if (fileSystem.exists(path)) {
            fileSystem.delete(path, true);
        }
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(TransactionSortingMapper.class);
        job.setReducerClass(TransactionSortingReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.waitForCompletion(true);
        parameters.set("gList", str);
    }

    public static void startParallelFPGrowth(Parameters parameters) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration configuration = new Configuration();
        configuration.set("pfp.parameters", parameters.toString());
        configuration.set("mapred.compress.map.output", "true");
        configuration.set("mapred.output.compression.type", "BLOCK");
        String str = parameters.get(Constants.ELEM_OUTPUT) + "/sortedoutput";
        Job job = new Job(configuration, "PFP Growth Driver running over input" + str);
        job.setJarByClass(PFPGrowth.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(TransactionTree.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TopKStringPatterns.class);
        FileInputFormat.addInputPath(job, new Path(str));
        Path path = new Path(parameters.get(Constants.ELEM_OUTPUT) + "/fpgrowth");
        FileOutputFormat.setOutputPath(job, path);
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        if (fileSystem.exists(path)) {
            fileSystem.delete(path, true);
        }
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(ParallelFPGrowthMapper.class);
        job.setCombinerClass(ParallelFPGrowthCombiner.class);
        job.setReducerClass(ParallelFPGrowthReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.waitForCompletion(true);
    }

    private static String serializeList(List<Pair<String, Long>> list, Configuration configuration) throws IOException {
        configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        return new DefaultStringifier(configuration, GenericsUtil.getClass(list)).toString(list);
    }

    private static String serializeMap(Map<String, Long> map, Configuration configuration) throws IOException {
        configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        return new DefaultStringifier(configuration, GenericsUtil.getClass(map)).toString(map);
    }
}
