package org.apache.mahout.fpm.pfpgrowth;

import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.HashSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.Parameters;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.mahout.common.iterator.StringRecordIterator;
import org.apache.mahout.fpm.pfpgrowth.convertors.ContextStatusUpdater;
import org.apache.mahout.fpm.pfpgrowth.convertors.SequenceFileOutputCollector;
import org.apache.mahout.fpm.pfpgrowth.convertors.string.StringOutputConverter;
import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj;
import org.apache.mahout.utils.regex.RegexMapper;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.class */
public final class FPGrowthDriver extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(FPGrowthDriver.class);

    private FPGrowthDriver() {
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new FPGrowthDriver(), strArr);
    }

    public int run(String[] strArr) throws Exception {
        addInputOption();
        addOutputOption();
        addOption("minSupport", "s", "(Optional) The minimum number of times a co-occurrence must be present. Default Value: 3", "3");
        addOption(PFPGrowth.MAX_HEAP_SIZE, "k", "(Optional) Maximum Heap Size k, to denote the requirement to mine top K items. Default value: 50", "50");
        addOption(PFPGrowth.NUM_GROUPS, "g", "(Optional) Number of groups the features should be divided in the map-reduce version. Doesn't work in sequential version Default Value:1000", Integer.toString(1000));
        addOption("splitterPattern", RegexMapper.REGEX, "Regular Expression pattern used to split given string transaction into itemsets. Default value splits comma separated itemsets.  Default Value: \"[ ,\\t]*[,|\\t][ ,\\t]*\" ", "[ ,\t]*[,|\t][ ,\t]*");
        addOption("numTreeCacheEntries", "tc", "(Optional) Number of entries in the tree cache to prevent duplicate tree building. (Warning) a first level conditional FP-Tree might consume a lot of memory, so keep this value small, but big enough to prevent duplicate tree building. Default Value:5 Recommended Values: [5-10]", "5");
        addOption(DefaultOptionCreator.METHOD_OPTION, DefaultOptionCreator.METHOD_OPTION, "Method of processing: sequential|mapreduce", "sequential");
        addOption(PFPGrowth.ENCODING, "e", "(Optional) The file encoding.  Default value: UTF-8", "UTF-8");
        addFlag("useFPG2", "2", "Use an alternate FPG implementation");
        if (parseArguments(strArr) == null) {
            return -1;
        }
        Parameters parameters = new Parameters();
        if (hasOption("minSupport")) {
            parameters.set("minSupport", getOption("minSupport"));
        }
        if (hasOption(PFPGrowth.MAX_HEAP_SIZE)) {
            parameters.set(PFPGrowth.MAX_HEAP_SIZE, getOption(PFPGrowth.MAX_HEAP_SIZE));
        }
        if (hasOption(PFPGrowth.NUM_GROUPS)) {
            parameters.set(PFPGrowth.NUM_GROUPS, getOption(PFPGrowth.NUM_GROUPS));
        }
        if (hasOption("numTreeCacheEntries")) {
            parameters.set("treeCacheSize", getOption("numTreeCacheEntries"));
        }
        if (hasOption("splitterPattern")) {
            parameters.set(PFPGrowth.SPLIT_PATTERN, getOption("splitterPattern"));
        }
        parameters.set(PFPGrowth.ENCODING, hasOption(PFPGrowth.ENCODING) ? getOption(PFPGrowth.ENCODING) : "UTF-8");
        if (hasOption("useFPG2")) {
            parameters.set(PFPGrowth.USE_FPG2, SchemaSymbols.ATTVAL_TRUE);
        }
        Path inputPath = getInputPath();
        Path outputPath = getOutputPath();
        parameters.set("input", inputPath.toString());
        parameters.set("output", outputPath.toString());
        String option = getOption(DefaultOptionCreator.METHOD_OPTION);
        if ("sequential".equalsIgnoreCase(option)) {
            runFPGrowth(parameters);
            return 0;
        }
        if (!DefaultOptionCreator.MAPREDUCE_METHOD.equalsIgnoreCase(option)) {
            return 0;
        }
        HadoopUtil.delete(new Configuration(), outputPath);
        PFPGrowth.runPFPGrowth(parameters);
        return 0;
    }

    private static void runFPGrowth(Parameters parameters) throws IOException {
        log.info("Starting Sequential FPGrowth");
        int intValue = Integer.valueOf(parameters.get(PFPGrowth.MAX_HEAP_SIZE, "50")).intValue();
        int intValue2 = Integer.valueOf(parameters.get("minSupport", "3")).intValue();
        Path path = new Path(parameters.get("output", "output.txt"));
        Path path2 = new Path(parameters.get("input"));
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        Charset forName = Charset.forName(parameters.get(PFPGrowth.ENCODING));
        String str = parameters.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString());
        SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, path, Text.class, TopKStringPatterns.class);
        InputStream inputStream = null;
        InputStream inputStream2 = null;
        HashSet newHashSet = Sets.newHashSet();
        if (SchemaSymbols.ATTVAL_TRUE.equals(parameters.get(PFPGrowth.USE_FPG2))) {
            FPGrowthObj fPGrowthObj = new FPGrowthObj();
            try {
                inputStream = fileSystem.open(path2);
                inputStream2 = fileSystem.open(path2);
                fPGrowthObj.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(inputStream, forName, false), str), fPGrowthObj.generateFList(new StringRecordIterator(new FileLineIterable(inputStream2, forName, false), str), intValue2), intValue2, intValue, newHashSet, new StringOutputConverter(new SequenceFileOutputCollector(writer)));
                Closeables.close(writer, false);
                Closeables.close(inputStream, true);
                Closeables.close(inputStream2, true);
            } catch (Throwable th) {
                Closeables.close(writer, false);
                Closeables.close(inputStream, true);
                Closeables.close(inputStream2, true);
                throw th;
            }
        } else {
            FPGrowth fPGrowth = new FPGrowth();
            FSDataInputStream open = fileSystem.open(path2);
            FSDataInputStream open2 = fileSystem.open(path2);
            try {
                fPGrowth.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable((InputStream) open, forName, false), str), fPGrowth.generateFList(new StringRecordIterator(new FileLineIterable((InputStream) open2, forName, false), str), intValue2), intValue2, intValue, newHashSet, new StringOutputConverter(new SequenceFileOutputCollector(writer)), new ContextStatusUpdater(null));
                Closeables.close(writer, false);
                Closeables.close(open, true);
                Closeables.close(open2, true);
            } catch (Throwable th2) {
                Closeables.close(writer, false);
                Closeables.close(open, true);
                Closeables.close(open2, true);
                throw th2;
            }
        }
        for (Pair<String, TopKStringPatterns> pair : FPGrowth.readFrequentPattern(configuration, path)) {
            log.info("Dumping Patterns for Feature: {} \n{}", pair.getFirst(), pair.getSecond());
        }
    }
}
