package edu.isi.nlp.files;

import com.google.common.base.Charsets;
import com.google.common.base.Functions;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.FileWriteMode;
import com.google.common.io.Files;
import com.google.common.math.IntMath;
import edu.isi.nlp.parameters.Parameters;
import edu.isi.nlp.symbols.Symbol;
import edu.isi.nlp.symbols.SymbolUtils;
import java.io.File;
import java.io.IOException;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:edu/isi/nlp/files/SplitCorpus.class */
public final class SplitCorpus {
    private static final Logger log = LoggerFactory.getLogger(SplitCorpus.class);
    private static final String USAGE = "SplitCorpus paramFile\nParameters are:\n\tcom.bbn.bue.splitCorpus.inputList: list of files to split. Optional.\n\tcom.bbn.bue.splitCorpus.inputMap: docId to file map of files to split. Optional.\n\tcom.bbn.bue.splitCorpus.outputDir: path to write output\n\tcom.bbn.bue.splitCorpus.numChunks: the number of chunks to split the corpus into. Optional.\n\tcom.bbn.bue.splitCorpus.chunkSize: the number of of files to put in each chunk. Optional.\n\nIf inputList is given, output file lists will be written to outputDir/split/fileList.txt\n\tand a list of these lists will be written to outputDir/listOfLists.txt\nIf inputMap is given, output file maps will be written to outputDir/split/fileMap.txt\n\tand a list of these maps will be written to outputDir/listOfMaps.txt\nAt least one of inputList and inputMap must be specified.\nExactly one of numChunks and chunkSize may be specified.";
    public static final String INPUT_LIST_PARAM = "com.bbn.bue.splitCorpus.inputList";
    public static final String INPUT_MAP_PARAM = "com.bbn.bue.splitCorpus.inputMap";
    public static final String OUTPUT_DIR_PARAM = "com.bbn.bue.splitCorpus.outputDir";
    public static final String NUM_CHUNKS_PARAM = "com.bbn.bue.splitCorpus.numChunks";
    public static final String CHUNK_SIZE_PARAM = "com.bbn.bue.splitCorpus.chunkSize";

    public static void main(String[] strArr) {
        try {
            trueMain(strArr);
        } catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    private static void trueMain(String[] strArr) throws IOException {
        if (strArr.length != 1) {
            log.info(USAGE);
            System.exit(1);
        }
        Parameters loadSerifStyle = Parameters.loadSerifStyle(new File(strArr[0]));
        File creatableDirectory = loadSerifStyle.getCreatableDirectory(OUTPUT_DIR_PARAM);
        loadSerifStyle.assertAtLeastOneDefined(INPUT_LIST_PARAM, INPUT_MAP_PARAM);
        Optional<File> optionalExistingFile = loadSerifStyle.getOptionalExistingFile(INPUT_LIST_PARAM);
        Optional<File> optionalExistingFile2 = loadSerifStyle.getOptionalExistingFile(INPUT_MAP_PARAM);
        ImmutableMap<Symbol, File> loadDocIdToFileMap = loadDocIdToFileMap(optionalExistingFile, optionalExistingFile2);
        loadSerifStyle.assertExactlyOneDefined(NUM_CHUNKS_PARAM, CHUNK_SIZE_PARAM);
        Iterable<List<Map.Entry<Symbol, File>>> splitToNChunks = loadSerifStyle.isPresent(NUM_CHUNKS_PARAM) ? splitToNChunks(loadDocIdToFileMap, loadSerifStyle.getPositiveInteger(NUM_CHUNKS_PARAM)) : splitToChunksOfFixedSize(loadDocIdToFileMap, loadSerifStyle.getPositiveInteger(CHUNK_SIZE_PARAM));
        ArrayList newArrayList = Lists.newArrayList();
        ArrayList newArrayList2 = Lists.newArrayList();
        int i = 0;
        Iterator<List<Map.Entry<Symbol, File>>> it = splitToNChunks.iterator();
        while (it.hasNext()) {
            ImmutableMap copyOf = ImmutableMap.copyOf(it.next());
            ImmutableList copyOf2 = ImmutableList.copyOf(copyOf.values());
            File file = new File(creatableDirectory, Integer.toString(i));
            file.mkdir();
            File file2 = new File(file, "fileMap.txt");
            newArrayList2.add(file2);
            if (optionalExistingFile2.isPresent()) {
                FileUtils.writeSymbolToFileMap(copyOf, Files.asCharSink(file2, Charsets.UTF_8, new FileWriteMode[0]));
            }
            File file3 = new File(file, "fileList.txt");
            newArrayList.add(file3);
            if (optionalExistingFile.isPresent()) {
                FileUtils.writeFileList(copyOf2, Files.asCharSink(file3, Charsets.UTF_8, new FileWriteMode[0]));
            }
            i++;
        }
        log.info("Split into {} chunks", Integer.valueOf(i));
        if (optionalExistingFile.isPresent()) {
            File file4 = new File(creatableDirectory, "listOfLists.txt");
            log.info("List of file lists written to {}", file4);
            FileUtils.writeFileList(newArrayList, Files.asCharSink(file4, Charsets.UTF_8, new FileWriteMode[0]));
        }
        if (optionalExistingFile2.isPresent()) {
            File file5 = new File(creatableDirectory, "listOfMaps.txt");
            log.info("List of file maps written to {}", file5);
            FileUtils.writeFileList(newArrayList2, Files.asCharSink(file5, Charsets.UTF_8, new FileWriteMode[0]));
        }
    }

    private static Iterable<List<Map.Entry<Symbol, File>>> splitToChunksOfFixedSize(ImmutableMap<Symbol, File> immutableMap, int i) {
        Preconditions.checkArgument(i > 0);
        return Iterables.partition(immutableMap.entrySet(), i);
    }

    private static Iterable<List<Map.Entry<Symbol, File>>> splitToNChunks(ImmutableMap<Symbol, File> immutableMap, int i) {
        Preconditions.checkArgument(i > 0);
        ImmutableList of = ImmutableList.of();
        if (immutableMap.isEmpty()) {
            return Collections.nCopies(i, of);
        }
        ImmutableList copyOf = ImmutableList.copyOf(splitToChunksOfFixedSize(immutableMap, IntMath.divide(immutableMap.size(), i, RoundingMode.UP)));
        return copyOf.size() == i ? copyOf : Iterables.concat(copyOf, Collections.nCopies(i - copyOf.size(), of));
    }

    private static ImmutableMap<Symbol, File> loadDocIdToFileMap(Optional<File> optional, Optional<File> optional2) throws IOException {
        Preconditions.checkArgument(optional.isPresent() || optional2.isPresent());
        Optional of = optional.isPresent() ? Optional.of(FileUtils.loadFileList((File) optional.get())) : Optional.absent();
        Optional of2 = optional2.isPresent() ? Optional.of(FileUtils.loadSymbolToFileMap((File) optional2.get())) : Optional.absent();
        if (of.isPresent()) {
            if (ImmutableSet.copyOf((Collection) of.get()).size() != ((ImmutableList) of.get()).size()) {
                throw new RuntimeException("Input file list contains duplicates");
            }
        }
        if (of.isPresent() && of2.isPresent()) {
            if (((ImmutableList) of.get()).size() != ((ImmutableMap) of2.get()).size()) {
                throw new RuntimeException("Input file list and file map do not match in size (" + ((ImmutableList) of.get()).size() + " vs " + ((ImmutableMap) of2.get()).size());
            }
            if (!ImmutableSet.copyOf((Collection) of.get()).equals(ImmutableSet.copyOf(((ImmutableMap) of2.get()).values()))) {
                throw new RuntimeException("Input file list and file map do not containe exactly the same files");
            }
        }
        if (of2.isPresent()) {
            return (ImmutableMap) of2.get();
        }
        return Maps.uniqueIndex((Iterable) of.get(), Functions.compose(SymbolUtils.symbolizeFunction(), FileUtils.toAbsolutePathFunction()));
    }
}
