/*
 * Decompiled with CFR 0.152.
 */
package ciir.umass.edu.features;

import ciir.umass.edu.features.FeatureStats;
import ciir.umass.edu.learning.DataPoint;
import ciir.umass.edu.learning.DenseDataPoint;
import ciir.umass.edu.learning.RankList;
import ciir.umass.edu.learning.SparseDataPoint;
import ciir.umass.edu.utilities.FileUtils;
import ciir.umass.edu.utilities.RankLibError;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

public class FeatureManager {
    public static void main(String[] args) {
        ArrayList<String> rankingFiles = new ArrayList<String>();
        String outputDir = "";
        String modelFileName = "";
        boolean shuffle = false;
        boolean doFeatureStats = false;
        int nFold = 0;
        float tvs = -1.0f;
        int argsLen = args.length;
        if (argsLen < 3 && !Arrays.asList(args).contains("-feature_stats") || argsLen != 2 && Arrays.asList(args).contains("-feature_stats")) {
            System.out.println("Usage: java -cp bin/RankLib.jar ciir.umass.edu.features.FeatureManager <Params>");
            System.out.println("Params:");
            System.out.println("\t-input <file>\t\tSource data (ranked lists)");
            System.out.println("\t-output <dir>\t\tThe output directory");
            System.out.println("");
            System.out.println("  [+] Shuffling");
            System.out.println("\t-shuffle\t\tCreate a copy of the input file in which the ordering of all ranked lists (e.g. queries) is randomized.");
            System.out.println("\t\t\t\t(the order among objects (e.g. documents) within each ranked list is certainly unchanged).");
            System.out.println("  [+] k-fold Partitioning (sequential split)");
            System.out.println("\t-k <fold>\t\tThe number of folds");
            System.out.println("\t[ -tvs <x \\in [0..1]> ] Train-validation split ratio (x)(1.0-x)");
            System.out.println("");
            System.out.println("  NOTE: If both -shuffle and -k are specified, the input data will be shuffled and then sequentially partitioned.");
            System.out.println("");
            System.out.println("Feature Statistics -- Saved model feature use frequencies and statistics.");
            System.out.println("-input and -output parameters are not used.");
            System.out.println("\t-feature_stats\tName of a saved, feature-limited, LTR model text file.");
            System.out.println("\t\t\tDoes not process Coordinate Ascent, LambdaRank, ListNet or RankNet models.");
            System.out.println("\t\t\tas they include all features rather than selected feature subsets.");
            System.out.println("");
            return;
        }
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equalsIgnoreCase("-input")) {
                rankingFiles.add(args[++i]);
                continue;
            }
            if (args[i].equalsIgnoreCase("-k")) {
                nFold = Integer.parseInt(args[++i]);
                continue;
            }
            if (args[i].equalsIgnoreCase("-shuffle")) {
                shuffle = true;
                continue;
            }
            if (args[i].equalsIgnoreCase("-tvs")) {
                tvs = Float.parseFloat(args[++i]);
                continue;
            }
            if (args[i].equalsIgnoreCase("-output")) {
                outputDir = FileUtils.makePathStandard(args[++i]);
                continue;
            }
            if (!args[i].equalsIgnoreCase("-feature_stats")) continue;
            doFeatureStats = true;
            modelFileName = args[++i];
        }
        if (shuffle || nFold > 0) {
            List<RankList> samples = FeatureManager.readInput(rankingFiles);
            if (samples.size() == 0) {
                System.out.println("Error: The input file is empty.");
                return;
            }
            String fn = FileUtils.getFileName((String)rankingFiles.get(0));
            if (shuffle) {
                fn = fn + ".shuffled";
                System.out.print("Shuffling... ");
                Collections.shuffle(samples);
                System.out.println("[Done]");
                System.out.print("Saving... ");
                FeatureManager.save(samples, outputDir + fn);
                System.out.println("[Done]");
            }
            if (nFold > 0) {
                ArrayList<List<RankList>> trains = new ArrayList<List<RankList>>();
                ArrayList<List<RankList>> tests = new ArrayList<List<RankList>>();
                ArrayList<List<RankList>> valis = new ArrayList<List<RankList>>();
                System.out.println("Partitioning... ");
                FeatureManager.prepareCV(samples, nFold, tvs, trains, valis, tests);
                System.out.println("[Done]");
                try {
                    for (int i = 0; i < trains.size(); ++i) {
                        System.out.print("Saving fold " + (i + 1) + "/" + nFold + "... ");
                        FeatureManager.save((List)trains.get(i), outputDir + "f" + (i + 1) + ".train." + fn);
                        FeatureManager.save((List)tests.get(i), outputDir + "f" + (i + 1) + ".test." + fn);
                        if (tvs > 0.0f) {
                            FeatureManager.save((List)valis.get(i), outputDir + "f" + (i + 1) + ".validation." + fn);
                        }
                        System.out.println("[Done]");
                    }
                }
                catch (Exception ex) {
                    throw RankLibError.create("Cannot save partition data.\nOccured in FeatureManager::main(): ", ex);
                }
            }
        } else if (doFeatureStats) {
            try {
                FeatureStats fs = new FeatureStats(modelFileName);
                fs.writeFeatureStats();
            }
            catch (Exception ex) {
                throw RankLibError.create("Failure processing saved " + modelFileName + " model file.\nError occurred in FeatureManager::main(): ", ex);
            }
        }
    }

    public static List<RankList> readInput(String inputFile) {
        return FeatureManager.readInput(inputFile, false, false);
    }

    public static List<RankList> readInput(String inputFile, boolean mustHaveRelDoc, boolean useSparseRepresentation) {
        ArrayList<RankList> samples = new ArrayList<RankList>();
        int countRL = 0;
        int countEntries = 0;
        try {
            String content = "";
            BufferedReader in = FileUtils.smartReader(inputFile);
            String lastID = "";
            boolean hasRel = false;
            ArrayList<DataPoint> rl = new ArrayList<DataPoint>();
            while ((content = in.readLine()) != null) {
                if ((content = content.trim()).length() == 0 || content.indexOf("#") == 0) continue;
                if (countEntries % 10000 == 0) {
                    System.out.print("\rReading feature file [" + inputFile + "]: " + countRL + "... ");
                }
                DataPoint qp = null;
                qp = useSparseRepresentation ? new SparseDataPoint(content) : new DenseDataPoint(content);
                if (lastID.compareTo("") != 0 && lastID.compareTo(qp.getID()) != 0) {
                    if (!mustHaveRelDoc || hasRel) {
                        samples.add(new RankList(rl));
                    }
                    rl = new ArrayList();
                    hasRel = false;
                }
                if (qp.getLabel() > 0.0f) {
                    hasRel = true;
                }
                lastID = qp.getID();
                rl.add(qp);
                ++countEntries;
            }
            if (rl.size() > 0 && (!mustHaveRelDoc || hasRel)) {
                samples.add(new RankList(rl));
            }
            in.close();
            System.out.println("\rReading feature file [" + inputFile + "]... [Done.]            ");
            System.out.println("(" + samples.size() + " ranked lists, " + countEntries + " entries read)");
        }
        catch (Exception ex) {
            throw RankLibError.create("Error in FeatureManager::readInput(): ", ex);
        }
        return samples;
    }

    public static List<RankList> readInput(List<String> inputFiles) {
        ArrayList<RankList> samples = new ArrayList<RankList>();
        for (int i = 0; i < inputFiles.size(); ++i) {
            List<RankList> s2 = FeatureManager.readInput(inputFiles.get(i), false, false);
            samples.addAll(s2);
        }
        return samples;
    }

    public static int[] readFeature(String featureDefFile) {
        int[] features = null;
        ArrayList<String> fids = new ArrayList<String>();
        try (BufferedReader in = FileUtils.smartReader(featureDefFile);){
            String content = "";
            while ((content = in.readLine()) != null) {
                if ((content = content.trim()).length() == 0 || content.indexOf("#") == 0) continue;
                fids.add(content.split("\t")[0].trim());
            }
            in.close();
            features = new int[fids.size()];
            for (int i = 0; i < fids.size(); ++i) {
                features[i] = Integer.parseInt((String)fids.get(i));
            }
        }
        catch (IOException ex) {
            throw RankLibError.create("Error in FeatureManager::readFeature(): ", ex);
        }
        return features;
    }

    public static int[] getFeatureFromSampleVector(List<RankList> samples) {
        if (samples.size() == 0) {
            throw RankLibError.create("Error in FeatureManager::getFeatureFromSampleVector(): There are no training samples.");
        }
        int knownFeatures = 0;
        for (RankList rl : samples) {
            for (int i = 0; i < rl.size(); ++i) {
                DataPoint dp = rl.get(i);
                if (knownFeatures >= dp.getNumberOfKnownFeatures()) continue;
                knownFeatures = dp.getNumberOfKnownFeatures();
            }
        }
        int[] features = new int[knownFeatures];
        for (int i = 1; i <= knownFeatures; ++i) {
            features[i - 1] = i;
        }
        return features;
    }

    public static void prepareCV(List<RankList> samples, int nFold, List<List<RankList>> trainingData, List<List<RankList>> testData) {
        FeatureManager.prepareCV(samples, nFold, -1.0f, trainingData, null, testData);
    }

    public static void prepareCV(List<RankList> samples, int nFold, float tvs, List<List<RankList>> trainingData, List<List<RankList>> validationData, List<List<RankList>> testData) {
        ArrayList trainSamplesIdx = new ArrayList();
        int size = samples.size() / nFold;
        int start = 0;
        int total = 0;
        for (int f = 0; f < nFold; ++f) {
            ArrayList<Integer> t = new ArrayList<Integer>();
            for (int i = 0; i < size && start + i < samples.size(); ++i) {
                t.add(start + i);
            }
            trainSamplesIdx.add(t);
            total += t.size();
            start += size;
        }
        while (total < samples.size()) {
            ((List)trainSamplesIdx.get(trainSamplesIdx.size() - 1)).add(total);
            ++total;
        }
        for (int i = 0; i < trainSamplesIdx.size(); ++i) {
            System.out.print("\rCreating data for fold-" + (i + 1) + "...");
            ArrayList<RankList> train = new ArrayList<RankList>();
            ArrayList<RankList> test = new ArrayList<RankList>();
            ArrayList<RankList> vali = new ArrayList<RankList>();
            List t = (List)trainSamplesIdx.get(i);
            for (int j = 0; j < samples.size(); ++j) {
                if (t.contains(j)) {
                    test.add(new RankList(samples.get(j)));
                    continue;
                }
                train.add(new RankList(samples.get(j)));
            }
            if (tvs > 0.0f) {
                int validationSize = (int)((double)train.size() * (1.0 - (double)tvs));
                for (int j = 0; j < validationSize; ++j) {
                    vali.add((RankList)train.get(train.size() - 1));
                    train.remove(train.size() - 1);
                }
            }
            trainingData.add(train);
            testData.add(test);
            if (!(tvs > 0.0f)) continue;
            validationData.add(vali);
        }
        System.out.println("\rCreating data for " + nFold + " folds... [Done]            ");
        FeatureManager.printQueriesForSplit("Train", trainingData);
        FeatureManager.printQueriesForSplit("Validate", validationData);
        FeatureManager.printQueriesForSplit("Test", testData);
    }

    public static void printQueriesForSplit(String name, List<List<RankList>> split) {
        if (split == null) {
            System.out.print("No " + name + " split.");
            return;
        }
        for (int i = 0; i < split.size(); ++i) {
            List<RankList> rankLists = split.get(i);
            System.out.print(name + "[" + i + "]=");
            for (RankList rankList : rankLists) {
                System.out.print(" \"" + rankList.getID() + "\"");
            }
            System.out.println();
        }
    }

    public static void prepareSplit(List<RankList> samples, double percentTrain, List<RankList> trainingData, List<RankList> testData) {
        int i;
        int size = (int)((double)samples.size() * percentTrain);
        for (i = 0; i < size; ++i) {
            trainingData.add(new RankList(samples.get(i)));
        }
        for (i = size; i < samples.size(); ++i) {
            testData.add(new RankList(samples.get(i)));
        }
    }

    public static void save(List<RankList> samples, String outputFile) {
        try {
            BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile)));
            for (RankList sample : samples) {
                FeatureManager.save(sample, out);
            }
            out.close();
        }
        catch (Exception ex) {
            throw RankLibError.create("Error in FeatureManager::save(): ", ex);
        }
    }

    private static void save(RankList r, BufferedWriter out) throws Exception {
        for (int j = 0; j < r.size(); ++j) {
            out.write(r.get(j).toString());
            out.newLine();
        }
    }
}

