/*
 * Decompiled with CFR 0.152.
 */
package de.gwdg.metadataqa.api.similarity;

import de.gwdg.metadataqa.api.similarity.BinaryMaker;
import de.gwdg.metadataqa.api.similarity.Clustering;
import de.gwdg.metadataqa.api.similarity.RecordPattern;
import de.gwdg.metadataqa.api.util.FileUtils;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

public class ProfileReader {
    public static final double DEFAULT_TRESHOLD = 0.97;
    private List<String> profiles;
    private Map<String, RecordPattern> rowIndex;
    private int i = 0;
    private BinaryMaker binaryMaker;

    public ProfileReader(List<String> canonicalFieldList, List<String> profiles) {
        this.binaryMaker = new BinaryMaker(canonicalFieldList);
        this.profiles = profiles;
        this.rowIndex = new HashMap<String, RecordPattern>();
    }

    public Map<List<RecordPattern>, Double> buildCluster() {
        return this.buildCluster(0.97);
    }

    public Map<List<RecordPattern>, Double> buildCluster(double treshold) {
        List<String> binaryPatterns = this.createBinaryPatternList();
        Clustering clustering = new Clustering(binaryPatterns, treshold);
        List<List<String>> clusters = clustering.getClusters();
        HashMap<List<RecordPattern>, Double> sortableClusters = new HashMap<List<RecordPattern>, Double>();
        for (List<String> terms : clusters) {
            double sum = 0.0;
            HashMap<String, RecordPattern> sortableTerms = new HashMap<String, RecordPattern>();
            for (String term : terms) {
                RecordPattern row = this.rowIndex.get(term);
                sum += row.getPercent().doubleValue();
                sortableTerms.put(term, row);
            }
            List<RecordPattern> sortedTerms = this.sortTerms(sortableTerms);
            sortableClusters.put(sortedTerms, sum);
        }
        return this.sortClusters(sortableClusters);
    }

    private Map<List<RecordPattern>, Double> sortClusters(Map<List<RecordPattern>, Double> sortableClusters) {
        return sortableClusters.entrySet().stream().sorted((e1, e2) -> ((Double)e2.getValue()).compareTo((Double)e1.getValue())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new));
    }

    public int getNext() {
        return this.i++;
    }

    private List<RecordPattern> sortTerms(Map<String, RecordPattern> sortableTerms) {
        return sortableTerms.entrySet().stream().sorted((e1, e2) -> ((RecordPattern)e2.getValue()).getPercent().compareTo(((RecordPattern)e1.getValue()).getPercent())).map(Map.Entry::getValue).collect(Collectors.toList());
    }

    public int count(List<RecordPattern> rows) {
        int sum = 0;
        for (RecordPattern row : rows) {
            sum += row.getCount().intValue();
        }
        return sum;
    }

    public List<String> createBinaryPatternList() {
        ArrayList<String> binaryPatterns = new ArrayList<String>();
        for (String line : this.profiles) {
            RecordPattern row = new RecordPattern(this.binaryMaker, Arrays.asList(line.split(",")));
            binaryPatterns.add(row.getBinary());
            this.rowIndex.put(row.getBinary(), row);
        }
        return binaryPatterns;
    }

    public static void main(String[] args2) throws IOException {
        String fieldListFile = args2[0];
        String profileFile = args2[1];
        boolean produceList = args2.length > 2 && args2[2].equals("list");
        List<String> canonicalFieldList = ProfileReader.parseFieldCountLine(FileUtils.readFirstLineFromFile(fieldListFile));
        List<String> profiles = Files.readAllLines(Paths.get(profileFile, new String[0]), Charset.defaultCharset());
        ProfileReader profileReader = new ProfileReader(canonicalFieldList, profiles);
        if (produceList) {
            List<String> binaryPatterns = profileReader.createBinaryPatternList();
            for (String binaryPattern : binaryPatterns) {
                System.out.println(binaryPattern);
            }
        } else {
            Map<List<RecordPattern>, Double> sortedClusters = profileReader.buildCluster();
            sortedClusters.entrySet().stream().forEach(cluster -> {
                int i = profileReader.getNext();
                ((List)cluster.getKey()).forEach(row -> System.out.printf("%d,%s\n", i, row.asCsv()));
            });
        }
    }

    public static List<String> parseFieldCountLine(String line) {
        ArrayList<String> fields2 = new ArrayList<String>();
        Matcher matcher = Pattern.compile("^[^,]+,\"(.*)\"$").matcher(line);
        if (matcher.matches()) {
            String fieldsWithCount = matcher.group(1);
            for (String fieldWithCount : fieldsWithCount.split(",", 0)) {
                String[] parts = fieldWithCount.split("=", 2);
                fields2.add(parts[0]);
            }
        }
        return fields2;
    }
}

