/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.utils.dict;

import de.julielab.genemapper.utils.IOUtils;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DictionaryClusterCleaner {
    static String GREEK = "(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)";
    HashMap<String, TreeSet<String>> clusters = new HashMap();

    DictionaryClusterCleaner(File dict) throws IOException {
        System.out.println("reading from file: " + dict);
        ArrayList<String> dictList = IOUtils.readFile2ArrayList(dict);
        System.out.println("building clusters...");
        for (String entry : dictList) {
            String[] values = entry.split("\t");
            if (values.length != 2) {
                System.err.println("unexpected line in dict: " + entry);
                System.exit(-1);
            }
            this.add2Clusters(values[1], values[0]);
        }
    }

    private void applyRule1() {
        System.out.println("cleaning clusters by rule1...");
        for (String id : this.clusters.keySet()) {
            TreeSet<String> syns = this.clusters.get(id);
            TreeSet<String> newSyns = new TreeSet<String>((SortedSet<String>)syns);
            for (String syn1 : syns) {
                String[] v = syn1.split("\\s+");
                if (v.length > 1) continue;
                Pattern pat = Pattern.compile("(" + syn1 + ") [0-9]");
                for (String syn2 : syns) {
                    Matcher m3;
                    if (syn1.equals(syn2) || !(m3 = pat.matcher(syn2)).matches()) continue;
                    newSyns.remove(syn1);
                }
            }
            this.clusters.put(id, newSyns);
        }
    }

    private void add2Clusters(String id, String syn) {
        TreeSet<String> syns = new TreeSet<String>();
        if (this.clusters.containsKey(id)) {
            syns = this.clusters.get(id);
        }
        syns.add(syn);
        this.clusters.put(id, syns);
    }

    private void writeClusters2File(File outFile) throws IOException {
        FileWriter fw = new FileWriter(outFile);
        for (String id : this.clusters.keySet()) {
            TreeSet<String> syns = this.clusters.get(id);
            for (String syn : syns) {
                fw.write(syn + "\t" + id + "\n");
            }
        }
        fw.close();
        System.out.println("written to file: " + outFile);
    }

    public static void main(String[] args) throws IOException {
        DictionaryClusterCleaner d = new DictionaryClusterCleaner(new File("/tmp/entrezGeneUniprotHGNC_Lexicon.unique"));
        d.applyRule1();
        d.writeClusters2File(new File("/tmp/newdict"));
    }
}

