package org.spider.util;

import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

/* loaded from: input_file:org/spider/util/TfIdf.class */
public class TfIdf {
    private static List<String> readDirs(String str) {
        final ArrayList arrayList = new ArrayList();
        Path path = Paths.get(str, new String[0]);
        try {
            if (Files.isDirectory(path, new LinkOption[0])) {
                Files.walkFileTree(path, new SimpleFileVisitor<Path>() { // from class: org.spider.util.TfIdf.1
                    @Override // java.nio.file.SimpleFileVisitor, java.nio.file.FileVisitor
                    public FileVisitResult visitFile(Path path2, BasicFileAttributes basicFileAttributes) throws IOException {
                        arrayList.add(path2.getFileName().toFile().getAbsolutePath());
                        return FileVisitResult.CONTINUE;
                    }
                });
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return arrayList;
    }

    private static String readFile(String str) {
        StringBuilder sb = new StringBuilder();
        try {
            Files.readAllLines(Paths.get(str, new String[0]), StandardCharsets.UTF_8).forEach(str2 -> {
                sb.append(str2).append("\r\n");
            });
        } catch (IOException e) {
            e.printStackTrace();
        }
        return sb.toString();
    }

    private static Map<String, Integer> containWordOfAllDocNumber(Map<String, Map<String, Integer>> map) {
        if (map == null || map.isEmpty()) {
            return Collections.emptyMap();
        }
        HashMap hashMap = new HashMap();
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            Map<String, Integer> map2 = map.get(it.next());
            if (map2 != null && !map2.isEmpty()) {
                for (String str : map2.keySet()) {
                    if (hashMap.containsKey(str)) {
                        hashMap.put(str, Integer.valueOf(((Integer) hashMap.get(str)).intValue() + 1));
                    } else {
                        hashMap.put(str, 1);
                    }
                }
            }
        }
        return hashMap;
    }

    private static HashMap<String, Double> tf(Map<String, Integer> map) {
        HashMap<String, Double> hashMap = new HashMap<>();
        if (map == null || map.isEmpty()) {
            return hashMap;
        }
        Double valueOf = Double.valueOf(map.size());
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            hashMap.put(it.next(), Double.valueOf(Double.valueOf(map.get(r0).intValue()).doubleValue() / valueOf.doubleValue()));
        }
        return hashMap;
    }

    public static Map<String, Integer> segmentPlaintext(String str) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        try {
            StringReader stringReader = new StringReader(str);
            Throwable th = null;
            try {
                try {
                    IKSegmenter iKSegmenter = new IKSegmenter(stringReader, true);
                    while (true) {
                        Lexeme next = iKSegmenter.next();
                        if (next == null) {
                            break;
                        }
                        if (linkedHashMap.containsKey(next.getLexemeText())) {
                            linkedHashMap.put(next.getLexemeText(), Integer.valueOf(((Integer) linkedHashMap.get(next.getLexemeText())).intValue() + 1));
                        } else {
                            linkedHashMap.put(next.getLexemeText(), 1);
                        }
                    }
                    if (stringReader != null) {
                        if (0 != 0) {
                            try {
                                stringReader.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            stringReader.close();
                        }
                    }
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return linkedHashMap;
    }

    public static Map<String, Integer> getMostFrequentWords(int i, Map<String, Integer> map) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        int i2 = 0;
        ArrayList<Map.Entry> arrayList = new ArrayList(map.entrySet());
        Collections.sort(arrayList, (entry, entry2) -> {
            return ((Integer) entry2.getValue()).intValue() - ((Integer) entry.getValue()).intValue();
        });
        for (Map.Entry entry3 : arrayList) {
            if (((String) entry3.getKey()).length() > 1) {
                if (i <= i2) {
                    break;
                }
                linkedHashMap.put(entry3.getKey(), entry3.getValue());
                i2++;
            }
        }
        return linkedHashMap;
    }

    public static Map<String, Map<String, Double>> allTf(String str) {
        HashMap hashMap = new HashMap();
        for (String str2 : readDirs(str)) {
            hashMap.put(str2, tf(segmentPlaintext(readFile(str2))));
        }
        return hashMap;
    }

    public static Map<String, Map<String, Integer>> wordSegmentCount(String str) {
        HashMap hashMap = new HashMap();
        for (String str2 : readDirs(str)) {
            hashMap.put(str2, segmentPlaintext(readFile(str2)));
        }
        return hashMap;
    }

    public static Map<String, Double> idf(Map<String, Map<String, Integer>> map) {
        if (map == null || map.isEmpty()) {
            return Collections.emptyMap();
        }
        HashMap hashMap = new HashMap();
        Set<String> keySet = containWordOfAllDocNumber(map).keySet();
        Double valueOf = Double.valueOf(r0.size());
        Iterator<String> it = keySet.iterator();
        while (it.hasNext()) {
            hashMap.put(it.next(), Double.valueOf(Math.log(valueOf.doubleValue() / (Double.valueOf(r0.get(r0).intValue()).doubleValue() + 1.0d))));
        }
        return hashMap;
    }

    public static Map<String, Map<String, Double>> tfIdf(Map<String, Map<String, Double>> map, Map<String, Double> map2) {
        HashMap hashMap = new HashMap();
        for (String str : map.keySet()) {
            Map<String, Double> map3 = map.get(str);
            HashMap hashMap2 = new HashMap();
            for (String str2 : map3.keySet()) {
                hashMap2.put(str2, Double.valueOf(map3.get(str2).doubleValue() * map2.get(str2).doubleValue()));
            }
            hashMap.put(str, hashMap2);
        }
        return hashMap;
    }
}
