package com.antbrains.urlcrawler.db;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/antbrains/urlcrawler/db/TestMd5Distribution.class */
public class TestMd5Distribution {
    protected static Logger logger = Logger.getLogger(TestMd5Distribution.class);

    public static void main(String[] strArr) throws Exception {
        if (strArr.length != 2) {
            System.err.println("need 2 arg: urlFile count");
            System.exit(-1);
        }
        int intValue = Integer.valueOf(strArr[1]).intValue();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(strArr[0]), "UTF8"));
        int i = 0;
        HashMap hashMap = new HashMap();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (readLine.startsWith("http://wapbaike.baidu.com/")) {
                i++;
                if (i > intValue) {
                    break;
                }
                if (i % 10000 == 0) {
                    logger.info("lineNumber: " + i);
                }
                byte[] md5 = DigestUtils.md5(readLine.replace("wapbaike", "baike"));
                Integer num = (Integer) hashMap.get(Byte.valueOf(md5[0]));
                if (num == null) {
                    hashMap.put(Byte.valueOf(md5[0]), 1);
                } else {
                    hashMap.put(Byte.valueOf(md5[0]), Integer.valueOf(num.intValue() + 1));
                }
            }
        }
        bufferedReader.close();
        for (Map.Entry entry : hashMap.entrySet()) {
            logger.info(String.valueOf(((Byte) entry.getKey()).toString()) + "\t" + ((Integer) entry.getValue()).toString());
        }
    }
}
