package jmind.core.ip;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.net.URL;

import jmind.base.util.GlobalConstants;

import org.json.JSONObject;

/**
 * 统计数据预处理程序
 * @author fbxie
 * $Revision: 1.1.2.4 $
 */
public class DataPreprocessor {

    /**
     * @param args
     */
    public static void main(String[] args) {
        DataPreprocessor dp = new DataPreprocessor();
        if (args.length == 0) {
            args = new String[] { "中国北京市昌平区回龙观镇", "内蒙古", "回龙观" };
        }
        for (String a : args) {
            System.out.println(dp.parseLocation(a).getProvince());
        }
    }

    /**
     * 解析地址，先使用地址解析，否则使用身份证号解析，
     * 如果两个都是空值，则返回null
     * @param address
     * @param pin
     * @return
     */
    public Location parseLocation(String address, String pin) {
        if (address == null || address.trim().isEmpty()) {
            if (pin == null || pin.trim().isEmpty()) {
                return null;
            }
            return parseLocationByPin(pin);
        } else {
            return parseLocation(address);
        }
    }

    public Location parseLocation(String address) {
        Location l = new Location(address);
        System.out.println(l.getCity());
        if (l.getCity() == null) {
            try {
                l = parseLocationByGoogle(address);
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        return l;
    }

    public String fixName(String name) {
        return name.replaceAll("[\\*\\@\\$\\#]", "").trim();
    }

    /**
     * 根据身份证号码得到区县，目前只能查询北京市的。
     * @param pin
     * @return
     */
    public Location parseLocationByPin(String pin) {
        Pin p = new Pin();
        String d = p.getBjd(pin);
        Location l = new Location();
        if (d == null) {
            l.setProvince("外省市");
        } else {
            l.setCity("北京");
            l.setProvince("北京");
            l.setDistrict(d);
        }
        return l;
    }

    public Location parseLocationByGoogle(String address) throws Exception {
        address = java.net.URLEncoder.encode(address, GlobalConstants.UTF8);
        java.net.URL url = new URL("http://ditu.google.cn/maps/geo?q=" + address);
        BufferedReader stream = new BufferedReader(new InputStreamReader(url.openStream(), "utf-8"));
        StringWriter writer = new StringWriter();
        int c = 0;
        char[] buf = new char[1024];
        while ((c = stream.read(buf)) > 0) {
            writer.write(buf, 0, c);
        }
        stream.close();
        String result = writer.toString();
        //log.debug(result);
        JSONObject j = new JSONObject(result);
        int code = j.getJSONObject("Status").getInt("code");
        Location l;
        if (code == 200) {
            JSONObject p = j.getJSONArray("Placemark").getJSONObject(0);
            address = p.getString("address");
            System.out.println(address);
            l = new Location(address);
        } else {

            l = new Location(address);
        }
        return l;
    }
}
