package cn.elwy.common.util.io;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.net.URL;

import cn.elwy.common.log.Logger;
import cn.elwy.common.log.LoggerFactory;
import cn.elwy.common.util.CloseUtil;

/**
 * 根据BOM信息获取文件或输入流编码
 * @author huangsq
 * @version 1.0, 2018-02-19
 */
public class CharsetDetector {

	public static final String ENCODING_GBK = "GBK";
	public static final String ENCODING_GB18030 = "GB18030";
	public static final String ENCODING_ISO88591 = "ISO-8859-1";
	public static final String ENCODING_UTF8 = "UTF-8";

	protected Logger logger = LoggerFactory.getLogger(getClass());

	private static volatile CharsetDetector instance;

	public static CharsetDetector getInstance() {
		if (instance == null) {
			synchronized (CharsetDetector.class) {
				if (instance == null) {
					instance = new CharsetDetector();
				}
			}
		}
		return instance;
	}

	protected static String getEncoding(String str) {
		String encode = "GB2312";
		if (isEncoding(str, encode)) {
			return encode;
		}
		if (isUTF8(str)) {
			return ENCODING_UTF8;
		}
		if (isGBK(str)) {
			return ENCODING_GBK;
		}
		if (isGB18030(str)) {
			return ENCODING_GB18030;
		}
		if (isEncoding(str, ENCODING_ISO88591)) {
			return ENCODING_ISO88591;
		}
		return "";
	}

	protected static boolean isUTF8(String str) {
		return isEncoding(str, ENCODING_UTF8);
	}

	protected static boolean isGBK(String str) {
		return isEncoding(str, ENCODING_GBK);
	}

	protected static boolean isGB18030(String str) {
		return isEncoding(str, ENCODING_GB18030);
	}

	private static boolean isEncoding(String text, String encode) {
		try {
			if (text.equals(new String(text.getBytes(encode), encode))) {
				return true;
			}
		} catch (Exception e) {
		}
		return false;
	}

	/**
	 * 读取文件的编码
	 * @param file 文件
	 * @return 文件编码格式
	 * @throws IOException
	 */
	public String getCharsetName(File file) throws IOException {
		return getCharsetName(file.getAbsolutePath());
	}

	/**
	 * 获取文件的编码格式
	 * @param filePath 文件路径
	 * @return 文件编码格式
	 * @throws IOException
	 */
	public String getCharsetName(String filePath) throws IOException {
		return getCharsetName(new FileInputStream(filePath));
	}

	/**
	 * 获取文件的编码格式
	 * @param filePath 文件路径
	 * @return 文件编码格式
	 * @throws IOException
	 */
	public String getCharsetName(URL url) throws IOException {
		return getCharsetName(url.openStream());
	}

	/**
	 * 读取输入流的编码，获取编码后会关闭输入流
	 * @param is 输入流
	 * @throws IOException
	 */
	public String getCharsetName(InputStream is) throws IOException {
		PushbackInputStream pis = null;
		int BOM_SIZE = 4;
		String encoding = null;
		try {
			pis = new PushbackInputStream(is, BOM_SIZE);
			byte bom[] = new byte[BOM_SIZE];
			pis.read(bom, 0, bom.length);

			if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
				encoding = "UTF-32BE";
			} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00)
					&& (bom[3] == (byte) 0x00)) {
				encoding = "UTF-32LE";
			} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
				encoding = "UTF-8";
			} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
				encoding = "UTF-16BE";
			} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
				encoding = "UTF-16LE";
			}
		} catch (IOException e) {
			logger.error(e.getMessage(), e);
		} finally {
			CloseUtil.close(pis);
			CloseUtil.close(is);
		}
		return encoding;
	}

}