package jmind.core.image.ocr;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.jdesktop.swingx.util.OS;

/**
 * OCR （Optical Character Recognition，光学字符识别）
 * 是指电子设备（例如扫描仪或数码相机）检查纸上打印的字符，
 * 通过检测暗、亮的模式确定其形状，然后用字符识别方法将形状翻译成计算机文字的过程；
 * 即，对文本资料进行扫描，然后对图像文件进行分析处理，获取文字及版面信息的过程。
 * 如何除错或利用辅助信息提高识别正确率，是OCR最重要的课题，
 * ICR（Intelligent Character Recognition）的名词也因此而产生。
 * 衡量一个OCR系统性能好坏的主要指标有：拒识率、误识率、识别速度、用户界面的友好性，产品的稳定性，易用性及可行性等。
 * http://www.woxihuan.com/15205381/1321753538085292.shtml
 * http://www.cnblogs.com/brooks-dotnet/archive/2010/10/05/1844203.html
 * @author weibo-xie
 * 2012-8-8
 */
public class OCR {
    private static final String LANG_OPTION = "-l";
    private static final String EOL = System.getProperty("line.separator");
    private static String tessPath = new File("/Users/wbxie/work/workspace/im20/jmind-core/doc/Tesseract-ocr")
            .getAbsolutePath();

    /**
     * @param args
     */
    public static void main(String[] args) {

        try {
            File parent = new File("/data/captcha/");
            //            File[] files = parent.listFiles();
            //            for (File f : files) {
            //                if (f.getName().endsWith(".jpg")) {
            //                    String maybe2 = macRecognize(f);
            //                    System.out.println(f.getName() + "+" + maybe2);
            //                   
            //                }
            //
            //            }
            System.out.println(macRecognize(new File("/data/captcha/vcodejsp3.jpg")));

            System.out.println("识别结束**********");
            ;
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        ;
    }

    public static String macRecognize(File imageFile) {

        File outputFile = new File(imageFile.getParentFile(), "output");
        StringBuilder strB = new StringBuilder();
        List<String> cmd = new ArrayList<String>();

        cmd.add("tesseract");

        cmd.add(imageFile.getAbsolutePath());

        cmd.add(outputFile.getAbsolutePath());

        cmd.add(LANG_OPTION);

        cmd.add("eng");
        String commd = cmd.toString().replace("[", "").replace("]", "").replace(",", "");
        System.out.println(commd);
        try {
            Process process = Runtime.getRuntime().exec(
                    "tesseract /data/captcha/vcodejsp3.jpg  /data/captcha/out -l eng");
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return "";

    }

    //private String tessPath="C:\\Program Files (x86)\\Tesseract-OCR\\";
    public static String recognizeText(File imageFile, String imageFormat) throws Exception {
        File tempImage = ImageIOHelper.createImage(imageFile, imageFormat);
        File outputFile = new File(imageFile.getParentFile(), "output");
        StringBuffer strB = new StringBuffer();
        List<String> cmd = new ArrayList<String>();
        System.out.println(OS.isMacOSX() + outputFile.getName());
        if (OS.isWindowsXP()) {
            cmd.add(tessPath + "\\tesseract");
            //cmd.add(tessPath + "\\Tesseract-OCR");
        } else if (OS.isMacOSX()) {

            cmd.add("tesseract");
        } else {
            //cmd.add(tessPath + "\\Tesseract-OCR");
            cmd.add(tessPath + "\\tesseract");
        }
        cmd.add("");
        cmd.add(outputFile.getName());
        cmd.add(LANG_OPTION);
        cmd.add("chi_sim");
        cmd.add("eng");

        ProcessBuilder pb = new ProcessBuilder();
        pb.directory(imageFile.getParentFile());
        System.out.println(tempImage.getName());
        cmd.set(1, tempImage.getName());
        pb.command(cmd);
        pb.redirectErrorStream(true);
        Process process = pb.start();
        //tesseract.exe 1.jpg 1 -l chi_sim
        int w = process.waitFor();

        // delete temp working files
        //   tempImage.delete();
        System.out.println(w);
        if (w == 0) {
            BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(
                    outputFile.getAbsolutePath() + ".txt"), "UTF-8"));

            String str;

            while ((str = in.readLine()) != null) {
                strB.append(str).append(EOL);
            }
            in.close();
        } else {
            String msg;
            switch (w) {
            case 1:
                msg = "Errors accessing files. There may be spaces in your image's filename.";
                break;
            case 29:
                msg = "Cannot recognize the image or its selected region.";
                break;
            case 31:
                msg = "Unsupported image format.";
                break;
            default:
                msg = "Errors occurred.";
            }
            tempImage.delete();
            throw new RuntimeException(msg);
        }
        new File(outputFile.getAbsolutePath() + ".txt").delete();
        return strB.toString();
    }
}
