/*
  Project Name:Kettle Engine
  File Name:ExcelUtil.java
  Package Name:org.pentaho.di.trans.steps.excelinput
  Date:2015年6月24日下午8:50:10
  Copyright (c) 2015, jingma All Rights Reserved.
 */

package cn.benma666.excel;

import cn.benma666.myutils.DateUtil;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
* 已文件流的方式读取07版excel，支持读取大的excel文件 <br/>
* date: 2016年9月10日 <br/>
* @author jingma
* @version 0.1
*/
public class XlsxReader extends DefaultHandler {
    /**
     * 日志
     */
    protected Log log = LogFactory.getLog(getClass());

    /**
     * 单元格类型
     */
    private String cellType;
    /**
     * 单元格S属性
     */
    private int cellS;
    /**
     * 表格样式数据
     */
    private StylesTable stylesTable;
    private SharedStrings sst;
    private String lastContents;
    private boolean nextIsString;
    protected OPCPackage pkg;
    protected InputStream sheet;
    private int sheetIndex = -1;
    /**
     * 当前行数据
     */
    private final List<String> rowlist = new ArrayList<>();
    /**
     * 当前行
     */
    protected int curRow = 0;
    /**
     * 当前列
     */
    protected int curCol = 0;

    /**
     * 读取第一个工作簿的入口方法
     *
     * @param path 文件路径
     */
    public void readOneSheet(String path) throws Exception {
        pkg = OPCPackage.open(path, PackageAccess.READ);
        XSSFReader r = new XSSFReader(pkg);
        SharedStrings sst = r.getSharedStringsTable();
        stylesTable = r.getStylesTable();
        XMLReader parser = fetchSheetParser(sst);
        //getSheet("rId1"); 解决部分EXCEL文件第一个sheet的id为rId2的情况。
        sheet = r.getSheetsData().next();
        InputSource sheetSource = new InputSource(sheet);
        try {
            parser.parse(sheetSource);
        } finally {
            if (sheet != null) {
                try {
                    sheet.close();
                } catch (Exception e) {
                    log.info("关闭excel失败，" + e.getMessage());
                }
            }
            if (pkg != null) {
                try {
                    pkg.close();
                } catch (Exception e) {
                    log.info("关闭excel失败，" + e.getMessage());
                }
            }
        }
    }

    /**
     * 读取所有工作簿的入口方法
     *
     * @param path 文件路径
     * @throws Exception 处理异常
     */
    public void process(String path) throws Exception {
        OPCPackage pkg = OPCPackage.open(path);
        XSSFReader r = new XSSFReader(pkg);
        SharedStrings sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        Iterator<InputStream> sheets = r.getSheetsData();
        while (sheets.hasNext()) {
            curRow = 0;
            sheetIndex++;
            InputStream sheet = sheets.next();
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);
            sheet.close();
        }
    }

    /**
     * 该方法自动被调用，每读一行调用一次，在方法中写自己的业务逻辑即可
     *
     * @param sheetIndex
     *            工作簿序号
     * @param curRow
     *            处理到第几行
     * @param rowList
     *            当前数据行的数据集合
     */
    public void optRow(int sheetIndex, int curRow, List<String> rowList)
            throws RuntimeException {
        StringBuilder temp = new StringBuilder();
        for (String str : rowList) {
            temp.append(str).append("_");
        }
        System.out.println(curRow + "||" + temp);
    }

    public XMLReader fetchSheetParser(SharedStrings sst)
            throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader(
                "org.apache.xerces.parsers.SAXParser");
        this.sst = sst;
        parser.setContentHandler(this);
        return parser;
    }

    public void startElement(String uri, String localName, String name,
            Attributes attributes) throws SAXException {
        // c => 单元格
        if (name.equals("c")) {
            // 如果下一个元素是 SST 的索引，则将nextIsString标记为true
            String cellLocation = attributes.getValue("r");
            curCol = extractColumnNumber(cellLocation) - 1;
            cellType = attributes.getValue("t");
            nextIsString = cellType != null && cellType.equals("s");
            cellS = attributes.getValue("s")==null?-1:Integer.parseInt(attributes.getValue("s"));
        }
        // 置空
        lastContents = "";
    }

    public static int extractColumnNumber(String position) {
        int startIndex = 0;
        while (!Character.isDigit(position.charAt(startIndex))
                && startIndex < position.length()) {
            startIndex++;
        }
        String colPart = position.substring(0, startIndex);
        return parseColumnNumber(colPart);
    }

    /**
     * Convert the column indicator in Excel like A, B, C, AE, CX and so on to a
     * 1-based column number.
     *
     * @param columnIndicator
     *            The indicator to convert
     * @return The 1-based column number
     */
    public static int parseColumnNumber(String columnIndicator) {
        int col = 0;
        for (int i = columnIndicator.length() - 1; i >= 0; i--) {
            char c = columnIndicator.charAt(i);
            int offset = 1 + Character.getNumericValue(c)
                    - Character.getNumericValue('A');
            col += Math.pow(26, columnIndicator.length() - i - 1) * offset;
        }

        return col;
    }

    public void endElement(String uri, String localName, String name)
            throws SAXException {
        // 根据SST的索引值的到单元格的真正要存储的字符串
        // 这时characters()方法可能会被调用多次
        if (nextIsString&&name.equals("v")) {
            try {
                int idx = Integer.parseInt(lastContents);
                lastContents = new XSSFRichTextString(sst.getItemAt(idx).getString())
                        .toString();
            } catch (Exception e) {
                log.error("获取单元格字符串值失败", e);
            }
        }

        // v => 单元格的值，如果单元格是字符串则v标签的值为该字符串在SST中的索引
        // 将单元格内容加入rowlist中，在这之前先去掉字符串前后的空白符
        if (name.equals("v")) {
            try {
                if(cellType==null&&this.cellS>0){
                    long numFmtId = stylesTable.getCellXfAt(this.cellS).getNumFmtId();
                    if(cellIsDate(numFmtId)){
                        double d = Double.parseDouble(lastContents);
                        if(org.apache.poi.ss.usermodel.DateUtil.isValidExcelDate(d)){
                            lastContents = DateUtil.doFormatDate(org.apache.poi.ss.usermodel.
                                    DateUtil.getJavaDate(d,false),DateUtil.DATE_FORMATTER_L);
                        }
                    }
                }
            } catch (Exception e) {
                log.debug("解析为时间错误",e);
            }
            while (rowlist.size() < curCol) {
                rowlist.add("");
            }
            rowlist.add(curCol, lastContents);
            curCol++;
        } else {
            // 如果标签名称为 row ，这说明已到行尾，调用 optRows() 方法
            if (name.equals("row")) {
                optRow(sheetIndex, curRow, rowlist);
                rowlist.clear();
                curRow++;
                curCol = 0;
            }
        }
    }

    /**
    * 单元格内容是否为时间 <br/>
    * @author jingma
    * @param numFmtId 单元格数字格式id
    * @return 单元格内容是否为时间
    */
    public boolean cellIsDate(long numFmtId) {
        String numFmt = stylesTable.getNumberFormatAt((short) numFmtId);
        if(numFmt==null){
            return numFmtId==22||numFmtId==14;
        }else{
            numFmt = numFmt.toLowerCase();
            return (numFmt.contains("yy") && numFmt.contains("m"))
                    || (numFmt.contains("h") && numFmt.contains("m"))
                    || (numFmt.contains("d") && numFmt.contains("m"));
        }
    }

    public void characters(char[] ch, int start, int length)
            throws SAXException {
        // 得到单元格内容的值
        lastContents += new String(ch, start, length);
    }
}
