package net.ifok.common.html;

import net.ifok.common.html.model.HtmlExtractData;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
 * @Description:  HTML 字符串相关操作
 * @Author: xq 
 * @Date: 2021/1/13 17:11
 **/
public class HtmlUtils {

    private final static Pattern HTML_IMG_SRC_PATTERN=Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)");

    /***
     * 获取HTML代码中图片标签的src属性
     *
     * @param content
     * @return
     */
    public static String[] getImageSrc(String content) {
        String img = "";
        Pattern p_image;
        Matcher m_image;
        String str = "";
        String[] images = null;
        String regEx_img = "(<img.*src\\s*=\\s*(.*?)[^>]*?>)";
        p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
        m_image = p_image.matcher(content);
        while (m_image.find()) {
            img = m_image.group();
            Matcher m = HTML_IMG_SRC_PATTERN.matcher(img);
            while (m.find()) {
                String tempSelected = m.group(1);
                if ("".equals(str)) {
                    str = tempSelected;
                } else {
                    String temp = tempSelected;
                    str = str + "," + temp;
                }
            }
        }
        if (!"".equals(str)) {
            images = str.split(",");
        }
        return images;
    }

    /**
     * 修改或创建一个HTML标签的属性
     *
     * @param htmlStr
     *            html标签文本
     * @param htmlTag
     *            需要修改或创建的标签名
     * @param attrName
     *            标签中的属性名称
     * @param startStr
     *            新的标签内容
     * @return
     */
    public static String createOrUpdateHtmlTagAttr(String htmlStr, String htmlTag, String attrName, String startStr) {
        String regxpForTag = "<\\s*" + htmlTag + "\\s+([^>]*)\\s*>";
        String regxpForTagAttrib = attrName + "\\s*=\\s*[\"|']([^\"|']+)[\"|']";
        Pattern patternForTag = Pattern.compile(regxpForTag);
        Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib);
        Matcher matcherForTag = patternForTag.matcher(htmlStr);
        StringBuffer sb = new StringBuffer();
        boolean result = matcherForTag.find();
        while (result) {
            StringBuffer sbreplace = new StringBuffer("<" + htmlTag + " ");
            // System.out.println(matcherForTag.group(1));
            Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag.group(1));

            if (matcherForAttrib.find()) {
                // System.out.println(matcherForAttrib.group(1));
                // System.out.println("ll"+matcherForAttrib.group(1));
                matcherForAttrib.appendReplacement(sbreplace, attrName + "=\"" + startStr + "\"");
            } else {
                sbreplace.append(attrName).append("=\"").append(startStr).append("\" ");
            }
            // matcherForTag.appendReplacement(sb, sbreplace.toString());
            matcherForAttrib.appendTail(sbreplace);
            matcherForTag.appendReplacement(sb, sbreplace.toString() + ">");
            result = matcherForTag.find();
        }
        matcherForTag.appendTail(sb);
        return sb.toString();
    }

    /**
     * <pre>
     * 提取html代码中标签中的style属性出来生成一个css的字符串
     * 返回的数组第一个是HTML代码,第二个是css代码
     * </pre>
     *
     * @param html
     *            源html
     * @return 返回html
     */
    public static HtmlExtractData extractCSS(String html) {
        String regForTag = "<([^>]*style\\s*=\\s*[\"|'][^>]*[\"|'][^>]*)>";
        String regForStyle = "style\\s*=\\s*[\"|'](([^('|\")])*)[\"|']";

        Pattern patternForTag = Pattern.compile(regForTag);
        Matcher matcherForTag = patternForTag.matcher(html);

        Pattern patternForStyle = Pattern.compile(regForStyle);

        boolean result = matcherForTag.find();
        StringBuffer sb = new StringBuffer();
        StringBuffer cssSb = new StringBuffer();
        int cssCount = 0;
        while (result) {
            // System.out.println(matcherForTag.group(1));
            StringBuffer rps = new StringBuffer();
            Matcher matcher = patternForStyle.matcher(matcherForTag.group());

            if (matcher.find()) {
                String cssClassName = "auto-css-" + cssCount;

                matcher.appendReplacement(rps, "class='" + cssClassName + "'");
                matcher.appendTail(rps);

                cssSb.append(".").append(cssClassName).append("{");
                cssSb.append(matcher.group(1)).append("}").append("\r\n");
                cssCount++;
            }
            matcherForTag.appendReplacement(sb, rps.toString());

            // 寻找下一个匹配
            result = matcherForTag.find();
        }
        matcherForTag.appendTail(sb);
        HtmlExtractData data=new HtmlExtractData();
        data.setCssCodes(cssSb.toString());
        data.setHtmlCodes(sb.toString());
        return data;
    }
}
