package ai.yda.framework.rag.core.util;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;

/* loaded from: input_file:ai/yda/framework/rag/core/util/ContentUtil.class */
public final class ContentUtil {
    public static final String SENTENCE_SEPARATOR = ".";

    public static List<String> preprocessAndSplitContent(String str, Integer num) {
        String removeHtmlTags = removeHtmlTags(normalizeWhitespaces(new String(str.getBytes(), StandardCharsets.UTF_8).toLowerCase()));
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (true) {
            int i2 = i;
            if (i2 >= removeHtmlTags.length()) {
                return arrayList;
            }
            arrayList.add(removeHtmlTags.substring(i2, Math.min(removeHtmlTags.length(), i2 + num.intValue())));
            i = i2 + num.intValue();
        }
    }

    private static String normalizeWhitespaces(String str) {
        return str.replaceAll("\\s+", " ");
    }

    private static String removeHtmlTags(String str) {
        return Jsoup.parse(str).text();
    }

    private ContentUtil() {
    }
}
