package cc.unitmesh.rag.splitter;

import cc.unitmesh.rag.document.Document;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.stream.Stream;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;

/* compiled from: MarkdownHeaderTextSplitter.kt */
@Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��H\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0002\b\u0002\n\u0002\u0010!\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0007\n\u0002\u0010$\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010%\n\u0002\b\u0004\u0018��2\u00020\u0001B!\b\u0016\u0012\u0018\u0010\u0002\u001a\u0014\u0012\u0010\u0012\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u00040\u0003¢\u0006\u0002\u0010\u0006B)\u0012\u001a\u0010\u0002\u001a\u0016\u0012\u0012\u0012\u0010\u0012\u0004\u0012\u00020\u0005\u0012\u0006\u0012\u0004\u0018\u00010\u00050\u00040\u0003\u0012\u0006\u0010\u0007\u001a\u00020\b¢\u0006\u0002\u0010\tJ\u001c\u0010\n\u001a\b\u0012\u0004\u0012\u00020\f0\u000b2\f\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u000e0\u0003H\u0002J\u0018\u0010\u000f\u001a\u00020\b2\u0006\u0010\u0010\u001a\u00020\u00052\u0006\u0010\u0011\u001a\u00020\u0005H\u0002Jb\u0010\u0012\u001a\u00020\b2\u0006\u0010\u0010\u001a\u00020\u00052\f\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u000e0\u000b2\f\u0010\u0014\u001a\b\u0012\u0004\u0012\u00020\u00050\u000b2\u0012\u0010\u0015\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00010\u00162\f\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\u00180\u000b2\u0012\u0010\u0019\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u001aH\u0002J>\u0010\u001b\u001a\b\u0012\u0004\u0012\u00020\f0\u00032\f\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u000e0\u000b2\f\u0010\u0014\u001a\b\u0012\u0004\u0012\u00020\u00050\u00032\u0012\u0010\u0015\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00010\u0016H\u0002J\u0014\u0010\u001c\u001a\b\u0012\u0004\u0012\u00020\f0\u00032\u0006\u0010\u001d\u001a\u00020\u0005R\"\u0010\u0002\u001a\u0016\u0012\u0012\u0012\u0010\u0012\u0004\u0012\u00020\u0005\u0012\u0006\u0012\u0004\u0018\u00010\u00050\u00040\u0003X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0007\u001a\u00020\bX\u0082\u0004¢\u0006\u0002\n��¨\u0006\u001e"}, d2 = {"Lcc/unitmesh/rag/splitter/MarkdownHeaderTextSplitter;", "", "headersToSplitOn", "", "Lkotlin/Pair;", "", "(Ljava/util/List;)V", "returnEachLine", "", "(Ljava/util/List;Z)V", "aggregateLinesToChunks", "", "Lcc/unitmesh/rag/document/Document;", "lines", "Lcc/unitmesh/rag/splitter/LineType;", "isHeaderToSplitOn", "strippedLine", "sep", "processLine", "linesWithMetadata", "currentContent", "currentMetadata", "", "headerStack", "Lcc/unitmesh/rag/splitter/HeaderType;", "initialMetadata", "", "processOutput", "splitText", "text", "cocoa-core"})
@SourceDebugExtension({"SMAP\nMarkdownHeaderTextSplitter.kt\nKotlin\n*S Kotlin\n*F\n+ 1 MarkdownHeaderTextSplitter.kt\ncc/unitmesh/rag/splitter/MarkdownHeaderTextSplitter\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 ArraysJVM.kt\nkotlin/collections/ArraysKt__ArraysJVMKt\n+ 4 _Strings.kt\nkotlin/text/StringsKt___StringsKt\n*L\n1#1,192:1\n731#2,9:193\n37#3,2:202\n1099#4,3:204\n*S KotlinDebug\n*F\n+ 1 MarkdownHeaderTextSplitter.kt\ncc/unitmesh/rag/splitter/MarkdownHeaderTextSplitter\n*L\n94#1:193,9\n94#1:202,2\n134#1:204,3\n*E\n"})
/* loaded from: input_file:cc/unitmesh/rag/splitter/MarkdownHeaderTextSplitter.class */
public final class MarkdownHeaderTextSplitter {
    private final boolean returnEachLine;

    @NotNull
    private final List<Pair<String, String>> headersToSplitOn;

    public MarkdownHeaderTextSplitter(@NotNull List<Pair<String, String>> headersToSplitOn, boolean z) {
        Intrinsics.checkNotNullParameter(headersToSplitOn, "headersToSplitOn");
        this.returnEachLine = z;
        Stream<Pair<String, String>> stream = headersToSplitOn.stream();
        AnonymousClass1 anonymousClass1 = new Function1<Pair<? extends String, ? extends String>, Integer>() { // from class: cc.unitmesh.rag.splitter.MarkdownHeaderTextSplitter.1
            @NotNull
            /* renamed from: invoke, reason: avoid collision after fix types in other method */
            public final Integer invoke2(@NotNull Pair<String, String> e) {
                Intrinsics.checkNotNullParameter(e, "e");
                return Integer.valueOf(e.getFirst().length());
            }

            @Override // kotlin.jvm.functions.Function1
            public /* bridge */ /* synthetic */ Integer invoke(Pair<? extends String, ? extends String> pair) {
                return invoke2((Pair<String, String>) pair);
            }
        };
        List<Pair<String, String>> list = stream.sorted(Comparator.comparingInt((v1) -> {
            return _init_$lambda$0(r2, v1);
        }).reversed()).toList();
        Intrinsics.checkNotNullExpressionValue(list, "toList(...)");
        this.headersToSplitOn = list;
    }

    /* JADX WARN: 'this' call moved to the top of the method (can break code semantics) */
    public MarkdownHeaderTextSplitter(@NotNull List<Pair<String, String>> headersToSplitOn) {
        this(headersToSplitOn, false);
        Intrinsics.checkNotNullParameter(headersToSplitOn, "headersToSplitOn");
    }

    private final List<Document> aggregateLinesToChunks(List<LineType> list) {
        ArrayList arrayList = new ArrayList();
        for (LineType lineType : list) {
            if ((!arrayList.isEmpty()) && Intrinsics.areEqual(((LineType) arrayList.get(arrayList.size() - 1)).getMetadata(), lineType.getMetadata())) {
                LineType lineType2 = (LineType) arrayList.get(arrayList.size() - 1);
                lineType2.setContent(lineType2.getContent() + "  \n" + lineType.getContent());
            } else {
                arrayList.add(lineType);
            }
        }
        Stream stream = arrayList.stream();
        MarkdownHeaderTextSplitter$aggregateLinesToChunks$1 markdownHeaderTextSplitter$aggregateLinesToChunks$1 = new Function1<LineType, Document>() { // from class: cc.unitmesh.rag.splitter.MarkdownHeaderTextSplitter$aggregateLinesToChunks$1
            @Override // kotlin.jvm.functions.Function1
            public final Document invoke(@NotNull LineType chunk) {
                Intrinsics.checkNotNullParameter(chunk, "chunk");
                return new Document(chunk.getContent(), chunk.getMetadata());
            }
        };
        List<Document> list2 = stream.map((v1) -> {
            return aggregateLinesToChunks$lambda$1(r1, v1);
        }).toList();
        Intrinsics.checkNotNullExpressionValue(list2, "toList(...)");
        return list2;
    }

    @NotNull
    public final List<Document> splitText(@NotNull String text) {
        List emptyList;
        Intrinsics.checkNotNullParameter(text, "text");
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        Map<String, ? extends Object> emptyMap = MapsKt.emptyMap();
        ArrayList arrayList3 = new ArrayList();
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        List<String> split = new Regex(StringUtils.LF).split(text, 0);
        if (!split.isEmpty()) {
            ListIterator<String> listIterator = split.listIterator(split.size());
            while (listIterator.hasPrevious()) {
                if (!(listIterator.previous().length() == 0)) {
                    emptyList = CollectionsKt.take(split, listIterator.nextIndex() + 1);
                    break;
                }
            }
        }
        emptyList = CollectionsKt.emptyList();
        for (String str : (String[]) emptyList.toArray(new String[0])) {
            String obj = StringsKt.trim((CharSequence) str).toString();
            if (!processLine(obj, arrayList, arrayList2, emptyMap, arrayList3, linkedHashMap)) {
                if (obj.length() > 0) {
                    arrayList2.add(obj);
                    emptyMap = new HashMap(linkedHashMap);
                }
            }
            if (!arrayList2.isEmpty()) {
                String join = String.join(StringUtils.LF, arrayList2);
                Intrinsics.checkNotNullExpressionValue(join, "join(...)");
                arrayList.add(new LineType(join, emptyMap));
                arrayList2.clear();
            }
            emptyMap = new HashMap(linkedHashMap);
        }
        return processOutput(arrayList, arrayList2, emptyMap);
    }

    private final boolean processLine(String str, List<LineType> list, List<String> list2, Map<String, ? extends Object> map, List<HeaderType> list3, Map<String, String> map2) {
        for (Pair<String, String> pair : this.headersToSplitOn) {
            String first = pair.getFirst();
            String second = pair.getSecond();
            if (isHeaderToSplitOn(str, first)) {
                if (second != null) {
                    String str2 = first;
                    int i = 0;
                    for (int i2 = 0; i2 < str2.length(); i2++) {
                        if (str2.charAt(i2) == '#') {
                            i++;
                        }
                    }
                    int i3 = i;
                    while (true) {
                        if (!(!list3.isEmpty()) || list3.get(list3.size() - 1).getLevel() < i3) {
                            break;
                        }
                        map2.remove(list3.remove(list3.size() - 1).getName());
                    }
                    String substring = str.substring(first.length());
                    Intrinsics.checkNotNullExpressionValue(substring, "this as java.lang.String).substring(startIndex)");
                    HeaderType headerType = new HeaderType(i3, second, StringsKt.trim((CharSequence) substring).toString());
                    list3.add(headerType);
                    map2.put(second, headerType.getData());
                }
                if (!(!list2.isEmpty())) {
                    return true;
                }
                list.add(new LineType(CollectionsKt.joinToString$default(list2, StringUtils.LF, null, null, 0, null, null, 62, null), map));
                list2.clear();
                return true;
            }
        }
        return false;
    }

    private final boolean isHeaderToSplitOn(String str, String str2) {
        return StringsKt.startsWith$default(str, str2, false, 2, (Object) null) && (str.length() == str2.length() || str.charAt(str2.length()) == ' ');
    }

    private final List<Document> processOutput(List<LineType> list, List<String> list2, Map<String, ? extends Object> map) {
        if (!list2.isEmpty()) {
            list.add(new LineType(CollectionsKt.joinToString$default(list2, StringUtils.LF, null, null, 0, null, null, 62, null), map));
        }
        if (!this.returnEachLine) {
            return aggregateLinesToChunks(list);
        }
        Stream<LineType> stream = list.stream();
        MarkdownHeaderTextSplitter$processOutput$1 markdownHeaderTextSplitter$processOutput$1 = new Function1<LineType, Document>() { // from class: cc.unitmesh.rag.splitter.MarkdownHeaderTextSplitter$processOutput$1
            @Override // kotlin.jvm.functions.Function1
            public final Document invoke(@NotNull LineType chunk) {
                Intrinsics.checkNotNullParameter(chunk, "chunk");
                return new Document(chunk.getContent(), chunk.getMetadata());
            }
        };
        List<Document> list3 = stream.map((v1) -> {
            return processOutput$lambda$4(r1, v1);
        }).toList();
        Intrinsics.checkNotNull(list3);
        return list3;
    }

    private static final int _init_$lambda$0(Function1 tmp0, Object obj) {
        Intrinsics.checkNotNullParameter(tmp0, "$tmp0");
        return ((Number) tmp0.invoke(obj)).intValue();
    }

    private static final Document aggregateLinesToChunks$lambda$1(Function1 tmp0, Object obj) {
        Intrinsics.checkNotNullParameter(tmp0, "$tmp0");
        return (Document) tmp0.invoke(obj);
    }

    private static final Document processOutput$lambda$4(Function1 tmp0, Object obj) {
        Intrinsics.checkNotNullParameter(tmp0, "$tmp0");
        return (Document) tmp0.invoke(obj);
    }
}
