package me.yingrui.segment.tools;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import me.yingrui.segment.hmm.Node;
import me.yingrui.segment.hmm.Node$;
import me.yingrui.segment.hmm.NodeRepository;
import me.yingrui.segment.hmm.Trie;
import me.yingrui.segment.util.CharCheckUtil$;
import me.yingrui.segment.util.SerializeHandler;
import me.yingrui.segment.util.SerializeHandler$;
import scala.App;
import scala.Array$;
import scala.Function0;
import scala.Predef$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ListBuffer;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.AbstractFunction0;
import scala.runtime.BoxedUnit;

/* compiled from: GoogleBigramBuilder.scala */
@ScalaSignature(bytes = "\u0006\u0001m4A!\u0001\u0002\u0001\u0017\t\u0019ri\\8hY\u0016\u0014\u0015n\u001a:b[\n+\u0018\u000e\u001c3fe*\u00111\u0001B\u0001\u0006i>|Gn\u001d\u0006\u0003\u000b\u0019\tqa]3h[\u0016tGO\u0003\u0002\b\u0011\u00059\u00110\u001b8heVL'\"A\u0005\u0002\u00055,7\u0001A\n\u0004\u00011\u0011\u0002CA\u0007\u0011\u001b\u0005q!\"A\b\u0002\u000bM\u001c\u0017\r\\1\n\u0005Eq!AB!osJ+g\r\u0005\u0002\u000e'%\u0011AC\u0004\u0002\u0004\u0003B\u0004\b\"\u0002\f\u0001\t\u00039\u0012A\u0002\u001fj]&$h\bF\u0001\u0019!\tI\u0002!D\u0001\u0003\u0011\u001dY\u0002A1A\u0005\u0002q\tA\u0001\u001e:jKV\tQ\u0004\u0005\u0002\u001fC5\tqD\u0003\u0002!\t\u0005\u0019\u0001.\\7\n\u0005\tz\"\u0001\u0002+sS\u0016Da\u0001\n\u0001!\u0002\u0013i\u0012!\u0002;sS\u0016\u0004\u0003b\u0002\u0014\u0001\u0005\u0004%\taJ\u0001\u000f]>$WMU3q_NLGo\u001c:z+\u0005A\u0003C\u0001\u0010*\u0013\tQsD\u0001\bO_\u0012,'+\u001a9pg&$xN]=\t\r1\u0002\u0001\u0015!\u0003)\u0003=qw\u000eZ3SKB|7/\u001b;pef\u0004\u0003b\u0002\u0018\u0001\u0005\u0004%\taL\u0001\nI&\u0014Xm\u0019;pef,\u0012\u0001\r\t\u0003cQr!!\u0004\u001a\n\u0005Mr\u0011A\u0002)sK\u0012,g-\u0003\u00026m\t11\u000b\u001e:j]\u001eT!a\r\b\t\ra\u0002\u0001\u0015!\u00031\u0003)!\u0017N]3di>\u0014\u0018\u0010\t\u0005\bu\u0001\u0011\r\u0011\"\u0001<\u0003\r!\u0017N]\u000b\u0002yA\u0011QHQ\u0007\u0002})\u0011q\bQ\u0001\u0003S>T\u0011!Q\u0001\u0005U\u00064\u0018-\u0003\u0002D}\t!a)\u001b7f\u0011\u0019)\u0005\u0001)A\u0005y\u0005!A-\u001b:!\u0011\u00159\u0005\u0001\"\u0001I\u00035\u00198-\u00198ESJ,7\r^8ssR\u0011\u0011\n\u0014\t\u0003\u001b)K!a\u0013\b\u0003\tUs\u0017\u000e\u001e\u0005\u0006u\u0019\u0003\r\u0001\u0010\u0005\u0006\u001d\u0002!\taT\u0001\tY>\fGMR5mKR\u0011\u0011\n\u0015\u0005\u0006#6\u0003\r\u0001P\u0001\u0005M&dW\rC\u0003T\u0001\u0011\u0005A+A\u0005qCJ\u001cX\rT5oKR\u0011\u0011*\u0016\u0005\u0006-J\u0003\r\u0001M\u0001\u0005Y&tW\rC\u0003Y\u0001\u0011\u0005\u0011,A\bti\u0006$\u0018n\u001d;jG\nKwM]1n)\u0011I%\f\u00180\t\u000bm;\u0006\u0019\u0001\u0019\u0002\u000b]|'\u000fZ\u0019\t\u000bu;\u0006\u0019\u0001\u0019\u0002\u000b]|'\u000f\u001a\u001a\t\u000b};\u0006\u0019\u00011\u0002\u000b\r|WO\u001c;\u0011\u00055\t\u0017B\u00012\u000f\u0005\rIe\u000e\u001e\u0005\u0006I\u0002!\t!Z\u0001\u0015O\u0016$xk\u001c:e\r&\u00148\u000f\u001e+x_\u000eC\u0017M]:\u0015\u0005A2\u0007\"B4d\u0001\u0004\u0001\u0014\u0001B<pe\u0012DQ!\u001b\u0001\u0005\u0002)\f\u0011cZ3u\u0005V4g-\u001a:fIJ+\u0017\rZ3s)\tYg\u000e\u0005\u0002>Y&\u0011QN\u0010\u0002\u000f\u0005V4g-\u001a:fIJ+\u0017\rZ3s\u0011\u0015\t\u0006\u000e1\u0001=\u0011\u0015\u0001\b\u0001\"\u0001r\u000399W\r^%oaV$8\u000b\u001e:fC6$\"A];\u0011\u0005u\u001a\u0018B\u0001;?\u0005-Ie\u000e];u'R\u0014X-Y7\t\u000bE{\u0007\u0019\u0001\u001f\t\u000b]\u0004A\u0011\u0001=\u0002\tM\fg/\u001a\u000b\u0003\u0013fDQA\u001f<A\u0002A\n\u0001BZ5mK:\fW.\u001a")
/* loaded from: input_file:me/yingrui/segment/tools/GoogleBigramBuilder.class */
public class GoogleBigramBuilder implements App {
    private final Trie trie;
    private final NodeRepository nodeRepository;
    private final String directory;
    private final File dir;
    private final long executionStart;
    private String[] scala$App$$_args;
    private final ListBuffer<Function0<BoxedUnit>> scala$App$$initCode;

    public long executionStart() {
        return this.executionStart;
    }

    public String[] scala$App$$_args() {
        return this.scala$App$$_args;
    }

    public void scala$App$$_args_$eq(String[] strArr) {
        this.scala$App$$_args = strArr;
    }

    public ListBuffer<Function0<BoxedUnit>> scala$App$$initCode() {
        return this.scala$App$$initCode;
    }

    public void scala$App$_setter_$executionStart_$eq(long j) {
        this.executionStart = j;
    }

    public void scala$App$_setter_$scala$App$$initCode_$eq(ListBuffer listBuffer) {
        this.scala$App$$initCode = listBuffer;
    }

    public String[] args() {
        return App.class.args(this);
    }

    public void delayedInit(Function0<BoxedUnit> function0) {
        App.class.delayedInit(this, function0);
    }

    public void main(String[] strArr) {
        App.class.main(this, strArr);
    }

    public Trie trie() {
        return this.trie;
    }

    public NodeRepository nodeRepository() {
        return this.nodeRepository;
    }

    public String directory() {
        return this.directory;
    }

    public File dir() {
        return this.dir;
    }

    public void scanDirectory(File file) {
        Predef$.MODULE$.refArrayOps(file.listFiles()).foreach(new GoogleBigramBuilder$$anonfun$scanDirectory$1(this));
    }

    public void loadFile(File file) {
        BufferedReader bufferedReader = getBufferedReader(file);
        String readLine = bufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                return;
            }
            parseLine(str);
            readLine = bufferedReader.readLine();
        }
    }

    public void parseLine(String str) {
        String[] split = str.split("\\s+");
        if (split.length == 5) {
            String str2 = split[0];
            String str3 = split[1];
            new StringOps(Predef$.MODULE$.augmentString(split[2])).toInt();
            int i = new StringOps(Predef$.MODULE$.augmentString(split[3])).toInt();
            new StringOps(Predef$.MODULE$.augmentString(split[4])).toInt();
            statisticBigram(str2, str3, i);
        }
    }

    public void statisticBigram(String str, String str2, int i) {
        if (CharCheckUtil$.MODULE$.isChinese(str) && CharCheckUtil$.MODULE$.isChinese(str2)) {
            Node apply = Node$.MODULE$.apply(getWordFirstTwoChars(str));
            Node apply2 = Node$.MODULE$.apply(getWordFirstTwoChars(str2));
            trie().insert((int[]) Array$.MODULE$.apply(Predef$.MODULE$.wrapIntArray(new int[]{nodeRepository().add(apply).getIndex(), nodeRepository().add(apply2).getIndex()}), ClassTag$.MODULE$.Int()), i);
        }
    }

    public String getWordFirstTwoChars(String str) {
        return str.length() > 2 ? str.substring(str.length() - 2) : str;
    }

    public BufferedReader getBufferedReader(File file) {
        return new BufferedReader(new InputStreamReader(getInputStream(file), "utf-8"));
    }

    public InputStream getInputStream(File file) {
        return file.getName().endsWith(".gz") ? new GZIPInputStream(new FileInputStream(file)) : new FileInputStream(file);
    }

    public void save(String str) {
        SerializeHandler apply = SerializeHandler$.MODULE$.apply(new File(str), SerializeHandler$.MODULE$.WRITE_ONLY());
        nodeRepository().save(apply);
        trie().save(apply);
    }

    public final void delayedEndpoint$me$yingrui$segment$tools$GoogleBigramBuilder$1() {
        this.trie = new Trie();
        this.nodeRepository = new NodeRepository();
        this.directory = args().length > 0 ? args()[0] : "/Users/twer/workspace/nlp/google-ngram/2gram";
        this.dir = new File(directory());
        if (!dir().isDirectory()) {
            System.exit(1);
        } else {
            scanDirectory(dir());
            save("google-bigram.dat");
        }
    }

    public GoogleBigramBuilder() {
        App.class.$init$(this);
        delayedInit(new AbstractFunction0(this) { // from class: me.yingrui.segment.tools.GoogleBigramBuilder$delayedInit$body
            private final GoogleBigramBuilder $outer;

            public final Object apply() {
                this.$outer.delayedEndpoint$me$yingrui$segment$tools$GoogleBigramBuilder$1();
                return BoxedUnit.UNIT;
            }

            {
                if (this == null) {
                    throw null;
                }
                this.$outer = this;
            }
        });
    }
}
