package org.apache.asterix.fuzzyjoin;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.asterix.fuzzyjoin.tokenizer.Tokenizer;
import org.apache.asterix.fuzzyjoin.tokenizer.TokenizerFactory;
import org.apache.asterix.fuzzyjoin.tokenorder.TokenLoad;
import org.apache.asterix.fuzzyjoin.tokenorder.TokenRankFrequency;

/* loaded from: input_file:org/apache/asterix/fuzzyjoin/FuzzyJoinTokenize.class */
public class FuzzyJoinTokenize {

    /* loaded from: input_file:org/apache/asterix/fuzzyjoin/FuzzyJoinTokenize$TokenCount.class */
    public static class TokenCount implements Comparable<Object> {
        public String token;
        public MutableInteger count;

        public TokenCount(String str, MutableInteger mutableInteger) {
            this.token = str;
            this.count = mutableInteger;
        }

        @Override // java.lang.Comparable
        public int compareTo(Object obj) {
            return this.count.compareTo(((TokenCount) obj).count);
        }

        public String getToken() {
            return this.token;
        }

        public String toString() {
            return this.token + FuzzyJoinConfig.RIDPAIRS_SEPARATOR_REGEX + this.count;
        }
    }

    public static void main(String[] strArr) throws IOException {
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        Tokenizer tokenizer = TokenizerFactory.getTokenizer(FuzzyJoinConfig.TOKENIZER_VALUE, "_", '_');
        int[] dataColumns = FuzzyJoinUtil.getDataColumns("2,3");
        HashMap hashMap = new HashMap();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            for (String str4 : tokenizer.tokenize(FuzzyJoinUtil.getData(readLine.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX), dataColumns, '_'))) {
                MutableInteger mutableInteger = (MutableInteger) hashMap.get(str4);
                if (mutableInteger == null) {
                    hashMap.put(str4, new MutableInteger(1));
                } else {
                    mutableInteger.inc();
                }
            }
        }
        bufferedReader.close();
        ArrayList arrayList = new ArrayList();
        hashMap.forEach((str5, mutableInteger2) -> {
            arrayList.add(new TokenCount(str5, mutableInteger2));
        });
        Collections.sort(arrayList);
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(str2));
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            bufferedWriter.write(((TokenCount) it.next()).getToken() + "\n");
        }
        bufferedWriter.close();
        TokenRankFrequency tokenRankFrequency = new TokenRankFrequency();
        new TokenLoad(str2, tokenRankFrequency).loadTokenRank();
        BufferedReader bufferedReader2 = new BufferedReader(new FileReader(str));
        LittleEndianIntOutputStream littleEndianIntOutputStream = new LittleEndianIntOutputStream(new BufferedOutputStream(new FileOutputStream(str3)));
        while (true) {
            String readLine2 = bufferedReader2.readLine();
            if (readLine2 == null) {
                bufferedReader2.close();
                littleEndianIntOutputStream.close();
                return;
            }
            String[] split = readLine2.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX);
            littleEndianIntOutputStream.writeInt(Integer.parseInt(split[0]));
            Collection<Integer> tokenRanks = tokenRankFrequency.getTokenRanks(tokenizer.tokenize(FuzzyJoinUtil.getData(split, dataColumns, '_')));
            littleEndianIntOutputStream.writeInt(tokenRanks.size());
            Iterator<Integer> it2 = tokenRanks.iterator();
            while (it2.hasNext()) {
                littleEndianIntOutputStream.writeInt(it2.next().intValue());
            }
        }
    }
}
