package org.apache.accumulo.examples.wikisearch.ingest;

import com.google.common.collect.HashMultimap;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.MultiTableBatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.ColumnVisibility;
import org.apache.accumulo.examples.wikisearch.ingest.ArticleExtractor;
import org.apache.accumulo.examples.wikisearch.ingest.LRUOutputCombiner;
import org.apache.accumulo.examples.wikisearch.normalizer.LcNoDiacriticsNormalizer;
import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/* loaded from: input_file:org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.class */
public class WikipediaPartitionedMapper extends Mapper<Text, ArticleExtractor.Article, Text, Mutation> {
    public static final String DOCUMENT_COLUMN_FAMILY = "d";
    public static final String METADATA_EVENT_COLUMN_FAMILY = "e";
    public static final String METADATA_INDEX_COLUMN_FAMILY = "i";
    public static final String TOKENS_FIELD_NAME = "TEXT";
    private static final String cvPrefix = "all|";
    private int numPartitions = 0;
    private Text tablename = null;
    private Text indexTableName = null;
    private Text reverseIndexTableName = null;
    private Text metadataTableName = null;
    private LRUOutputCombiner<MutationInfo, CountAndSet> wikiIndexOutput;
    private LRUOutputCombiner<MutationInfo, CountAndSet> wikiReverseIndexOutput;
    private LRUOutputCombiner<MutationInfo, Value> wikiMetadataOutput;
    MultiTableBatchWriter mtbw;
    public static final Charset UTF8 = Charset.forName("UTF-8");
    private static final Value NULL_VALUE = new Value(new byte[0]);

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper$CountAndSet.class */
    public static class CountAndSet {
        public int count;
        public HashSet<String> set = new HashSet<>();

        public CountAndSet(String str) {
            this.set.add(str);
            this.count = 1;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper$MutationInfo.class */
    public static class MutationInfo {
        final String row;
        final String colfam;
        final String colqual;
        final ColumnVisibility cv;
        final long timestamp;

        public MutationInfo(String str, String str2, String str3, ColumnVisibility columnVisibility, long j) {
            this.row = str;
            this.colfam = str2;
            this.colqual = str3;
            this.cv = columnVisibility;
            this.timestamp = j;
        }

        public boolean equals(Object obj) {
            MutationInfo mutationInfo = (MutationInfo) obj;
            return (this.row == mutationInfo.row || this.row.equals(mutationInfo.row)) && (this.colfam == mutationInfo.colfam || this.colfam.equals(mutationInfo.colfam)) && this.colqual.equals(mutationInfo.colqual) && ((this.cv == mutationInfo.cv || this.cv.equals(mutationInfo.cv)) && this.timestamp == mutationInfo.timestamp);
        }

        public int hashCode() {
            return (((this.row.hashCode() ^ this.colfam.hashCode()) ^ this.colqual.hashCode()) ^ this.cv.hashCode()) ^ ((int) this.timestamp);
        }
    }

    public void setup(Mapper<Text, ArticleExtractor.Article, Text, Mutation>.Context context) {
        Configuration configuration = context.getConfiguration();
        this.tablename = new Text(WikipediaConfiguration.getTableName(configuration));
        this.indexTableName = new Text(this.tablename + "Index");
        this.reverseIndexTableName = new Text(this.tablename + "ReverseIndex");
        this.metadataTableName = new Text(this.tablename + "Metadata");
        try {
            this.mtbw = WikipediaConfiguration.getConnector(configuration).createMultiTableBatchWriter(10000000L, 1000L, 10);
            final Text text = this.metadataTableName;
            final Text text2 = this.indexTableName;
            final Text text3 = this.reverseIndexTableName;
            this.numPartitions = WikipediaConfiguration.getNumPartitions(configuration);
            LRUOutputCombiner.Fold<CountAndSet> fold = new LRUOutputCombiner.Fold<CountAndSet>() { // from class: org.apache.accumulo.examples.wikisearch.ingest.WikipediaPartitionedMapper.1
                @Override // org.apache.accumulo.examples.wikisearch.ingest.LRUOutputCombiner.Fold
                public CountAndSet fold(CountAndSet countAndSet, CountAndSet countAndSet2) {
                    countAndSet.count += countAndSet2.count;
                    if (countAndSet.set == null || countAndSet2.set == null) {
                        countAndSet.set = null;
                        return countAndSet;
                    }
                    countAndSet.set.addAll(countAndSet2.set);
                    if (countAndSet.set.size() > 20) {
                        countAndSet.set = null;
                    }
                    return countAndSet;
                }
            };
            LRUOutputCombiner.Output<MutationInfo, CountAndSet> output = new LRUOutputCombiner.Output<MutationInfo, CountAndSet>() { // from class: org.apache.accumulo.examples.wikisearch.ingest.WikipediaPartitionedMapper.2
                @Override // org.apache.accumulo.examples.wikisearch.ingest.LRUOutputCombiner.Output
                public void output(MutationInfo mutationInfo, CountAndSet countAndSet) {
                    Uid.List.Builder newBuilder = Uid.List.newBuilder();
                    newBuilder.setCOUNT(countAndSet.count);
                    if (countAndSet.set == null) {
                        newBuilder.setIGNORE(true);
                        newBuilder.clearUID();
                    } else {
                        newBuilder.setIGNORE(false);
                        newBuilder.addAllUID(countAndSet.set);
                    }
                    Value value = new Value(newBuilder.m68build().toByteArray());
                    Mutation mutation = new Mutation(mutationInfo.row);
                    mutation.put(mutationInfo.colfam, mutationInfo.colqual, mutationInfo.cv, mutationInfo.timestamp, value);
                    try {
                        WikipediaPartitionedMapper.this.mtbw.getBatchWriter(text2.toString()).addMutation(mutation);
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            };
            LRUOutputCombiner.Output<MutationInfo, CountAndSet> output2 = new LRUOutputCombiner.Output<MutationInfo, CountAndSet>() { // from class: org.apache.accumulo.examples.wikisearch.ingest.WikipediaPartitionedMapper.3
                @Override // org.apache.accumulo.examples.wikisearch.ingest.LRUOutputCombiner.Output
                public void output(MutationInfo mutationInfo, CountAndSet countAndSet) {
                    Uid.List.Builder newBuilder = Uid.List.newBuilder();
                    newBuilder.setCOUNT(countAndSet.count);
                    if (countAndSet.set == null) {
                        newBuilder.setIGNORE(true);
                        newBuilder.clearUID();
                    } else {
                        newBuilder.setIGNORE(false);
                        newBuilder.addAllUID(countAndSet.set);
                    }
                    Value value = new Value(newBuilder.m68build().toByteArray());
                    Mutation mutation = new Mutation(mutationInfo.row);
                    mutation.put(mutationInfo.colfam, mutationInfo.colqual, mutationInfo.cv, mutationInfo.timestamp, value);
                    try {
                        WikipediaPartitionedMapper.this.mtbw.getBatchWriter(text3.toString()).addMutation(mutation);
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            };
            this.wikiIndexOutput = new LRUOutputCombiner<>(10000, fold, output);
            this.wikiReverseIndexOutput = new LRUOutputCombiner<>(10000, fold, output2);
            this.wikiMetadataOutput = new LRUOutputCombiner<>(10000, new LRUOutputCombiner.Fold<Value>() { // from class: org.apache.accumulo.examples.wikisearch.ingest.WikipediaPartitionedMapper.4
                @Override // org.apache.accumulo.examples.wikisearch.ingest.LRUOutputCombiner.Fold
                public Value fold(Value value, Value value2) {
                    return value;
                }
            }, new LRUOutputCombiner.Output<MutationInfo, Value>() { // from class: org.apache.accumulo.examples.wikisearch.ingest.WikipediaPartitionedMapper.5
                @Override // org.apache.accumulo.examples.wikisearch.ingest.LRUOutputCombiner.Output
                public void output(MutationInfo mutationInfo, Value value) {
                    Mutation mutation = new Mutation(mutationInfo.row);
                    mutation.put(mutationInfo.colfam, mutationInfo.colqual, mutationInfo.cv, mutationInfo.timestamp, value);
                    try {
                        WikipediaPartitionedMapper.this.mtbw.getBatchWriter(text.toString()).addMutation(mutation);
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            });
        } catch (AccumuloException e) {
            throw new RuntimeException((Throwable) e);
        } catch (AccumuloSecurityException e2) {
            throw new RuntimeException((Throwable) e2);
        }
    }

    protected void cleanup(Mapper<Text, ArticleExtractor.Article, Text, Mutation>.Context context) throws IOException, InterruptedException {
        this.wikiIndexOutput.flush();
        this.wikiMetadataOutput.flush();
        this.wikiReverseIndexOutput.flush();
        try {
            this.mtbw.close();
        } catch (MutationsRejectedException e) {
            throw new RuntimeException((Throwable) e);
        }
    }

    protected void map(Text text, ArticleExtractor.Article article, Mapper<Text, ArticleExtractor.Article, Text, Mutation>.Context context) throws IOException, InterruptedException {
        String str = text.toString() + "��";
        String str2 = "fi��";
        ColumnVisibility columnVisibility = new ColumnVisibility(cvPrefix + text);
        if (article != null) {
            Text text2 = new Text(Integer.toString(WikipediaMapper.getPartitionId(article, this.numPartitions)));
            Mutation mutation = new Mutation(text2);
            for (Map.Entry<String, Object> entry : article.getFieldValues().entrySet()) {
                mutation.put(str + article.getId(), entry.getKey() + "��" + entry.getValue().toString(), columnVisibility, article.getTimestamp(), NULL_VALUE);
                this.wikiMetadataOutput.put(new MutationInfo(entry.getKey(), "e", text.toString(), columnVisibility, article.getTimestamp()), NULL_VALUE);
            }
            Set<String> tokens = WikipediaMapper.getTokens(article);
            HashMultimap create = HashMultimap.create();
            LcNoDiacriticsNormalizer lcNoDiacriticsNormalizer = new LcNoDiacriticsNormalizer();
            for (Map.Entry<String, String> entry2 : article.getNormalizedFieldValues().entrySet()) {
                create.put(entry2.getKey(), entry2.getValue());
            }
            Iterator<String> it = tokens.iterator();
            while (it.hasNext()) {
                create.put("TEXT", lcNoDiacriticsNormalizer.normalizeFieldValue("", it.next()));
            }
            for (Map.Entry entry3 : create.entries()) {
                mutation.put(str2 + ((String) entry3.getKey()), ((String) entry3.getValue()) + "��" + str + article.getId(), columnVisibility, article.getTimestamp(), NULL_VALUE);
                this.wikiIndexOutput.put(new MutationInfo((String) entry3.getValue(), (String) entry3.getKey(), text2 + "��" + text, columnVisibility, article.getTimestamp()), new CountAndSet(Integer.toString(article.getId())));
                this.wikiReverseIndexOutput.put(new MutationInfo(StringUtils.reverse((String) entry3.getValue()), (String) entry3.getKey(), text2 + "��" + text, columnVisibility, article.getTimestamp()), new CountAndSet(Integer.toString(article.getId())));
                this.wikiMetadataOutput.put(new MutationInfo((String) entry3.getKey(), "i", text + "��" + LcNoDiacriticsNormalizer.class.getName(), columnVisibility, article.getTimestamp()), NULL_VALUE);
            }
            mutation.put("d", str + article.getId(), columnVisibility, article.getTimestamp(), new Value(Base64.encodeBase64(article.getText().getBytes())));
            context.write(this.tablename, mutation);
        } else {
            context.getCounter("wikipedia", "invalid articles").increment(1L);
        }
        context.progress();
    }

    protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((Text) obj, (ArticleExtractor.Article) obj2, (Mapper<Text, ArticleExtractor.Article, Text, Mutation>.Context) context);
    }
}
