package hivemall.topicmodel;

import com.google.common.base.Preconditions;
import hivemall.UDTFWithOptions;
import hivemall.annotations.VisibleForTesting;
import hivemall.sketch.bloom.BloomFilterUtils;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.io.NIOUtils;
import hivemall.utils.io.NioStatefulSegment;
import hivemall.utils.lang.Primitives;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import javax.annotation.Nonnull;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

/* loaded from: input_file:hivemall/topicmodel/ProbabilisticTopicModelBaseUDTF.class */
public abstract class ProbabilisticTopicModelBaseUDTF extends UDTFWithOptions {
    private static final Log logger;
    public static final int DEFAULT_TOPICS = 10;
    protected int topics = 10;
    protected int iterations = 10;
    protected double eps = 0.1d;
    protected int miniBatchSize = 128;
    protected String[][] miniBatch;
    protected int miniBatchCount;
    protected transient AbstractProbabilisticTopicModel model;
    protected ListObjectInspector wordCountsOI;
    protected transient NioStatefulSegment fileIO;
    protected transient ByteBuffer inputBuf;
    private float cumPerplexity;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // hivemall.UDTFWithOptions
    public Options getOptions() {
        Options options = new Options();
        options.addOption("k", "topics", true, "The number of topics [default: 10]");
        options.addOption("iter", "iterations", true, "The maximum number of iterations [default: 10]");
        options.addOption("eps", "epsilon", true, "Check convergence based on the difference of perplexity [default: 1E-1]");
        options.addOption("s", "mini_batch_size", true, "Repeat model updating per mini-batch [default: 128]");
        return options;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // hivemall.UDTFWithOptions
    public CommandLine processOptions(ObjectInspector[] objectInspectorArr) throws UDFArgumentException {
        CommandLine commandLine = null;
        if (objectInspectorArr.length >= 2) {
            commandLine = parseOptions(HiveUtils.getConstString(objectInspectorArr[1]));
            this.topics = Primitives.parseInt(commandLine.getOptionValue("topics"), 10);
            this.iterations = Primitives.parseInt(commandLine.getOptionValue("iterations"), 10);
            if (this.iterations < 1) {
                throw new UDFArgumentException("'-iterations' must be greater than or equals to 1: " + this.iterations);
            }
            this.eps = Primitives.parseDouble(commandLine.getOptionValue("epsilon"), 0.1d);
            this.miniBatchSize = Primitives.parseInt(commandLine.getOptionValue("mini_batch_size"), 128);
        }
        return commandLine;
    }

    /* JADX WARN: Type inference failed for: r1v8, types: [java.lang.String[], java.lang.String[][]] */
    public StructObjectInspector initialize(ObjectInspector[] objectInspectorArr) throws UDFArgumentException {
        if (objectInspectorArr.length < 1) {
            throw new UDFArgumentException("_FUNC_ takes 1 arguments: array<string> words [, const string options]");
        }
        this.wordCountsOI = HiveUtils.asListOI(objectInspectorArr[0]);
        HiveUtils.validateFeatureOI(this.wordCountsOI.getListElementObjectInspector());
        processOptions(objectInspectorArr);
        this.model = null;
        this.miniBatch = new String[this.miniBatchSize];
        this.miniBatchCount = 0;
        this.cumPerplexity = 0.0f;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        arrayList.add("topic");
        arrayList2.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
        arrayList.add("word");
        arrayList2.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
        arrayList.add("score");
        arrayList2.add(PrimitiveObjectInspectorFactory.writableFloatObjectInspector);
        return ObjectInspectorFactory.getStandardStructObjectInspector(arrayList, arrayList2);
    }

    @Nonnull
    protected abstract AbstractProbabilisticTopicModel createModel();

    public void process(Object[] objArr) throws HiveException {
        if (this.model == null) {
            this.model = createModel();
        }
        Preconditions.checkArgument(objArr.length >= 1);
        Object obj = objArr[0];
        if (obj == null) {
            return;
        }
        int listLength = this.wordCountsOI.getListLength(obj);
        String[] strArr = new String[listLength];
        int i = 0;
        for (int i2 = 0; i2 < listLength; i2++) {
            Object listElement = this.wordCountsOI.getListElement(obj, i2);
            if (listElement == null) {
                throw new HiveException("Given feature vector contains invalid null elements");
            }
            strArr[i] = listElement.toString();
            i++;
        }
        if (i == 0) {
            return;
        }
        this.model.accumulateDocCount();
        update(strArr);
        recordTrainSampleToTempFile(strArr);
    }

    protected void recordTrainSampleToTempFile(@Nonnull String[] strArr) throws HiveException {
        if (this.iterations == 1) {
            return;
        }
        ByteBuffer byteBuffer = this.inputBuf;
        NioStatefulSegment nioStatefulSegment = this.fileIO;
        if (byteBuffer == null) {
            try {
                File createTempFile = File.createTempFile("hivemall_topicmodel", ".sgmt");
                createTempFile.deleteOnExit();
                if (!createTempFile.canWrite()) {
                    throw new UDFArgumentException("Cannot write a temporary file: " + createTempFile.getAbsolutePath());
                }
                logger.info("Record training samples to a file: " + createTempFile.getAbsolutePath());
                ByteBuffer allocateDirect = ByteBuffer.allocateDirect(BloomFilterUtils.DEFAULT_BLOOM_FILTER_SIZE);
                byteBuffer = allocateDirect;
                this.inputBuf = allocateDirect;
                NioStatefulSegment nioStatefulSegment2 = new NioStatefulSegment(createTempFile, false);
                nioStatefulSegment = nioStatefulSegment2;
                this.fileIO = nioStatefulSegment2;
            } catch (IOException e) {
                throw new UDFArgumentException(e);
            } catch (Throwable th) {
                throw new UDFArgumentException(th);
            }
        }
        int i = 0;
        for (String str : strArr) {
            if (str != null) {
                i += str.length();
            }
        }
        int length = 4 + (4 * strArr.length) + (i * 2);
        if (byteBuffer.remaining() < 4 + length) {
            writeBuffer(byteBuffer, nioStatefulSegment);
        }
        byteBuffer.putInt(length);
        byteBuffer.putInt(strArr.length);
        for (String str2 : strArr) {
            NIOUtils.putString(str2, byteBuffer);
        }
    }

    private void update(@Nonnull String[] strArr) {
        this.miniBatch[this.miniBatchCount] = strArr;
        this.miniBatchCount++;
        if (this.miniBatchCount == this.miniBatchSize) {
            train();
        }
    }

    protected void train() {
        if (this.miniBatchCount == 0) {
            return;
        }
        this.model.train(this.miniBatch);
        this.cumPerplexity += this.model.computePerplexity();
        Arrays.fill(this.miniBatch, (Object) null);
        this.miniBatchCount = 0;
    }

    private static void writeBuffer(@Nonnull ByteBuffer byteBuffer, @Nonnull NioStatefulSegment nioStatefulSegment) throws HiveException {
        byteBuffer.flip();
        try {
            nioStatefulSegment.write(byteBuffer);
            byteBuffer.clear();
        } catch (IOException e) {
            throw new HiveException("Exception causes while writing a buffer to file", e);
        }
    }

    public void close() throws HiveException {
        if (this.model == null) {
            logger.warn("Model is not initialized bacause no training exmples to learn. Better to revise input data.");
            return;
        }
        if (this.model.getDocCount() == 0) {
            logger.warn("model.getDocCount() is zero because no training exmples to learn. Better to revise input data.");
            this.model = null;
        } else {
            finalizeTraining();
            forwardModel();
            this.model = null;
        }
    }

    @VisibleForTesting
    void finalizeTraining() throws HiveException {
        if (this.miniBatchCount > 0) {
            this.model.train((String[][]) Arrays.copyOfRange(this.miniBatch, 0, this.miniBatchCount));
        }
        if (this.iterations > 1) {
            runIterativeTraining(this.iterations);
        }
    }

    /* JADX WARN: Code restructure failed: missing block: B:117:0x0347, code lost:
    
        r18 = r7.cumPerplexity / ((float) r13);
        hivemall.topicmodel.ProbabilisticTopicModelBaseUDTF.logger.info("Mean perplexity over mini-batches: " + r18);
     */
    /* JADX WARN: Code restructure failed: missing block: B:118:0x037b, code lost:
    
        if (java.lang.Math.abs(r0 - r18) >= r7.eps) goto L101;
     */
    /* JADX WARN: Code restructure failed: missing block: B:119:0x0381, code lost:
    
        r17 = r17 + 1;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected final void runIterativeTraining(@javax.annotation.Nonnegative int r8) throws org.apache.hadoop.hive.ql.metadata.HiveException {
        /*
            Method dump skipped, instructions count: 1107
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: hivemall.topicmodel.ProbabilisticTopicModelBaseUDTF.runIterativeTraining(int):void");
    }

    protected void forwardModel() throws HiveException {
        IntWritable intWritable = new IntWritable();
        Text text = new Text();
        FloatWritable floatWritable = new FloatWritable();
        Object[] objArr = {intWritable, text, floatWritable};
        for (int i = 0; i < this.topics; i++) {
            intWritable.set(i);
            SortedMap<Float, List<String>> topicWords = this.model.getTopicWords(i);
            if (topicWords != null) {
                for (Map.Entry<Float, List<String>> entry : topicWords.entrySet()) {
                    floatWritable.set(entry.getKey().floatValue());
                    Iterator<String> it = entry.getValue().iterator();
                    while (it.hasNext()) {
                        text.set(it.next());
                        forward(objArr);
                    }
                }
            }
        }
        logger.info("Forwarded topic words each of " + this.topics + " topics");
    }

    @VisibleForTesting
    float getWordScore(String str, int i) {
        return this.model.getWordScore(str, i);
    }

    @VisibleForTesting
    SortedMap<Float, List<String>> getTopicWords(int i) {
        return this.model.getTopicWords(i);
    }

    @VisibleForTesting
    float[] getTopicDistribution(@Nonnull String[] strArr) {
        return this.model.getTopicDistribution(strArr);
    }

    static {
        $assertionsDisabled = !ProbabilisticTopicModelBaseUDTF.class.desiredAssertionStatus();
        logger = LogFactory.getLog(ProbabilisticTopicModelBaseUDTF.class);
    }
}
