package com.senseidb.indexing.hadoop.map;

import com.senseidb.conf.SchemaConverter;
import com.senseidb.conf.SenseiSchema;
import com.senseidb.indexing.DefaultJsonSchemaInterpreter;
import com.senseidb.indexing.ShardingStrategy;
import com.senseidb.indexing.hadoop.keyvalueformat.IntermediateForm;
import com.senseidb.indexing.hadoop.keyvalueformat.Shard;
import com.senseidb.indexing.hadoop.util.SenseiJobConfig;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.util.Version;
import org.json.JSONException;
import org.json.JSONObject;
import proj.zoie.api.ZoieSegmentReader;
import proj.zoie.api.indexing.ZoieIndexable;

/* loaded from: input_file:com/senseidb/indexing/hadoop/map/SenseiMapper.class */
public class SenseiMapper extends MapReduceBase implements Mapper<Object, Object, Shard, IntermediateForm> {
    private static final Logger logger = Logger.getLogger(SenseiMapper.class);
    private static DefaultJsonSchemaInterpreter _defaultInterpreter = null;
    private boolean _use_remote_schema = false;
    private volatile boolean _isConfigured = false;
    private Configuration _conf;
    private Shard[] _shards;
    private ShardingStrategy _shardingStategy;
    private MapInputConverter _converter;
    private static Analyzer analyzer;

    public void map(Object obj, Object obj2, OutputCollector<Shard, IntermediateForm> outputCollector, Reporter reporter) throws IOException {
        byte[] storeValue;
        if (!this._isConfigured) {
            throw new IllegalStateException("Mapper's configure method wasn't sucessful. May not get the correct schema or Lucene Analyzer.");
        }
        try {
            JSONObject doFilter = this._converter.doFilter(this._converter.getJsonInput(obj, obj2, this._conf));
            if (_defaultInterpreter == null) {
                reporter.incrCounter("Map", "Interpreter_null", 1L);
            }
            if (_defaultInterpreter == null || doFilter == null || analyzer == null) {
                return;
            }
            ZoieIndexable convertAndInterpret = _defaultInterpreter.convertAndInterpret(doFilter);
            ZoieIndexable.IndexingReq[] buildIndexingReqs = convertAndInterpret.buildIndexingReqs();
            if (buildIndexingReqs.length > 0) {
                Document document = buildIndexingReqs[0].getDocument();
                ZoieSegmentReader.fillDocumentID(document, convertAndInterpret.getUID());
                if (convertAndInterpret.isStorable() && (storeValue = convertAndInterpret.getStoreValue()) != null) {
                    document.add(new Field("_STORE", storeValue));
                }
                IntermediateForm intermediateForm = new IntermediateForm();
                intermediateForm.configure(this._conf);
                intermediateForm.process(document, analyzer);
                intermediateForm.closeWriter();
                try {
                    int caculateShard = this._shardingStategy.caculateShard(this._shards.length, doFilter);
                    if (caculateShard < 0) {
                        throw new IOException("Chosen shard for insert must be >= 0. current shard is: " + caculateShard);
                    }
                    outputCollector.collect(this._shards[caculateShard], intermediateForm);
                } catch (JSONException e) {
                    throw new IOException("sharding dose not work for mapper.");
                }
            }
        } catch (Exception e2) {
            ExceptionUtils.printRootCauseStackTrace(e2);
            throw new IllegalStateException("data conversion or filtering failed inside mapper. \n");
        }
    }

    public void configure(JobConf jobConf) {
        super.configure(jobConf);
        this._conf = jobConf;
        this._shards = Shard.getIndexShards(this._conf);
        this._shardingStategy = (ShardingStrategy) ReflectionUtils.newInstance(jobConf.getClass(SenseiJobConfig.DISTRIBUTION_POLICY, DummyShardingStrategy.class, ShardingStrategy.class), jobConf);
        this._converter = (MapInputConverter) ReflectionUtils.newInstance(jobConf.getClass(SenseiJobConfig.MAPINPUT_CONVERTER, DummyMapInputConverter.class, MapInputConverter.class), jobConf);
        try {
            setSchema(jobConf);
            setAnalyzer(jobConf);
            this._isConfigured = true;
        } catch (Exception e) {
            e.printStackTrace();
            this._isConfigured = false;
        }
    }

    private void setAnalyzer(JobConf jobConf) throws Exception {
        if (analyzer != null) {
            return;
        }
        String str = this._conf.get(SenseiJobConfig.DOCUMENT_ANALYZER_VERSION);
        if (str == null) {
            throw new IllegalStateException("version has not been specified");
        }
        String str2 = this._conf.get(SenseiJobConfig.DOCUMENT_ANALYZER);
        if (str2 == null) {
            throw new IllegalStateException("analyzer name has not been specified");
        }
        analyzer = (Analyzer) Class.forName(str2).getConstructor(Version.class).newInstance(Enum.valueOf(Class.forName("org.apache.lucene.util.Version"), str));
    }

    private void setSchema(JobConf jobConf) throws Exception {
        String str = jobConf.get(SenseiJobConfig.SCHEMA_FILE_URL);
        Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(jobConf);
        if (localCacheFiles != null) {
            int i = 0;
            while (true) {
                if (i >= localCacheFiles.length) {
                    break;
                }
                String path = localCacheFiles[i].toString();
                if (path.contains(jobConf.get(SenseiJobConfig.SCHEMA_FILE_URL))) {
                    str = path;
                    break;
                }
                i++;
            }
        }
        if (str == null || str.length() <= 0) {
            return;
        }
        String str2 = "file:///" + str;
        if (_defaultInterpreter == null) {
            logger.info("schema file is:" + str2);
            URL url = new URL(str2);
            url.openConnection().connect();
            File file = new File(url.toURI());
            if (!file.exists()) {
                throw new ConfigurationException("schema not file");
            }
            DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
            newInstance.setIgnoringComments(true);
            org.w3c.dom.Document parse = newInstance.newDocumentBuilder().parse(file);
            parse.getDocumentElement().normalize();
            _defaultInterpreter = new DefaultJsonSchemaInterpreter(SenseiSchema.build(SchemaConverter.convert(parse)));
        }
    }
}
