org.apache.solr.hadoop
Class SolrOutputFormat<K,V>

java.lang.Object
  extended by org.apache.hadoop.mapreduce.OutputFormat<K,V>
      extended by org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<K,V>
          extended by org.apache.solr.hadoop.SolrOutputFormat<K,V>

public class SolrOutputFormat<K,V>
extends org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<K,V>


Nested Class Summary
 
Nested classes/interfaces inherited from class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.Counter
 
Field Summary
static String OUTPUT_ZIP_FILE
          The key used to pass the boolean configuration parameter that instructs for regular or zip file output
static String SETUP_OK
          The parameter used to pass the solr config zip file information.
static String SOLR_RECORD_WRITER_BATCH_SIZE
           
static String SOLR_RECORD_WRITER_MAX_SEGMENTS
           
static String SOLR_WRITER_QUEUE_SIZE
           
static String SOLR_WRITER_THREAD_COUNT
           
static String ZIP_FILE_BASE_NAME
          The base name of the zip file containing the configuration information.
static String ZIP_NAME
          The key used to pass the zip file name through the configuration.
 
Fields inherited from class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
BASE_OUTPUT_NAME, COMPRESS, COMPRESS_CODEC, COMPRESS_TYPE, OUTDIR, PART
 
Constructor Summary
SolrOutputFormat()
           
 
Method Summary
static void addSolrConfToDistributedCache(org.apache.hadoop.mapreduce.Job job, File solrHomeZip)
           
 void checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext job)
           
static File createSolrHomeZip(File solrHomeDir)
           
static int getBatchSize(org.apache.hadoop.conf.Configuration jobConf)
           
static String getOutputName(org.apache.hadoop.mapreduce.JobContext job)
           
 org.apache.hadoop.mapreduce.RecordWriter<K,V> getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context)
           
static String getSetupOk()
           
static int getSolrWriterQueueSize(org.apache.hadoop.conf.Configuration conf)
          Return the maximum size for the number of documents pending index writing.
static int getSolrWriterThreadCount(org.apache.hadoop.conf.Configuration conf)
          Set the number of threads used for index writing
static String getZipName(org.apache.hadoop.conf.Configuration conf)
          Return the file name portion of the configuration zip file, from the configuration.
static boolean isOutputZipFormat(org.apache.hadoop.conf.Configuration conf)
          return true if the output should be a zip file of the index, rather than the raw index
static void setBatchSize(int count, org.apache.hadoop.conf.Configuration jobConf)
           
static void setOutputZipFormat(boolean output, org.apache.hadoop.conf.Configuration conf)
          configure the job to output zip files of the output index, or full directory trees.
static void setSolrWriterQueueSize(int count, org.apache.hadoop.conf.Configuration conf)
          Set the maximum size of the the queue for documents to be written to the index.
static void setSolrWriterThreadCount(int count, org.apache.hadoop.conf.Configuration conf)
          Get the number of threads used for index writing
static void setupSolrHomeCache(File solrHomeDir, org.apache.hadoop.mapreduce.Job job)
           
 
Methods inherited from class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
getCompressOutput, getDefaultWorkFile, getOutputCommitter, getOutputCompressorClass, getOutputPath, getPathForWorkFile, getUniqueFile, getWorkOutputPath, setCompressOutput, setOutputCompressorClass, setOutputName, setOutputPath
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

SETUP_OK

public static final String SETUP_OK
The parameter used to pass the solr config zip file information. This will be the hdfs path to the configuration zip file

See Also:
Constant Field Values

ZIP_NAME

public static final String ZIP_NAME
The key used to pass the zip file name through the configuration.

See Also:
Constant Field Values

ZIP_FILE_BASE_NAME

public static final String ZIP_FILE_BASE_NAME
The base name of the zip file containing the configuration information. This file is passed via the distributed cache using a unique name, obtained via getZipName(Configuration jobConf).

See Also:
Constant Field Values

OUTPUT_ZIP_FILE

public static final String OUTPUT_ZIP_FILE
The key used to pass the boolean configuration parameter that instructs for regular or zip file output

See Also:
Constant Field Values

SOLR_WRITER_THREAD_COUNT

public static final String SOLR_WRITER_THREAD_COUNT
See Also:
Constant Field Values

SOLR_WRITER_QUEUE_SIZE

public static final String SOLR_WRITER_QUEUE_SIZE
See Also:
Constant Field Values

SOLR_RECORD_WRITER_BATCH_SIZE

public static final String SOLR_RECORD_WRITER_BATCH_SIZE
See Also:
Constant Field Values

SOLR_RECORD_WRITER_MAX_SEGMENTS

public static final String SOLR_RECORD_WRITER_MAX_SEGMENTS
See Also:
Constant Field Values
Constructor Detail

SolrOutputFormat

public SolrOutputFormat()
Method Detail

getSetupOk

public static String getSetupOk()

setSolrWriterThreadCount

public static void setSolrWriterThreadCount(int count,
                                            org.apache.hadoop.conf.Configuration conf)
Get the number of threads used for index writing


getSolrWriterThreadCount

public static int getSolrWriterThreadCount(org.apache.hadoop.conf.Configuration conf)
Set the number of threads used for index writing


setSolrWriterQueueSize

public static void setSolrWriterQueueSize(int count,
                                          org.apache.hadoop.conf.Configuration conf)
Set the maximum size of the the queue for documents to be written to the index.


getSolrWriterQueueSize

public static int getSolrWriterQueueSize(org.apache.hadoop.conf.Configuration conf)
Return the maximum size for the number of documents pending index writing.


getZipName

public static String getZipName(org.apache.hadoop.conf.Configuration conf)
Return the file name portion of the configuration zip file, from the configuration.


setOutputZipFormat

public static void setOutputZipFormat(boolean output,
                                      org.apache.hadoop.conf.Configuration conf)
configure the job to output zip files of the output index, or full directory trees. Zip files are about 1/5th the size of the raw index, and much faster to write, but take more cpu to create.

Parameters:
output - true if should output zip files
conf - to use

isOutputZipFormat

public static boolean isOutputZipFormat(org.apache.hadoop.conf.Configuration conf)
return true if the output should be a zip file of the index, rather than the raw index

Parameters:
conf - to use
Returns:
true if output zip files is on

getOutputName

public static String getOutputName(org.apache.hadoop.mapreduce.JobContext job)

checkOutputSpecs

public void checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext job)
                      throws IOException
Overrides:
checkOutputSpecs in class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<K,V>
Throws:
IOException

getRecordWriter

public org.apache.hadoop.mapreduce.RecordWriter<K,V> getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context)
                                                              throws IOException,
                                                                     InterruptedException
Specified by:
getRecordWriter in class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<K,V>
Throws:
IOException
InterruptedException

setupSolrHomeCache

public static void setupSolrHomeCache(File solrHomeDir,
                                      org.apache.hadoop.mapreduce.Job job)
                               throws IOException
Throws:
IOException

createSolrHomeZip

public static File createSolrHomeZip(File solrHomeDir)
                              throws IOException
Throws:
IOException

addSolrConfToDistributedCache

public static void addSolrConfToDistributedCache(org.apache.hadoop.mapreduce.Job job,
                                                 File solrHomeZip)
                                          throws IOException
Throws:
IOException

getBatchSize

public static int getBatchSize(org.apache.hadoop.conf.Configuration jobConf)

setBatchSize

public static void setBatchSize(int count,
                                org.apache.hadoop.conf.Configuration jobConf)


Copyright © 2000-2014 Apache Software Foundation. All Rights Reserved.