public class DistCpOptions extends Object
| Modifier and Type | Class and Description |
|---|---|
static class |
DistCpOptions.FileAttribute |
| Modifier and Type | Field and Description |
|---|---|
static int |
maxNumListstatusThreads |
| Constructor and Description |
|---|
DistCpOptions(DistCpOptions that)
Copy constructor.
|
DistCpOptions(List<org.apache.hadoop.fs.Path> sourcePaths,
org.apache.hadoop.fs.Path targetPath)
Constructor, to initialize source/target paths.
|
DistCpOptions(org.apache.hadoop.fs.Path sourceFileListing,
org.apache.hadoop.fs.Path targetPath)
Constructor, to initialize source/target paths.
|
| Modifier and Type | Method and Description |
|---|---|
void |
appendToConf(org.apache.hadoop.conf.Configuration conf)
Add options to configuration.
|
protected DistCpOptions |
clone() |
org.apache.hadoop.fs.Path |
getAtomicWorkPath()
Get work path for atomic commit.
|
int |
getBlocksPerChunk() |
int |
getCopyBufferSize() |
String |
getCopyStrategy()
Get the copy strategy to use.
|
String |
getFiltersFile()
File path that contains the list of patterns
for paths to be filtered from the file copy.
|
String |
getFromSnapshot() |
org.apache.hadoop.fs.Path |
getLogPath()
Get output directory for writing distcp logs.
|
int |
getMapBandwidth()
Get the map bandwidth in MB
|
int |
getMaxMaps()
Get the max number of maps to use for this copy
|
int |
getNumListstatusThreads()
Get the number of threads to use for listStatus
|
org.apache.hadoop.fs.Path |
getSourceFileListing()
File path (hdfs:// or file://) that contains the list of actual
files to copy
|
List<org.apache.hadoop.fs.Path> |
getSourcePaths()
Getter for sourcePaths.
|
String |
getSslConfigurationFile()
Get path where the ssl configuration file is present to use for hftps://
|
org.apache.hadoop.fs.Path |
getTargetPath()
Getter for the targetPath.
|
boolean |
getTargetPathExists()
Getter for the targetPathExists.
|
String |
getToSnapshot() |
void |
preserve(DistCpOptions.FileAttribute fileAttribute)
Add file attributes that need to be preserved.
|
Iterator<DistCpOptions.FileAttribute> |
preserveAttributes()
Returns an iterator with the list of file attributes to preserve
|
void |
preserveRawXattrs()
Indicate that raw.* xattrs should be preserved
|
void |
setAppend(boolean append)
Set if we want to append new data to target files.
|
void |
setAtomicCommit(boolean atomicCommit)
Set if data need to be committed automatically
|
void |
setAtomicWorkPath(org.apache.hadoop.fs.Path atomicWorkPath)
Set the work path for atomic commit
|
void |
setBlocking(boolean blocking)
Set if Disctp should run blocking or non-blocking
|
void |
setBlocksPerChunk(int csize) |
void |
setCopyBufferSize(int newCopyBufferSize) |
void |
setCopyStrategy(String copyStrategy)
Set the copy strategy to use.
|
void |
setDeleteMissing(boolean deleteMissing)
Set if files only present in target should be deleted
|
void |
setFiltersFile(String filtersFilename)
Set filtersFile.
|
void |
setIgnoreFailures(boolean ignoreFailures)
Set if failures during copy be ignored
|
void |
setLogPath(org.apache.hadoop.fs.Path logPath)
Set the log path where distcp output logs are stored
Uses JobStagingDir/_logs by default
|
void |
setMapBandwidth(int mapBandwidth)
Set per map bandwidth
|
void |
setMaxMaps(int maxMaps)
Set the max number of maps to use for copy
|
void |
setNumListstatusThreads(int numThreads)
Set the number of threads to use for listStatus.
|
void |
setOverwrite(boolean overwrite)
Set if files should always be overwritten on target
|
void |
setSkipCRC(boolean skipCRC)
Set if checksum comparison should be skipped while determining if
source and destination files are identical
|
void |
setSourcePaths(List<org.apache.hadoop.fs.Path> sourcePaths)
Setter for sourcePaths.
|
void |
setSslConfigurationFile(String sslConfigurationFile)
Set the SSL configuration file path to use with hftps:// (local path)
|
void |
setSyncFolder(boolean syncFolder)
Set if source and target folder contents be sync'ed up
|
boolean |
setTargetPathExists(boolean targetPathExists)
Set targetPathExists.
|
void |
setUseDiff(String fromSS,
String toSS) |
void |
setUseRdiff(String fromSS,
String toSS) |
void |
setVerboseLog(boolean newVerboseLog) |
boolean |
shouldAppend() |
boolean |
shouldAtomicCommit()
Should the data be committed atomically?
|
boolean |
shouldBlock()
Should DistCp be running in blocking mode
|
boolean |
shouldDeleteMissing()
Should target files missing in source should be deleted?
|
boolean |
shouldIgnoreFailures()
Should failures be logged and ignored during copy?
|
boolean |
shouldOverwrite()
Should files be overwritten always?
|
boolean |
shouldPreserve(DistCpOptions.FileAttribute attribute)
Checks if the input attribute should be preserved or not.
|
boolean |
shouldPreserveRawXattrs()
Return true if raw.* xattrs should be preserved.
|
boolean |
shouldSkipCRC()
Should CRC/checksum check be skipped while checking files are identical
|
boolean |
shouldSyncFolder()
Should the data be sync'ed between source and target paths?
|
boolean |
shouldUseDiff() |
boolean |
shouldUseRdiff() |
boolean |
shouldUseSnapshotDiff() |
boolean |
shouldVerboseLog() |
boolean |
splitLargeFile() |
String |
toString()
Utility to easily string-ify Options, for logging.
|
void |
validate(DistCpOptionSwitch option,
boolean value) |
public static final int maxNumListstatusThreads
public DistCpOptions(List<org.apache.hadoop.fs.Path> sourcePaths, org.apache.hadoop.fs.Path targetPath)
sourcePaths - List of source-paths (including wildcards)
to be copied to target.targetPath - Destination path for the dist-copy.public DistCpOptions(org.apache.hadoop.fs.Path sourceFileListing,
org.apache.hadoop.fs.Path targetPath)
sourceFileListing - File containing list of source pathstargetPath - Destination path for the dist-copy.public DistCpOptions(DistCpOptions that)
that - DistCpOptions being copied from.public boolean shouldAtomicCommit()
public void setAtomicCommit(boolean atomicCommit)
atomicCommit - - boolean switchpublic boolean shouldSyncFolder()
public void setSyncFolder(boolean syncFolder)
syncFolder - - boolean switchpublic boolean shouldDeleteMissing()
public void setDeleteMissing(boolean deleteMissing)
deleteMissing - - boolean switchpublic boolean shouldIgnoreFailures()
public void setIgnoreFailures(boolean ignoreFailures)
ignoreFailures - - boolean switchpublic boolean shouldBlock()
public void setBlocking(boolean blocking)
blocking - - boolean switchpublic boolean shouldOverwrite()
public void setOverwrite(boolean overwrite)
overwrite - - boolean switchpublic boolean shouldAppend()
public void setAppend(boolean append)
public boolean shouldUseDiff()
public boolean shouldUseRdiff()
public boolean shouldUseSnapshotDiff()
public String getFromSnapshot()
public String getToSnapshot()
public boolean shouldSkipCRC()
public void setSkipCRC(boolean skipCRC)
skipCRC - - boolean switchpublic int getNumListstatusThreads()
public void setNumListstatusThreads(int numThreads)
numThreads - - Number of threadspublic int getMaxMaps()
public void setMaxMaps(int maxMaps)
maxMaps - - Number of mapspublic int getMapBandwidth()
public void setMapBandwidth(int mapBandwidth)
mapBandwidth - - per map bandwidthpublic String getSslConfigurationFile()
public void setSslConfigurationFile(String sslConfigurationFile)
sslConfigurationFile - - Local ssl config file pathpublic Iterator<DistCpOptions.FileAttribute> preserveAttributes()
public boolean shouldPreserve(DistCpOptions.FileAttribute attribute)
attribute - - Attribute to checkpublic void preserve(DistCpOptions.FileAttribute fileAttribute)
fileAttribute - - Attribute to add, one at a timepublic boolean shouldPreserveRawXattrs()
public void preserveRawXattrs()
public org.apache.hadoop.fs.Path getAtomicWorkPath()
public void setAtomicWorkPath(org.apache.hadoop.fs.Path atomicWorkPath)
atomicWorkPath - - Path on the target clusterpublic org.apache.hadoop.fs.Path getLogPath()
public void setLogPath(org.apache.hadoop.fs.Path logPath)
logPath - - Path where logs will be savedpublic String getCopyStrategy()
public void setCopyStrategy(String copyStrategy)
copyStrategy - - copy Strategy to usepublic org.apache.hadoop.fs.Path getSourceFileListing()
public List<org.apache.hadoop.fs.Path> getSourcePaths()
public void setSourcePaths(List<org.apache.hadoop.fs.Path> sourcePaths)
sourcePaths - The new list of source-paths.public org.apache.hadoop.fs.Path getTargetPath()
public boolean getTargetPathExists()
public boolean setTargetPathExists(boolean targetPathExists)
targetPathExists - Whether the target path of distcp exists.public final String getFiltersFile()
public final void setFiltersFile(String filtersFilename)
filtersFilename - The path to a list of patterns to exclude from copy.public final void setBlocksPerChunk(int csize)
public final int getBlocksPerChunk()
public final boolean splitLargeFile()
public final void setCopyBufferSize(int newCopyBufferSize)
public int getCopyBufferSize()
public void setVerboseLog(boolean newVerboseLog)
public boolean shouldVerboseLog()
public void validate(DistCpOptionSwitch option, boolean value)
public void appendToConf(org.apache.hadoop.conf.Configuration conf)
conf - - Configuration object to which the options need to be addedpublic String toString()
protected DistCpOptions clone() throws CloneNotSupportedException
clone in class ObjectCloneNotSupportedExceptionCopyright © 2020 Apache Software Foundation. All rights reserved.