001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.util;
020
021 import java.io.DataInputStream;
022 import java.io.DataOutputStream;
023 import java.io.IOException;
024 import java.nio.ByteBuffer;
025 import java.util.zip.Checksum;
026
027 import org.apache.hadoop.classification.InterfaceAudience;
028 import org.apache.hadoop.classification.InterfaceStability;
029 import org.apache.hadoop.fs.ChecksumException;
030
031 /**
032 * This class provides inteface and utilities for processing checksums for
033 * DFS data transfers.
034 */
035 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
036 @InterfaceStability.Evolving
037 public class DataChecksum implements Checksum {
038
039 // Misc constants
040 public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len
041
042 // checksum types
043 public static final int CHECKSUM_NULL = 0;
044 public static final int CHECKSUM_CRC32 = 1;
045 public static final int CHECKSUM_CRC32C = 2;
046 public static final int CHECKSUM_DEFAULT = 3;
047 public static final int CHECKSUM_MIXED = 4;
048
049 /** The checksum types */
050 public static enum Type {
051 NULL (CHECKSUM_NULL, 0),
052 CRC32 (CHECKSUM_CRC32, 4),
053 CRC32C(CHECKSUM_CRC32C, 4),
054 DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
055 MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
056
057 public final int id;
058 public final int size;
059
060 private Type(int id, int size) {
061 this.id = id;
062 this.size = size;
063 }
064
065 /** @return the type corresponding to the id. */
066 public static Type valueOf(int id) {
067 if (id < 0 || id >= values().length) {
068 throw new IllegalArgumentException("id=" + id
069 + " out of range [0, " + values().length + ")");
070 }
071 return values()[id];
072 }
073 }
074
075
076 public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
077 if ( bytesPerChecksum <= 0 ) {
078 return null;
079 }
080
081 switch ( type ) {
082 case NULL :
083 return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
084 case CRC32 :
085 return new DataChecksum(type, new PureJavaCrc32(), bytesPerChecksum );
086 case CRC32C:
087 return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
088 default:
089 return null;
090 }
091 }
092
093 /**
094 * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
095 * @return DataChecksum of the type in the array or null in case of an error.
096 */
097 public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
098 if ( offset < 0 || bytes.length < offset + HEADER_LEN ) {
099 return null;
100 }
101
102 // like readInt():
103 int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) |
104 ( (bytes[offset+2] & 0xff) << 16 ) |
105 ( (bytes[offset+3] & 0xff) << 8 ) |
106 ( (bytes[offset+4] & 0xff) );
107 return newDataChecksum( Type.valueOf(bytes[0]), bytesPerChecksum );
108 }
109
110 /**
111 * This constructucts a DataChecksum by reading HEADER_LEN bytes from
112 * input stream <i>in</i>
113 */
114 public static DataChecksum newDataChecksum( DataInputStream in )
115 throws IOException {
116 int type = in.readByte();
117 int bpc = in.readInt();
118 DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
119 if ( summer == null ) {
120 throw new IOException( "Could not create DataChecksum of type " +
121 type + " with bytesPerChecksum " + bpc );
122 }
123 return summer;
124 }
125
126 /**
127 * Writes the checksum header to the output stream <i>out</i>.
128 */
129 public void writeHeader( DataOutputStream out )
130 throws IOException {
131 out.writeByte( type.id );
132 out.writeInt( bytesPerChecksum );
133 }
134
135 public byte[] getHeader() {
136 byte[] header = new byte[DataChecksum.HEADER_LEN];
137 header[0] = (byte) (type.id & 0xff);
138 // Writing in buffer just like DataOutput.WriteInt()
139 header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
140 header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
141 header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
142 header[1+3] = (byte) (bytesPerChecksum & 0xff);
143 return header;
144 }
145
146 /**
147 * Writes the current checksum to the stream.
148 * If <i>reset</i> is true, then resets the checksum.
149 * @return number of bytes written. Will be equal to getChecksumSize();
150 */
151 public int writeValue( DataOutputStream out, boolean reset )
152 throws IOException {
153 if ( type.size <= 0 ) {
154 return 0;
155 }
156
157 if ( type.size == 4 ) {
158 out.writeInt( (int) summer.getValue() );
159 } else {
160 throw new IOException( "Unknown Checksum " + type );
161 }
162
163 if ( reset ) {
164 reset();
165 }
166
167 return type.size;
168 }
169
170 /**
171 * Writes the current checksum to a buffer.
172 * If <i>reset</i> is true, then resets the checksum.
173 * @return number of bytes written. Will be equal to getChecksumSize();
174 */
175 public int writeValue( byte[] buf, int offset, boolean reset )
176 throws IOException {
177 if ( type.size <= 0 ) {
178 return 0;
179 }
180
181 if ( type.size == 4 ) {
182 int checksum = (int) summer.getValue();
183 buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
184 buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
185 buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
186 buf[offset+3] = (byte) (checksum & 0xff);
187 } else {
188 throw new IOException( "Unknown Checksum " + type );
189 }
190
191 if ( reset ) {
192 reset();
193 }
194
195 return type.size;
196 }
197
198 /**
199 * Compares the checksum located at buf[offset] with the current checksum.
200 * @return true if the checksum matches and false otherwise.
201 */
202 public boolean compare( byte buf[], int offset ) {
203 if ( type.size == 4 ) {
204 int checksum = ( (buf[offset+0] & 0xff) << 24 ) |
205 ( (buf[offset+1] & 0xff) << 16 ) |
206 ( (buf[offset+2] & 0xff) << 8 ) |
207 ( (buf[offset+3] & 0xff) );
208 return checksum == (int) summer.getValue();
209 }
210 return type.size == 0;
211 }
212
213 private final Type type;
214 private final Checksum summer;
215 private final int bytesPerChecksum;
216 private int inSum = 0;
217
218 private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
219 this.type = type;
220 summer = checksum;
221 bytesPerChecksum = chunkSize;
222 }
223
224 // Accessors
225 public Type getChecksumType() {
226 return type;
227 }
228 public int getChecksumSize() {
229 return type.size;
230 }
231 public int getBytesPerChecksum() {
232 return bytesPerChecksum;
233 }
234 public int getNumBytesInSum() {
235 return inSum;
236 }
237
238 public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
239 static public int getChecksumHeaderSize() {
240 return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
241 }
242 //Checksum Interface. Just a wrapper around member summer.
243 public long getValue() {
244 return summer.getValue();
245 }
246 public void reset() {
247 summer.reset();
248 inSum = 0;
249 }
250 public void update( byte[] b, int off, int len ) {
251 if ( len > 0 ) {
252 summer.update( b, off, len );
253 inSum += len;
254 }
255 }
256 public void update( int b ) {
257 summer.update( b );
258 inSum += 1;
259 }
260
261 /**
262 * Verify that the given checksums match the given data.
263 *
264 * The 'mark' of the ByteBuffer parameters may be modified by this function,.
265 * but the position is maintained.
266 *
267 * @param data the DirectByteBuffer pointing to the data to verify.
268 * @param checksums the DirectByteBuffer pointing to a series of stored
269 * checksums
270 * @param fileName the name of the file being read, for error-reporting
271 * @param basePos the file position to which the start of 'data' corresponds
272 * @throws ChecksumException if the checksums do not match
273 */
274 public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
275 String fileName, long basePos)
276 throws ChecksumException {
277 if (type.size == 0) return;
278
279 if (data.hasArray() && checksums.hasArray()) {
280 verifyChunkedSums(
281 data.array(), data.arrayOffset() + data.position(), data.remaining(),
282 checksums.array(), checksums.arrayOffset() + checksums.position(),
283 fileName, basePos);
284 return;
285 }
286 if (NativeCrc32.isAvailable()) {
287 NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
288 fileName, basePos);
289 return;
290 }
291
292 int startDataPos = data.position();
293 data.mark();
294 checksums.mark();
295 try {
296 byte[] buf = new byte[bytesPerChecksum];
297 byte[] sum = new byte[type.size];
298 while (data.remaining() > 0) {
299 int n = Math.min(data.remaining(), bytesPerChecksum);
300 checksums.get(sum);
301 data.get(buf, 0, n);
302 summer.reset();
303 summer.update(buf, 0, n);
304 int calculated = (int)summer.getValue();
305 int stored = (sum[0] << 24 & 0xff000000) |
306 (sum[1] << 16 & 0xff0000) |
307 (sum[2] << 8 & 0xff00) |
308 sum[3] & 0xff;
309 if (calculated != stored) {
310 long errPos = basePos + data.position() - startDataPos - n;
311 throw new ChecksumException(
312 "Checksum error: "+ fileName + " at "+ errPos +
313 " exp: " + stored + " got: " + calculated, errPos);
314 }
315 }
316 } finally {
317 data.reset();
318 checksums.reset();
319 }
320 }
321
322 /**
323 * Implementation of chunked verification specifically on byte arrays. This
324 * is to avoid the copy when dealing with ByteBuffers that have array backing.
325 */
326 private void verifyChunkedSums(
327 byte[] data, int dataOff, int dataLen,
328 byte[] checksums, int checksumsOff, String fileName,
329 long basePos) throws ChecksumException {
330
331 int remaining = dataLen;
332 int dataPos = 0;
333 while (remaining > 0) {
334 int n = Math.min(remaining, bytesPerChecksum);
335
336 summer.reset();
337 summer.update(data, dataOff + dataPos, n);
338 dataPos += n;
339 remaining -= n;
340
341 int calculated = (int)summer.getValue();
342 int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
343 (checksums[checksumsOff + 1] << 16 & 0xff0000) |
344 (checksums[checksumsOff + 2] << 8 & 0xff00) |
345 checksums[checksumsOff + 3] & 0xff;
346 checksumsOff += 4;
347 if (calculated != stored) {
348 long errPos = basePos + dataPos - n;
349 throw new ChecksumException(
350 "Checksum error: "+ fileName + " at "+ errPos +
351 " exp: " + stored + " got: " + calculated, errPos);
352 }
353 }
354 }
355
356 /**
357 * Calculate checksums for the given data.
358 *
359 * The 'mark' of the ByteBuffer parameters may be modified by this function,
360 * but the position is maintained.
361 *
362 * @param data the DirectByteBuffer pointing to the data to checksum.
363 * @param checksums the DirectByteBuffer into which checksums will be
364 * stored. Enough space must be available in this
365 * buffer to put the checksums.
366 */
367 public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
368 if (type.size == 0) return;
369
370 if (data.hasArray() && checksums.hasArray()) {
371 calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
372 checksums.array(), checksums.arrayOffset() + checksums.position());
373 return;
374 }
375
376 data.mark();
377 checksums.mark();
378 try {
379 byte[] buf = new byte[bytesPerChecksum];
380 while (data.remaining() > 0) {
381 int n = Math.min(data.remaining(), bytesPerChecksum);
382 data.get(buf, 0, n);
383 summer.reset();
384 summer.update(buf, 0, n);
385 checksums.putInt((int)summer.getValue());
386 }
387 } finally {
388 data.reset();
389 checksums.reset();
390 }
391 }
392
393 /**
394 * Implementation of chunked calculation specifically on byte arrays. This
395 * is to avoid the copy when dealing with ByteBuffers that have array backing.
396 */
397 private void calculateChunkedSums(
398 byte[] data, int dataOffset, int dataLength,
399 byte[] sums, int sumsOffset) {
400
401 int remaining = dataLength;
402 while (remaining > 0) {
403 int n = Math.min(remaining, bytesPerChecksum);
404 summer.reset();
405 summer.update(data, dataOffset, n);
406 dataOffset += n;
407 remaining -= n;
408 long calculated = summer.getValue();
409 sums[sumsOffset++] = (byte) (calculated >> 24);
410 sums[sumsOffset++] = (byte) (calculated >> 16);
411 sums[sumsOffset++] = (byte) (calculated >> 8);
412 sums[sumsOffset++] = (byte) (calculated);
413 }
414 }
415
416 @Override
417 public boolean equals(Object other) {
418 if (!(other instanceof DataChecksum)) {
419 return false;
420 }
421 DataChecksum o = (DataChecksum)other;
422 return o.bytesPerChecksum == this.bytesPerChecksum &&
423 o.type == this.type;
424 }
425
426 @Override
427 public int hashCode() {
428 return (this.type.id + 31) * this.bytesPerChecksum;
429 }
430
431 @Override
432 public String toString() {
433 return "DataChecksum(type=" + type +
434 ", chunkSize=" + bytesPerChecksum + ")";
435 }
436
437 /**
438 * This just provides a dummy implimentation for Checksum class
439 * This is used when there is no checksum available or required for
440 * data
441 */
442 static class ChecksumNull implements Checksum {
443
444 public ChecksumNull() {}
445
446 //Dummy interface
447 public long getValue() { return 0; }
448 public void reset() {}
449 public void update(byte[] b, int off, int len) {}
450 public void update(int b) {}
451 };
452 }