001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.hdfs.server.common.Util.now; 021 022import java.io.File; 023import java.io.FilterInputStream; 024import java.io.IOException; 025import java.io.InputStream; 026import java.util.Arrays; 027import java.util.EnumMap; 028 029import org.apache.hadoop.fs.permission.PermissionStatus; 030import org.apache.hadoop.hdfs.protocol.HdfsConstants; 031import org.apache.hadoop.hdfs.protocol.LayoutVersion; 032import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; 033import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 034import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; 035import org.apache.hadoop.hdfs.server.common.Storage; 036import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream.LogHeaderCorruptException; 037import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp; 038import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp; 039import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp; 040import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp; 041import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp; 042import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp; 043import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp; 044import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp; 045import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp; 046import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp; 047import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp; 048import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampOp; 049import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp; 050import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp; 051import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp; 052import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp; 053import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp; 054import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp; 055import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp; 056import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp; 057import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 058import org.apache.hadoop.hdfs.util.Holder; 059import org.apache.hadoop.io.IOUtils; 060 061import com.google.common.base.Joiner; 062 063public class FSEditLogLoader { 064 private final FSNamesystem fsNamesys; 065 066 public FSEditLogLoader(FSNamesystem fsNamesys) { 067 this.fsNamesys = fsNamesys; 068 } 069 070 /** 071 * Load an edit log, and apply the changes to the in-memory structure 072 * This is where we apply edits that we've been writing to disk all 073 * along. 074 */ 075 int loadFSEdits(EditLogInputStream edits, long expectedStartingTxId) 076 throws IOException { 077 long startTime = now(); 078 int numEdits = loadFSEdits(edits, true, expectedStartingTxId); 079 FSImage.LOG.info("Edits file " + edits.getName() 080 + " of size " + edits.length() + " edits # " + numEdits 081 + " loaded in " + (now()-startTime)/1000 + " seconds."); 082 return numEdits; 083 } 084 085 int loadFSEdits(EditLogInputStream edits, boolean closeOnExit, 086 long expectedStartingTxId) 087 throws IOException { 088 int numEdits = 0; 089 int logVersion = edits.getVersion(); 090 091 try { 092 numEdits = loadEditRecords(logVersion, edits, false, 093 expectedStartingTxId); 094 } finally { 095 if(closeOnExit) { 096 edits.close(); 097 } 098 } 099 100 return numEdits; 101 } 102 103 @SuppressWarnings("deprecation") 104 int loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit, 105 long expectedStartingTxId) 106 throws IOException { 107 FSDirectory fsDir = fsNamesys.dir; 108 int numEdits = 0; 109 110 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = 111 new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class); 112 113 fsNamesys.writeLock(); 114 fsDir.writeLock(); 115 116 long recentOpcodeOffsets[] = new long[4]; 117 Arrays.fill(recentOpcodeOffsets, -1); 118 119 try { 120 long txId = expectedStartingTxId - 1; 121 122 try { 123 FSEditLogOp op; 124 while ((op = in.readOp()) != null) { 125 recentOpcodeOffsets[numEdits % recentOpcodeOffsets.length] = 126 in.getPosition(); 127 if (LayoutVersion.supports(Feature.STORED_TXIDS, logVersion)) { 128 long thisTxId = op.txid; 129 if (thisTxId != txId + 1) { 130 throw new IOException("Expected transaction ID " + 131 (txId + 1) + " but got " + thisTxId); 132 } 133 txId = thisTxId; 134 } 135 136 numEdits++; 137 incrOpCount(op.opCode, opCounts); 138 switch (op.opCode) { 139 case OP_ADD: 140 case OP_CLOSE: { 141 AddCloseOp addCloseOp = (AddCloseOp)op; 142 143 // versions > 0 support per file replication 144 // get name and replication 145 final short replication = fsNamesys.getBlockManager( 146 ).adjustReplication(addCloseOp.replication); 147 148 long blockSize = addCloseOp.blockSize; 149 BlockInfo blocks[] = new BlockInfo[addCloseOp.blocks.length]; 150 for (int i = 0; i < addCloseOp.blocks.length; i++) { 151 if(addCloseOp.opCode == FSEditLogOpCodes.OP_ADD 152 && i == addCloseOp.blocks.length-1) { 153 blocks[i] = new BlockInfoUnderConstruction(addCloseOp.blocks[i], 154 replication); 155 } else { 156 blocks[i] = new BlockInfo(addCloseOp.blocks[i], replication); 157 } 158 } 159 160 PermissionStatus permissions = fsNamesys.getUpgradePermission(); 161 if (addCloseOp.permissions != null) { 162 permissions = addCloseOp.permissions; 163 } 164 165 166 // Older versions of HDFS does not store the block size in inode. 167 // If the file has more than one block, use the size of the 168 // first block as the blocksize. Otherwise use the default 169 // block size. 170 if (-8 <= logVersion && blockSize == 0) { 171 if (blocks.length > 1) { 172 blockSize = blocks[0].getNumBytes(); 173 } else { 174 long first = ((blocks.length == 1)? blocks[0].getNumBytes(): 0); 175 blockSize = Math.max(fsNamesys.getDefaultBlockSize(), first); 176 } 177 } 178 179 180 // The open lease transaction re-creates a file if necessary. 181 // Delete the file if it already exists. 182 if (FSNamesystem.LOG.isDebugEnabled()) { 183 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path + 184 " numblocks : " + blocks.length + 185 " clientHolder " + addCloseOp.clientName + 186 " clientMachine " + addCloseOp.clientMachine); 187 } 188 189 // There are four cases here: 190 // 1. OP_ADD to create a new file 191 // 2. OP_ADD to update file blocks 192 // 3. OP_ADD to open file for append 193 // 4. OP_CLOSE to close the file 194 195 // See if the file already exists 196 INodeFile oldFile = fsDir.getFileINode(addCloseOp.path); 197 if (oldFile == null) { // OP_ADD for a new file 198 assert addCloseOp.opCode == FSEditLogOpCodes.OP_ADD : 199 "Expected opcode OP_ADD, but got " + addCloseOp.opCode; 200 fsDir.unprotectedAddFile( 201 addCloseOp.path, permissions, blocks, replication, 202 addCloseOp.mtime, addCloseOp.atime, blockSize, 203 addCloseOp.clientName, addCloseOp.clientMachine); 204 fsNamesys.leaseManager.addLease(addCloseOp.clientName, 205 addCloseOp.path); 206 } else { 207 fsDir.updateFile(oldFile, addCloseOp.path, blocks, 208 addCloseOp.mtime, addCloseOp.atime); 209 if(addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE) { // OP_CLOSE 210 if (!oldFile.isUnderConstruction() && 211 logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) { 212 // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE 213 // could show up twice in a row. But after that version, this 214 // should be fixed, so we should treat it as an error. 215 throw new IOException( 216 "File is not under construction: " + addCloseOp.path); 217 } 218 fsNamesys.getBlockManager().completeBlock( 219 oldFile, blocks.length-1, true); 220 221 if (oldFile.isUnderConstruction()) { 222 INodeFile newFile = 223 ((INodeFileUnderConstruction)oldFile).convertToInodeFile(); 224 fsDir.replaceNode(addCloseOp.path, oldFile, newFile); 225 fsNamesys.leaseManager.removeLease( 226 ((INodeFileUnderConstruction)oldFile).getClientName(), 227 addCloseOp.path); 228 } 229 } else if(! oldFile.isUnderConstruction()) { // OP_ADD for append 230 fsNamesys.prepareFileForWrite(addCloseOp.path, oldFile, 231 addCloseOp.clientName, addCloseOp.clientMachine, null, 232 false); 233 } 234 } 235 break; 236 } 237 case OP_SET_REPLICATION: { 238 SetReplicationOp setReplicationOp = (SetReplicationOp)op; 239 short replication = fsNamesys.getBlockManager().adjustReplication( 240 setReplicationOp.replication); 241 fsDir.unprotectedSetReplication(setReplicationOp.path, 242 replication, null); 243 break; 244 } 245 case OP_CONCAT_DELETE: { 246 ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op; 247 fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs, 248 concatDeleteOp.timestamp); 249 break; 250 } 251 case OP_RENAME_OLD: { 252 RenameOldOp renameOp = (RenameOldOp)op; 253 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 254 renameOp.timestamp); 255 break; 256 } 257 case OP_DELETE: { 258 DeleteOp deleteOp = (DeleteOp)op; 259 fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp); 260 break; 261 } 262 case OP_MKDIR: { 263 MkdirOp mkdirOp = (MkdirOp)op; 264 PermissionStatus permissions = fsNamesys.getUpgradePermission(); 265 if (mkdirOp.permissions != null) { 266 permissions = mkdirOp.permissions; 267 } 268 269 fsDir.unprotectedMkdir(mkdirOp.path, permissions, 270 mkdirOp.timestamp); 271 break; 272 } 273 case OP_SET_GENSTAMP: { 274 SetGenstampOp setGenstampOp = (SetGenstampOp)op; 275 fsNamesys.setGenerationStamp(setGenstampOp.genStamp); 276 break; 277 } 278 case OP_SET_PERMISSIONS: { 279 SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op; 280 fsDir.unprotectedSetPermission(setPermissionsOp.src, 281 setPermissionsOp.permissions); 282 break; 283 } 284 case OP_SET_OWNER: { 285 SetOwnerOp setOwnerOp = (SetOwnerOp)op; 286 fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username, 287 setOwnerOp.groupname); 288 break; 289 } 290 case OP_SET_NS_QUOTA: { 291 SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op; 292 fsDir.unprotectedSetQuota(setNSQuotaOp.src, 293 setNSQuotaOp.nsQuota, 294 HdfsConstants.QUOTA_DONT_SET); 295 break; 296 } 297 case OP_CLEAR_NS_QUOTA: { 298 ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op; 299 fsDir.unprotectedSetQuota(clearNSQuotaOp.src, 300 HdfsConstants.QUOTA_RESET, 301 HdfsConstants.QUOTA_DONT_SET); 302 break; 303 } 304 305 case OP_SET_QUOTA: 306 SetQuotaOp setQuotaOp = (SetQuotaOp)op; 307 fsDir.unprotectedSetQuota(setQuotaOp.src, 308 setQuotaOp.nsQuota, 309 setQuotaOp.dsQuota); 310 break; 311 312 case OP_TIMES: { 313 TimesOp timesOp = (TimesOp)op; 314 315 fsDir.unprotectedSetTimes(timesOp.path, 316 timesOp.mtime, 317 timesOp.atime, true); 318 break; 319 } 320 case OP_SYMLINK: { 321 SymlinkOp symlinkOp = (SymlinkOp)op; 322 fsDir.unprotectedSymlink(symlinkOp.path, symlinkOp.value, 323 symlinkOp.mtime, symlinkOp.atime, 324 symlinkOp.permissionStatus); 325 break; 326 } 327 case OP_RENAME: { 328 RenameOp renameOp = (RenameOp)op; 329 330 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, 331 renameOp.timestamp, renameOp.options); 332 break; 333 } 334 case OP_GET_DELEGATION_TOKEN: { 335 GetDelegationTokenOp getDelegationTokenOp 336 = (GetDelegationTokenOp)op; 337 338 fsNamesys.getDelegationTokenSecretManager() 339 .addPersistedDelegationToken(getDelegationTokenOp.token, 340 getDelegationTokenOp.expiryTime); 341 break; 342 } 343 case OP_RENEW_DELEGATION_TOKEN: { 344 RenewDelegationTokenOp renewDelegationTokenOp 345 = (RenewDelegationTokenOp)op; 346 fsNamesys.getDelegationTokenSecretManager() 347 .updatePersistedTokenRenewal(renewDelegationTokenOp.token, 348 renewDelegationTokenOp.expiryTime); 349 break; 350 } 351 case OP_CANCEL_DELEGATION_TOKEN: { 352 CancelDelegationTokenOp cancelDelegationTokenOp 353 = (CancelDelegationTokenOp)op; 354 fsNamesys.getDelegationTokenSecretManager() 355 .updatePersistedTokenCancellation( 356 cancelDelegationTokenOp.token); 357 break; 358 } 359 case OP_UPDATE_MASTER_KEY: { 360 UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op; 361 fsNamesys.getDelegationTokenSecretManager() 362 .updatePersistedMasterKey(updateMasterKeyOp.key); 363 break; 364 } 365 case OP_REASSIGN_LEASE: { 366 ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op; 367 368 Lease lease = fsNamesys.leaseManager.getLease( 369 reassignLeaseOp.leaseHolder); 370 INodeFileUnderConstruction pendingFile = 371 (INodeFileUnderConstruction) fsDir.getFileINode( 372 reassignLeaseOp.path); 373 fsNamesys.reassignLeaseInternal(lease, 374 reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile); 375 break; 376 } 377 case OP_START_LOG_SEGMENT: 378 case OP_END_LOG_SEGMENT: { 379 // no data in here currently. 380 break; 381 } 382 case OP_DATANODE_ADD: 383 case OP_DATANODE_REMOVE: 384 break; 385 default: 386 throw new IOException("Invalid operation read " + op.opCode); 387 } 388 } 389 390 } catch (IOException ex) { 391 check203UpgradeFailure(logVersion, ex); 392 } finally { 393 if(closeOnExit) 394 in.close(); 395 } 396 } catch (Throwable t) { 397 // Catch Throwable because in the case of a truly corrupt edits log, any 398 // sort of error might be thrown (NumberFormat, NullPointer, EOF, etc.) 399 StringBuilder sb = new StringBuilder(); 400 sb.append("Error replaying edit log at offset " + in.getPosition()); 401 if (recentOpcodeOffsets[0] != -1) { 402 Arrays.sort(recentOpcodeOffsets); 403 sb.append("\nRecent opcode offsets:"); 404 for (long offset : recentOpcodeOffsets) { 405 if (offset != -1) { 406 sb.append(' ').append(offset); 407 } 408 } 409 } 410 String errorMessage = sb.toString(); 411 FSImage.LOG.error(errorMessage); 412 throw new IOException(errorMessage, t); 413 } finally { 414 fsDir.writeUnlock(); 415 fsNamesys.writeUnlock(); 416 } 417 if (FSImage.LOG.isDebugEnabled()) { 418 dumpOpCounts(opCounts); 419 } 420 return numEdits; 421 } 422 423 424 private static void dumpOpCounts( 425 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) { 426 StringBuilder sb = new StringBuilder(); 427 sb.append("Summary of operations loaded from edit log:\n "); 428 Joiner.on("\n ").withKeyValueSeparator("=").appendTo(sb, opCounts); 429 FSImage.LOG.debug(sb.toString()); 430 } 431 432 private void incrOpCount(FSEditLogOpCodes opCode, 433 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) { 434 Holder<Integer> holder = opCounts.get(opCode); 435 if (holder == null) { 436 holder = new Holder<Integer>(1); 437 opCounts.put(opCode, holder); 438 } else { 439 holder.held++; 440 } 441 } 442 443 /** 444 * Throw appropriate exception during upgrade from 203, when editlog loading 445 * could fail due to opcode conflicts. 446 */ 447 private void check203UpgradeFailure(int logVersion, IOException ex) 448 throws IOException { 449 // 0.20.203 version version has conflicting opcodes with the later releases. 450 // The editlog must be emptied by restarting the namenode, before proceeding 451 // with the upgrade. 452 if (Storage.is203LayoutVersion(logVersion) 453 && logVersion != HdfsConstants.LAYOUT_VERSION) { 454 String msg = "During upgrade failed to load the editlog version " 455 + logVersion + " from release 0.20.203. Please go back to the old " 456 + " release and restart the namenode. This empties the editlog " 457 + " and saves the namespace. Resume the upgrade after this step."; 458 throw new IOException(msg, ex); 459 } else { 460 throw ex; 461 } 462 } 463 464 static EditLogValidation validateEditLog(File file) throws IOException { 465 EditLogFileInputStream in; 466 try { 467 in = new EditLogFileInputStream(file); 468 } catch (LogHeaderCorruptException corrupt) { 469 // If it's missing its header, this is equivalent to no transactions 470 FSImage.LOG.warn("Log at " + file + " has no valid header", 471 corrupt); 472 return new EditLogValidation(0, 0); 473 } 474 475 try { 476 return validateEditLog(in); 477 } finally { 478 IOUtils.closeStream(in); 479 } 480 } 481 482 /** 483 * Return the number of valid transactions in the stream. If the stream is 484 * truncated during the header, returns a value indicating that there are 485 * 0 valid transactions. This reads through the stream but does not close 486 * it. 487 * @throws IOException if the stream cannot be read due to an IO error (eg 488 * if the log does not exist) 489 */ 490 static EditLogValidation validateEditLog(EditLogInputStream in) { 491 long numValid = 0; 492 long lastPos = 0; 493 try { 494 while (true) { 495 lastPos = in.getPosition(); 496 if (in.readOp() == null) { 497 break; 498 } 499 numValid++; 500 } 501 } catch (Throwable t) { 502 // Catch Throwable and not just IOE, since bad edits may generate 503 // NumberFormatExceptions, AssertionErrors, OutOfMemoryErrors, etc. 504 FSImage.LOG.debug("Caught exception after reading " + numValid + 505 " ops from " + in + " while determining its valid length.", t); 506 } 507 return new EditLogValidation(lastPos, numValid); 508 } 509 510 static class EditLogValidation { 511 long validLength; 512 long numTransactions; 513 514 EditLogValidation(long validLength, long numTransactions) { 515 this.validLength = validLength; 516 this.numTransactions = numTransactions; 517 } 518 } 519 520 /** 521 * Stream wrapper that keeps track of the current stream position. 522 */ 523 static class PositionTrackingInputStream extends FilterInputStream { 524 private long curPos = 0; 525 private long markPos = -1; 526 527 public PositionTrackingInputStream(InputStream is) { 528 super(is); 529 } 530 531 public int read() throws IOException { 532 int ret = super.read(); 533 if (ret != -1) curPos++; 534 return ret; 535 } 536 537 public int read(byte[] data) throws IOException { 538 int ret = super.read(data); 539 if (ret > 0) curPos += ret; 540 return ret; 541 } 542 543 public int read(byte[] data, int offset, int length) throws IOException { 544 int ret = super.read(data, offset, length); 545 if (ret > 0) curPos += ret; 546 return ret; 547 } 548 549 public void mark(int limit) { 550 super.mark(limit); 551 markPos = curPos; 552 } 553 554 public void reset() throws IOException { 555 if (markPos == -1) { 556 throw new IOException("Not marked!"); 557 } 558 super.reset(); 559 curPos = markPos; 560 markPos = -1; 561 } 562 563 public long getPos() { 564 return curPos; 565 } 566 } 567 568}