001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.blockmanagement; 019 020import java.io.DataInput; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Collection; 024import java.util.Iterator; 025import java.util.LinkedList; 026import java.util.List; 027import java.util.Queue; 028import java.util.Set; 029import java.util.TreeSet; 030 031import org.apache.hadoop.classification.InterfaceAudience; 032import org.apache.hadoop.classification.InterfaceStability; 033import org.apache.hadoop.hdfs.DeprecatedUTF8; 034import org.apache.hadoop.hdfs.protocol.Block; 035import org.apache.hadoop.hdfs.protocol.DatanodeID; 036import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 037import org.apache.hadoop.hdfs.util.LightWeightHashSet; 038import org.apache.hadoop.io.Text; 039import org.apache.hadoop.io.WritableUtils; 040 041/************************************************** 042 * DatanodeDescriptor tracks stats on a given DataNode, such as 043 * available storage capacity, last update time, etc., and maintains a 044 * set of blocks stored on the datanode. 045 * 046 * This data structure is internal to the namenode. It is *not* sent 047 * over-the-wire to the Client or the Datanodes. Neither is it stored 048 * persistently in the fsImage. 049 **************************************************/ 050@InterfaceAudience.Private 051public class DatanodeDescriptor extends DatanodeInfo { 052 053 // Stores status of decommissioning. 054 // If node is not decommissioning, do not use this object for anything. 055 public DecommissioningStatus decommissioningStatus = new DecommissioningStatus(); 056 057 /** Block and targets pair */ 058 @InterfaceAudience.Private 059 @InterfaceStability.Evolving 060 public static class BlockTargetPair { 061 public final Block block; 062 public final DatanodeDescriptor[] targets; 063 064 BlockTargetPair(Block block, DatanodeDescriptor[] targets) { 065 this.block = block; 066 this.targets = targets; 067 } 068 } 069 070 /** A BlockTargetPair queue. */ 071 private static class BlockQueue<E> { 072 private final Queue<E> blockq = new LinkedList<E>(); 073 074 /** Size of the queue */ 075 synchronized int size() {return blockq.size();} 076 077 /** Enqueue */ 078 synchronized boolean offer(E e) { 079 return blockq.offer(e); 080 } 081 082 /** Dequeue */ 083 synchronized List<E> poll(int numBlocks) { 084 if (numBlocks <= 0 || blockq.isEmpty()) { 085 return null; 086 } 087 088 List<E> results = new ArrayList<E>(); 089 for(; !blockq.isEmpty() && numBlocks > 0; numBlocks--) { 090 results.add(blockq.poll()); 091 } 092 return results; 093 } 094 095 /** 096 * Returns <tt>true</tt> if the queue contains the specified element. 097 */ 098 boolean contains(E e) { 099 return blockq.contains(e); 100 } 101 } 102 103 private volatile BlockInfo blockList = null; 104 private int numBlocks = 0; 105 // isAlive == heartbeats.contains(this) 106 // This is an optimization, because contains takes O(n) time on Arraylist 107 public boolean isAlive = false; 108 public boolean needKeyUpdate = false; 109 110 // A system administrator can tune the balancer bandwidth parameter 111 // (dfs.balance.bandwidthPerSec) dynamically by calling 112 // "dfsadmin -setBalanacerBandwidth <newbandwidth>", at which point the 113 // following 'bandwidth' variable gets updated with the new value for each 114 // node. Once the heartbeat command is issued to update the value on the 115 // specified datanode, this value will be set back to 0. 116 private long bandwidth; 117 118 /** A queue of blocks to be replicated by this datanode */ 119 private BlockQueue<BlockTargetPair> replicateBlocks = new BlockQueue<BlockTargetPair>(); 120 /** A queue of blocks to be recovered by this datanode */ 121 private BlockQueue<BlockInfoUnderConstruction> recoverBlocks = 122 new BlockQueue<BlockInfoUnderConstruction>(); 123 /** A set of blocks to be invalidated by this datanode */ 124 private LightWeightHashSet<Block> invalidateBlocks = new LightWeightHashSet<Block>(); 125 126 /* Variables for maintaining number of blocks scheduled to be written to 127 * this datanode. This count is approximate and might be slightly bigger 128 * in case of errors (e.g. datanode does not report if an error occurs 129 * while writing the block). 130 */ 131 private int currApproxBlocksScheduled = 0; 132 private int prevApproxBlocksScheduled = 0; 133 private long lastBlocksScheduledRollTime = 0; 134 private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600*1000; //10min 135 private int volumeFailures = 0; 136 137 /* Set to true after processing first block report. Will be reset to false 138 * if the node re-registers. This enables a NN in safe-mode to reprocess 139 * the first block report in case the DN is now reporting different blocks 140 */ 141 private boolean processedBlockReport = false; 142 143 /** 144 * When set to true, the node is not in include list and is not allowed 145 * to communicate with the namenode 146 */ 147 private boolean disallowed = false; 148 149 /** Default constructor */ 150 public DatanodeDescriptor() {} 151 152 /** DatanodeDescriptor constructor 153 * @param nodeID id of the data node 154 */ 155 public DatanodeDescriptor(DatanodeID nodeID) { 156 this(nodeID, 0L, 0L, 0L, 0L, 0, 0); 157 } 158 159 /** DatanodeDescriptor constructor 160 * 161 * @param nodeID id of the data node 162 * @param networkLocation location of the data node in network 163 */ 164 public DatanodeDescriptor(DatanodeID nodeID, 165 String networkLocation) { 166 this(nodeID, networkLocation, null); 167 } 168 169 /** DatanodeDescriptor constructor 170 * 171 * @param nodeID id of the data node 172 * @param networkLocation location of the data node in network 173 * @param hostName it could be different from host specified for DatanodeID 174 */ 175 public DatanodeDescriptor(DatanodeID nodeID, 176 String networkLocation, 177 String hostName) { 178 this(nodeID, networkLocation, hostName, 0L, 0L, 0L, 0L, 0, 0); 179 } 180 181 /** DatanodeDescriptor constructor 182 * 183 * @param nodeID id of the data node 184 * @param capacity capacity of the data node 185 * @param dfsUsed space used by the data node 186 * @param remaining remaining capacity of the data node 187 * @param bpused space used by the block pool corresponding to this namenode 188 * @param xceiverCount # of data transfers at the data node 189 */ 190 public DatanodeDescriptor(DatanodeID nodeID, 191 long capacity, 192 long dfsUsed, 193 long remaining, 194 long bpused, 195 int xceiverCount, 196 int failedVolumes) { 197 super(nodeID); 198 updateHeartbeat(capacity, dfsUsed, remaining, bpused, xceiverCount, 199 failedVolumes); 200 } 201 202 /** DatanodeDescriptor constructor 203 * 204 * @param nodeID id of the data node 205 * @param networkLocation location of the data node in network 206 * @param capacity capacity of the data node, including space used by non-dfs 207 * @param dfsUsed the used space by dfs datanode 208 * @param remaining remaining capacity of the data node 209 * @param bpused space used by the block pool corresponding to this namenode 210 * @param xceiverCount # of data transfers at the data node 211 */ 212 public DatanodeDescriptor(DatanodeID nodeID, 213 String networkLocation, 214 String hostName, 215 long capacity, 216 long dfsUsed, 217 long remaining, 218 long bpused, 219 int xceiverCount, 220 int failedVolumes) { 221 super(nodeID, networkLocation, hostName); 222 updateHeartbeat(capacity, dfsUsed, remaining, bpused, xceiverCount, 223 failedVolumes); 224 } 225 226 /** 227 * Add datanode to the block. 228 * Add block to the head of the list of blocks belonging to the data-node. 229 */ 230 public boolean addBlock(BlockInfo b) { 231 if(!b.addNode(this)) 232 return false; 233 // add to the head of the data-node list 234 blockList = b.listInsert(blockList, this); 235 numBlocks++; 236 return true; 237 } 238 239 /** 240 * Remove block from the list of blocks belonging to the data-node. 241 * Remove datanode from the block. 242 */ 243 public boolean removeBlock(BlockInfo b) { 244 blockList = b.listRemove(blockList, this); 245 if ( b.removeNode(this) ) { 246 numBlocks--; 247 return true; 248 } else { 249 return false; 250 } 251 } 252 253 /** 254 * Move block to the head of the list of blocks belonging to the data-node. 255 * @return the index of the head of the blockList 256 */ 257 int moveBlockToHead(BlockInfo b, int curIndex, int headIndex) { 258 blockList = b.moveBlockToHead(blockList, this, curIndex, headIndex); 259 return curIndex; 260 } 261 262 /** 263 * Used for testing only 264 * @return the head of the blockList 265 */ 266 protected BlockInfo getHead(){ 267 return blockList; 268 } 269 270 /** 271 * Replace specified old block with a new one in the DataNodeDescriptor. 272 * 273 * @param oldBlock - block to be replaced 274 * @param newBlock - a replacement block 275 * @return the new block 276 */ 277 public BlockInfo replaceBlock(BlockInfo oldBlock, BlockInfo newBlock) { 278 boolean done = removeBlock(oldBlock); 279 assert done : "Old block should belong to the data-node when replacing"; 280 done = addBlock(newBlock); 281 assert done : "New block should not belong to the data-node when replacing"; 282 return newBlock; 283 } 284 285 public void resetBlocks() { 286 this.capacity = 0; 287 this.remaining = 0; 288 this.blockPoolUsed = 0; 289 this.dfsUsed = 0; 290 this.xceiverCount = 0; 291 this.blockList = null; 292 this.invalidateBlocks.clear(); 293 this.volumeFailures = 0; 294 } 295 296 public int numBlocks() { 297 return numBlocks; 298 } 299 300 /** 301 * Updates stats from datanode heartbeat. 302 */ 303 public void updateHeartbeat(long capacity, long dfsUsed, long remaining, 304 long blockPoolUsed, int xceiverCount, int volFailures) { 305 this.capacity = capacity; 306 this.dfsUsed = dfsUsed; 307 this.remaining = remaining; 308 this.blockPoolUsed = blockPoolUsed; 309 this.lastUpdate = System.currentTimeMillis(); 310 this.xceiverCount = xceiverCount; 311 this.volumeFailures = volFailures; 312 rollBlocksScheduled(lastUpdate); 313 } 314 315 /** 316 * Iterates over the list of blocks belonging to the datanode. 317 */ 318 public static class BlockIterator implements Iterator<BlockInfo> { 319 private BlockInfo current; 320 private DatanodeDescriptor node; 321 322 BlockIterator(BlockInfo head, DatanodeDescriptor dn) { 323 this.current = head; 324 this.node = dn; 325 } 326 327 public boolean hasNext() { 328 return current != null; 329 } 330 331 public BlockInfo next() { 332 BlockInfo res = current; 333 current = current.getNext(current.findDatanode(node)); 334 return res; 335 } 336 337 public void remove() { 338 throw new UnsupportedOperationException("Sorry. can't remove."); 339 } 340 } 341 342 public Iterator<BlockInfo> getBlockIterator() { 343 return new BlockIterator(this.blockList, this); 344 } 345 346 /** 347 * Store block replication work. 348 */ 349 void addBlockToBeReplicated(Block block, DatanodeDescriptor[] targets) { 350 assert(block != null && targets != null && targets.length > 0); 351 replicateBlocks.offer(new BlockTargetPair(block, targets)); 352 } 353 354 /** 355 * Store block recovery work. 356 */ 357 void addBlockToBeRecovered(BlockInfoUnderConstruction block) { 358 if(recoverBlocks.contains(block)) { 359 // this prevents adding the same block twice to the recovery queue 360 BlockManager.LOG.info("Block " + block + 361 " is already in the recovery queue."); 362 return; 363 } 364 recoverBlocks.offer(block); 365 } 366 367 /** 368 * Store block invalidation work. 369 */ 370 void addBlocksToBeInvalidated(List<Block> blocklist) { 371 assert(blocklist != null && blocklist.size() > 0); 372 synchronized (invalidateBlocks) { 373 for(Block blk : blocklist) { 374 invalidateBlocks.add(blk); 375 } 376 } 377 } 378 379 /** 380 * The number of work items that are pending to be replicated 381 */ 382 int getNumberOfBlocksToBeReplicated() { 383 return replicateBlocks.size(); 384 } 385 386 /** 387 * The number of block invalidation items that are pending to 388 * be sent to the datanode 389 */ 390 int getNumberOfBlocksToBeInvalidated() { 391 synchronized (invalidateBlocks) { 392 return invalidateBlocks.size(); 393 } 394 } 395 396 public List<BlockTargetPair> getReplicationCommand(int maxTransfers) { 397 return replicateBlocks.poll(maxTransfers); 398 } 399 400 public BlockInfoUnderConstruction[] getLeaseRecoveryCommand(int maxTransfers) { 401 List<BlockInfoUnderConstruction> blocks = recoverBlocks.poll(maxTransfers); 402 if(blocks == null) 403 return null; 404 return blocks.toArray(new BlockInfoUnderConstruction[blocks.size()]); 405 } 406 407 /** 408 * Remove the specified number of blocks to be invalidated 409 */ 410 public Block[] getInvalidateBlocks(int maxblocks) { 411 synchronized (invalidateBlocks) { 412 Block[] deleteList = invalidateBlocks.pollToArray(new Block[Math.min( 413 invalidateBlocks.size(), maxblocks)]); 414 return deleteList.length == 0 ? null : deleteList; 415 } 416 } 417 418 /** Serialization for FSEditLog */ 419 public void readFieldsFromFSEditLog(DataInput in) throws IOException { 420 this.name = DeprecatedUTF8.readString(in); 421 this.storageID = DeprecatedUTF8.readString(in); 422 this.infoPort = in.readShort() & 0x0000ffff; 423 424 this.capacity = in.readLong(); 425 this.dfsUsed = in.readLong(); 426 this.remaining = in.readLong(); 427 this.blockPoolUsed = in.readLong(); 428 this.lastUpdate = in.readLong(); 429 this.xceiverCount = in.readInt(); 430 this.location = Text.readString(in); 431 this.hostName = Text.readString(in); 432 setAdminState(WritableUtils.readEnum(in, AdminStates.class)); 433 } 434 435 /** 436 * @return Approximate number of blocks currently scheduled to be written 437 * to this datanode. 438 */ 439 public int getBlocksScheduled() { 440 return currApproxBlocksScheduled + prevApproxBlocksScheduled; 441 } 442 443 /** 444 * Increments counter for number of blocks scheduled. 445 */ 446 public void incBlocksScheduled() { 447 currApproxBlocksScheduled++; 448 } 449 450 /** 451 * Decrements counter for number of blocks scheduled. 452 */ 453 void decBlocksScheduled() { 454 if (prevApproxBlocksScheduled > 0) { 455 prevApproxBlocksScheduled--; 456 } else if (currApproxBlocksScheduled > 0) { 457 currApproxBlocksScheduled--; 458 } 459 // its ok if both counters are zero. 460 } 461 462 /** 463 * Adjusts curr and prev number of blocks scheduled every few minutes. 464 */ 465 private void rollBlocksScheduled(long now) { 466 if ((now - lastBlocksScheduledRollTime) > 467 BLOCKS_SCHEDULED_ROLL_INTERVAL) { 468 prevApproxBlocksScheduled = currApproxBlocksScheduled; 469 currApproxBlocksScheduled = 0; 470 lastBlocksScheduledRollTime = now; 471 } 472 } 473 474 @Override 475 public int hashCode() { 476 // Super implementation is sufficient 477 return super.hashCode(); 478 } 479 480 @Override 481 public boolean equals(Object obj) { 482 // Sufficient to use super equality as datanodes are uniquely identified 483 // by DatanodeID 484 return (this == obj) || super.equals(obj); 485 } 486 487 /** Decommissioning status */ 488 public class DecommissioningStatus { 489 private int underReplicatedBlocks; 490 private int decommissionOnlyReplicas; 491 private int underReplicatedInOpenFiles; 492 private long startTime; 493 494 synchronized void set(int underRep, 495 int onlyRep, int underConstruction) { 496 if (isDecommissionInProgress() == false) { 497 return; 498 } 499 underReplicatedBlocks = underRep; 500 decommissionOnlyReplicas = onlyRep; 501 underReplicatedInOpenFiles = underConstruction; 502 } 503 504 /** @return the number of under-replicated blocks */ 505 public synchronized int getUnderReplicatedBlocks() { 506 if (isDecommissionInProgress() == false) { 507 return 0; 508 } 509 return underReplicatedBlocks; 510 } 511 /** @return the number of decommission-only replicas */ 512 public synchronized int getDecommissionOnlyReplicas() { 513 if (isDecommissionInProgress() == false) { 514 return 0; 515 } 516 return decommissionOnlyReplicas; 517 } 518 /** @return the number of under-replicated blocks in open files */ 519 public synchronized int getUnderReplicatedInOpenFiles() { 520 if (isDecommissionInProgress() == false) { 521 return 0; 522 } 523 return underReplicatedInOpenFiles; 524 } 525 /** Set start time */ 526 public synchronized void setStartTime(long time) { 527 startTime = time; 528 } 529 /** @return start time */ 530 public synchronized long getStartTime() { 531 if (isDecommissionInProgress() == false) { 532 return 0; 533 } 534 return startTime; 535 } 536 } // End of class DecommissioningStatus 537 538 /** 539 * Set the flag to indicate if this datanode is disallowed from communicating 540 * with the namenode. 541 */ 542 public void setDisallowed(boolean flag) { 543 disallowed = flag; 544 } 545 /** Is the datanode disallowed from communicating with the namenode? */ 546 public boolean isDisallowed() { 547 return disallowed; 548 } 549 550 /** 551 * @return number of failed volumes in the datanode. 552 */ 553 public int getVolumeFailures() { 554 return volumeFailures; 555 } 556 557 /** 558 * @param nodeReg DatanodeID to update registration for. 559 */ 560 public void updateRegInfo(DatanodeID nodeReg) { 561 processedBlockReport = false; // must re-process IBR after re-registration 562 super.updateRegInfo(nodeReg); 563 } 564 565 /** 566 * @return Blanacer bandwidth in bytes per second for this datanode. 567 */ 568 public long getBalancerBandwidth() { 569 return this.bandwidth; 570 } 571 572 /** 573 * @param bandwidth Blanacer bandwidth in bytes per second for this datanode. 574 */ 575 public void setBalancerBandwidth(long bandwidth) { 576 this.bandwidth = bandwidth; 577 } 578 579 public void receivedBlockReport() { 580 processedBlockReport = true; 581 } 582 583 boolean isFirstBlockReport() { 584 return !processedBlockReport; 585 } 586}