001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.blockmanagement;
019
020import java.io.DataInput;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Collection;
024import java.util.Iterator;
025import java.util.LinkedList;
026import java.util.List;
027import java.util.Queue;
028import java.util.Set;
029import java.util.TreeSet;
030
031import org.apache.hadoop.classification.InterfaceAudience;
032import org.apache.hadoop.classification.InterfaceStability;
033import org.apache.hadoop.hdfs.DeprecatedUTF8;
034import org.apache.hadoop.hdfs.protocol.Block;
035import org.apache.hadoop.hdfs.protocol.DatanodeID;
036import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
037import org.apache.hadoop.hdfs.util.LightWeightHashSet;
038import org.apache.hadoop.io.Text;
039import org.apache.hadoop.io.WritableUtils;
040
041/**************************************************
042 * DatanodeDescriptor tracks stats on a given DataNode, such as
043 * available storage capacity, last update time, etc., and maintains a
044 * set of blocks stored on the datanode.
045 *
046 * This data structure is internal to the namenode. It is *not* sent
047 * over-the-wire to the Client or the Datanodes. Neither is it stored
048 * persistently in the fsImage.
049 **************************************************/
050@InterfaceAudience.Private
051public class DatanodeDescriptor extends DatanodeInfo {
052  
053  // Stores status of decommissioning.
054  // If node is not decommissioning, do not use this object for anything.
055  public DecommissioningStatus decommissioningStatus = new DecommissioningStatus();
056  
057  /** Block and targets pair */
058  @InterfaceAudience.Private
059  @InterfaceStability.Evolving
060  public static class BlockTargetPair {
061    public final Block block;
062    public final DatanodeDescriptor[] targets;    
063
064    BlockTargetPair(Block block, DatanodeDescriptor[] targets) {
065      this.block = block;
066      this.targets = targets;
067    }
068  }
069
070  /** A BlockTargetPair queue. */
071  private static class BlockQueue<E> {
072    private final Queue<E> blockq = new LinkedList<E>();
073
074    /** Size of the queue */
075    synchronized int size() {return blockq.size();}
076
077    /** Enqueue */
078    synchronized boolean offer(E e) { 
079      return blockq.offer(e);
080    }
081
082    /** Dequeue */
083    synchronized List<E> poll(int numBlocks) {
084      if (numBlocks <= 0 || blockq.isEmpty()) {
085        return null;
086      }
087
088      List<E> results = new ArrayList<E>();
089      for(; !blockq.isEmpty() && numBlocks > 0; numBlocks--) {
090        results.add(blockq.poll());
091      }
092      return results;
093    }
094
095    /**
096     * Returns <tt>true</tt> if the queue contains the specified element.
097     */
098    boolean contains(E e) {
099      return blockq.contains(e);
100    }
101  }
102
103  private volatile BlockInfo blockList = null;
104  private int numBlocks = 0;
105  // isAlive == heartbeats.contains(this)
106  // This is an optimization, because contains takes O(n) time on Arraylist
107  public boolean isAlive = false;
108  public boolean needKeyUpdate = false;
109
110  // A system administrator can tune the balancer bandwidth parameter
111  // (dfs.balance.bandwidthPerSec) dynamically by calling
112  // "dfsadmin -setBalanacerBandwidth <newbandwidth>", at which point the
113  // following 'bandwidth' variable gets updated with the new value for each
114  // node. Once the heartbeat command is issued to update the value on the
115  // specified datanode, this value will be set back to 0.
116  private long bandwidth;
117
118  /** A queue of blocks to be replicated by this datanode */
119  private BlockQueue<BlockTargetPair> replicateBlocks = new BlockQueue<BlockTargetPair>();
120  /** A queue of blocks to be recovered by this datanode */
121  private BlockQueue<BlockInfoUnderConstruction> recoverBlocks =
122                                new BlockQueue<BlockInfoUnderConstruction>();
123  /** A set of blocks to be invalidated by this datanode */
124  private LightWeightHashSet<Block> invalidateBlocks = new LightWeightHashSet<Block>();
125
126  /* Variables for maintaining number of blocks scheduled to be written to
127   * this datanode. This count is approximate and might be slightly bigger
128   * in case of errors (e.g. datanode does not report if an error occurs
129   * while writing the block).
130   */
131  private int currApproxBlocksScheduled = 0;
132  private int prevApproxBlocksScheduled = 0;
133  private long lastBlocksScheduledRollTime = 0;
134  private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600*1000; //10min
135  private int volumeFailures = 0;
136  
137  /* Set to true after processing first block report.  Will be reset to false
138   * if the node re-registers.  This enables a NN in safe-mode to reprocess
139   * the first block report in case the DN is now reporting different blocks
140   */
141  private boolean processedBlockReport = false;
142  
143  /** 
144   * When set to true, the node is not in include list and is not allowed
145   * to communicate with the namenode
146   */
147  private boolean disallowed = false;
148
149  /** Default constructor */
150  public DatanodeDescriptor() {}
151  
152  /** DatanodeDescriptor constructor
153   * @param nodeID id of the data node
154   */
155  public DatanodeDescriptor(DatanodeID nodeID) {
156    this(nodeID, 0L, 0L, 0L, 0L, 0, 0);
157  }
158
159  /** DatanodeDescriptor constructor
160   * 
161   * @param nodeID id of the data node
162   * @param networkLocation location of the data node in network
163   */
164  public DatanodeDescriptor(DatanodeID nodeID, 
165                            String networkLocation) {
166    this(nodeID, networkLocation, null);
167  }
168  
169  /** DatanodeDescriptor constructor
170   * 
171   * @param nodeID id of the data node
172   * @param networkLocation location of the data node in network
173   * @param hostName it could be different from host specified for DatanodeID
174   */
175  public DatanodeDescriptor(DatanodeID nodeID, 
176                            String networkLocation,
177                            String hostName) {
178    this(nodeID, networkLocation, hostName, 0L, 0L, 0L, 0L, 0, 0);
179  }
180  
181  /** DatanodeDescriptor constructor
182   * 
183   * @param nodeID id of the data node
184   * @param capacity capacity of the data node
185   * @param dfsUsed space used by the data node
186   * @param remaining remaining capacity of the data node
187   * @param bpused space used by the block pool corresponding to this namenode
188   * @param xceiverCount # of data transfers at the data node
189   */
190  public DatanodeDescriptor(DatanodeID nodeID, 
191                            long capacity,
192                            long dfsUsed,
193                            long remaining,
194                            long bpused,
195                            int xceiverCount,
196                            int failedVolumes) {
197    super(nodeID);
198    updateHeartbeat(capacity, dfsUsed, remaining, bpused, xceiverCount, 
199        failedVolumes);
200  }
201
202  /** DatanodeDescriptor constructor
203   * 
204   * @param nodeID id of the data node
205   * @param networkLocation location of the data node in network
206   * @param capacity capacity of the data node, including space used by non-dfs
207   * @param dfsUsed the used space by dfs datanode
208   * @param remaining remaining capacity of the data node
209   * @param bpused space used by the block pool corresponding to this namenode
210   * @param xceiverCount # of data transfers at the data node
211   */
212  public DatanodeDescriptor(DatanodeID nodeID,
213                            String networkLocation,
214                            String hostName,
215                            long capacity,
216                            long dfsUsed,
217                            long remaining,
218                            long bpused,
219                            int xceiverCount,
220                            int failedVolumes) {
221    super(nodeID, networkLocation, hostName);
222    updateHeartbeat(capacity, dfsUsed, remaining, bpused, xceiverCount, 
223        failedVolumes);
224  }
225
226  /**
227   * Add datanode to the block.
228   * Add block to the head of the list of blocks belonging to the data-node.
229   */
230  public boolean addBlock(BlockInfo b) {
231    if(!b.addNode(this))
232      return false;
233    // add to the head of the data-node list
234    blockList = b.listInsert(blockList, this);
235    numBlocks++;
236    return true;
237  }
238  
239  /**
240   * Remove block from the list of blocks belonging to the data-node.
241   * Remove datanode from the block.
242   */
243  public boolean removeBlock(BlockInfo b) {
244    blockList = b.listRemove(blockList, this);
245    if ( b.removeNode(this) ) {
246      numBlocks--;
247      return true;
248    } else {
249      return false;
250    }
251  }
252
253  /**
254   * Move block to the head of the list of blocks belonging to the data-node.
255   * @return the index of the head of the blockList
256   */
257  int moveBlockToHead(BlockInfo b, int curIndex, int headIndex) {
258    blockList = b.moveBlockToHead(blockList, this, curIndex, headIndex);
259    return curIndex;
260  }
261
262  /**
263   * Used for testing only
264   * @return the head of the blockList
265   */
266  protected BlockInfo getHead(){
267    return blockList;
268  }
269
270  /**
271   * Replace specified old block with a new one in the DataNodeDescriptor.
272   *
273   * @param oldBlock - block to be replaced
274   * @param newBlock - a replacement block
275   * @return the new block
276   */
277  public BlockInfo replaceBlock(BlockInfo oldBlock, BlockInfo newBlock) {
278    boolean done = removeBlock(oldBlock);
279    assert done : "Old block should belong to the data-node when replacing";
280    done = addBlock(newBlock);
281    assert done : "New block should not belong to the data-node when replacing";
282    return newBlock;
283  }
284
285  public void resetBlocks() {
286    this.capacity = 0;
287    this.remaining = 0;
288    this.blockPoolUsed = 0;
289    this.dfsUsed = 0;
290    this.xceiverCount = 0;
291    this.blockList = null;
292    this.invalidateBlocks.clear();
293    this.volumeFailures = 0;
294  }
295
296  public int numBlocks() {
297    return numBlocks;
298  }
299
300  /**
301   * Updates stats from datanode heartbeat.
302   */
303  public void updateHeartbeat(long capacity, long dfsUsed, long remaining,
304      long blockPoolUsed, int xceiverCount, int volFailures) {
305    this.capacity = capacity;
306    this.dfsUsed = dfsUsed;
307    this.remaining = remaining;
308    this.blockPoolUsed = blockPoolUsed;
309    this.lastUpdate = System.currentTimeMillis();
310    this.xceiverCount = xceiverCount;
311    this.volumeFailures = volFailures;
312    rollBlocksScheduled(lastUpdate);
313  }
314
315  /**
316   * Iterates over the list of blocks belonging to the datanode.
317   */
318  public static class BlockIterator implements Iterator<BlockInfo> {
319    private BlockInfo current;
320    private DatanodeDescriptor node;
321      
322    BlockIterator(BlockInfo head, DatanodeDescriptor dn) {
323      this.current = head;
324      this.node = dn;
325    }
326
327    public boolean hasNext() {
328      return current != null;
329    }
330
331    public BlockInfo next() {
332      BlockInfo res = current;
333      current = current.getNext(current.findDatanode(node));
334      return res;
335    }
336
337    public void remove()  {
338      throw new UnsupportedOperationException("Sorry. can't remove.");
339    }
340  }
341
342  public Iterator<BlockInfo> getBlockIterator() {
343    return new BlockIterator(this.blockList, this);
344  }
345  
346  /**
347   * Store block replication work.
348   */
349  void addBlockToBeReplicated(Block block, DatanodeDescriptor[] targets) {
350    assert(block != null && targets != null && targets.length > 0);
351    replicateBlocks.offer(new BlockTargetPair(block, targets));
352  }
353
354  /**
355   * Store block recovery work.
356   */
357  void addBlockToBeRecovered(BlockInfoUnderConstruction block) {
358    if(recoverBlocks.contains(block)) {
359      // this prevents adding the same block twice to the recovery queue
360      BlockManager.LOG.info("Block " + block +
361                            " is already in the recovery queue.");
362      return;
363    }
364    recoverBlocks.offer(block);
365  }
366
367  /**
368   * Store block invalidation work.
369   */
370  void addBlocksToBeInvalidated(List<Block> blocklist) {
371    assert(blocklist != null && blocklist.size() > 0);
372    synchronized (invalidateBlocks) {
373      for(Block blk : blocklist) {
374        invalidateBlocks.add(blk);
375      }
376    }
377  }
378
379  /**
380   * The number of work items that are pending to be replicated
381   */
382  int getNumberOfBlocksToBeReplicated() {
383    return replicateBlocks.size();
384  }
385
386  /**
387   * The number of block invalidation items that are pending to 
388   * be sent to the datanode
389   */
390  int getNumberOfBlocksToBeInvalidated() {
391    synchronized (invalidateBlocks) {
392      return invalidateBlocks.size();
393    }
394  }
395  
396  public List<BlockTargetPair> getReplicationCommand(int maxTransfers) {
397    return replicateBlocks.poll(maxTransfers);
398  }
399
400  public BlockInfoUnderConstruction[] getLeaseRecoveryCommand(int maxTransfers) {
401    List<BlockInfoUnderConstruction> blocks = recoverBlocks.poll(maxTransfers);
402    if(blocks == null)
403      return null;
404    return blocks.toArray(new BlockInfoUnderConstruction[blocks.size()]);
405  }
406
407  /**
408   * Remove the specified number of blocks to be invalidated
409   */
410  public Block[] getInvalidateBlocks(int maxblocks) {
411    synchronized (invalidateBlocks) {
412      Block[] deleteList = invalidateBlocks.pollToArray(new Block[Math.min(
413          invalidateBlocks.size(), maxblocks)]);
414      return deleteList.length == 0 ? null : deleteList;
415    }
416  }
417
418  /** Serialization for FSEditLog */
419  public void readFieldsFromFSEditLog(DataInput in) throws IOException {
420    this.name = DeprecatedUTF8.readString(in);
421    this.storageID = DeprecatedUTF8.readString(in);
422    this.infoPort = in.readShort() & 0x0000ffff;
423
424    this.capacity = in.readLong();
425    this.dfsUsed = in.readLong();
426    this.remaining = in.readLong();
427    this.blockPoolUsed = in.readLong();
428    this.lastUpdate = in.readLong();
429    this.xceiverCount = in.readInt();
430    this.location = Text.readString(in);
431    this.hostName = Text.readString(in);
432    setAdminState(WritableUtils.readEnum(in, AdminStates.class));
433  }
434  
435  /**
436   * @return Approximate number of blocks currently scheduled to be written 
437   * to this datanode.
438   */
439  public int getBlocksScheduled() {
440    return currApproxBlocksScheduled + prevApproxBlocksScheduled;
441  }
442  
443  /**
444   * Increments counter for number of blocks scheduled. 
445   */
446  public void incBlocksScheduled() {
447    currApproxBlocksScheduled++;
448  }
449  
450  /**
451   * Decrements counter for number of blocks scheduled.
452   */
453  void decBlocksScheduled() {
454    if (prevApproxBlocksScheduled > 0) {
455      prevApproxBlocksScheduled--;
456    } else if (currApproxBlocksScheduled > 0) {
457      currApproxBlocksScheduled--;
458    } 
459    // its ok if both counters are zero.
460  }
461  
462  /**
463   * Adjusts curr and prev number of blocks scheduled every few minutes.
464   */
465  private void rollBlocksScheduled(long now) {
466    if ((now - lastBlocksScheduledRollTime) > 
467        BLOCKS_SCHEDULED_ROLL_INTERVAL) {
468      prevApproxBlocksScheduled = currApproxBlocksScheduled;
469      currApproxBlocksScheduled = 0;
470      lastBlocksScheduledRollTime = now;
471    }
472  }
473  
474  @Override
475  public int hashCode() {
476    // Super implementation is sufficient
477    return super.hashCode();
478  }
479  
480  @Override
481  public boolean equals(Object obj) {
482    // Sufficient to use super equality as datanodes are uniquely identified
483    // by DatanodeID
484    return (this == obj) || super.equals(obj);
485  }
486
487  /** Decommissioning status */
488  public class DecommissioningStatus {
489    private int underReplicatedBlocks;
490    private int decommissionOnlyReplicas;
491    private int underReplicatedInOpenFiles;
492    private long startTime;
493    
494    synchronized void set(int underRep,
495        int onlyRep, int underConstruction) {
496      if (isDecommissionInProgress() == false) {
497        return;
498      }
499      underReplicatedBlocks = underRep;
500      decommissionOnlyReplicas = onlyRep;
501      underReplicatedInOpenFiles = underConstruction;
502    }
503
504    /** @return the number of under-replicated blocks */
505    public synchronized int getUnderReplicatedBlocks() {
506      if (isDecommissionInProgress() == false) {
507        return 0;
508      }
509      return underReplicatedBlocks;
510    }
511    /** @return the number of decommission-only replicas */
512    public synchronized int getDecommissionOnlyReplicas() {
513      if (isDecommissionInProgress() == false) {
514        return 0;
515      }
516      return decommissionOnlyReplicas;
517    }
518    /** @return the number of under-replicated blocks in open files */
519    public synchronized int getUnderReplicatedInOpenFiles() {
520      if (isDecommissionInProgress() == false) {
521        return 0;
522      }
523      return underReplicatedInOpenFiles;
524    }
525    /** Set start time */
526    public synchronized void setStartTime(long time) {
527      startTime = time;
528    }
529    /** @return start time */
530    public synchronized long getStartTime() {
531      if (isDecommissionInProgress() == false) {
532        return 0;
533      }
534      return startTime;
535    }
536  }  // End of class DecommissioningStatus
537
538  /**
539   * Set the flag to indicate if this datanode is disallowed from communicating
540   * with the namenode.
541   */
542  public void setDisallowed(boolean flag) {
543    disallowed = flag;
544  }
545  /** Is the datanode disallowed from communicating with the namenode? */
546  public boolean isDisallowed() {
547    return disallowed;
548  }
549
550  /**
551   * @return number of failed volumes in the datanode.
552   */
553  public int getVolumeFailures() {
554    return volumeFailures;
555  }
556
557  /**
558   * @param nodeReg DatanodeID to update registration for.
559   */
560  public void updateRegInfo(DatanodeID nodeReg) {
561    processedBlockReport = false; // must re-process IBR after re-registration
562    super.updateRegInfo(nodeReg);
563  }
564
565  /**
566   * @return Blanacer bandwidth in bytes per second for this datanode.
567   */
568  public long getBalancerBandwidth() {
569    return this.bandwidth;
570  }
571
572  /**
573   * @param bandwidth Blanacer bandwidth in bytes per second for this datanode.
574   */
575  public void setBalancerBandwidth(long bandwidth) {
576    this.bandwidth = bandwidth;
577  }
578
579  public void receivedBlockReport() {
580    processedBlockReport = true;
581  }
582
583  boolean isFirstBlockReport() {
584    return !processedBlockReport;
585  }
586}