001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import java.io.File;
021import java.io.IOException;
022import java.util.Collections;
023import java.util.List;
024import java.util.TreeSet;
025
026import org.apache.commons.logging.Log;
027import org.apache.commons.logging.LogFactory;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.hdfs.DFSConfigKeys;
030import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
031import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
032import org.apache.hadoop.hdfs.util.MD5FileUtils;
033
034import com.google.common.collect.Lists;
035import com.google.common.collect.Sets;
036
037/**
038 * The NNStorageRetentionManager is responsible for inspecting the storage
039 * directories of the NN and enforcing a retention policy on checkpoints
040 * and edit logs.
041 * 
042 * It delegates the actual removal of files to a StoragePurger
043 * implementation, which might delete the files or instead copy them to
044 * a filer or HDFS for later analysis.
045 */
046public class NNStorageRetentionManager {
047  
048  private final int numCheckpointsToRetain;
049  private static final Log LOG = LogFactory.getLog(
050      NNStorageRetentionManager.class);
051  private final NNStorage storage;
052  private final StoragePurger purger;
053  private final FSEditLog editLog;
054  
055  public NNStorageRetentionManager(
056      Configuration conf,
057      NNStorage storage,
058      FSEditLog editLog,
059      StoragePurger purger) {
060    this.numCheckpointsToRetain = conf.getInt(
061        DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
062        DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT);
063    this.storage = storage;
064    this.editLog = editLog;
065    this.purger = purger;
066  }
067  
068  public NNStorageRetentionManager(Configuration conf, NNStorage storage,
069      FSEditLog editLog) {
070    this(conf, storage, editLog, new DeletionStoragePurger());
071  }
072
073  public void purgeOldStorage() throws IOException {
074    FSImageTransactionalStorageInspector inspector =
075      new FSImageTransactionalStorageInspector();
076    storage.inspectStorageDirs(inspector);
077
078    long minImageTxId = getImageTxIdToRetain(inspector);
079    purgeCheckpointsOlderThan(inspector, minImageTxId);
080    // If fsimage_N is the image we want to keep, then we need to keep
081    // all txns > N. We can remove anything < N+1, since fsimage_N
082    // reflects the state up to and including N.
083    editLog.purgeLogsOlderThan(minImageTxId + 1);
084  }
085  
086  private void purgeCheckpointsOlderThan(
087      FSImageTransactionalStorageInspector inspector,
088      long minTxId) {
089    for (FSImageFile image : inspector.getFoundImages()) {
090      if (image.getCheckpointTxId() < minTxId) {
091        LOG.info("Purging old image " + image);
092        purger.purgeImage(image);
093      }
094    }
095  }
096
097  /**
098   * @param inspector inspector that has already inspected all storage dirs
099   * @return the transaction ID corresponding to the oldest checkpoint
100   * that should be retained. 
101   */
102  private long getImageTxIdToRetain(FSImageTransactionalStorageInspector inspector) {
103      
104    List<FSImageFile> images = inspector.getFoundImages();
105    TreeSet<Long> imageTxIds = Sets.newTreeSet();
106    for (FSImageFile image : images) {
107      imageTxIds.add(image.getCheckpointTxId());
108    }
109    
110    List<Long> imageTxIdsList = Lists.newArrayList(imageTxIds);
111    if (imageTxIdsList.isEmpty()) {
112      return 0;
113    }
114    
115    Collections.reverse(imageTxIdsList);
116    int toRetain = Math.min(numCheckpointsToRetain, imageTxIdsList.size());    
117    long minTxId = imageTxIdsList.get(toRetain - 1);
118    LOG.info("Going to retain " + toRetain + " images with txid >= " +
119        minTxId);
120    return minTxId;
121  }
122  
123  /**
124   * Interface responsible for disposing of old checkpoints and edit logs.
125   */
126  static interface StoragePurger {
127    void purgeLog(EditLogFile log);
128    void purgeImage(FSImageFile image);
129  }
130  
131  static class DeletionStoragePurger implements StoragePurger {
132    @Override
133    public void purgeLog(EditLogFile log) {
134      deleteOrWarn(log.getFile());
135    }
136
137    @Override
138    public void purgeImage(FSImageFile image) {
139      deleteOrWarn(image.getFile());
140      deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile()));
141    }
142
143    private static void deleteOrWarn(File file) {
144      if (!file.delete()) {
145        // It's OK if we fail to delete something -- we'll catch it
146        // next time we swing through this directory.
147        LOG.warn("Could not delete " + file);
148      }      
149    }
150  }
151}