001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import java.io.File; 021import java.io.IOException; 022import java.util.Collections; 023import java.util.List; 024import java.util.TreeSet; 025 026import org.apache.commons.logging.Log; 027import org.apache.commons.logging.LogFactory; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.hdfs.DFSConfigKeys; 030import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile; 031import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile; 032import org.apache.hadoop.hdfs.util.MD5FileUtils; 033 034import com.google.common.collect.Lists; 035import com.google.common.collect.Sets; 036 037/** 038 * The NNStorageRetentionManager is responsible for inspecting the storage 039 * directories of the NN and enforcing a retention policy on checkpoints 040 * and edit logs. 041 * 042 * It delegates the actual removal of files to a StoragePurger 043 * implementation, which might delete the files or instead copy them to 044 * a filer or HDFS for later analysis. 045 */ 046public class NNStorageRetentionManager { 047 048 private final int numCheckpointsToRetain; 049 private static final Log LOG = LogFactory.getLog( 050 NNStorageRetentionManager.class); 051 private final NNStorage storage; 052 private final StoragePurger purger; 053 private final FSEditLog editLog; 054 055 public NNStorageRetentionManager( 056 Configuration conf, 057 NNStorage storage, 058 FSEditLog editLog, 059 StoragePurger purger) { 060 this.numCheckpointsToRetain = conf.getInt( 061 DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 062 DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT); 063 this.storage = storage; 064 this.editLog = editLog; 065 this.purger = purger; 066 } 067 068 public NNStorageRetentionManager(Configuration conf, NNStorage storage, 069 FSEditLog editLog) { 070 this(conf, storage, editLog, new DeletionStoragePurger()); 071 } 072 073 public void purgeOldStorage() throws IOException { 074 FSImageTransactionalStorageInspector inspector = 075 new FSImageTransactionalStorageInspector(); 076 storage.inspectStorageDirs(inspector); 077 078 long minImageTxId = getImageTxIdToRetain(inspector); 079 purgeCheckpointsOlderThan(inspector, minImageTxId); 080 // If fsimage_N is the image we want to keep, then we need to keep 081 // all txns > N. We can remove anything < N+1, since fsimage_N 082 // reflects the state up to and including N. 083 editLog.purgeLogsOlderThan(minImageTxId + 1); 084 } 085 086 private void purgeCheckpointsOlderThan( 087 FSImageTransactionalStorageInspector inspector, 088 long minTxId) { 089 for (FSImageFile image : inspector.getFoundImages()) { 090 if (image.getCheckpointTxId() < minTxId) { 091 LOG.info("Purging old image " + image); 092 purger.purgeImage(image); 093 } 094 } 095 } 096 097 /** 098 * @param inspector inspector that has already inspected all storage dirs 099 * @return the transaction ID corresponding to the oldest checkpoint 100 * that should be retained. 101 */ 102 private long getImageTxIdToRetain(FSImageTransactionalStorageInspector inspector) { 103 104 List<FSImageFile> images = inspector.getFoundImages(); 105 TreeSet<Long> imageTxIds = Sets.newTreeSet(); 106 for (FSImageFile image : images) { 107 imageTxIds.add(image.getCheckpointTxId()); 108 } 109 110 List<Long> imageTxIdsList = Lists.newArrayList(imageTxIds); 111 if (imageTxIdsList.isEmpty()) { 112 return 0; 113 } 114 115 Collections.reverse(imageTxIdsList); 116 int toRetain = Math.min(numCheckpointsToRetain, imageTxIdsList.size()); 117 long minTxId = imageTxIdsList.get(toRetain - 1); 118 LOG.info("Going to retain " + toRetain + " images with txid >= " + 119 minTxId); 120 return minTxId; 121 } 122 123 /** 124 * Interface responsible for disposing of old checkpoints and edit logs. 125 */ 126 static interface StoragePurger { 127 void purgeLog(EditLogFile log); 128 void purgeImage(FSImageFile image); 129 } 130 131 static class DeletionStoragePurger implements StoragePurger { 132 @Override 133 public void purgeLog(EditLogFile log) { 134 deleteOrWarn(log.getFile()); 135 } 136 137 @Override 138 public void purgeImage(FSImageFile image) { 139 deleteOrWarn(image.getFile()); 140 deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile())); 141 } 142 143 private static void deleteOrWarn(File file) { 144 if (!file.delete()) { 145 // It's OK if we fail to delete something -- we'll catch it 146 // next time we swing through this directory. 147 LOG.warn("Could not delete " + file); 148 } 149 } 150 } 151}