本文主要针对用户需要批量删除一个桶中的对象的场景,介绍几种批量删除的方法。
一、使用客户端工具批量删除
使用S3Browser批量删除桶中的对象
S3Browser工具是一款客户端工具,可在S3Browser官网下载,安装即可使用非商用版。使用方法可参考S3Browser网站相关文章。
S3Browser对象列表中选择多个对象删除
按住Ctrl 键,鼠标点击S3Browser对象列表中的对象,可以同时选中多个对象,点击对象列表下方【Delete】按钮,弹出确认对话框,点击【确认】按钮可同时删除多个对象。
S3Browser全选对象列表删除
点击S3Browser 菜单项中的【Files】项,再点击【Select All】项,可将对象列表中展示的对象(最多1000个)全部选中。
点击对象列表下方的【Delete】按钮,并点击【确认】后,可将对象列表中展示的对象全部删除。
二、使用SDK调用接口删除桶内对象
参考《开发者文档》,选择合适的SDK,适配环境后,调用接口,即可删除对象。
Python SDK 删除桶中对象
以Python SDK 为例,Python SDK的使用方法请参考《ZOS对象存储Python_SDK使用手册》,此处举例说明调用接口删除一个桶中的全部对象。
config.py
Bucket_name="testbk"
AK="test"
SK="test"
URL="http://192.168.16.155:8000"
multi_pool_num=16
loglevel="debug"
log.py
# -*- coding: utf-8 -*-
import logging
from logging.handlers import RotatingFileHandler
import platform
import pathlib
import os
loglevelswitch = {"DEBUG": logging.DEBUG, "INFO": logging.INFO, \
"WARNING": logging.WARNING, "ERROR": logging.ERROR, "CRITICAL": logging.CRITICAL}
def set_logger(level=logging.ERROR, file_path = "log\objs_clean.log", name="objs_clean", format_string=None):
if not format_string:
format_string = "%(asctime)s [%(levelname)s] process:%(process)d thread:%(thread)d, %(filename)s:%(lineno)d %(message)s"
logger = logging.getLogger(name)
if not logger.handlers:
logger.setLevel(level)
fh = RotatingFileHandler(file_path, maxBytes=1024 * 20, backupCount=5)
fh.setLevel(level)
fh.setFormatter(logging.Formatter(format_string))
logger.addHandler(fh)
def create_log_path(level):
loglevel = 'ERROR'
if level:
loglevel = level.upper()
if platform.system() == 'Linux':
file_path = "/var/log/objs_clean/"
log_file = "/var/log/objs_clean/objs_clean." + str(os.getpid()) + ".log"
elif platform.system() == 'Windows':
file_path = os.getcwd() + "\log"
log_file = os.getcwd() + "\log\objs_clean." + str(os.getpid()) + ".log"
folder = os.path.exists(file_path)
if not folder:
os.makedirs(file_path)
if not os.path.exists(log_file):
pathlib.Path(log_file).touch()
set_logger(loglevelswitch.get(loglevel, logging.ERROR), log_file, name=str(os.getpid()))
bucket_empty.py
#-*-coding:utf-8 -*-
from boto3.session import Session
from botocore.client import Config
import config
import log
import os
import time
import logging
import threading
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED
from multiprocessing import Pool
from time import strftime, localtime
def err_call(value):
logger = logging.getLogger(str(os.getpid()))
logger.error("____err_call____ {0}".format(value))
def delete_sub_objs(res_contents, s3_client, logger):
for obj in res_contents:
key_name = obj.get("Key")
try:
s3_client.delete_object(
Bucket=config.Bucket_name,
Key=key_name
)
logger.debug("delete obj {0} complete...".format(key_name))
except Exception as e:
logger.error('__file__:{0}, objname:{1}, lineno:{2}'.format(e.__traceback__.tb_frame.f_globals['__file__'], key_name, e.__traceback__.tb_lineno))
logger.error(e.args)
def delete_objs(res_contents):
session = Session(config.AK, config.SK)
s3_client = session.client('s3', endpoint_url=config.URL, verify=False)
log.create_log_path(config.loglevel)
a_logger = logging.getLogger(str(os.getpid()))
total_nums = len(res_contents)
begin = 0
remainder_nums = 0
a_logger.info("begin delete_objs...")
subentry_obj_nums = total_nums // 50
if subentry_obj_nums < 5:
subentry_obj_nums = 5
else:
remainder_nums = total_nums % 50
future_list = []
with ThreadPoolExecutor(max_workers=50) as pool:
while begin < total_nums:
if remainder_nums == 0:
end = begin + subentry_obj_nums
else:
end = begin + subentry_obj_nums + 1
remainder_nums = remainder_nums - 1
entry = res_contents[begin:end]
future = pool.submit(delete_sub_objs, entry, s3_client, a_logger)
future_list.append(future)
begin = end
wait(future_list, return_when= ALL_COMPLETED)
a_logger.info("end delete_objs..., total num: {}".format(total_nums))
if __name__ == "__main__":
log.create_log_path(config.loglevel)
s_logger = logging.getLogger(str(os.getpid()))
pool = Pool(processes=config.multi_pool_num)
session = Session(config.AK, config.SK)
s3_client = session.client('s3', endpoint_url=config.URL, verify=False)
key_marker = ""
next_key_marker = ""
is_truncated = True
s_logger.info('=============================del begin time=============================== :{0}'.format(strftime("%Y-%m-%d %H:%M:%S", localtime())))
while is_truncated:
key_marker = next_key_marker
res_obj = s3_client.list_objects(Bucket=config.Bucket_name, Marker=key_marker, MaxKeys=1000)
res_contents = res_obj.get('Contents', [])
s_logger.debug(res_contents)
if res_contents:
pool.apply_async(delete_objs, args=(res_contents,), error_callback = err_call)
while len(pool._cache) >= config.multi_pool_num*2:
s_logger.info('waiting for cache to clear...')
time.sleep(3)
is_truncated = res_obj.get('IsTruncated')
next_key_marker = res_obj.get('NextMarker')
pool.close()
pool.join()
s_logger.info('=============================del end time ============================= :{0}'.format(strftime("%Y-%m-%d %H:%M:%S", localtime())))
执行python bucket_empty.py 即可。