批量删除桶内对象
 
                  更新时间 2025-01-10 14:01:41
                 
 
                    最近更新时间: 2025-01-10 14:01:41
                  
 本文帮助您了解对象存储批量删除桶内对象的最佳实践。
 本文主要针对用户需要批量删除一个桶中的对象的场景,介绍几种批量删除的方法。
一、使用客户端工具批量删除
使用S3Browser批量删除桶中的对象
S3Browser工具是一款客户端工具,可在S3Browser官网下载,安装即可使用非商用版。使用方法可参考S3Browser网站相关文章。
S3Browser对象列表中选择多个对象删除
按住Ctrl 键,鼠标点击S3Browser对象列表中的对象,可以同时选中多个对象,点击对象列表下方【Delete】按钮,弹出确认对话框,点击【确认】按钮可同时删除多个对象。

S3Browser全选对象列表删除
点击S3Browser 菜单项中的【Files】项,再点击【Select All】项,可将对象列表中展示的对象(最多1000个)全部选中。

点击对象列表下方的【Delete】按钮,并点击【确认】后,可将对象列表中展示的对象全部删除。

二、使用SDK调用接口删除桶内对象
参考《开发者文档》,选择合适的SDK,适配环境后,调用接口,即可删除对象。
Python SDK 删除桶中对象
以Python SDK 为例,Python SDK的使用方法请参考《ZOS对象存储Python_SDK使用手册》,此处举例说明调用接口删除一个桶中的全部对象。
config.py
Bucket_name="testbk"
AK="test"
SK="test"
URL="http://192.168.16.155:8000"
multi_pool_num=16
loglevel="debug"
log.py
# -*- coding: utf-8 -*-
import logging
from logging.handlers import RotatingFileHandler
import platform
import pathlib
import os
loglevelswitch = {"DEBUG": logging.DEBUG, "INFO": logging.INFO,  \
        "WARNING": logging.WARNING, "ERROR": logging.ERROR, "CRITICAL": logging.CRITICAL} 
def set_logger(level=logging.ERROR, file_path = "log\objs_clean.log", name="objs_clean", format_string=None):
    if not format_string:
        format_string = "%(asctime)s [%(levelname)s] process:%(process)d thread:%(thread)d, %(filename)s:%(lineno)d %(message)s"
    logger = logging.getLogger(name)
    if not logger.handlers:
        logger.setLevel(level)
        fh = RotatingFileHandler(file_path, maxBytes=1024 * 20, backupCount=5)
        fh.setLevel(level)
        fh.setFormatter(logging.Formatter(format_string))
        logger.addHandler(fh)
def create_log_path(level):
    loglevel = 'ERROR'
    if level:
        loglevel = level.upper()
    if platform.system() == 'Linux':
        file_path = "/var/log/objs_clean/"
        log_file = "/var/log/objs_clean/objs_clean." + str(os.getpid()) + ".log"
    elif platform.system() == 'Windows':
        file_path = os.getcwd() + "\log"
        log_file = os.getcwd() + "\log\objs_clean." + str(os.getpid()) + ".log"
    folder = os.path.exists(file_path)
    if not folder:
        os.makedirs(file_path)
    if not os.path.exists(log_file):
        pathlib.Path(log_file).touch()
    set_logger(loglevelswitch.get(loglevel, logging.ERROR), log_file, name=str(os.getpid()))
bucket_empty.py
#-*-coding:utf-8 -*-
from boto3.session import Session
from botocore.client import Config
import config
import log
import os
import time
import logging
import threading
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED 
from multiprocessing import Pool
from time import strftime, localtime
def err_call(value):
    logger = logging.getLogger(str(os.getpid()))
    logger.error("____err_call____ {0}".format(value))
def delete_sub_objs(res_contents, s3_client, logger):
  for obj in res_contents:
    key_name = obj.get("Key")
    try:
      s3_client.delete_object(
        Bucket=config.Bucket_name,
        Key=key_name
      )
      logger.debug("delete obj {0} complete...".format(key_name))
    except Exception as e:
      logger.error('__file__:{0}, objname:{1}, lineno:{2}'.format(e.__traceback__.tb_frame.f_globals['__file__'], key_name, e.__traceback__.tb_lineno))
      logger.error(e.args)
def delete_objs(res_contents):
  session = Session(config.AK, config.SK)
  s3_client = session.client('s3', endpoint_url=config.URL, verify=False)
  log.create_log_path(config.loglevel)
  a_logger = logging.getLogger(str(os.getpid()))
  total_nums = len(res_contents)
  begin = 0
  remainder_nums = 0
  a_logger.info("begin delete_objs...")
  subentry_obj_nums = total_nums // 50
  if subentry_obj_nums < 5:
      subentry_obj_nums = 5
  else:
      remainder_nums = total_nums % 50
  future_list = []
  with ThreadPoolExecutor(max_workers=50) as pool:
      while begin < total_nums:
          if remainder_nums == 0:
              end = begin + subentry_obj_nums
          else:
              end = begin + subentry_obj_nums + 1
              remainder_nums = remainder_nums - 1
          entry = res_contents[begin:end]
          future = pool.submit(delete_sub_objs, entry, s3_client, a_logger)
          future_list.append(future)
          begin = end
      wait(future_list, return_when= ALL_COMPLETED)
  a_logger.info("end delete_objs..., total num: {}".format(total_nums))
if __name__ == "__main__":
  log.create_log_path(config.loglevel)
  s_logger = logging.getLogger(str(os.getpid()))
  pool = Pool(processes=config.multi_pool_num)
  session = Session(config.AK, config.SK)
  s3_client = session.client('s3', endpoint_url=config.URL, verify=False)
  key_marker = ""
  next_key_marker = ""
  is_truncated = True
  s_logger.info('=============================del begin time=============================== :{0}'.format(strftime("%Y-%m-%d %H:%M:%S", localtime())))
  while is_truncated:
    key_marker = next_key_marker
    res_obj = s3_client.list_objects(Bucket=config.Bucket_name, Marker=key_marker, MaxKeys=1000)
    res_contents = res_obj.get('Contents', [])
    s_logger.debug(res_contents)
    if res_contents:
      pool.apply_async(delete_objs, args=(res_contents,), error_callback = err_call)
      while len(pool._cache) >= config.multi_pool_num*2:
          s_logger.info('waiting for cache to clear...')
          time.sleep(3)
    is_truncated = res_obj.get('IsTruncated')
    next_key_marker = res_obj.get('NextMarker')
  pool.close()
  pool.join()
  s_logger.info('=============================del end time ============================= :{0}'.format(strftime("%Y-%m-%d %H:%M:%S", localtime())))
执行python bucket_empty.py 即可。
