定时任务实时生成pdf,将文件tornado用异步io上传到s3,有几个坑记录下:
import re
import boto3
import logging
from multiprocessing.dummy import Pool as ThreadPool
AWS_REGION_NAME = 'cn-north-1'
AWS_S3_ACCESS_KEY_ID = ""
AWS_S3_SECRET_ACCESS_KEY = ""
AWS_S3_WQS_BUCKET = ""
service_name="s3"
LOG_OUT_FILENAME = "/var/log/fixs3_out.log"
LOG_ERROR_FILENAME = "/var/log/fixs3_err.log"
def setup_logger(logger_name, log_file, level=logging.INFO):
l = logging.getLogger(logger_name)
formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d [ %(name)s:%(process)d ] - [ %(levelname)s ] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
fileHandler = logging.FileHandler(log_file, mode='w')
fileHandler.setFormatter(formatter)
streamHandler = logging.StreamHandler()
streamHandler.setFormatter(formatter)
l.setLevel(level)
l.addHandler(fileHandler)
l.addHandler(streamHandler)
setup_logger('log_out', LOG_OUT_FILENAME)
out_logger = logging.getLogger('log_out')
class S3StoryOperate(object):
def __init__(self):
try:
session = boto3.session.Session(aws_access_key_id=AWS_S3_ACCESS_KEY_ID,
aws_secret_access_key=AWS_S3_SECRET_ACCESS_KEY,
region_name=AWS_REGION_NAME)
self.resource = session.resource('s3', AWS_REGION_NAME)
except Exception as ex:
print ex
exit(ex)
def get_all_key(self,bucket_name):
keys=[]
for obj in self.resource.Bucket(bucket_name).objects.all():
keys.append(obj.key)
return keys
def rename_all_key(self):
old_names=self.get_all_key(AWS_S3_WQS_BUCKET)
pool = ThreadPool(4)
def func(old_name):
fix_result = self.fix_path(old_name)
if fix_result:
new_path = fix_result[1]
self.resource.Object(AWS_S3_WQS_BUCKET, new_path).copy_from(
CopySource=AWS_S3_WQS_BUCKET + "/" + old_name)
self.resource.Object(AWS_S3_WQS_BUCKET, old_name).delete()
try:
pool.map(func,old_names)
pool.close()
pool.join()
except Exception as ex:
print ex
exit(ex)
def fix_path(self,old_path):
p=re.compile("/p(0|[1-9]\d?|[1-9]\d\d?|1000)-")
result=re.search(p,old_path)
if result:
page_num=result.group().replace("/p","").replace("-","")
else:
return
new_path=p.sub("/",old_path)
out=(old_path,new_path,page_num)
out_logger.info(out)
return out
def uploadfile(self,bucket,objkey,data):
try:
file_obj = self.resource.Bucket(bucket).put_object(Key=objkey, Body=data)
except Exception as ex:
print ex
exit(ex)
if __name__=="__main__":
s3=S3StoryOperate()
s3.rename_all_key()
#s3.fix_path("szyz/unpaid/20170328/11/SX/220/214324456/d5e0fa9257b345faacd6959979265428/SZYZ20152055_96e3cbfd7dd74b6d88e0e968f5ca38b9.pdf")
1,boto3可能connect不上,文档没有提示,一定要设置时区,不然会403,报access deny;
2, boto3没有rename 接口文档,在stackoverflow上面看了下,最好也是最快的解决方案就是copy+deleded。
3,提供copy的接口有几个,一个是bucket提供接口,一个是client提供接口,还有个是object提供的copy接口,因为是桶内的rename,使用object的copy&delete方案最快。
4,fixpath是自己的业务代码,根据需求可以对应修改即可
版权声明:本文为weixin_42847874原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。