【S3】boto3でMultipartUploadする


大サイズのデータをS3にMultipartUploadする時のためのメモ

multi_part_upload.py
from memory_profiler import profile
import boto3
import uuid
import json

sc = boto3.client('s3')
bucket_name = '<bucket_name>'
encoding = 'utf-8'

@profile()
def upload():
    key = f'tests/{uuid.uuid4().hex[10:]}'
    mpu = sc.create_multipart_upload(
        Bucket=bucket_name,
        ContentEncoding=encoding,
        ContentType=f'application/x-jsonlines; charset={encoding}',
        Key=key
    )

    upload_id = mpu['UploadId']

    end = 10
    parts = []

    for i in range(end):
        part_number = i + 1
        contnet = f'{json.dumps(j)}\n' * 1024 * 1024
        part = sc.upload_part(
            Bucket=bucket_name,
            Key=key,
            PartNumber=part_number,
            UploadId=upload_id,
            Body=contnet.encode(encoding)
        )
        parts.append({
            'PartNumber': part_number,
            'ETag': part['ETag']
        })

        if part_number == end:
            sc.complete_multipart_upload(
                Bucket=bucket_name,
                MultipartUpload={
                    'Parts': parts
                },
                Key=key,
                UploadId=upload_id
            )


if __name__ == '__main__':
    upload()