lambdaでS3に格納されたcsvの絵文字を消してみた。


csvの中身

utf8mb4.csv
aa,😀,123
bb,😄,456
lambda_function.py
import urllib.parse
import boto3
import re

print('Loading function')

s3 = boto3.client('s3')

def lambda_handler(event, context):
    #print("Received event: " + json.dumps(event, indent=2))

    # Get the object from the event and show its content type
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
    try:
        response = s3.get_object(Bucket=bucket, Key=key)
        # 正規表現パターンを構築
        emoji_pattern = re.compile("["
            u"\U0001F600-\U0001F64F"
            u"\U0001F300-\U0001F5FF"
            u"\U0001F680-\U0001F6FF"
            u"\U0001F1E0-\U0001F1FF"
                               "]+", flags=re.UNICODE)
        body = response['Body'].read()
        bodystr = body.decode('utf-8')
        print(bodystr)
        bodystr = emoji_pattern.sub('', bodystr)
        print(bodystr)
        lines = bodystr.split('\r\n')
    except Exception as e:
        print(e)
        print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
        raise e

結果

Function logs:
START RequestId: XXXXXXXX
aa,😀,123
bb,😄,456
aa,,123
bb,,456
END RequestId: XXXXXXXX
REPORT RequestId: XXXXXXXX  Duration: 222.76 ms Billed Duration: 223 ms Memory Size: 128 MB Max Memory Used: 80 MB  Init Duration: 452.44 ms

参考文献