Get all pdf file list in s3 bucket.
Change extension file name properly.
..
#s3 client
import boto3
from botocore.exceptions import ClientError
s3_client = boto3.client('s3')
ACCESS_KEY = 'key'
SECRET_KEY = 'secret_key'
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
        region_name = 'eu-west-1'
    )
#get pdf list
paginator = s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket='bucket_name')
cnt = 0
pdf_list = []
for page in pages:
    for obj in page['Contents']:
        #get file name
        fn = obj['Key']
        #check extension name
        if fn[-3:] == 'pdf':
            pdf_list.append(fn)
            #print count, filename
            print(f'cnt:{cnt}, fn:{fn}')
            #increase count
            cnt += 1
..
other code for search bucket + subfolder
..
operation_parameters = {'Bucket': 'bucket_name','Prefix': 'sub_folder_name'}
pages = paginator.paginate(**operation_parameters)
..
www.marearts.com
thank you.
ππ»♂️
 
 
 
 
No comments:
Post a Comment