Get all pdf file list in s3 bucket.
Change extension file name properly.
..
#s3 client
import boto3
from botocore.exceptions import ClientError
s3_client = boto3.client('s3')
ACCESS_KEY = 'key'
SECRET_KEY = 'secret_key'
s3_client = boto3.client(
's3',
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
region_name = 'eu-west-1'
)
#get pdf list
paginator = s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket='bucket_name')
cnt = 0
pdf_list = []
for page in pages:
for obj in page['Contents']:
#get file name
fn = obj['Key']
#check extension name
if fn[-3:] == 'pdf':
pdf_list.append(fn)
#print count, filename
print(f'cnt:{cnt}, fn:{fn}')
#increase count
cnt += 1
..
other code for search bucket + subfolder
..
operation_parameters = {'Bucket': 'bucket_name','Prefix': 'sub_folder_name'}
pages = paginator.paginate(**operation_parameters)
..
www.marearts.com
thank you.
๐๐ป♂️
No comments:
Post a Comment