3/07/2022

get pdf file list in aws s3 bucket

Get all pdf file list in s3 bucket.

Change extension file name properly.


..

#s3 client
import boto3
from botocore.exceptions import ClientError

s3_client = boto3.client('s3')
ACCESS_KEY = 'key'
SECRET_KEY = 'secret_key'
s3_client = boto3.client(
's3',
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
region_name = 'eu-west-1'
)

#get pdf list
paginator = s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket='bucket_name')

cnt = 0
pdf_list = []
for page in pages:
for obj in page['Contents']:
#get file name
fn = obj['Key']
#check extension name
if fn[-3:] == 'pdf':
pdf_list.append(fn)
#print count, filename
print(f'cnt:{cnt}, fn:{fn}')
#increase count
cnt += 1

..


other code for search bucket + subfolder

..

operation_parameters = {'Bucket': 'bucket_name','Prefix': 'sub_folder_name'}
pages = paginator.paginate(**operation_parameters)

..


www.marearts.com

thank you.

πŸ™‡πŸ»‍♂️