Showing posts with label s3. Show all posts
Showing posts with label s3. Show all posts

3/30/2023

The list_objects_v2 function returns up to 1000 objects by default. To read all the contents in the bucket, you can use pagination.

 refer to code:

You can modify '.json' for you case.

.

import boto3

def get_origin_fn_list(ORIGIN_DATA_S3, ORIGIN_DATA_S3_prefix):
s3 = boto3.client('s3')
paginator = s3.get_paginator('list_objects_v2')
origin_path = {}

for response in paginator.paginate(Bucket=ORIGIN_DATA_S3, Prefix=ORIGIN_DATA_S3_prefix):
for obj in response['Contents']:
if obj['Key'][-4:] == '.json':
path = obj['Key']
uid = path.split('/')[-2]
origin_path[uid] = path

print(f"get kv.json list: {len(origin_path)}/{sum(1 for _ in paginator.paginate(Bucket=ORIGIN_DATA_S3, Prefix=ORIGIN_DATA_S3_prefix))}")
return origin_path

..


Thank you.

🙇🏻‍♂️

www.marearts.com


3/16/2023

sync local dir with s3 bucket , code for Jupyter

 refer to code:


.

local_directory = "/path/to/your/local/directory"
s3_bucket = "your-s3-bucket-name"
s3_folder = "your-s3-folder-name"

# Sync S3 bucket folder to local directory
!aws s3 sync s3://$s3_bucket/$s3_folder $local_directory

# Sync local directory to S3 bucket folder
!aws s3 sync $local_directory s3://$s3_bucket/$s3_folder

..

Replace /path/to/your/local/directory, your-s3-bucket-name, and your-s3-folder-name with your specific values. The first aws s3 sync command downloads the S3 folder's contents to the local directory, and the second one uploads the local directory's contents to the S3 folder. You can use either of these commands as needed.

Note that the aws s3 sync command does not support excluding or including specific files like rsync, but it will only copy new and updated files by default.


Thank you.

🙇🏻‍♂️

www.marearts.com



3/15/2023

copy s3 object to another bucket

refer to code: 


.

import boto3

# Initialize the S3 client
s3 = boto3.client('s3')

# Specify the source and destination S3 buckets and object keys
source_bucket = 'source-bucket-name'
source_key = 'path/to/source/object'

destination_bucket = 'destination-bucket-name'
destination_key = 'path/to/destination/object'

# Copy the object from the source bucket to the destination bucket
s3.copy_object(
CopySource={'Bucket': source_bucket, 'Key': source_key},
Bucket=destination_bucket,
Key=destination_key
)

print(f"Copied object from '{source_bucket}/{source_key}' to '{destination_bucket}/{destination_key}'")

..

Replace the placeholder values for source_bucket, source_key, destination_bucket, and destination_key with your actual bucket names and object keys. This code will copy the specified object from the source bucket to the destination bucket.


Thank you

🙇🏻‍♂️

www.marearts.com

load json file in memory form s3 bucket object (python example code)

 refer to code:

.

import boto3
import json

# Initialize the S3 client
s3 = boto3.client('s3')

# Specify the S3 bucket and JSON file key
bucket_name = 'your-bucket-name'
file_key = 'path/to/your/file.json'

# Download the JSON file from the S3 bucket
response = s3.get_object(Bucket=bucket_name, Key=file_key)
content = response['Body'].read()

# Parse the JSON content
data = json.loads(content)

# Print the JSON data
print(data)

..


Thank you.

🙇🏻‍♂️

www.marearts.com

3/01/2023

Folder existing check in s3 bucket, python example code

Check folder exist in s3 bucket

refer to code:

.

import boto3
import botocore.exceptions

s3 = boto3.client('s3')
bucket_name = 'your-bucket-name'
folder_path = 'f1/f2/f3/f4/'

try:
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_path)
if 'Contents' in response or 'CommonPrefixes' in response:
print(f"Folder {folder_path} exists in bucket {bucket_name}")
else:
print(f"Folder {folder_path} does not exist in bucket {bucket_name}")
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == "NoSuchBucket":
print(f"Bucket {bucket_name} does not exist")
else:
raise

..


thank you

🙇🏻‍♂️

www.marearts.com

3/07/2022

get pdf file list in aws s3 bucket

Get all pdf file list in s3 bucket.

Change extension file name properly.


..

#s3 client
import boto3
from botocore.exceptions import ClientError

s3_client = boto3.client('s3')
ACCESS_KEY = 'key'
SECRET_KEY = 'secret_key'
s3_client = boto3.client(
's3',
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
region_name = 'eu-west-1'
)

#get pdf list
paginator = s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket='bucket_name')

cnt = 0
pdf_list = []
for page in pages:
for obj in page['Contents']:
#get file name
fn = obj['Key']
#check extension name
if fn[-3:] == 'pdf':
pdf_list.append(fn)
#print count, filename
print(f'cnt:{cnt}, fn:{fn}')
#increase count
cnt += 1

..


other code for search bucket + subfolder

..

operation_parameters = {'Bucket': 'bucket_name','Prefix': 'sub_folder_name'}
pages = paginator.paginate(**operation_parameters)

..


www.marearts.com

thank you.

🙇🏻‍♂️

10/13/2021

Get file list in s3 bucket folder (example python code)

 refer to source code

..

import boto3
import botocore

ACCESS_KEY = '**'
SECRET_KEY = '**'

s3_client = boto3.client(
's3',
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
region_name = 'eu-west-1'
)

bucket_name = '***'
folder_name = '***'

#check folder exist
try:
result = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=folder_name )
for o in result['Contents']:
print(o['Key']) #file list in folder
except botocore.exceptions.ClientError as e:
print('no folder exist')

..


Thank you 

www.marearts.com



8/27/2021

AWS, find all file list in s3 bucket

 Find all file list in s3 bucket

import boto3
BUCKET_INPUT = 'bucket_name'
PREFIX_INPUT = 'some_prefix_folder_name'
item_list_all = []
paginator = s3_client.get_paginator('list_objects')
operation_parameters = {'Bucket': BUCKET_INPUT} #,'Prefix': PREFIX_INPUT}
page_iterator = paginator.paginate(**operation_parameters)
for page in page_iterator:
item_list_all.append(page['Contents'])
key_all = []
for page in item_list_all:
for i in range(len(page)):
key_all.append(page[i]["Key"])
print("The total number of files in the bucket:", len(key_all) )

Thank you.

www.marearts.com


11/27/2020

s3 bucket copy object to another bucket, python example

 code

def copy_s3_object(s3_resource, source_bucket_name, source_key, target_bucket_name, target_key):
copy_source = {'Bucket': source_bucket_name, 'Key': source_key}
s3_resource.meta.client.copy(copy_source, target_bucket_name, target_key)

s3_resource = boto3.resource('s3')
copy_s3_object(s3_resource, source_bucket_name, source_key, target_bucket_name, target_key)

.

aws s3 get all object more than 1000 python example code

simply to use paginator instance


example code

paginator = s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket='bucket', Prefix='folder1/')
for page in pages:
for obj in page['Contents']:
print(obj['Key'])

.

4/03/2020

Example python code for : Download s3 object as opencv image in memory and upload too

Just see the code
It's not difficult.

...

...
import cv2
import numpy as np
...

def lambda_handler(event, context):
# TODO implement
bucket_name = event['Records'][0]['s3']['bucket']['name']
s3_path = event['Records'][0]['s3']['object']['key']
#download object
obj = s3_client.get_object(Bucket=bucket_name, Key=s3_path)
#obj to cv2
nparr = np.frombuffer(obj['Body'].read(), np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
#simple image processing
reimg = cv2.resize(img, (100,100) )
#cv2 to string
image_string = cv2.imencode('.png', reimg)[1].tostring()
#upload
s3_client.put_object(Bucket='thum-prj-output', Key = s3_path, Body=image_string)
...

...

7/01/2019

AWS S3, Get object list in Subfolder by python code using s3_client.list_objects function

This is my s3 folder structure





This is code to get file list in certain subfolder.

#get boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#get object list
contents = s3_client.list_objects(Bucket='test-can-delete-anyone', Prefix='folder1/subfolder1')['Contents']
for object in contents:
     print(object['Key'])


result
folder1/subfolder1/
folder1/subfolder1/4_kitchen.jpg
folder1/subfolder1/5_bathroom.jpg
folder1/subfolder1/5_bedroom.jpg
folder1/subfolder1/5_frontal.jpg
folder1/subfolder1/5_kitchen.jpg
folder1/subfolder1/6_bathroom.jpg

another example
#get object list
contents = s3_client.list_objects(Bucket='test-can-delete-anyone', Prefix='folder1/')['Contents']
for object in contents:
        print(object['Key'])

result
folder1/
folder1/1_kitchen.jpg
folder1/2_bathroom.jpg
folder1/2_bedroom.jpg
folder1/2_frontal.jpg
folder1/2_kitchen.jpg
folder1/subfolder1/
folder1/subfolder1/4_kitchen.jpg
folder1/subfolder1/5_bathroom.jpg
folder1/subfolder1/5_bedroom.jpg
folder1/subfolder1/5_frontal.jpg
folder1/subfolder1/5_kitchen.jpg
folder1/subfolder1/6_bathroom.jpg


AWS s3 bucket - check folder exist or not by python code

Check certain folder exist in s3 bucket by python ncode


-
#create boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#check folder exist
try:
        s3_client.get_object(Bucket='s3-bucket-name', Key='folder-name/')
        print('folder exist')
except botocore.exceptions.ClientError as e:
        print('no folder exist')
-

Thank you.


function type is here:
def check_folder_exist(s3_client, bucket_name, folder_name):
    
    try:
        s3_client.get_object(Bucket=bucket_name, Key=folder_name)
        return True
    except botocore.exceptions.ClientError as e:
        return False


If not working above code then try this one as well.
def check_folder_exist(bucket_name, folder_name):
try:
print(bucket_name, folder_name)
result = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=folder_name )
if 'Contents' in result:
return True
else:
return False
except botocore.exceptions.ClientError as e:
print(e)
return False

6/30/2019

AWS S3 bucket, folder creation in python code

Basically s3 bucket doesn't have folder concept.
But this code create folder by key, and it doesn't have any object.


--
#create boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#create folder by key    
s3_client.put_object(Bucket='s3-bucket-name', Key=('folder-name'+'/'))
--

Thank you.