I recently download some object detection dataset from Roboflow.
But the coordinate is strange, some coordinate is polygon, some is cenxter x,y and width, height.
My favourite coordinate is left, top, right, bottom.
So this code figure out type of coordinate and covert it to [left, top, right, bottom]
Thank you.
.
import cv2
import numpy as np
import os
from tqdm import tqdm
def convert_and_write_coordinates(txt_path, rect_coords, poly_coords, w, h):
"""Converts coordinates to (label, left, top, right, bottom) and writes them back in a normalized format."""
with open(txt_path, 'w') as file:
# Write rectangle coordinates in normalized (label, left, top, right, bottom) format
for coord_set in rect_coords:
center_x, center_y, width, height = coord_set
left = (center_x - width / 2) * w
top = (center_y - height / 2) * h
right = (center_x + width / 2) * w
bottom = (center_y + height / 2) * h
# Normalize coordinates
left_norm = left / w
top_norm = top / h
right_norm = right / w
bottom_norm = bottom / h
# Write the rectangle coordinates in normalized format
file.write(f"0 {left_norm} {right_norm} {top_norm} {bottom_norm}\n")
# Write polygon coordinates in normalized (label, left, top, right, bottom) format
for points in poly_coords:
# Convert polygon points to bounding box
x_coords = [x * w for x, y in points]
y_coords = [y * h for x, y in points]
left = min(x_coords)
top = min(y_coords)
right = max(x_coords)
bottom = max(y_coords)
# Normalize coordinates
left_norm = left / w
top_norm = top / h
right_norm = right / w
bottom_norm = bottom / h
# Write the polygon coordinates in normalized bounding box format
file.write(f"0 {left_norm} {right_norm} {top_norm} {bottom_norm}\n")
def draw_annotations(image_path, txt_path):
# Load the image
image = cv2.imread(image_path)
if image is None:
print(f"Failed to load image: {image_path}")
return False
# Get image dimensions
h, w, _ = image.shape
# Read the annotation data from the corresponding text file
rect_coords = [] # To store rectangle coordinates
poly_coords = [] # To store polygon coordinates
with open(txt_path, 'r') as file:
lines = file.readlines()
for line in lines:
parts = line.strip().split()
label = int(parts[0]) # Extract the label index
coords = [float(p) for p in parts[1:]]
if len(coords) == 4: # Assuming it's (center_x, center_y, width, height)
rect_coords.append((coords[0], coords[1], coords[2], coords[3]))
elif len(coords) % 2 == 0: # Assuming pairs of coordinates for a polygon
points = [(coords[i], coords[i + 1]) for i in range(0, len(coords), 2)]
poly_coords.append(points)
# Write the new coordinates to the text file
convert_and_write_coordinates(txt_path, rect_coords, poly_coords, w, h)
return True
def process_images_in_folder(folder_path):
image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png'))]
for filename in tqdm(image_files, desc="Processing images"):
image_path = os.path.join(folder_path, filename)
txt_path = os.path.join(folder_path, os.path.splitext(filename)[0] + '.txt')
if os.path.isfile(txt_path):
draw_annotations(image_path, txt_path)
else:
print(f"No corresponding text file found for image: {filename}")
def main():
# Specify the directory containing the images and text files
folder_path = './val_indonesia_roboflow_2024_08_03/'
# Process the images in the folder
process_images_in_folder(folder_path)
if __name__ == "__main__":
main()
..
bonus code
Display image + annotation rect box
.
import cv2
import os
from tqdm import tqdm
def display_image_with_annotations(image_path, txt_path):
"""Display the image with rectangle annotations."""
# Load the image
image = cv2.imread(image_path)
if image is None:
print(f"Failed to load image: {image_path}")
return False
# Get image dimensions
h, w, _ = image.shape
# Read the annotation data from the text file
with open(txt_path, 'r') as file:
lines = file.readlines()
for line in lines:
parts = line.strip().split()
if len(parts) == 5: # Check for valid rectangle data
# Extract label and normalized coordinates
label = int(parts[0]) # The label is currently unused, always 0 in your case
left_norm, right_norm, top_norm, bottom_norm = map(float, parts[1:])
# Convert normalized coordinates to absolute pixel coordinates
left = int(left_norm * w)
right = int(right_norm * w)
top = int(top_norm * h)
bottom = int(bottom_norm * h)
# Draw rectangle on the image
cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 2)
# Display the image with annotations
cv2.imshow('Annotated Image', image)
key = cv2.waitKey(0)
cv2.destroyAllWindows()
# Return True if 'q' was pressed, otherwise False
return key == ord('q')
def process_images_in_folder(folder_path):
image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png'))]
for filename in tqdm(image_files, desc="Processing images"):
image_path = os.path.join(folder_path, filename)
txt_path = os.path.join(folder_path, os.path.splitext(filename)[0] + '.txt')
if os.path.isfile(txt_path):
if display_image_with_annotations(image_path, txt_path):
print("Exiting image display loop.")
break
else:
print(f"No corresponding text file found for image: {filename}")
def main():
# Specify the directory containing the images and text files
folder_path = './train_indonesia_roboflow_2024_08_03/'
# Process the images in the folder
process_images_in_folder(folder_path)
if __name__ == "__main__":
main()
..
marearts
๐๐ป♂️