MareArts Computer Vision Study.: object detection data coordinate convert, {polygon, center) -> (left, top, right, bottom)

8/04/2024

object detection data coordinate convert, {polygon, center) -> (left, top, right, bottom)

I recently download some object detection dataset from Roboflow.

But the coordinate is strange, some coordinate is polygon, some is cenxter x,y and width, height.

My favourite coordinate is left, top, right, bottom.

So this code figure out type of coordinate and covert it to [left, top, right, bottom]

Thank you.

import cv2
import numpy as np
import os
from tqdm import tqdm

def convert_and_write_coordinates(txt_path, rect_coords, poly_coords, w, h):
    """Converts coordinates to (label, left, top, right, bottom) and writes them back in a normalized format."""
    with open(txt_path, 'w') as file:
        # Write rectangle coordinates in normalized (label, left, top, right, bottom) format
        for coord_set in rect_coords:
            center_x, center_y, width, height = coord_set
            left = (center_x - width / 2) * w
            top = (center_y - height / 2) * h
            right = (center_x + width / 2) * w
            bottom = (center_y + height / 2) * h

            # Normalize coordinates
            left_norm = left / w
            top_norm = top / h
            right_norm = right / w
            bottom_norm = bottom / h

            # Write the rectangle coordinates in normalized format
            file.write(f"0 {left_norm} {right_norm} {top_norm} {bottom_norm}\n")

        # Write polygon coordinates in normalized (label, left, top, right, bottom) format
        for points in poly_coords:
            # Convert polygon points to bounding box
            x_coords = [x * w for x, y in points]
            y_coords = [y * h for x, y in points]
            left = min(x_coords)
            top = min(y_coords)
            right = max(x_coords)
            bottom = max(y_coords)

            # Normalize coordinates
            left_norm = left / w
            top_norm = top / h
            right_norm = right / w
            bottom_norm = bottom / h

            # Write the polygon coordinates in normalized bounding box format
            file.write(f"0 {left_norm} {right_norm} {top_norm} {bottom_norm}\n")

def draw_annotations(image_path, txt_path):
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to load image: {image_path}")
        return False

    # Get image dimensions
    h, w, _ = image.shape

    # Read the annotation data from the corresponding text file
    rect_coords = []  # To store rectangle coordinates
    poly_coords = []  # To store polygon coordinates
    with open(txt_path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            parts = line.strip().split()
            label = int(parts[0])  # Extract the label index
            coords = [float(p) for p in parts[1:]]

            if len(coords) == 4:  # Assuming it's (center_x, center_y, width, height)
                rect_coords.append((coords[0], coords[1], coords[2], coords[3]))
            elif len(coords) % 2 == 0:  # Assuming pairs of coordinates for a polygon
                points = [(coords[i], coords[i + 1]) for i in range(0, len(coords), 2)]
                poly_coords.append(points)

    # Write the new coordinates to the text file
    convert_and_write_coordinates(txt_path, rect_coords, poly_coords, w, h)
    return True

def process_images_in_folder(folder_path):
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png'))]
    for filename in tqdm(image_files, desc="Processing images"):
        image_path = os.path.join(folder_path, filename)
        txt_path = os.path.join(folder_path, os.path.splitext(filename)[0] + '.txt')

        if os.path.isfile(txt_path):
            draw_annotations(image_path, txt_path)
        else:
            print(f"No corresponding text file found for image: {filename}")

def main():
    # Specify the directory containing the images and text files
    folder_path = './val_indonesia_roboflow_2024_08_03/'

    # Process the images in the folder
    process_images_in_folder(folder_path)

if __name__ == "__main__":
    main()

bonus code

Display image + annotation rect box

import cv2
import os
from tqdm import tqdm

def display_image_with_annotations(image_path, txt_path):
    """Display the image with rectangle annotations."""
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to load image: {image_path}")
        return False

    # Get image dimensions
    h, w, _ = image.shape

    # Read the annotation data from the text file
    with open(txt_path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            parts = line.strip().split()
            if len(parts) == 5:  # Check for valid rectangle data
                # Extract label and normalized coordinates
                label = int(parts[0])  # The label is currently unused, always 0 in your case
                left_norm, right_norm, top_norm, bottom_norm = map(float, parts[1:])

                # Convert normalized coordinates to absolute pixel coordinates
                left = int(left_norm * w)
                right = int(right_norm * w)
                top = int(top_norm * h)
                bottom = int(bottom_norm * h)

                # Draw rectangle on the image
                cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 2)

    # Display the image with annotations
    cv2.imshow('Annotated Image', image)
    key = cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Return True if 'q' was pressed, otherwise False
    return key == ord('q')

def process_images_in_folder(folder_path):
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png'))]
    for filename in tqdm(image_files, desc="Processing images"):
        image_path = os.path.join(folder_path, filename)
        txt_path = os.path.join(folder_path, os.path.splitext(filename)[0] + '.txt')

        if os.path.isfile(txt_path):
            if display_image_with_annotations(image_path, txt_path):
                print("Exiting image display loop.")
                break
        else:
            print(f"No corresponding text file found for image: {filename}")

def main():
    # Specify the directory containing the images and text files
    folder_path = './train_indonesia_roboflow_2024_08_03/'

    # Process the images in the folder
    process_images_in_folder(folder_path)

if __name__ == "__main__":
    main()

marearts

🙇🏻‍♂️

MareArts Computer Vision Study.

Pages

8/04/2024

object detection data coordinate convert, {polygon, center) -> (left, top, right, bottom)

No comments:

Post a Comment