MareArts Computer Vision Study.: 2023.05

5/28/2023

yaml to Dict and update element like class and save dict to yaml again, python example code

refer to code and example yaml

before yaml to run code

a: a-value
b: b-value
c:
  d: Nested
  e: Values

code

pip install yaml, attract

import yaml
from attrdict import AttrDict

# Load YAML file
with open('file.yaml', 'r') as f:
    data = yaml.safe_load(f)

# Convert to AttrDict for dot notation access
data = AttrDict(data)

# Access elements
print(data.a)
print(data.b)

# Modify elements
data.a = 'new value 2'
data.b = 'another new value 2'

# Convert back to dictionary
data = dict(data)

# Save back to YAML
with open('file.yaml', 'w') as f:
    yaml.safe_dump(data, f)

after yaml running code

a: new value 2
b: another new value 2
c:
  d: Nested
  e: Values

Thank you.

www.marearts.com

🙇🏻‍♂️

5/23/2023

Create custom tokenizer simple code.

In the sample code, vocabulary is "0,1,2,3,4" and max length is 20.

from typing import List, Union

class CustomTokenizer:
    def __init__(self, vocab: Union[str, List[str]], pad_token="<PAD>", cls_token="<BOS>", sep_token="<SEP>", max_len=20):
        if isinstance(vocab, str):
            with open(vocab, 'r') as f:
                self.vocab = {word.strip(): i for i, word in enumerate(f.readlines())}
        elif isinstance(vocab, list):
            self.vocab = {word: i for i, word in enumerate(vocab)}
        else:
            raise ValueError("vocab must be either a filepath (str) or a list of words")
        
        print('vocab: ', self.vocab)
        self.pad_token = pad_token
        self.cls_token = cls_token
        self.sep_token = sep_token
        self.max_len = max_len
        self.inv_vocab = {v: k for k, v in self.vocab.items()}

    def tokenize(self, text: str):
        tokens = [c for c in text if c in self.vocab]
        tokens = tokens[:self.max_len]
        padding_length = self.max_len - len(tokens)
        return [self.cls_token] + tokens + [self.sep_token] + [self.pad_token] * padding_length

    def convert_tokens_to_ids(self, tokens):
        return [self.vocab.get(token, self.vocab.get(self.pad_token)) for token in tokens]

    def convert_ids_to_tokens(self, ids):
        return [self.inv_vocab.get(id, self.pad_token) for id in ids]



vocab = ["<PAD>", "<BOS>", "<SEP>", "0", "1", "2", "3", "4"]
with open('vocab.txt', 'w') as f:
    for token in vocab:
        f.write(token + '\n')

# Initialize your custom tokenizer
tokenizer = CustomTokenizer(vocab='vocab.txt')

# Now you can use this tokenizer to tokenize your data, study.marearts.com
tokenized_text = tokenizer.tokenize('22342')
print("tokenized_text: ", tokenized_text)

# Convert tokens to ids
token_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
print("token_ids: ", token_ids)

# Convert ids back to tokens, marearts.com
tokens = tokenizer.convert_ids_to_tokens(token_ids)
print("tokens: ", tokens)

Thank you.

🙇🏻‍♂️

5/20/2023

timm swin transformer v2 model review

refer to code:

import timm
import torch
from PIL import Image
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform

# Load from Hub 🔥
model = timm.create_model(
    'hf-hub:nateraw/resnet50-oxford-iiit-pet',
    pretrained=True
)

# Set model to eval mode for inference
model.eval()

#resolve
print(resolve_data_config(model.pretrained_cfg, model=model))

# Get the labels from the model config
print(model.pretrained_cfg)

# labels
labels = model.pretrained_cfg['label_names']
print(labels)

top_k = min(len(labels), 5)

# Create Transform
transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model))

# Use your own image file here...
image = Image.open('boxer.jpg').convert('RGB')

# Process PIL image with transforms and add a batch dimension
x = transform(image).unsqueeze(0)

print(x.shape)
# Pass inputs to model forward function to get outputs
out = model(x)
print(out.shape)

# Apply softmax to get predicted probabilities for each class
probabilities = torch.nn.functional.softmax(out[0], dim=0)

print(probabilities)

# Grab the values and indices of top 5 predicted classes
values, indices = torch.topk(probabilities, top_k)

# Prepare a nice dict of top k predictions
predictions = [
    {"label": labels[i], "score": v.item()}
    for i, v in zip(indices, values)
]
print(predictions)

from torchsummary import summary
input_size = (3, 224, 224)
# Use torchsummary to print the model summary
summary(model, input_size)


# Remove the last layer
model = torch.nn.Sequential(*list(model.children())[:-1])
input_size = (3, 224, 224)
# Use torchsummary to print the model summary
summary(model, input_size)

First Summary:

----------------------------------------------------------------

Layer (type) Output Shape Param #

================================================================

Conv2d-1 [-1, 64, 112, 112] 9,408

BatchNorm2d-2 [-1, 64, 112, 112] 128

ReLU-3 [-1, 64, 112, 112] 0

MaxPool2d-4 [-1, 64, 56, 56] 0

Conv2d-5 [-1, 64, 56, 56] 4,096

BatchNorm2d-6 [-1, 64, 56, 56] 128

ReLU-7 [-1, 64, 56, 56] 0

Conv2d-8 [-1, 64, 56, 56] 36,864

BatchNorm2d-9 [-1, 64, 56, 56] 128

Identity-10 [-1, 64, 56, 56] 0

ReLU-11 [-1, 64, 56, 56] 0

Identity-12 [-1, 64, 56, 56] 0

Conv2d-13 [-1, 256, 56, 56] 16,384

BatchNorm2d-14 [-1, 256, 56, 56] 512

Conv2d-15 [-1, 256, 56, 56] 16,384

BatchNorm2d-16 [-1, 256, 56, 56] 512

ReLU-17 [-1, 256, 56, 56] 0

Bottleneck-18 [-1, 256, 56, 56] 0

Conv2d-19 [-1, 64, 56, 56] 16,384

BatchNorm2d-20 [-1, 64, 56, 56] 128

ReLU-21 [-1, 64, 56, 56] 0

Conv2d-22 [-1, 64, 56, 56] 36,864

BatchNorm2d-23 [-1, 64, 56, 56] 128

Identity-24 [-1, 64, 56, 56] 0

ReLU-25 [-1, 64, 56, 56] 0

Identity-26 [-1, 64, 56, 56] 0

Conv2d-27 [-1, 256, 56, 56] 16,384

BatchNorm2d-28 [-1, 256, 56, 56] 512

ReLU-29 [-1, 256, 56, 56] 0

Bottleneck-30 [-1, 256, 56, 56] 0

Conv2d-31 [-1, 64, 56, 56] 16,384

BatchNorm2d-32 [-1, 64, 56, 56] 128

ReLU-33 [-1, 64, 56, 56] 0

Conv2d-34 [-1, 64, 56, 56] 36,864

BatchNorm2d-35 [-1, 64, 56, 56] 128

Identity-36 [-1, 64, 56, 56] 0

ReLU-37 [-1, 64, 56, 56] 0

Identity-38 [-1, 64, 56, 56] 0

Conv2d-39 [-1, 256, 56, 56] 16,384

BatchNorm2d-40 [-1, 256, 56, 56] 512

ReLU-41 [-1, 256, 56, 56] 0

Bottleneck-42 [-1, 256, 56, 56] 0

Conv2d-43 [-1, 128, 56, 56] 32,768

BatchNorm2d-44 [-1, 128, 56, 56] 256

ReLU-45 [-1, 128, 56, 56] 0

Conv2d-46 [-1, 128, 28, 28] 147,456

BatchNorm2d-47 [-1, 128, 28, 28] 256

Identity-48 [-1, 128, 28, 28] 0

ReLU-49 [-1, 128, 28, 28] 0

Identity-50 [-1, 128, 28, 28] 0

Conv2d-51 [-1, 512, 28, 28] 65,536

BatchNorm2d-52 [-1, 512, 28, 28] 1,024

Conv2d-53 [-1, 512, 28, 28] 131,072

BatchNorm2d-54 [-1, 512, 28, 28] 1,024

ReLU-55 [-1, 512, 28, 28] 0

Bottleneck-56 [-1, 512, 28, 28] 0

Conv2d-57 [-1, 128, 28, 28] 65,536

BatchNorm2d-58 [-1, 128, 28, 28] 256

ReLU-59 [-1, 128, 28, 28] 0

Conv2d-60 [-1, 128, 28, 28] 147,456

BatchNorm2d-61 [-1, 128, 28, 28] 256

Identity-62 [-1, 128, 28, 28] 0

ReLU-63 [-1, 128, 28, 28] 0

Identity-64 [-1, 128, 28, 28] 0

Conv2d-65 [-1, 512, 28, 28] 65,536

BatchNorm2d-66 [-1, 512, 28, 28] 1,024

ReLU-67 [-1, 512, 28, 28] 0

Bottleneck-68 [-1, 512, 28, 28] 0

Conv2d-69 [-1, 128, 28, 28] 65,536

BatchNorm2d-70 [-1, 128, 28, 28] 256

ReLU-71 [-1, 128, 28, 28] 0

Conv2d-72 [-1, 128, 28, 28] 147,456

BatchNorm2d-73 [-1, 128, 28, 28] 256

Identity-74 [-1, 128, 28, 28] 0

ReLU-75 [-1, 128, 28, 28] 0

Identity-76 [-1, 128, 28, 28] 0

Conv2d-77 [-1, 512, 28, 28] 65,536

BatchNorm2d-78 [-1, 512, 28, 28] 1,024

ReLU-79 [-1, 512, 28, 28] 0

Bottleneck-80 [-1, 512, 28, 28] 0

Conv2d-81 [-1, 128, 28, 28] 65,536

BatchNorm2d-82 [-1, 128, 28, 28] 256

ReLU-83 [-1, 128, 28, 28] 0

Conv2d-84 [-1, 128, 28, 28] 147,456

BatchNorm2d-85 [-1, 128, 28, 28] 256

Identity-86 [-1, 128, 28, 28] 0

ReLU-87 [-1, 128, 28, 28] 0

Identity-88 [-1, 128, 28, 28] 0

Conv2d-89 [-1, 512, 28, 28] 65,536

BatchNorm2d-90 [-1, 512, 28, 28] 1,024

ReLU-91 [-1, 512, 28, 28] 0

Bottleneck-92 [-1, 512, 28, 28] 0

Conv2d-93 [-1, 256, 28, 28] 131,072

BatchNorm2d-94 [-1, 256, 28, 28] 512

ReLU-95 [-1, 256, 28, 28] 0

Conv2d-96 [-1, 256, 14, 14] 589,824

BatchNorm2d-97 [-1, 256, 14, 14] 512

Identity-98 [-1, 256, 14, 14] 0

ReLU-99 [-1, 256, 14, 14] 0

Identity-100 [-1, 256, 14, 14] 0

Conv2d-101 [-1, 1024, 14, 14] 262,144

BatchNorm2d-102 [-1, 1024, 14, 14] 2,048

Conv2d-103 [-1, 1024, 14, 14] 524,288

BatchNorm2d-104 [-1, 1024, 14, 14] 2,048

ReLU-105 [-1, 1024, 14, 14] 0

Bottleneck-106 [-1, 1024, 14, 14] 0

Conv2d-107 [-1, 256, 14, 14] 262,144

BatchNorm2d-108 [-1, 256, 14, 14] 512

ReLU-109 [-1, 256, 14, 14] 0

Conv2d-110 [-1, 256, 14, 14] 589,824

BatchNorm2d-111 [-1, 256, 14, 14] 512

Identity-112 [-1, 256, 14, 14] 0

ReLU-113 [-1, 256, 14, 14] 0

Identity-114 [-1, 256, 14, 14] 0

Conv2d-115 [-1, 1024, 14, 14] 262,144

BatchNorm2d-116 [-1, 1024, 14, 14] 2,048

ReLU-117 [-1, 1024, 14, 14] 0

Bottleneck-118 [-1, 1024, 14, 14] 0

Conv2d-119 [-1, 256, 14, 14] 262,144

BatchNorm2d-120 [-1, 256, 14, 14] 512

ReLU-121 [-1, 256, 14, 14] 0

Conv2d-122 [-1, 256, 14, 14] 589,824

BatchNorm2d-123 [-1, 256, 14, 14] 512

Identity-124 [-1, 256, 14, 14] 0

ReLU-125 [-1, 256, 14, 14] 0

Identity-126 [-1, 256, 14, 14] 0

Conv2d-127 [-1, 1024, 14, 14] 262,144

BatchNorm2d-128 [-1, 1024, 14, 14] 2,048

ReLU-129 [-1, 1024, 14, 14] 0

Bottleneck-130 [-1, 1024, 14, 14] 0

Conv2d-131 [-1, 256, 14, 14] 262,144

BatchNorm2d-132 [-1, 256, 14, 14] 512

ReLU-133 [-1, 256, 14, 14] 0

Conv2d-134 [-1, 256, 14, 14] 589,824

BatchNorm2d-135 [-1, 256, 14, 14] 512

Identity-136 [-1, 256, 14, 14] 0

ReLU-137 [-1, 256, 14, 14] 0

Identity-138 [-1, 256, 14, 14] 0

Conv2d-139 [-1, 1024, 14, 14] 262,144

BatchNorm2d-140 [-1, 1024, 14, 14] 2,048

ReLU-141 [-1, 1024, 14, 14] 0

Bottleneck-142 [-1, 1024, 14, 14] 0

Conv2d-143 [-1, 256, 14, 14] 262,144

BatchNorm2d-144 [-1, 256, 14, 14] 512

ReLU-145 [-1, 256, 14, 14] 0

Conv2d-146 [-1, 256, 14, 14] 589,824

BatchNorm2d-147 [-1, 256, 14, 14] 512

Identity-148 [-1, 256, 14, 14] 0

ReLU-149 [-1, 256, 14, 14] 0

Identity-150 [-1, 256, 14, 14] 0

Conv2d-151 [-1, 1024, 14, 14] 262,144

BatchNorm2d-152 [-1, 1024, 14, 14] 2,048

ReLU-153 [-1, 1024, 14, 14] 0

Bottleneck-154 [-1, 1024, 14, 14] 0

Conv2d-155 [-1, 256, 14, 14] 262,144

BatchNorm2d-156 [-1, 256, 14, 14] 512

ReLU-157 [-1, 256, 14, 14] 0

Conv2d-158 [-1, 256, 14, 14] 589,824

BatchNorm2d-159 [-1, 256, 14, 14] 512

Identity-160 [-1, 256, 14, 14] 0

ReLU-161 [-1, 256, 14, 14] 0

Identity-162 [-1, 256, 14, 14] 0

Conv2d-163 [-1, 1024, 14, 14] 262,144

BatchNorm2d-164 [-1, 1024, 14, 14] 2,048

ReLU-165 [-1, 1024, 14, 14] 0

Bottleneck-166 [-1, 1024, 14, 14] 0

Conv2d-167 [-1, 512, 14, 14] 524,288

BatchNorm2d-168 [-1, 512, 14, 14] 1,024

ReLU-169 [-1, 512, 14, 14] 0

Conv2d-170 [-1, 512, 7, 7] 2,359,296

BatchNorm2d-171 [-1, 512, 7, 7] 1,024

Identity-172 [-1, 512, 7, 7] 0

ReLU-173 [-1, 512, 7, 7] 0

Identity-174 [-1, 512, 7, 7] 0

Conv2d-175 [-1, 2048, 7, 7] 1,048,576

BatchNorm2d-176 [-1, 2048, 7, 7] 4,096

Conv2d-177 [-1, 2048, 7, 7] 2,097,152

BatchNorm2d-178 [-1, 2048, 7, 7] 4,096

ReLU-179 [-1, 2048, 7, 7] 0

Bottleneck-180 [-1, 2048, 7, 7] 0

Conv2d-181 [-1, 512, 7, 7] 1,048,576

BatchNorm2d-182 [-1, 512, 7, 7] 1,024

ReLU-183 [-1, 512, 7, 7] 0

Conv2d-184 [-1, 512, 7, 7] 2,359,296

BatchNorm2d-185 [-1, 512, 7, 7] 1,024

Identity-186 [-1, 512, 7, 7] 0

ReLU-187 [-1, 512, 7, 7] 0

Identity-188 [-1, 512, 7, 7] 0

Conv2d-189 [-1, 2048, 7, 7] 1,048,576

BatchNorm2d-190 [-1, 2048, 7, 7] 4,096

ReLU-191 [-1, 2048, 7, 7] 0

Bottleneck-192 [-1, 2048, 7, 7] 0

Conv2d-193 [-1, 512, 7, 7] 1,048,576

BatchNorm2d-194 [-1, 512, 7, 7] 1,024

ReLU-195 [-1, 512, 7, 7] 0

Conv2d-196 [-1, 512, 7, 7] 2,359,296

BatchNorm2d-197 [-1, 512, 7, 7] 1,024

Identity-198 [-1, 512, 7, 7] 0

ReLU-199 [-1, 512, 7, 7] 0

Identity-200 [-1, 512, 7, 7] 0

Conv2d-201 [-1, 2048, 7, 7] 1,048,576

BatchNorm2d-202 [-1, 2048, 7, 7] 4,096

ReLU-203 [-1, 2048, 7, 7] 0

Bottleneck-204 [-1, 2048, 7, 7] 0

AdaptiveAvgPool2d-205 [-1, 2048, 1, 1] 0

Flatten-206 [-1, 2048] 0

SelectAdaptivePool2d-207 [-1, 2048] 0

Linear-208 [-1, 37] 75,813

================================================================

Total params: 23,583,845

Trainable params: 23,583,845

Non-trainable params: 0

----------------------------------------------------------------

Input size (MB): 0.57

Forward/backward pass size (MB): 307.64

Params size (MB): 89.97

Estimated Total Size (MB): 398.18

Second Summary:

----------------------------------------------------------------

Layer (type) Output Shape Param #

================================================================

Conv2d-1 [-1, 64, 112, 112] 9,408

BatchNorm2d-2 [-1, 64, 112, 112] 128

ReLU-3 [-1, 64, 112, 112] 0

MaxPool2d-4 [-1, 64, 56, 56] 0

Conv2d-5 [-1, 64, 56, 56] 4,096

BatchNorm2d-6 [-1, 64, 56, 56] 128

ReLU-7 [-1, 64, 56, 56] 0

Conv2d-8 [-1, 64, 56, 56] 36,864

BatchNorm2d-9 [-1, 64, 56, 56] 128

Identity-10 [-1, 64, 56, 56] 0

ReLU-11 [-1, 64, 56, 56] 0

Identity-12 [-1, 64, 56, 56] 0

Conv2d-13 [-1, 256, 56, 56] 16,384

BatchNorm2d-14 [-1, 256, 56, 56] 512

Conv2d-15 [-1, 256, 56, 56] 16,384

BatchNorm2d-16 [-1, 256, 56, 56] 512

ReLU-17 [-1, 256, 56, 56] 0

Bottleneck-18 [-1, 256, 56, 56] 0

Conv2d-19 [-1, 64, 56, 56] 16,384

BatchNorm2d-20 [-1, 64, 56, 56] 128

ReLU-21 [-1, 64, 56, 56] 0

Conv2d-22 [-1, 64, 56, 56] 36,864

BatchNorm2d-23 [-1, 64, 56, 56] 128

Identity-24 [-1, 64, 56, 56] 0

ReLU-25 [-1, 64, 56, 56] 0

Identity-26 [-1, 64, 56, 56] 0

Conv2d-27 [-1, 256, 56, 56] 16,384

BatchNorm2d-28 [-1, 256, 56, 56] 512

ReLU-29 [-1, 256, 56, 56] 0

Bottleneck-30 [-1, 256, 56, 56] 0

Conv2d-31 [-1, 64, 56, 56] 16,384

BatchNorm2d-32 [-1, 64, 56, 56] 128

ReLU-33 [-1, 64, 56, 56] 0

Conv2d-34 [-1, 64, 56, 56] 36,864

BatchNorm2d-35 [-1, 64, 56, 56] 128

Identity-36 [-1, 64, 56, 56] 0

ReLU-37 [-1, 64, 56, 56] 0

Identity-38 [-1, 64, 56, 56] 0

Conv2d-39 [-1, 256, 56, 56] 16,384

BatchNorm2d-40 [-1, 256, 56, 56] 512

ReLU-41 [-1, 256, 56, 56] 0

Bottleneck-42 [-1, 256, 56, 56] 0

Conv2d-43 [-1, 128, 56, 56] 32,768

BatchNorm2d-44 [-1, 128, 56, 56] 256

ReLU-45 [-1, 128, 56, 56] 0

Conv2d-46 [-1, 128, 28, 28] 147,456

BatchNorm2d-47 [-1, 128, 28, 28] 256

Identity-48 [-1, 128, 28, 28] 0

ReLU-49 [-1, 128, 28, 28] 0

Identity-50 [-1, 128, 28, 28] 0

Conv2d-51 [-1, 512, 28, 28] 65,536

BatchNorm2d-52 [-1, 512, 28, 28] 1,024

Conv2d-53 [-1, 512, 28, 28] 131,072

BatchNorm2d-54 [-1, 512, 28, 28] 1,024

ReLU-55 [-1, 512, 28, 28] 0

Bottleneck-56 [-1, 512, 28, 28] 0

Conv2d-57 [-1, 128, 28, 28] 65,536

BatchNorm2d-58 [-1, 128, 28, 28] 256

ReLU-59 [-1, 128, 28, 28] 0

Conv2d-60 [-1, 128, 28, 28] 147,456

BatchNorm2d-61 [-1, 128, 28, 28] 256

Identity-62 [-1, 128, 28, 28] 0

ReLU-63 [-1, 128, 28, 28] 0

Identity-64 [-1, 128, 28, 28] 0

Conv2d-65 [-1, 512, 28, 28] 65,536

BatchNorm2d-66 [-1, 512, 28, 28] 1,024

ReLU-67 [-1, 512, 28, 28] 0

Bottleneck-68 [-1, 512, 28, 28] 0

Conv2d-69 [-1, 128, 28, 28] 65,536

BatchNorm2d-70 [-1, 128, 28, 28] 256

ReLU-71 [-1, 128, 28, 28] 0

Conv2d-72 [-1, 128, 28, 28] 147,456

BatchNorm2d-73 [-1, 128, 28, 28] 256

Identity-74 [-1, 128, 28, 28] 0

ReLU-75 [-1, 128, 28, 28] 0

Identity-76 [-1, 128, 28, 28] 0

Conv2d-77 [-1, 512, 28, 28] 65,536

BatchNorm2d-78 [-1, 512, 28, 28] 1,024

ReLU-79 [-1, 512, 28, 28] 0

Bottleneck-80 [-1, 512, 28, 28] 0

Conv2d-81 [-1, 128, 28, 28] 65,536

BatchNorm2d-82 [-1, 128, 28, 28] 256

ReLU-83 [-1, 128, 28, 28] 0

Conv2d-84 [-1, 128, 28, 28] 147,456

BatchNorm2d-85 [-1, 128, 28, 28] 256

Identity-86 [-1, 128, 28, 28] 0

ReLU-87 [-1, 128, 28, 28] 0

Identity-88 [-1, 128, 28, 28] 0

Conv2d-89 [-1, 512, 28, 28] 65,536

BatchNorm2d-90 [-1, 512, 28, 28] 1,024

ReLU-91 [-1, 512, 28, 28] 0

Bottleneck-92 [-1, 512, 28, 28] 0

Conv2d-93 [-1, 256, 28, 28] 131,072

BatchNorm2d-94 [-1, 256, 28, 28] 512

ReLU-95 [-1, 256, 28, 28] 0

Conv2d-96 [-1, 256, 14, 14] 589,824

BatchNorm2d-97 [-1, 256, 14, 14] 512

Identity-98 [-1, 256, 14, 14] 0

ReLU-99 [-1, 256, 14, 14] 0

Identity-100 [-1, 256, 14, 14] 0

Conv2d-101 [-1, 1024, 14, 14] 262,144

BatchNorm2d-102 [-1, 1024, 14, 14] 2,048

Conv2d-103 [-1, 1024, 14, 14] 524,288

BatchNorm2d-104 [-1, 1024, 14, 14] 2,048

ReLU-105 [-1, 1024, 14, 14] 0

Bottleneck-106 [-1, 1024, 14, 14] 0

Conv2d-107 [-1, 256, 14, 14] 262,144

BatchNorm2d-108 [-1, 256, 14, 14] 512

ReLU-109 [-1, 256, 14, 14] 0

Conv2d-110 [-1, 256, 14, 14] 589,824

BatchNorm2d-111 [-1, 256, 14, 14] 512

Identity-112 [-1, 256, 14, 14] 0

ReLU-113 [-1, 256, 14, 14] 0

Identity-114 [-1, 256, 14, 14] 0

Conv2d-115 [-1, 1024, 14, 14] 262,144

BatchNorm2d-116 [-1, 1024, 14, 14] 2,048

ReLU-117 [-1, 1024, 14, 14] 0

Bottleneck-118 [-1, 1024, 14, 14] 0

Conv2d-119 [-1, 256, 14, 14] 262,144

BatchNorm2d-120 [-1, 256, 14, 14] 512

ReLU-121 [-1, 256, 14, 14] 0

Conv2d-122 [-1, 256, 14, 14] 589,824

BatchNorm2d-123 [-1, 256, 14, 14] 512

Identity-124 [-1, 256, 14, 14] 0

ReLU-125 [-1, 256, 14, 14] 0

Identity-126 [-1, 256, 14, 14] 0

Conv2d-127 [-1, 1024, 14, 14] 262,144

BatchNorm2d-128 [-1, 1024, 14, 14] 2,048

ReLU-129 [-1, 1024, 14, 14] 0

Bottleneck-130 [-1, 1024, 14, 14] 0

Conv2d-131 [-1, 256, 14, 14] 262,144

BatchNorm2d-132 [-1, 256, 14, 14] 512

ReLU-133 [-1, 256, 14, 14] 0

Conv2d-134 [-1, 256, 14, 14] 589,824

BatchNorm2d-135 [-1, 256, 14, 14] 512

Identity-136 [-1, 256, 14, 14] 0

ReLU-137 [-1, 256, 14, 14] 0

Identity-138 [-1, 256, 14, 14] 0

Conv2d-139 [-1, 1024, 14, 14] 262,144

BatchNorm2d-140 [-1, 1024, 14, 14] 2,048

ReLU-141 [-1, 1024, 14, 14] 0

Bottleneck-142 [-1, 1024, 14, 14] 0

Conv2d-143 [-1, 256, 14, 14] 262,144

BatchNorm2d-144 [-1, 256, 14, 14] 512

ReLU-145 [-1, 256, 14, 14] 0

Conv2d-146 [-1, 256, 14, 14] 589,824

BatchNorm2d-147 [-1, 256, 14, 14] 512

Identity-148 [-1, 256, 14, 14] 0

ReLU-149 [-1, 256, 14, 14] 0

Identity-150 [-1, 256, 14, 14] 0

Conv2d-151 [-1, 1024, 14, 14] 262,144

BatchNorm2d-152 [-1, 1024, 14, 14] 2,048

ReLU-153 [-1, 1024, 14, 14] 0

Bottleneck-154 [-1, 1024, 14, 14] 0

Conv2d-155 [-1, 256, 14, 14] 262,144

BatchNorm2d-156 [-1, 256, 14, 14] 512

ReLU-157 [-1, 256, 14, 14] 0

Conv2d-158 [-1, 256, 14, 14] 589,824

BatchNorm2d-159 [-1, 256, 14, 14] 512

Identity-160 [-1, 256, 14, 14] 0

ReLU-161 [-1, 256, 14, 14] 0

Identity-162 [-1, 256, 14, 14] 0

Conv2d-163 [-1, 1024, 14, 14] 262,144

BatchNorm2d-164 [-1, 1024, 14, 14] 2,048

ReLU-165 [-1, 1024, 14, 14] 0

Bottleneck-166 [-1, 1024, 14, 14] 0

Conv2d-167 [-1, 512, 14, 14] 524,288

BatchNorm2d-168 [-1, 512, 14, 14] 1,024

ReLU-169 [-1, 512, 14, 14] 0

Conv2d-170 [-1, 512, 7, 7] 2,359,296

BatchNorm2d-171 [-1, 512, 7, 7] 1,024

Identity-172 [-1, 512, 7, 7] 0

ReLU-173 [-1, 512, 7, 7] 0

Identity-174 [-1, 512, 7, 7] 0

Conv2d-175 [-1, 2048, 7, 7] 1,048,576

BatchNorm2d-176 [-1, 2048, 7, 7] 4,096

Conv2d-177 [-1, 2048, 7, 7] 2,097,152

BatchNorm2d-178 [-1, 2048, 7, 7] 4,096

ReLU-179 [-1, 2048, 7, 7] 0

Bottleneck-180 [-1, 2048, 7, 7] 0

Conv2d-181 [-1, 512, 7, 7] 1,048,576

BatchNorm2d-182 [-1, 512, 7, 7] 1,024

ReLU-183 [-1, 512, 7, 7] 0

Conv2d-184 [-1, 512, 7, 7] 2,359,296

BatchNorm2d-185 [-1, 512, 7, 7] 1,024

Identity-186 [-1, 512, 7, 7] 0

ReLU-187 [-1, 512, 7, 7] 0

Identity-188 [-1, 512, 7, 7] 0

Conv2d-189 [-1, 2048, 7, 7] 1,048,576

BatchNorm2d-190 [-1, 2048, 7, 7] 4,096

ReLU-191 [-1, 2048, 7, 7] 0

Bottleneck-192 [-1, 2048, 7, 7] 0

Conv2d-193 [-1, 512, 7, 7] 1,048,576

BatchNorm2d-194 [-1, 512, 7, 7] 1,024

ReLU-195 [-1, 512, 7, 7] 0

Conv2d-196 [-1, 512, 7, 7] 2,359,296

BatchNorm2d-197 [-1, 512, 7, 7] 1,024

Identity-198 [-1, 512, 7, 7] 0

ReLU-199 [-1, 512, 7, 7] 0

Identity-200 [-1, 512, 7, 7] 0

Conv2d-201 [-1, 2048, 7, 7] 1,048,576

BatchNorm2d-202 [-1, 2048, 7, 7] 4,096

ReLU-203 [-1, 2048, 7, 7] 0

Bottleneck-204 [-1, 2048, 7, 7] 0

AdaptiveAvgPool2d-205 [-1, 2048, 1, 1] 0

Flatten-206 [-1, 2048] 0

SelectAdaptivePool2d-207 [-1, 2048] 0

================================================================

Total params: 23,508,032

Trainable params: 23,508,032

Non-trainable params: 0

----------------------------------------------------------------

Input size (MB): 0.57

Forward/backward pass size (MB): 307.64

Params size (MB): 89.68

Estimated Total Size (MB): 397.89

----------------------------------------------------------------

www.marearts.com

🙇🏻‍♂️

Output exceeds the size limit. Open the full output data in a text editor

Jupyter notebook

unlimited size of line:

Code #1

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

Code #2

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

Thank you.

🙇🏻‍♂️

5/18/2023

swin transformer v2 - model forward and export onnx

1. load pre-trained model

2. export onnx

3. load onnx

refer to code:

import warnings
from torch.jit import TracerWarning
warnings.filterwarnings("ignore", category=TracerWarning)

#------------------
#swin-transformer v2 pretrained model
#------------------

from transformers import AutoImageProcessor, Swinv2Model
import torch
from datasets import load_dataset

dataset = load_dataset("huggingface/cats-image")
image = dataset["test"]["image"][0]

image_processor = AutoImageProcessor.from_pretrained("microsoft/swinv2-tiny-patch4-window8-256")
model = Swinv2Model.from_pretrained("microsoft/swinv2-tiny-patch4-window8-256")

inputs = image_processor(image, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

last_hidden_states = outputs.last_hidden_state

# print( list(last_hidden_states.shape) )
# Convert last_hidden_states to numpy
last_hidden_states_numpy = last_hidden_states.detach().numpy()
print(f"Shape of last_hidden_states: {last_hidden_states_numpy.shape}")
print(last_hidden_states)



#----------------
#onnx export
#------------------
import torch
from torch.autograd import Variable

# ensure the model is in evaluation mode
model.eval()

# create a dummy variable with the same size as your input
# for this example, let's assume the input is of size [1, 3, 256, 256]
dummy_input = Variable(torch.randn(1, 3, 256, 256))

# specify the file path
file_path = "./swinv2_tiny.onnx"

# export the model
torch.onnx.export(model, dummy_input, file_path)

#------------------
#onnx inference
#------------------
import onnxruntime as ort

# load the ONNX model
ort_session = ort.InferenceSession(file_path)

# convert the PyTorch tensor to numpy array for onnxruntime
print(inputs.keys())
inputs_numpy = inputs["pixel_values"].numpy()
# inputs_numpy = inputs["input_ids"].numpy()

# create a dictionary from model input name to the actual input data
ort_inputs = {ort_session.get_inputs()[0].name: inputs_numpy}

# forward
ort_outs = ort_session.run(None, ort_inputs)
print(f"Shape of ort_outs: {ort_outs[0].shape}")
print(ort_outs)
# print(type(ort_outs))
# print( list(ort_outs.shape) )

Thank you.

www.marearts.com

🙇🏻‍♂️

5/04/2023

Generate EAN13 barcode number as random

refer to code:

def generate_random_ean13():
    number = [random.randint(0, 9) for _ in range(12)]
    number_str = ''.join(str(digit) for digit in number)
    ean = EAN13(number_str)
    return ean.get_fullcode()

Thank you.

study.marearts.com

5/02/2023

Yolo V7 vs V8

V7 vs V8 comparison

https://youtu.be/k1dOZFcLOek

https://youtu.be/tpOGDclq7KY

https://youtu.be/u5qxN2ACEP4

https://youtu.be/85SH08jN4dY

This is a comparison video between yolo v7 and v8.

Here is information for each version

Yolo V7
- Github : https://github.com/WongKinYiu/yolov7
- Model : yolov7x.pt
Yolo V8
- Github : https://github.com/ultralytics/ultralytics
- Model : yolov8x.pt

Testing Computer :

Intel(R) Core(TM) i7-9800X CPU @ 3.80GHz
RTX 4090

Something might be useful code

yolo v8, video writer for detection result

import cv2
import time
from ultralytics import YOLO

def process_video(model, video_path, output_path):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Create a VideoWriter object to save the annotated video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        success, frame = cap.read()

        if success:
            start_time = time.time()
            results = model(frame)
            end_time = time.time()
            processing_time = end_time - start_time
            fps = 1/processing_time
            # Visualize the results on the frame
            annotated_frame = results[0].plot()
            
            # Display the processing time on the annotated frame
            cv2.putText(annotated_frame, f"Processing time: {processing_time:.4f} seconds / {fps:.4f} fps",
                        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

            # Write the annotated frame to the output video
            out.write(annotated_frame)

            # cv2.imshow("YOLOv8 Inference", annotated_frame)
            # if cv2.waitKey(1) & 0xFF == ord("q"):
            #     break
        else:
            break

    cap.release()
    out.release()

def main():
    # Load the YOLO model
    model = YOLO('yolov8x.pt')

    # List of video files
    video_paths = [
        "../video/videoplayback-1.mp4",
        "../video/videoplayback-2.mp4",
        "../video/videoplayback-3.mp4",
        "../video/videoplayback-4.mp4",
    ]

    # Loop through video files and process them
    for i, video_path in enumerate(video_paths):
        output_path = f"../video/yolo_88_output_{i+1}.mp4"
        process_video(model, video_path, output_path)

    cv2.destroyAllWindows()

if __name__ == '__main__':
    main()

make 2 video to side by side

Combine Two Videos Side by Side with OpenCV python

Thank you! 😺

Pages

5/28/2023

5/23/2023

5/20/2023

5/18/2023

5/04/2023

5/02/2023

V7 vs V8 comparison

Testing Computer :