4/30/2023

MLFlow example, using kmean + iris dataset in sklearn.

 

refer to code:

.

import mlflow
from sklearn.metrics import adjusted_rand_score, silhouette_score
from sklearn.cluster import KMeans
from sklearn import datasets
import json
import matplotlib.pyplot as plt


try:
mlflow.create_experiment(name="iris_experiment")
except mlflow.exceptions.MlflowException:
print("The experement already exist")

with mlflow.start_run(experiment_id=mlflow.get_experiment_by_name("iris_experiment").experiment_id):
iris = datasets.load_iris()

length, width = iris['data'].shape
mlflow.log_dict( {"length":length, "width":width }, "shape.json")

params_dict = {"length":iris['data'].shape[0], "width":iris['data'].shape[1]}
params_json = json.dumps(params_dict)
mlflow.log_param(key="param", value=params_json)


BestK = 0
BestScore = 0
best_predict_labels = 0
best_centroids = 0
for step, k in enumerate(range(2,10)):
print(f'---- f:{k}')
# Fit the model to the data
kmeans = KMeans(n_clusters=k, random_state=0)

X = iris['data']
kmeans.fit(X)
centroids = kmeans.cluster_centers_

# Get the cluster assignments for each data point
predicted_labels = kmeans.labels_
true_labels = iris['target']

# Calculate the Adjusted Rand Index
ari = adjusted_rand_score(true_labels, predicted_labels)
print("Adjusted Rand Index:", ari)

# Calculate the Silhouette Score
sil_score = silhouette_score(X, predicted_labels)
print("Silhouette Score:", sil_score)

if BestScore < (ari+sil_score)/2 :
BestScore = (ari+sil_score)/2
BestK = k
best_predict_labels = predicted_labels
best_centroids = centroids
#log metric
# mlflow.log_metric(key="kmean score", value={"ARI":ari, "Sil":sil_score})
mlflow.log_metric(key="kmean_ari", value=ari, step=step)
mlflow.log_metric(key="silhouette_score", value=sil_score, step=step)



print('+++++++++')
print(f'best score:{BestScore}, k:{BestK}')


fig, ax = plt.subplots()
ax.hist(predicted_labels)
mlflow.log_figure(fig, artifact_file="lables_hist.png")

..


The code you provided is a Python script that uses the MLflow library to perform K-means clustering on the Iris dataset, logs the model parameters and metrics, and identifies the best K value (number of clusters) based on the Adjusted Rand Index (ARI) and Silhouette Score. It also generates a histogram plot of the predicted labels for the best K value and saves it as an artifact.

Here's an overview of the code:

Import the necessary libraries, including MLflow, scikit-learn, and matplotlib.

Create a new MLflow experiment called "iris_experiment" if it doesn't already exist.

Start an MLflow run in the context of the "iris_experiment".

Load the Iris dataset and log its shape as a dictionary.

Iterate through different K values (number of clusters) from 2 to 9 and perform the following steps:

a. Instantiate and fit a KMeans model with the current K value.

b. Obtain the predicted cluster assignments and compare them with the true labels using ARI and Silhouette Score.

c. Log the ARI and Silhouette Score metrics for each K value using MLflow.

d. Keep track of the best K value based on the average of ARI and Silhouette Score.

Print the best K value and its corresponding score.

Create a histogram plot of the predicted labels for the best K value and save it as an artifact using MLflow.


Thank you.

www.marearts.com

πŸ™‡πŸ»‍♂️



4/27/2023

Generate random DXF and cad image, python example

 

refer to code:

.

import ezdxf
import random
import matplotlib.pyplot as plt

# Random DXF file generator
def random_point():
return (random.uniform(-100, 100), random.uniform(-100, 100))

def add_random_line(msp):
start = random_point()
end = random_point()
msp.add_line(start, end)

def add_random_circle(msp):
center = random_point()
radius = random.uniform(1, 10)
msp.add_circle(center, radius)

def add_random_arc(msp):
center = random_point()
radius = random.uniform(1, 10)
start_angle = random.uniform(0, 360)
end_angle = random.uniform(start_angle, start_angle + 360)
msp.add_arc(center, radius, start_angle, end_angle)

def create_random_dxf(filename):
doc = ezdxf.new()
msp = doc.modelspace()

num_lines = 10
num_circles = 10
num_arcs = 10

for _ in range(num_lines):
add_random_line(msp)

for _ in range(num_circles):
add_random_circle(msp)

for _ in range(num_arcs):
add_random_arc(msp)

doc.saveas(filename)

# DXF to image conversion
def draw_entities(msp):
for entity in msp:
if entity.dxftype() == 'LINE':
x = [entity.dxf.start[0], entity.dxf.end[0]]
y = [entity.dxf.start[1], entity.dxf.end[1]]
plt.plot(x, y, 'k-')

elif entity.dxftype() == 'CIRCLE':
circle = plt.Circle((entity.dxf.center[0], entity.dxf.center[1]), entity.dxf.radius, edgecolor='k', facecolor='none')
plt.gca().add_patch(circle)

elif entity.dxftype() == 'ARC':
arc = plt.Arc((entity.dxf.center[0], entity.dxf.center[1]), 2 * entity.dxf.radius, 2 * entity.dxf.radius, theta1=entity.dxf.start_angle, theta2=entity.dxf.end_angle, edgecolor='k')
plt.gca().add_patch(arc)

# Add more entity types if needed

def dxf_to_image(dxf_file, image_file):
doc = ezdxf.readfile(dxf_file)
msp = doc.modelspace()
draw_entities(msp)

plt.gca().set_aspect('equal', adjustable='box')
plt.axis('off')
plt.savefig(image_file, dpi=300, bbox_inches='tight', pad_inches=0)

# Main program
random_dxf_file = 'random_dxf_file.dxf'
output_image_file = 'output_image.png'

create_random_dxf(random_dxf_file)
dxf_to_image(random_dxf_file, output_image_file)

..


This script first creates a random DXF file using the create_random_dxf function and saves it with the specified filename. Then, it reads the generated DXF file using the dxf_to_image function, draws the entities, and saves the result as a PNG image with the specified filename.

Thank you.
www.marearts.com
πŸ™‡πŸ»‍♂️


4/25/2023

image augment python sample code

 code 1

pip install imgaug

.

import os
import cv2
import imgaug.augmenters as iaa

def load_image(image_path):
return cv2.imread(image_path)

def save_image(image, image_path):
cv2.imwrite(image_path, image)

def apply_augmentation(image, augmenter):
augmented_image = augmenter(image=image)
return augmented_image

def augment_images(input_folder, output_folder, augmenter):
os.makedirs(output_folder, exist_ok=True)

for image_filename in os.listdir(input_folder):
input_image_path = os.path.join(input_folder, image_filename)
output_image_path = os.path.join(output_folder, image_filename)

image = load_image(input_image_path)
augmented_image = apply_augmentation(image, augmenter)
save_image(augmented_image, output_image_path)

# Define the augmentations to apply
augmenter = iaa.Sequential([
iaa.Affine(rotate=(-5, 5), scale=(0.9, 1.1)),
iaa.Multiply((0.8, 1.2)),
iaa.AdditiveGaussianNoise(scale=(10, 30)),
iaa.PerspectiveTransform(scale=(0.01, 0.1)),
iaa.PiecewiseAffine(scale=(0.01, 0.05)),
iaa.SomeOf((0, 2), [
iaa.GaussianBlur(sigma=(0, 2)),
iaa.MotionBlur(k=15, angle=[-45, 45]),
iaa.AddToHueAndSaturation((-20, 20))
]),
])

input_folder = "barcodes"
output_folder = "augmented_barcodes"

augment_images(input_folder, output_folder, augmenter)

..



Way #2

pip install albumentations

.

import os
import random
import cv2
from albumentations import (
Compose,
Rotate,
GaussNoise,
RandomBrightnessContrast,
RandomGamma,
HueSaturationValue,
)

def load_image(image_path):
return cv2.imread(image_path)

def save_image(image, image_path):
cv2.imwrite(image_path, image)

def apply_augmentation(image, augmentations):
augmented_image = augmentations(image=image)['image']
return augmented_image

def augment_images(input_folder, output_folder, augmentations):
os.makedirs(output_folder, exist_ok=True)

for image_filename in os.listdir(input_folder):
input_image_path = os.path.join(input_folder, image_filename)
output_image_path = os.path.join(output_folder, image_filename)

image = load_image(input_image_path)
augmented_image = apply_augmentation(image, augmentations)
save_image(augmented_image, output_image_path)

# Define the augmentations to apply
augmentations = Compose([
Rotate(limit=5, p=0.5),
GaussNoise(var_limit=(10, 50), p=0.5),
RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
RandomGamma(gamma_limit=(80, 120), p=0.5),
HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
])

input_folder = "barcodes"
output_folder = "augmented_barcodes"

augment_images(input_folder, output_folder, augmentations)

..


Thank you.

πŸ™‡πŸ»‍♂️

vit encoder + transformer decoder model - export onnx example

refer to this code:

.



# If you want to combine a Vision Transformer (ViT) as an encoder with a Transformer-based decoder,
# you can follow the steps below.
# We will use the Hugging Face Transformers library and PyTorch.

# Install the required libraries:
# pip install torch torchvision transformers onnx

# Define the combined model:
# -----------------------------------------
import torch
import torch.nn as nn
from transformers import ViTModel, ViTConfig, AutoModelForSeq2SeqLM

class ViTTransformer(nn.Module):
def __init__(self, vit_model, transformer_decoder):
super(ViTTransformer, self).__init__()
self.vit = vit_model
self.transformer_decoder = transformer_decoder

def forward(self, x, decoder_input_ids, **kwargs):
encoder_outputs = self.vit(x)
outputs = self.transformer_decoder(decoder_input_ids, encoder_outputs=encoder_outputs, **kwargs)
return outputs
# -----------------------------------------

# Load the ViT and Transformer decoder models:
# Assuming you have a pre-trained ViT model and a pre-trained Transformer decoder model, load them as follows:

# -----------------------------------------
vit_config = ViTConfig()
vit_model = ViTModel(vit_config)
transformer_decoder = AutoModelForSeq2SeqLM.from_pretrained("your-pretrained-transformer-decoder")


# Create the combined model and load the checkpoint if you have one:
# -----------------------------------------
combined_model = ViTTransformer(vit_model, transformer_decoder)
# -----------------------------------------

# # If you have a checkpoint, load it as follows:
# # checkpoint = torch.load('path/to/checkpoint.pth')
# # combined_model.load_state_dict(checkpoint['model_state_dict'])
# Export the combined model to ONNX format:
# The process of exporting the combined model to ONNX is more complicated due to the dynamic nature of the Transformer-based decoder.
# You might need to modify the export code depending on your specific use case.
# However, here is a general example:

# -----------------------------------------
# # Set the combined model to evaluation mode
combined_model.eval()
# Create dummy input tensors with the correct dimensions
# (B x C x H x W) for image input and (B x seq_len) for decoder input
dummy_image_input = torch.randn(1, 3, 224, 224)
dummy_decoder_input = torch.randint(0, transformer_decoder.config.vocab_size, (1, 5))

# Export the combined model to ONNX format
torch.onnx.export(
combined_model,
(dummy_image_input, dummy_decoder_input),
"vit_transformer.onnx",
input_names=["image_input", "decoder_input"],
output_names=["output"],
dynamic_axes={
"image_input": {0: "batch_size"},
"decoder_input": {0: "batch_size", 1: "sequence_length"},
"output": {0: "batch_size", 1: "sequence_length"},
},
opset_version=12,
)
# -----------------------------------------

# This code will create an ONNX file (vit_transformer.onnx) containing the combined ViT and Transformer decoder model.
# Note that you might need to adjust the code according to the specific needs of your application.

..

Thank you.πŸ™‡πŸ»‍♂️

4/24/2023

Image to Icon converting python example

refer to code: 


.

# pip install pillow

from PIL import Image

def convert_to_ico(input_image_path, output_ico_path, size=(32, 32)):
image = Image.open(input_image_path)
image = image.resize(size, Image.ANTIALIAS)
image.save(output_ico_path, 'ICO')

input_image_path = "path/to/your/input_image.png"
output_ico_path = "path/to/your/output_icon.ico"

convert_to_ico(input_image_path, output_ico_path)


..



Thank you.

πŸ™‡πŸ»‍♂️

4/22/2023

Find duplicated string in python list, sample source code:

 


refer to code:


.


def find_duplicates(input_list):
seen = set()
duplicates = set()
for item in input_list:
if item in seen:
duplicates.add(item)
else:
seen.add(item)
return list(duplicates)

input_list = ["apple", "orange", "banana", "apple", "orange", "grape"]
duplicates = find_duplicates(input_list)
print(duplicates)

>>>>>>>>>>>>>>>>>>
['apple', 'orange']

..


Thank you.

πŸ™‡πŸ»‍♂️

4/21/2023

get all txt or specific file in subfolder, python code example

 refer to code:


.

import glob
import os

def find_txt_files():
current_dir = os.getcwd()
txt_files = glob.glob(current_dir + '/**/*.txt', recursive=True)

for txt_file in txt_files:
print(txt_file)

if __name__ == "__main__":
find_txt_files()

..


Thank you.

πŸ™‡πŸ»‍♂️


4/17/2023

Python script to apply Canny edge detection and filter contours based on a dynamic threshold value, invert the image, and apply binary thresholding to create a mask on all images in a directory.

refer to code

 

.

import os
import cv2
from tqdm import tqdm
import numpy as np


# Path to input and output directories
input_dir = './background'
output_dir = './background_canny'

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)

# Loop over all files in the input directory
for filename in tqdm(os.listdir(input_dir)):
if filename.endswith('.jpg') or filename.endswith('.png'):
# Load the input image
img = cv2.imread(os.path.join(input_dir, filename), cv2.IMREAD_GRAYSCALE)
# Apply Canny edge detection algorithm
edges = cv2.Canny(img, 100, 200)
# Find and draw the contours of the edge map
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_filtered = []
threshold = int( min(0.1 * (img.shape[0]), 0.1 * (img.shape[1]) ) )
for cnt in contours:
if cv2.arcLength(cnt, True) >= threshold:
contours_filtered.append(cnt)
edges_contours = cv2.drawContours(np.zeros_like(img), contours_filtered, -1, 255, 1)

# Invert the image (black to white, white to black)
img_inv = cv2.bitwise_not(edges_contours)
# Apply binary thresholding to the image
thresh, img_binary = cv2.threshold(img_inv, 127, 255, cv2.THRESH_BINARY)

# Save the result in the output directory with the same file name
output_path = os.path.join(output_dir, filename)
cv2.imwrite(output_path, img_binary)


..




Thank you.

πŸ™‡πŸ»‍♂️

www.marearts.com



python opencv canny edge source code

 refer to code:


.

import cv2

# Load an image
img = cv2.imread('input_image.jpg', cv2.IMREAD_GRAYSCALE)

# Apply Canny edge detection algorithm
edges = cv2.Canny(img, 100, 200)

# Display the input image and the edge map
cv2.imshow('Input Image', img)
cv2.imshow('Canny Edges', edges)
cv2.waitKey(0)
cv2.destroyAllWindows()


..






Thank you.

πŸ™‡πŸ»‍♂️

4/16/2023

Food order forecast by RandomForestRegressor, DecisionTreeRegressor, LinearRegression

 refer to code:



.

from sklearn.metrics import r2_score,mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from math import sqrt
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

features = pd.read_csv('./features.csv')
label = pd.read_csv('./label.csv')

features.head()
label.head()

#------------------------------------ data split ---------------------------------------------
X_train,X_test,y_train,y_test = train_test_split(features,label,test_size=0.20,random_state=33)

#------------------------------------ RandomForestRegressor ---------------------------------------------
RFRmodel = RandomForestRegressor(max_depth=3, random_state=0)
RFRmodel.fit(X_train,y_train)
y_pred = RFRmodel.predict(X_test)

print('RandomForestRegressor')
print("R2 score :",r2_score(y_test, y_pred))
print("MSE score :",mean_squared_error(y_test, y_pred))
print("RMSE: ",sqrt(mean_squared_error(y_test, y_pred)))
print('')

#------------------------------------ DecisionTreeRegressor---------------------------------------------
DTRmodel = DecisionTreeRegressor(max_depth=3,random_state=0)
DTRmodel.fit(X_train,y_train)
y_pred = DTRmodel.predict(X_test)

print('DecisionTreeRegressor')
print("R2 score :",r2_score(y_test, y_pred))
print("MSE score :",mean_squared_error(y_test, y_pred))
print("RMSE: ",sqrt(mean_squared_error(y_test, y_pred)))
print('')

#------------------------------------ LinearRegression ---------------------------------------------
model = LinearRegression()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

print('LinearRegression')
print("R2 score :",r2_score(y_test, y_pred))
print("MSE score :",mean_squared_error(y_test, y_pred))
print("RMSE: ",sqrt(mean_squared_error(y_test, y_pred)))
print('')



..


You can download dataset from here:

https://www.marearts.com/Tea-Time-Computer-Vision-6bc925c53d46412691096825bfe0317a?p=004ca41eee0948c49f979016b6a31de8&pm=s


Thank you.

www.marearts.com

πŸ™‡πŸ»‍♂️

example source code of python for converting numpy ndarray to pandas dataframe

 refer to code:



.

import numpy as np
import pandas as pd

# Create a numpy ndarray
array = np.random.rand(5, 3)
print('array: \n', array)

# Convert the numpy ndarray to a pandas DataFrame
df = pd.DataFrame(array, columns=['Column1', 'Column2', 'Column3'])

# Print the DataFrame
print('df: \n',df)

..



Thank you.

www.marearts.com

πŸ™‡πŸ»‍♂️

4/14/2023

Get YouTube url list from YouTube playlist url.

refer to code: 

.

from pytube import Playlist

# Replace with your playlist URL
playlist_url = 'https://www.youtube.com/playlist?list=yourlist'

playlist = Playlist(playlist_url)

# Fetch video URLs
video_urls = playlist.video_urls

# Print video URLs
for url in video_urls:
print(url)

..


www.marearts.com

πŸ™‡πŸ»‍♂️

DownLoad Youtube video - I fall in love too easily



Download app

πŸ—“️ Version 2.0 - 2023-04-29

πŸ™…πŸ½ Don't worry there is no virus!! It's very clean code.

πŸ“¦ Download link -> By me a coffee : https://www.buymeacoffee.com/trurg28/e/131820



refer to code 

.

#pip install pytube
from pytube import YouTube

# Replace the URL below with the URL of the video you want to download
video_url = 'https://www.youtube.com/watch?v=YOUR_VIDEO_ID'

# Creating a YouTube object
yt = YouTube(video_url)

# Getting the highest resolution video stream
video = yt.streams.get_highest_resolution()

# Downloading the video
video.download()

print("Video downloaded successfully.")

..


you can fine source code here:

https://study.marearts.com/2023/09/download-youtube-video-url-to-local.html


www.marearts.com

πŸ™‡πŸ»‍♂️

4/13/2023

Create the output folder if it doesn't exist, python example

 refer to code:


.

# Create the output folder if it doesn't exist
output_folder = os.path.dirname(ioutput_file)
os.makedirs(output_folder, exist_ok=True)

..


Thank you.

πŸ™‡πŸ»‍♂️

Count object(file) amount in certain s3 folder. python and cli example

 refer to code.

.

import boto3

def count_files_in_bucket(bucket_name, prefix=''):
s3 = boto3.client('s3')
paginator = s3.get_paginator('list_objects_v2')
file_count = 0

for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
for obj in page['Contents']:
if not obj['Key'].endswith('/'):
file_count += 1
return file_count


bucket_name = 'your_bucket_name'
prefix = 'sub_folder_name'
file_count = count_files_in_bucket(bucket_name, prefix)
print(f'There are {file_count} files in the "{prefix}" folder of the "{bucket_name}" bucket.')

..


πŸ™‡πŸ»‍♂️

Thank you.

www.marearts.com



4/12/2023

The long-tail problem in an unbalanced dataset

The long-tail problem in an unbalanced dataset is a situation where a few classes have a large number of samples, while a majority of classes have few samples. This can lead to biased models that perform poorly on underrepresented classes. To address this issue, you can use various techniques, including:

  1. Resampling methods:
    a. Oversampling: Increase the number of instances in the underrepresented classes by creating copies or generating synthetic samples.
    • Random oversampling: Duplicate random instances from the minority classes.
    • Synthetic Minority Over-sampling Technique (SMOTE): Generate synthetic samples by interpolating between instances in the minority class.
    • Adaptive Synthetic (ADASYN): Similar to SMOTE, but with a focus on generating samples for difficult-to-classify instances.
    • b. Undersampling: Reduce the number of instances in the overrepresented classes.
    • Random undersampling: Randomly remove instances from the majority class.
    • Tomek links: Identify and remove majority class instances that are close to minority class instances.
    • Neighborhood Cleaning Rule (NCR): Remove majority class instances that are misclassified by their nearest neighbors.
  2. Cost-sensitive learning: Assign higher misclassification costs to underrepresented classes during the training process, encouraging the model to be more sensitive to these classes.
  3. Ensemble methods: Combine multiple models to improve classification performance.
    a. Balanced Random Forest: A variation of the Random Forest algorithm that balances the class distribution by either undersampling the majority class or oversampling the minority class in each tree.
    b. EasyEnsemble: Train an ensemble of classifiers, each using a random under-sampling of the majority class.
    c. RUSBoost: An adaptation of the boosting algorithm that incorporates random under-sampling of the majority class during the training process.
  4. Transfer learning: Pre-train a model on a balanced dataset or a dataset from a related domain, then fine-tune it on the imbalanced dataset.
  5. Evaluation metrics: Use appropriate evaluation metrics such as precision, recall, F1-score, or the area under the precision-recall curve (AUPRC) to measure the model's performance on the minority class. This helps ensure that the model's performance is not skewed by the imbalanced class distribution.

Remember to experiment with different techniques to find the best approach for your specific dataset and problem.


www.marearts.com

πŸ™‡πŸ»‍♂️

4/09/2023

Combine Two Videos Side by Side with OpenCV python

 refer to code:

.

import cv2

# Open the two video files
video1 = cv2.VideoCapture('video1.mp4')
video2 = cv2.VideoCapture('video2.mp4')

# Get video properties
width1 = int(video1.get(cv2.CAP_PROP_FRAME_WIDTH))
height1 = int(video1.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps1 = video1.get(cv2.CAP_PROP_FPS)

width2 = int(video2.get(cv2.CAP_PROP_FRAME_WIDTH))
height2 = int(video2.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps2 = video2.get(cv2.CAP_PROP_FPS)

# Check if videos have the same FPS and height
assert fps1 == fps2, "Videos should have the same FPS"
assert height1 == height2, "Videos should have the same height"

# Create a VideoWriter object to save the combined video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output.mp4', fourcc, fps1, (width1 + width2, height1))

while video1.isOpened() and video2.isOpened():
ret1, frame1 = video1.read()
ret2, frame2 = video2.read()

if not ret1 or not ret2:
break

# Concatenate the frames side by side
combined_frame = cv2.hconcat([frame1, frame2])

# Write the combined frame to the output video
out.write(combined_frame)

# Display the combined frame
cv2.imshow('Combined Video', combined_frame)

# Press 'q' to stop the process and close the window
if cv2.waitKey(1) & 0xFF == ord('q'):
break

# Release the video files and the output video
video1.release()
video2.release()
out.release()

cv2.destroyAllWindows()

..


Thank you.

www.marearts.com

πŸ™‡πŸ»‍♂️