
Pytorch, Infinite DataLoader using iter & next


# create dataloader-iterator
data_iter = iter(data_loader)

# iterate over dataset
# alternatively you could use while(True)
for i in range(NUM_ITERS_YOU_WANT)
data = next(data_iter)
except StopIteration:
# StopIteration is thrown if dataset ends
# reinitialize data loader
data_iter = iter(data_loader)
data = next(data_iter)

python argparse example

import argparse

paser = argparse.ArgumentParser()
args = paser.parse_args("")
args.cuda = False
args.show_summary = False
args.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')



find best (optimal) threshold using roc curve

 def plot_roc_curve(fpr, tpr):

    plt.plot(fpr, tpr, color='orange', label='ROC')
    plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')

y_true = np.array([0,0, 1, 1,1])
y_scores = np.array([0.0,0.09, .05, .75,1])

fpr, tpr, thresholds = roc_curve(y_true, y_scores)
print(roc_auc_score(y_true, y_scores))
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]
print("Threshold value is:", optimal_threshold)
plot_roc_curve(fpr, tpr)

What AUC(area under curve) value is better ?

0.9 ~ 1 : excellent
0.8 ~ 0.9: good
0.7 ~ 0.8 : normal
0.6 ~ 0.7 : poor
0.5 ~ 0.6 : fail

python measure processing time


from time import process_time
# Start the stopwatch / counter
t1_start = process_time()


# Stop the stopwatch / counter
t1_stop = process_time()
sec = t1_stop-t1_start


split train test dataset


import random

from sklearn.model_selection import train_test_split


pkl_train, pkl_test = train_test_split(pkl_list, test_size=0.2)

show image in jupyter notebook


from matplotlib import pyplot as plt
import numpy as np
import cv2

img = imread('xxx.png') #or image_data
img2 = img[:,:,::-1]

fix hangul separating issue in mac


from unicodedata import normalize
def nfd2nfc(data):
return normalize('NFC', data)

normalize('ใ„ท ใ…“')

-> ๋” 

python change file name, get file name, dir, ext, check file exist in source code using os package


get file name and ext

import os
base = os.path.basename('/root/dir/sub/file.ext')
#('file', '.ext')

get dir


change file name 

os.rename(r'C:\Users\Ron\Desktop\Test\Products.txt',r'C:\Users\Ron\Desktop\Test\Shipped Products.txt')

check file exist



sparse tensor to csr_matrix

from scipy.sparse import csr_matrix
import numpy as np

x = val_data.x
dim = len(x)
edge_index = val_data.edge_index
print(edge_index) #sparse tensor
row = edge_index[0].numpy()
col = edge_index[1].numpy()
edge_num = len(row)
data = np.ones( edge_num )
mtx = csr_matrix((data, (row, col)), shape=(dim, dim))
#print( type(mtx.toarray()), mtx.toarray().shape)
print( mtx.toarray(), type(mtx.toarray()), mtx.toarray().shape) 

let's image 

val_data.x is node features ex) 13x1000

val_data.edge_index is sparse edge index stored by torch tensor

now we want to convert it to csr_matrix

The above code is example for this case.

The print out is like this:

tensor([[ 0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,  3,  4,
          4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  7,  7,  7,
          8,  8,  8,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11,
         11, 11, 12, 12, 12, 12],
        [ 1,  3, 10,  0,  2,  3, 10, 11,  1,  3, 11, 12,  0,  1,  2, 11, 12,  5,
          6,  8,  9, 11, 12,  4,  6,  7,  8,  9,  4,  5,  7,  9, 10,  5,  6,  8,
          4,  5,  7,  4,  5,  6, 10, 11,  0,  1,  6,  9, 11,  1,  2,  3,  4,  9,
         10, 12,  2,  3,  4, 11]])
[[0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0.]
 [0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1.]
 [1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.]
 [0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 1.]
 [0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0.]
 [1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0.]
 [0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 1.]
 [0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0.]] <class 'numpy.ndarray'> (13, 13)

Thank you
Enjoy Pytorch!


error : Bad config encountered during initialization, when you run jupyter


type this

jupyter notebook --generate-config

run juypter again

good luck!


image augmentation by python

pip install imgaug
pip install imagecorruptions

github : https://github.com/aleju/imgaug

import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
import cv2

def agument_rewrite(file_list):

sometimes = lambda aug: iaa.Sometimes(0.1, aug)
seq = iaa.Sequential(
# apply the following augmenters to most images
sometimes(iaa.CropAndPad(percent=(-0.02, 0.02), pad_mode=ia.ALL, pad_cval=(0, 255))),
sometimes(iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5)), # add gaussian noise to images
sometimes(iaa.Dropout(p=(0, 0.2))),
sometimes(iaa.CoarseDropout(0.02, size_percent=0.15, per_channel=0.5)),
sometimes(iaa.Solarize(0.5, threshold=(32, 128))),
sometimes(iaa.Emboss(alpha=(0.0, 1.0), strength=(0.5, 1.5))),
sometimes(iaa.ElasticTransformation(alpha=(0, 5.0), sigma=0.25)),
sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))),
sometimes(iaa.Superpixels(p_replace=0.3, n_segments=500)),
sometimes(iaa.Rain(speed=(0.1, 0.3))),
sometimes(iaa.Snowflakes(flake_size=(0.1, 0.4), speed=(0.01, 0.05))),

for i, v in enumerate(file_list):
img = cv2.imread(v)
images_aug = seq(images=[img])[0] # done by the library
cv2.imwrite(v, images_aug)
print('{}/{} aug : {}'.format(i, len(file_list), v))


Fix indention in VS code


  • On Windows Shift + Alt + F
  • On Mac Shift + Option + F
  • On Linux Ctrl + Shift + I