12/13/2019

python OrderDict to Dict


od = OrderedDict([('name', 'signature'), ('pose', 'Unspecified'), ('bndbox', OrderedDict([('xmin', '423'), ('ymin', '1237'), ('xmax', '785'), ('ymax', '1349')]))])
print(od)
output_dict = json.loads(json.dumps(av))
print(output_dict)





reference is here : https://stackoverflow.com/questions/20166749/how-to-convert-an-ordereddict-into-a-regular-dict-in-python3

12/08/2019

keras model summary, print out for each layers property

from keras.layers import Input
from keras.applications import VGG16
vgg_model = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3))) #the head FC layer off

#how many layers
print(len(vgg_model.layers))



#last layer
print(vgg_model.layers[-1].name)



#print whole layer
for idx, layer in enumerate(vgg_model.layers):
print(idx+1, '-----')
print(layer.output_shape)
print(layer.name)
print('--------')



#summary of model
vgg_model.summary()



12/06/2019

using specific gpu device for TensorFlow

setting first or second gpu machine
CUDA_VISIBLE_DEVICES=0 python script_one.py
CUDA_VISIBLE_DEVICES=1 python script_two.py
another way
use first (it didn't work for me)
export CUDA_VISIBLE_DEVICE=0
./train.py
use second (it didn't work for me)
export CUDA_VISIBLE_DEVICE=1
./train.py
use both (it didn't work for me)
export CUDA_VISIBLE_DEVICE=0,1
./train.py

refer to here:
https://stackoverflow.com/questions/44135538/tensorflow-using-2-gpu-at-the-same-time

12/05/2019

monitoring gpu status in command (terminal)



pip install gpustat
>gpustat -cp




monitoring continuously
>watch -n 0.5 -c gpustat -cp --color


12/03/2019

find pdf file (or some exe file) in directories and copy it to another directory, python sample code


import os
import glob
from shutil import copyfile


files = []
start_dir = '/Volumes/input/'
output_path = '/Volumes/output/'
pattern = "*.pdf"

total = 0
for dir,_,_ in os.walk(start_dir):
files.extend(glob.glob(os.path.join(dir,pattern)))
for i,v in enumerate(files):
#found pdf files
print(total,i,v)
#extract filename only
filename = v.split('/')[-1]
#make new filename and output path
output_filename = output_path + str(total) + '_' + filename
#if file exist? then no copy
exist = glob.glob(output_filename)
#if not copy
if len(exist) == 0:
copyfile(v, output_filename)
#print out copied filename
print('copy! : ', output_filename)
#increase global count
total += 1

11/29/2019

download zip file from url, python sample code




from bs4 import BeautifulSoup
import requests
import os
import sys

def downloadZip(url, prefix_url, outpath):
mbyte=1024*1024
html = requests.get(url).text
soup = BeautifulSoup(html, features='lxml')


for name in soup.findAll('a', href=True):
#find A tag
zipurl = name['href']
#find file extension
if( zipurl.endswith('.zip') ):
#make download path
outfname = outpath +'/'+ zipurl.split('/')[-1]
#make url
zipurl = prefix_url+zipurl #http://aaa.com/ + 'abc.zip'
print(zipurl)
r = requests.get(zipurl, stream=True)
if( r.status_code == requests.codes.ok ) :
fsize = int(r.headers['content-length'])
print('Downloading %s (%sMb)'%(outfname, fsize/mbyte))
with open(outfname, 'wb') as fd:
for chunk in r.iter_content(chunk_size=1024): # chuck size can be larger
if chunk: # ignore keep-alive requests
fd.write(chunk)
fd.close()

base_path = os.getcwd()
path_join = os.path.join(base_path, 'data_download_pdf')
sys.path.append(path_join)

# point to output directory
outpath = path_join
url = 'https://www.gsa.gov/real-estate/real-estate-services/leasing-policy-procedures/lease-documents/lease-documents-region-1-new-england/'
prefix_url = 'https://www.gsa.gov/cdnstatic'

downloadZip(url, prefix_url, outpath)








11/24/2019

python website url verification code


import requests
import urllib.request

url = 'http://www.marearts.com'

try:
resp = requests.get(url, verify=False)
print(url, resp.status_code)
except:
print('fail to access')

try:
resp = urllib.request.urlopen(url)
print(url, resp.getcode())
except:
print('fail to access')

11/20/2019

All configured authentication methods failed -> vscode sftp setting

Try to use this script on sftp.json file.

{
"name": "test",
"protocol": "sftp",
"host": "ec2-114-24-120-84.eu-west-4.compute.amazonaws.com",
"remotePath": "/home/ubuntu/ABCD",
"privateKeyPath": "/Users/ABD/ec2_aws.pem",
"username": "ubuntu",
"port": 22,
"secure": true,
"uploadOnSave": true,
"passive": false,
"debug": true,
"ignore": [
"\\.vscode",
"\\.git",
"\\.DS_Store"
],
"generatedFiles": {
"uploadOnSave": true,
"extensionsToInclude": [],
"path": "./"
}
}

11/16/2019

yum install -y https://centos7.iuscommunity.org/ius-release.rpm -> yum install -y https://centos7.iuscommunity.org/ius-release.rpm

yum install -y https://centos7.iuscommunity.org/ius-release.rpm
but when you get this error :

Error:
 Problem: conflicting requests
  - nothing provides epel-release = 7 needed by ius-release-2-1.el7.ius.noarch
(try to add '--skip-broken' to skip uninstallable packages or '--nobest' to use not only best candidate packages)


Try to install this one first.

yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
then
yum install -y https://centos7.iuscommunity.org/ius-release.rpm


Thank you.

yum install unxz -> Error: Nothing to do

use this one:

yum install xz

yum install wkhtmltopdf -> No package wkhtmltopdf available.


wget https://github.com/wkhtmltopdf/wkhtmltopdf/releases/download/0.12.4/wkhtmltox-0.12.4_linux-generic-amd64.tar.xz
unxz wkhtmltox-0.12.4_linux-generic-amd64.tar.xz
tar -xvf wkhtmltox-0.12.4_linux-generic-amd64.tar
mv wkhtmltox/bin/* /usr/local/bin/
rm -rf wkhtmltox
rm -f wkhtmltox-0.12.4_linux-generic-amd64.tar



reference :
https://github.com/JazzCore/python-pdfkit
https://gist.github.com/paulsturgess/cfe1a59c7c03f1504c879d45787699f5
https://gist.github.com/AndreasFurster/ebe3f163d6d47be43b72b35b18d8b5b6

11/15/2019

rect intersector check using image (python)



1. make image as numpy
2. add 1 all element for each inside of box
3. check if there is bigger element value than 1

Thank you.


#draw new image
img = np.zeros((int(height), int(width), 1), dtype = "uint8")
#draw new image
img.fill(0)
for i, v in enumerate(box_list):
left = int(v['bbox'][0])
top = int(v['bbox'][1])
right = int(v['bbox'][2])
bottom = int(v['bbox'][3])
img[top:bottom,left:right] += 1

#check overlap
imgv = img.reshape(height*width)
if sum(imgv > 1) > 0:
print('intersector found')

11/13/2019

PDF split page and save each page to pdf, python

#pip3 install PyMuPDF
import fitz
doc = fitz.open('./test2.pdf'

#page_number = doc.pageCount
#print(page_number)

#split pages
for i, page in enumerate(doc.pages()):
    print(i)
    doc2 = fitz.open()               
    doc2.insertPDF(doc, to_page = i) 
    doc2.save("{}.pdf".format(i))

10/27/2019

Python: Create in Memory Zip File

simple example to make zip in memory

..
import io
import zipfile

def makeZip(data_list):
    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, Falseas zip_file:
        for file_name, data in data_list:
            zip_file.writestr(file_name, data.getvalue())
    return zip_buffer

data_list = [('1.txt', io.BytesIO(b'111')), ('2.txt', io.BytesIO(b'222'))]
zip_buffer = makeZip(data_list)
with open('./b.zip''wb'as f:
    f.write(zip_buffer.getvalue())
..

Thank you.

10/19/2019

Byte 2 opencv Mat


refer to below source code. ^^

import base64
import numpy as np
import cv2

def byte2Mat(data):
    imgdata = base64.b64decode(data)
    nparr = np.frombuffer(imgdata, np.uint8)
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    print(img.shape)
    
    return img    

8/24/2019

python string encryption, decryption - example code


from cryptography.fernet import Fernet

def encrypt(message: bytes, key: bytes):
    return Fernet(key).encrypt(message)

def decrypt(token: bytes, key: bytes):
    return Fernet(key).decrypt(token)

key = Fernet.generate_key()  # store in a secure location
#ex) key is 'Fn1dPza4Gchl7KpPE4kz2oJEMFXYG39ykpSLcsT1icU='

message = 'This is scret string'
#encryption
enstr = encrypt(message.encode(), key)
#decryption
destr = decrypt(enstr, key).decode()

print('input:',  message)
print('encryption:', enstr)
print('decryption:', destr)



8/21/2019

get similarity between two graphs

Basically, this example use networkX python library.
I made very simple two graphs which are G1, G2

Let see here:



and nx.graph_edit_distance this function calculate how much edit graph can be became isomorphic, that is return value of the function.

Check the example code.

..
#https://stackoverflow.com/questions/11804730/networkx-add-node-with-specific-position
#https://stackoverflow.com/questions/23975773/how-to-compare-directed-graphs-in-networkx

import matplotlib.pyplot as plt
import networkx as nx
G1=nx.Graph()
G1.add_node(1,pos=(1,1))
G1.add_node(2,pos=(2,2))
G1.add_node(3,pos=(3,1))
G1.add_edge(1,2)
G1.add_edge(1,3)

pos=nx.get_node_attributes(G1,'pos')
plt.figure('graph1')
nx.draw(G1,pos, with_labels=True)

G2=nx.Graph()
G2.add_node(1,pos=(10,10))
G2.add_node(2,pos=(20,20))
G2.add_node(3,pos=(30,10))
G2.add_node(4,pos=(40,30))
G2.add_edge(1,2)
G2.add_edge(1,3)
G2.add_edge(1,4)
pos2=nx.get_node_attributes(G2,'pos')
plt.figure('b')
nx.draw(G2,pos2, with_labels=True)

dist = nx.graph_edit_distance(G1, G2)
print(dist)

plt.show()
..

8/20/2019

compare text using fuzzy wuzzy in python

just refer to this example..it's simple and very useful.

#pip install fuzzywuzzy
from fuzzywuzzy import process
candidate = ["Atlanta Falcons", "New York Jetss", "New York Giants", "Dallas Cowboys"]
search = "new york jets"
r1 = process.extract(search, candidate)
#r1 = process.extract(search, candidate, limit=3)
search = "cowboys"
r2 = process.extractOne(search, candidate)
search = "new york jets"
r3 = process.extractBests(search, candidate, score_cutoff=70)
print(r1)
#[('New York Jetss', 96), ('New York Giants', 79), ('Atlanta Falcons', 29), ('Dallas Cowboys', 22)]
print(r2)
#('Dallas Cowboys', 90)
print(r3)
#[('Dallas Cowboys', 90)]


8/08/2019

PIL to string, string to PIL (python)

It's simple example source code for that:

PIL to string(base64)
- PIL open image
- image to byte
- byte to string (base64)

string(base64) to PIL
- string to byte
- PIL load byte

--
import base64
import io
from PIL import Image

#open file using PIL
pil_img = Image.open('IMG_0510.jpg')
width, height = pil_img.size
print(width, height)


#get image data as byte
buffer = io.BytesIO()
pil_img.save(buffer, format=pil_img.format)
buffer_value = buffer.getvalue()

#byte to string
base64_str = base64.b64encode(buffer_value)

#read string to image buffer
buffer2 = base64.b64decode(base64_str)
pil_img2 = Image.open(io.BytesIO(buffer2))
width2, height2 = pil_img2.size
print(width2, height2)


#check first & second image
pil_img.show()
pil_img2.show()
--

here, another source code for :
OpenCV -> PIL -> resize -> OpenCV

http://study.marearts.com/2019/06/opencv-pil-resize-opencv.html

7/30/2019

Simple example for CNN + MNIST + Keras, Tensorboard, save model, load model

Training Code CNN + MNIST

..

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D

"""Build CNN Model"""
num_classes = 10 input_shape = (28, 28, 1) #mnist channels first format model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.summary() """Download MNIST Data""" from keras.datasets import mnist import numpy as np # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() #(60000, 28, 28) -> (60000, 28, 28, 1) x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. x_train = np.reshape(x_train, (len(x_train), 28, 28, 1)) # adapt this if using `channels_first` image data format x_test = np.reshape(x_test, (len(x_test), 28, 28, 1)) # adapt this if using `channels_first` image data format # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) """Show some images""" import matplotlib.pyplot as plt row = 10 col = 10 n = row * col plt.figure(figsize=(4, 4)) for i in range(n): # display original #https://jakevdp.github.io/PythonDataScienceHandbook/04.08-multiple-subplots.html ax = plt.subplot(row, col, i+1) plt.imshow(x_test[i].reshape(28, 28)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show() """set up tensorboard""" from datetime import datetime import os logdir="logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") os.makedirs(logdir, exist_ok=True) tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) """Train model""" from keras.callbacks import TensorBoard batch_size = 128 epochs = 1 model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(x_test, y_test), callbacks=[TensorBoard(log_dir=logdir)]) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) """test one image data """ x_test[0].shape one_image = x_test[0].reshape(1,28,28,1) y_pred_all = model.predict(one_image) y_pred_it = model.predict_classes(one_image) print(y_pred_all, y_pred_it) plt.imshow(x_test[0].reshape(28, 28)) plt.show() """save model to drive""" model.save('my_cnn_mnist_model.h5')


..
CNN network Layout


Dataset


Run Tensorboard

>cd ./logs/scalars/20190730-105257
>tensorboard --logdir=./


almost 99% accuracy



Load Model and test one mnist image
...
"""load model from drive"""
from keras.models import load_model
new_model = load_model('my_cnn_mnist_model.h5')
"""load 1 image from drive"""
from PIL import Image
import numpy as np
"""test prediction"""
img_path = './mnist_7_450.jpg'
img = Image.open(img_path) #.convert("L") img = np.resize(img, (28,28,1)) im2arr = np.array(img) im2arr = im2arr.reshape(1,28,28,1) y_pred = new_model.predict_classes(im2arr) print(y_pred)

...

Test image


output
[7]


download minist jpeg file on here: http://study.marearts.com/2015/09/mnist-image-data-jpg-files.html



7/01/2019

Check if string matches with regular expression pattern in python

simple code for checking string matched with certain pattern.

import re
file3 = 'keyvalue_reference_1.json'
pattern = re.compile("keyvalue_reference_[0-9]+.json")
re = pattern.match(file3)

if re:
    print('matched')
else:
    print('non matched')


Thank you.

AWS S3, Get object list in Subfolder by python code using s3_client.list_objects function

This is my s3 folder structure





This is code to get file list in certain subfolder.

#get boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#get object list
contents = s3_client.list_objects(Bucket='test-can-delete-anyone', Prefix='folder1/subfolder1')['Contents']
for object in contents:
     print(object['Key'])


result
folder1/subfolder1/
folder1/subfolder1/4_kitchen.jpg
folder1/subfolder1/5_bathroom.jpg
folder1/subfolder1/5_bedroom.jpg
folder1/subfolder1/5_frontal.jpg
folder1/subfolder1/5_kitchen.jpg
folder1/subfolder1/6_bathroom.jpg

another example
#get object list
contents = s3_client.list_objects(Bucket='test-can-delete-anyone', Prefix='folder1/')['Contents']
for object in contents:
        print(object['Key'])

result
folder1/
folder1/1_kitchen.jpg
folder1/2_bathroom.jpg
folder1/2_bedroom.jpg
folder1/2_frontal.jpg
folder1/2_kitchen.jpg
folder1/subfolder1/
folder1/subfolder1/4_kitchen.jpg
folder1/subfolder1/5_bathroom.jpg
folder1/subfolder1/5_bedroom.jpg
folder1/subfolder1/5_frontal.jpg
folder1/subfolder1/5_kitchen.jpg
folder1/subfolder1/6_bathroom.jpg


AWS s3 bucket - check folder exist or not by python code

Check certain folder exist in s3 bucket by python ncode


-
#create boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#check folder exist
try:
        s3_client.get_object(Bucket='s3-bucket-name', Key='folder-name/')
        print('folder exist')
except botocore.exceptions.ClientError as e:
        print('no folder exist')
-

Thank you.


function type is here:
def check_folder_exist(s3_client, bucket_name, folder_name):
    
    try:
        s3_client.get_object(Bucket=bucket_name, Key=folder_name)
        return True
    except botocore.exceptions.ClientError as e:
        return False


If not working above code then try this one as well.
def check_folder_exist(bucket_name, folder_name):
try:
print(bucket_name, folder_name)
result = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=folder_name )
if 'Contents' in result:
return True
else:
return False
except botocore.exceptions.ClientError as e:
print(e)
return False

6/30/2019

AWS S3 bucket, folder creation in python code

Basically s3 bucket doesn't have folder concept.
But this code create folder by key, and it doesn't have any object.


--
#create boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#create folder by key    
s3_client.put_object(Bucket='s3-bucket-name', Key=('folder-name'+'/'))
--

Thank you.

6/10/2019

OpenCV -> PIL -> resize -> OpenCV

Some simple code for this processing

1. Read image by OpenCV
2. Conver from OpenCV to PIL image
3. some processing using PIL, ex)resize
4. Conver from PIL to OpenCV

Check this code.


import cv2
from PIL import Image
import numpy

#target resize
r_x = 100
r_y = 100


#read image using opencv
cv_img_o = cv2.imread('A.png')

#conver mat to pil
cv_img = cv2.cvtColor(cv_img_o, cv2.COLOR_BGR2RGB)
im_pil = Image.fromarray(cv_img)

#resize pil
im_pil = im_pil.resize((r_x,r_y), Image.ANTIALIAS)

#convert pil to mat
cv_img_r = numpy.array(im_pil)

# Convert RGB to BGR
cv_img_r = cv2.cvtColor(cv_img_r, cv2.COLOR_RGB2BGR)
#cv_img_r = cv_img_r[:, :, ::-1].copy()

cv2.namedWindow('origin',0)
cv2.imshow('origin', cv_img_o)

cv2.namedWindow('resize',0)
cv2.imshow('resize', cv_img_r)

cv2.waitKey(0)