12/13/2019

python OrderDict to Dict


od = OrderedDict([('name', 'signature'), ('pose', 'Unspecified'), ('bndbox', OrderedDict([('xmin', '423'), ('ymin', '1237'), ('xmax', '785'), ('ymax', '1349')]))])
print(od)
output_dict = json.loads(json.dumps(av))
print(output_dict)





reference is here : https://stackoverflow.com/questions/20166749/how-to-convert-an-ordereddict-into-a-regular-dict-in-python3

12/08/2019

keras model summary, print out for each layers property

from keras.layers import Input
from keras.applications import VGG16
vgg_model = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3))) #the head FC layer off

#how many layers
print(len(vgg_model.layers))



#last layer
print(vgg_model.layers[-1].name)



#print whole layer
for idx, layer in enumerate(vgg_model.layers):
print(idx+1, '-----')
print(layer.output_shape)
print(layer.name)
print('--------')



#summary of model
vgg_model.summary()



12/06/2019

using specific gpu device for TensorFlow

setting first or second gpu machine
CUDA_VISIBLE_DEVICES=0 python script_one.py
CUDA_VISIBLE_DEVICES=1 python script_two.py
another way
use first (it didn't work for me)
export CUDA_VISIBLE_DEVICE=0
./train.py
use second (it didn't work for me)
export CUDA_VISIBLE_DEVICE=1
./train.py
use both (it didn't work for me)
export CUDA_VISIBLE_DEVICE=0,1
./train.py

refer to here:
https://stackoverflow.com/questions/44135538/tensorflow-using-2-gpu-at-the-same-time

12/05/2019

monitoring gpu status in command (terminal)



pip install gpustat
>gpustat -cp




monitoring continuously
>watch -n 0.5 -c gpustat -cp --color


12/03/2019

find pdf file (or some exe file) in directories and copy it to another directory, python sample code


import os
import glob
from shutil import copyfile


files = []
start_dir = '/Volumes/input/'
output_path = '/Volumes/output/'
pattern = "*.pdf"

total = 0
for dir,_,_ in os.walk(start_dir):
files.extend(glob.glob(os.path.join(dir,pattern)))
for i,v in enumerate(files):
#found pdf files
print(total,i,v)
#extract filename only
filename = v.split('/')[-1]
#make new filename and output path
output_filename = output_path + str(total) + '_' + filename
#if file exist? then no copy
exist = glob.glob(output_filename)
#if not copy
if len(exist) == 0:
copyfile(v, output_filename)
#print out copied filename
print('copy! : ', output_filename)
#increase global count
total += 1

11/29/2019

download zip file from url, python sample code




from bs4 import BeautifulSoup
import requests
import os
import sys

def downloadZip(url, prefix_url, outpath):
mbyte=1024*1024
html = requests.get(url).text
soup = BeautifulSoup(html, features='lxml')


for name in soup.findAll('a', href=True):
#find A tag
zipurl = name['href']
#find file extension
if( zipurl.endswith('.zip') ):
#make download path
outfname = outpath +'/'+ zipurl.split('/')[-1]
#make url
zipurl = prefix_url+zipurl #http://aaa.com/ + 'abc.zip'
print(zipurl)
r = requests.get(zipurl, stream=True)
if( r.status_code == requests.codes.ok ) :
fsize = int(r.headers['content-length'])
print('Downloading %s (%sMb)'%(outfname, fsize/mbyte))
with open(outfname, 'wb') as fd:
for chunk in r.iter_content(chunk_size=1024): # chuck size can be larger
if chunk: # ignore keep-alive requests
fd.write(chunk)
fd.close()

base_path = os.getcwd()
path_join = os.path.join(base_path, 'data_download_pdf')
sys.path.append(path_join)

# point to output directory
outpath = path_join
url = 'https://www.gsa.gov/real-estate/real-estate-services/leasing-policy-procedures/lease-documents/lease-documents-region-1-new-england/'
prefix_url = 'https://www.gsa.gov/cdnstatic'

downloadZip(url, prefix_url, outpath)








11/24/2019

python website url verification code


import requests
import urllib.request

url = 'http://www.marearts.com'

try:
resp = requests.get(url, verify=False)
print(url, resp.status_code)
except:
print('fail to access')

try:
resp = urllib.request.urlopen(url)
print(url, resp.getcode())
except:
print('fail to access')

11/20/2019

All configured authentication methods failed -> vscode sftp setting

Try to use this script on sftp.json file.

{
"name": "test",
"protocol": "sftp",
"host": "ec2-114-24-120-84.eu-west-4.compute.amazonaws.com",
"remotePath": "/home/ubuntu/ABCD",
"privateKeyPath": "/Users/ABD/ec2_aws.pem",
"username": "ubuntu",
"port": 22,
"secure": true,
"uploadOnSave": true,
"passive": false,
"debug": true,
"ignore": [
"\\.vscode",
"\\.git",
"\\.DS_Store"
],
"generatedFiles": {
"uploadOnSave": true,
"extensionsToInclude": [],
"path": "./"
}
}

11/16/2019

yum install -y https://centos7.iuscommunity.org/ius-release.rpm -> yum install -y https://centos7.iuscommunity.org/ius-release.rpm

yum install -y https://centos7.iuscommunity.org/ius-release.rpm
but when you get this error :

Error:
 Problem: conflicting requests
  - nothing provides epel-release = 7 needed by ius-release-2-1.el7.ius.noarch
(try to add '--skip-broken' to skip uninstallable packages or '--nobest' to use not only best candidate packages)


Try to install this one first.

yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
then
yum install -y https://centos7.iuscommunity.org/ius-release.rpm


Thank you.

yum install unxz -> Error: Nothing to do

use this one:

yum install xz

yum install wkhtmltopdf -> No package wkhtmltopdf available.


wget https://github.com/wkhtmltopdf/wkhtmltopdf/releases/download/0.12.4/wkhtmltox-0.12.4_linux-generic-amd64.tar.xz
unxz wkhtmltox-0.12.4_linux-generic-amd64.tar.xz
tar -xvf wkhtmltox-0.12.4_linux-generic-amd64.tar
mv wkhtmltox/bin/* /usr/local/bin/
rm -rf wkhtmltox
rm -f wkhtmltox-0.12.4_linux-generic-amd64.tar



reference :
https://github.com/JazzCore/python-pdfkit
https://gist.github.com/paulsturgess/cfe1a59c7c03f1504c879d45787699f5
https://gist.github.com/AndreasFurster/ebe3f163d6d47be43b72b35b18d8b5b6

11/15/2019

rect intersector check using image (python)



1. make image as numpy
2. add 1 all element for each inside of box
3. check if there is bigger element value than 1

Thank you.


#draw new image
img = np.zeros((int(height), int(width), 1), dtype = "uint8")
#draw new image
img.fill(0)
for i, v in enumerate(box_list):
left = int(v['bbox'][0])
top = int(v['bbox'][1])
right = int(v['bbox'][2])
bottom = int(v['bbox'][3])
img[top:bottom,left:right] += 1

#check overlap
imgv = img.reshape(height*width)
if sum(imgv > 1) > 0:
print('intersector found')

11/13/2019

PDF split page and save each page to pdf, python

#pip3 install PyMuPDF
import fitz
doc = fitz.open('./test2.pdf'

#page_number = doc.pageCount
#print(page_number)

#split pages
for i, page in enumerate(doc.pages()):
    print(i)
    doc2 = fitz.open()               
    doc2.insertPDF(doc, to_page = i) 
    doc2.save("{}.pdf".format(i))

10/27/2019

Python: Create in Memory Zip File

simple example to make zip in memory

..
import io
import zipfile

def makeZip(data_list):
    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, Falseas zip_file:
        for file_name, data in data_list:
            zip_file.writestr(file_name, data.getvalue())
    return zip_buffer

data_list = [('1.txt', io.BytesIO(b'111')), ('2.txt', io.BytesIO(b'222'))]
zip_buffer = makeZip(data_list)
with open('./b.zip''wb'as f:
    f.write(zip_buffer.getvalue())
..

Thank you.

10/19/2019

Byte 2 opencv Mat


refer to below source code. ^^

import base64
import numpy as np
import cv2

def byte2Mat(data):
    imgdata = base64.b64decode(data)
    nparr = np.frombuffer(imgdata, np.uint8)
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    print(img.shape)
    
    return img    

8/24/2019

python string encryption, decryption - example code


from cryptography.fernet import Fernet

def encrypt(message: bytes, key: bytes):
    return Fernet(key).encrypt(message)

def decrypt(token: bytes, key: bytes):
    return Fernet(key).decrypt(token)

key = Fernet.generate_key()  # store in a secure location
#ex) key is 'Fn1dPza4Gchl7KpPE4kz2oJEMFXYG39ykpSLcsT1icU='

message = 'This is scret string'
#encryption
enstr = encrypt(message.encode(), key)
#decryption
destr = decrypt(enstr, key).decode()

print('input:',  message)
print('encryption:', enstr)
print('decryption:', destr)



8/21/2019

get similarity between two graphs

Basically, this example use networkX python library.
I made very simple two graphs which are G1, G2

Let see here:



and nx.graph_edit_distance this function calculate how much edit graph can be became isomorphic, that is return value of the function.

Check the example code.

..
#https://stackoverflow.com/questions/11804730/networkx-add-node-with-specific-position
#https://stackoverflow.com/questions/23975773/how-to-compare-directed-graphs-in-networkx

import matplotlib.pyplot as plt
import networkx as nx
G1=nx.Graph()
G1.add_node(1,pos=(1,1))
G1.add_node(2,pos=(2,2))
G1.add_node(3,pos=(3,1))
G1.add_edge(1,2)
G1.add_edge(1,3)

pos=nx.get_node_attributes(G1,'pos')
plt.figure('graph1')
nx.draw(G1,pos, with_labels=True)

G2=nx.Graph()
G2.add_node(1,pos=(10,10))
G2.add_node(2,pos=(20,20))
G2.add_node(3,pos=(30,10))
G2.add_node(4,pos=(40,30))
G2.add_edge(1,2)
G2.add_edge(1,3)
G2.add_edge(1,4)
pos2=nx.get_node_attributes(G2,'pos')
plt.figure('b')
nx.draw(G2,pos2, with_labels=True)

dist = nx.graph_edit_distance(G1, G2)
print(dist)

plt.show()
..

8/20/2019

compare text using fuzzy wuzzy in python

just refer to this example..it's simple and very useful.

#pip install fuzzywuzzy
from fuzzywuzzy import process
candidate = ["Atlanta Falcons", "New York Jetss", "New York Giants", "Dallas Cowboys"]
search = "new york jets"
r1 = process.extract(search, candidate)
#r1 = process.extract(search, candidate, limit=3)
search = "cowboys"
r2 = process.extractOne(search, candidate)
search = "new york jets"
r3 = process.extractBests(search, candidate, score_cutoff=70)
print(r1)
#[('New York Jetss', 96), ('New York Giants', 79), ('Atlanta Falcons', 29), ('Dallas Cowboys', 22)]
print(r2)
#('Dallas Cowboys', 90)
print(r3)
#[('Dallas Cowboys', 90)]


8/08/2019

PIL to string, string to PIL (python)

It's simple example source code for that:

PIL to string(base64)
- PIL open image
- image to byte
- byte to string (base64)

string(base64) to PIL
- string to byte
- PIL load byte

--
import base64
import io
from PIL import Image

#open file using PIL
pil_img = Image.open('IMG_0510.jpg')
width, height = pil_img.size
print(width, height)


#get image data as byte
buffer = io.BytesIO()
pil_img.save(buffer, format=pil_img.format)
buffer_value = buffer.getvalue()

#byte to string
base64_str = base64.b64encode(buffer_value)

#read string to image buffer
buffer2 = base64.b64decode(base64_str)
pil_img2 = Image.open(io.BytesIO(buffer2))
width2, height2 = pil_img2.size
print(width2, height2)


#check first & second image
pil_img.show()
pil_img2.show()
--

here, another source code for :
OpenCV -> PIL -> resize -> OpenCV

http://study.marearts.com/2019/06/opencv-pil-resize-opencv.html

7/30/2019

Simple example for CNN + MNIST + Keras, Tensorboard, save model, load model

Training Code CNN + MNIST

..

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D

"""Build CNN Model"""
num_classes = 10 input_shape = (28, 28, 1) #mnist channels first format model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.summary() """Download MNIST Data""" from keras.datasets import mnist import numpy as np # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() #(60000, 28, 28) -> (60000, 28, 28, 1) x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. x_train = np.reshape(x_train, (len(x_train), 28, 28, 1)) # adapt this if using `channels_first` image data format x_test = np.reshape(x_test, (len(x_test), 28, 28, 1)) # adapt this if using `channels_first` image data format # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) """Show some images""" import matplotlib.pyplot as plt row = 10 col = 10 n = row * col plt.figure(figsize=(4, 4)) for i in range(n): # display original #https://jakevdp.github.io/PythonDataScienceHandbook/04.08-multiple-subplots.html ax = plt.subplot(row, col, i+1) plt.imshow(x_test[i].reshape(28, 28)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show() """set up tensorboard""" from datetime import datetime import os logdir="logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") os.makedirs(logdir, exist_ok=True) tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) """Train model""" from keras.callbacks import TensorBoard batch_size = 128 epochs = 1 model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(x_test, y_test), callbacks=[TensorBoard(log_dir=logdir)]) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) """test one image data """ x_test[0].shape one_image = x_test[0].reshape(1,28,28,1) y_pred_all = model.predict(one_image) y_pred_it = model.predict_classes(one_image) print(y_pred_all, y_pred_it) plt.imshow(x_test[0].reshape(28, 28)) plt.show() """save model to drive""" model.save('my_cnn_mnist_model.h5')


..
CNN network Layout


Dataset


Run Tensorboard

>cd ./logs/scalars/20190730-105257
>tensorboard --logdir=./


almost 99% accuracy



Load Model and test one mnist image
...
"""load model from drive"""
from keras.models import load_model
new_model = load_model('my_cnn_mnist_model.h5')
"""load 1 image from drive"""
from PIL import Image
import numpy as np
"""test prediction"""
img_path = './mnist_7_450.jpg'
img = Image.open(img_path) #.convert("L") img = np.resize(img, (28,28,1)) im2arr = np.array(img) im2arr = im2arr.reshape(1,28,28,1) y_pred = new_model.predict_classes(im2arr) print(y_pred)

...

Test image


output
[7]


download minist jpeg file on here: http://study.marearts.com/2015/09/mnist-image-data-jpg-files.html



7/01/2019

Check if string matches with regular expression pattern in python

simple code for checking string matched with certain pattern.

import re
file3 = 'keyvalue_reference_1.json'
pattern = re.compile("keyvalue_reference_[0-9]+.json")
re = pattern.match(file3)

if re:
    print('matched')
else:
    print('non matched')


Thank you.

AWS S3, Get object list in Subfolder by python code using s3_client.list_objects function

This is my s3 folder structure





This is code to get file list in certain subfolder.

#get boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#get object list
contents = s3_client.list_objects(Bucket='test-can-delete-anyone', Prefix='folder1/subfolder1')['Contents']
for object in contents:
     print(object['Key'])


result
folder1/subfolder1/
folder1/subfolder1/4_kitchen.jpg
folder1/subfolder1/5_bathroom.jpg
folder1/subfolder1/5_bedroom.jpg
folder1/subfolder1/5_frontal.jpg
folder1/subfolder1/5_kitchen.jpg
folder1/subfolder1/6_bathroom.jpg

another example
#get object list
contents = s3_client.list_objects(Bucket='test-can-delete-anyone', Prefix='folder1/')['Contents']
for object in contents:
        print(object['Key'])

result
folder1/
folder1/1_kitchen.jpg
folder1/2_bathroom.jpg
folder1/2_bedroom.jpg
folder1/2_frontal.jpg
folder1/2_kitchen.jpg
folder1/subfolder1/
folder1/subfolder1/4_kitchen.jpg
folder1/subfolder1/5_bathroom.jpg
folder1/subfolder1/5_bedroom.jpg
folder1/subfolder1/5_frontal.jpg
folder1/subfolder1/5_kitchen.jpg
folder1/subfolder1/6_bathroom.jpg


AWS s3 bucket - check folder exist or not by python code

Check certain folder exist in s3 bucket by python ncode


-
#create boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#check folder exist
try:
        s3_client.get_object(Bucket='s3-bucket-name', Key='folder-name/')
        print('folder exist')
except botocore.exceptions.ClientError as e:
        print('no folder exist')
-

Thank you.


function type is here:
def check_folder_exist(s3_client, bucket_name, folder_name):
    
    try:
        s3_client.get_object(Bucket=bucket_name, Key=folder_name)
        return True
    except botocore.exceptions.ClientError as e:
        return False


If not working above code then try this one as well.
def check_folder_exist(bucket_name, folder_name):
try:
print(bucket_name, folder_name)
result = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=folder_name )
if 'Contents' in result:
return True
else:
return False
except botocore.exceptions.ClientError as e:
print(e)
return False

6/30/2019

AWS S3 bucket, folder creation in python code

Basically s3 bucket doesn't have folder concept.
But this code create folder by key, and it doesn't have any object.


--
#create boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#create folder by key    
s3_client.put_object(Bucket='s3-bucket-name', Key=('folder-name'+'/'))
--

Thank you.