MareArts Computer Vision Study.: 2019

12/13/2019

python OrderDict to Dict

od = OrderedDict([('name', 'signature'), ('pose', 'Unspecified'), ('bndbox', OrderedDict([('xmin', '423'), ('ymin', '1237'), ('xmax', '785'), ('ymax', '1349')]))])
print(od)
output_dict = json.loads(json.dumps(av))
print(output_dict)

reference is here : https://stackoverflow.com/questions/20166749/how-to-convert-an-ordereddict-into-a-regular-dict-in-python3

12/08/2019

keras model summary, print out for each layers property

from keras.layers import Input
from keras.applications import VGG16
vgg_model = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3))) #the head FC layer off

#how many layers
print(len(vgg_model.layers))

#last layer
print(vgg_model.layers[-1].name)

#print whole layer
for idx, layer in enumerate(vgg_model.layers):
print(idx+1, '-----')
print(layer.output_shape)
print(layer.name)
print('--------')

#summary of model
vgg_model.summary()

12/06/2019

using specific gpu device for TensorFlow

setting first or second gpu machine

CUDA_VISIBLE_DEVICES=0 python script_one.py
CUDA_VISIBLE_DEVICES=1 python script_two.py

another way
use first (it didn't work for me)

export CUDA_VISIBLE_DEVICE=0
./train.py

use second (it didn't work for me)

export CUDA_VISIBLE_DEVICE=1
./train.py

use both (it didn't work for me)

export CUDA_VISIBLE_DEVICE=0,1
./train.py

refer to here:
https://stackoverflow.com/questions/44135538/tensorflow-using-2-gpu-at-the-same-time

12/05/2019

monitoring gpu status in command (terminal)

pip install gpustat
>gpustat -cp

monitoring continuously
>watch -n 0.5 -c gpustat -cp --color

12/03/2019

find pdf file (or some exe file) in directories and copy it to another directory, python sample code

import os
import glob
from shutil import copyfile


files = []
start_dir = '/Volumes/input/'
output_path = '/Volumes/output/'
pattern   = "*.pdf"

total = 0
for dir,_,_ in os.walk(start_dir):
    files.extend(glob.glob(os.path.join(dir,pattern))) 
    for i,v in enumerate(files):
        #found pdf files
        print(total,i,v)
        #extract filename only
        filename = v.split('/')[-1]
        #make new filename and output path
        output_filename = output_path + str(total) + '_' + filename
        #if file exist? then no copy
        exist = glob.glob(output_filename)
        #if not copy
        if len(exist) == 0:
            copyfile(v, output_filename)
            #print out copied filename
            print('copy! : ', output_filename)
        #increase global count
        total += 1

11/29/2019

download zip file from url, python sample code

from bs4 import BeautifulSoup
import requests
import os
import sys

def downloadZip(url, prefix_url, outpath):
    mbyte=1024*1024
    html = requests.get(url).text
    soup = BeautifulSoup(html, features='lxml')


    for name in soup.findAll('a', href=True):
        #find A tag
        zipurl = name['href']
        #find file extension
        if( zipurl.endswith('.zip') ):
            #make download path
            outfname = outpath +'/'+ zipurl.split('/')[-1]
            #make url
            zipurl = prefix_url+zipurl #http://aaa.com/ + 'abc.zip'
            print(zipurl)
            r = requests.get(zipurl, stream=True)
            if( r.status_code == requests.codes.ok ) :
                fsize = int(r.headers['content-length'])
                print('Downloading %s (%sMb)'%(outfname, fsize/mbyte))
                with open(outfname, 'wb') as fd:
                    for chunk in r.iter_content(chunk_size=1024): # chuck size can be larger
                        if chunk: # ignore keep-alive requests
                            fd.write(chunk)
                    fd.close()

base_path = os.getcwd()
path_join = os.path.join(base_path, 'data_download_pdf')
sys.path.append(path_join)

# point to output directory
outpath = path_join
url = 'https://www.gsa.gov/real-estate/real-estate-services/leasing-policy-procedures/lease-documents/lease-documents-region-1-new-england/'
prefix_url = 'https://www.gsa.gov/cdnstatic'

downloadZip(url, prefix_url, outpath)

11/24/2019

python website url verification code

    
import requests
import urllib.request

url = 'http://www.marearts.com' 

try:
    resp = requests.get(url, verify=False)
    print(url, resp.status_code) 
except:
    print('fail to access')

try:
    resp = urllib.request.urlopen(url)
    print(url, resp.getcode())
except:
    print('fail to access')

11/20/2019

All configured authentication methods failed -> vscode sftp setting

Try to use this script on sftp.json file.

{
    "name": "test",
    "protocol": "sftp",
    "host": "ec2-114-24-120-84.eu-west-4.compute.amazonaws.com",
    "remotePath": "/home/ubuntu/ABCD",
    "privateKeyPath": "/Users/ABD/ec2_aws.pem",
    "username": "ubuntu",
    "port": 22,
    "secure": true,
    "uploadOnSave": true,
    "passive": false,
    "debug": true,
    "ignore": [
        "\\.vscode",
        "\\.git",
        "\\.DS_Store"
    ],
    "generatedFiles": {
        "uploadOnSave": true,
        "extensionsToInclude": [],
        "path": "./"
    }
}

11/16/2019

yum install -y https://centos7.iuscommunity.org/ius-release.rpm -> yum install -y https://centos7.iuscommunity.org/ius-release.rpm

yum install -y https://centos7.iuscommunity.org/ius-release.rpm

but when you get this error :

Error:
Problem: conflicting requests
- nothing provides epel-release = 7 needed by ius-release-2-1.el7.ius.noarch
(try to add '--skip-broken' to skip uninstallable packages or '--nobest' to use not only best candidate packages)

Try to install this one first.

yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm

then

yum install -y https://centos7.iuscommunity.org/ius-release.rpm

Thank you.

yum install unxz -> Error: Nothing to do

use this one:

yum install xz

yum install wkhtmltopdf -> No package wkhtmltopdf available.

wget https://github.com/wkhtmltopdf/wkhtmltopdf/releases/download/0.12.4/wkhtmltox-0.12.4_linux-generic-amd64.tar.xz
unxz wkhtmltox-0.12.4_linux-generic-amd64.tar.xz
tar -xvf wkhtmltox-0.12.4_linux-generic-amd64.tar
mv wkhtmltox/bin/* /usr/local/bin/
rm -rf wkhtmltox
rm -f wkhtmltox-0.12.4_linux-generic-amd64.tar

reference :
https://github.com/JazzCore/python-pdfkit
https://gist.github.com/paulsturgess/cfe1a59c7c03f1504c879d45787699f5
https://gist.github.com/AndreasFurster/ebe3f163d6d47be43b72b35b18d8b5b6

11/15/2019

rect intersector check using image (python)

1. make image as numpy
2. add 1 all element for each inside of box
3. check if there is bigger element value than 1

Thank you.

#draw new image        
img = np.zeros((int(height), int(width), 1), dtype = "uint8")
#draw new image
img.fill(0)
for i, v in enumerate(box_list):
left = int(v['bbox'][0])
top = int(v['bbox'][1])
right = int(v['bbox'][2])
bottom = int(v['bbox'][3])
img[top:bottom,left:right] += 1

#check overlap
imgv = img.reshape(height*width)
if sum(imgv > 1) > 0:
       print('intersector found')

11/13/2019

PDF split page and save each page to pdf, python

#manual https://pymupdf.readthedocs.io/en/latest/

#pip3 install PyMuPDF

import fitz

doc = fitz.open('./test2.pdf')

#page_number = doc.pageCount

#print(page_number)

#split pages

for i, page in enumerate(doc.pages()):

print(i)

doc2 = fitz.open()

doc2.insertPDF(doc, to_page = i)

doc2.save("{}.pdf".format(i))

10/27/2019

Python: Create in Memory Zip File

simple example to make zip in memory

..

import io
import zipfile

def makeZip(data_list):
    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file:
        for file_name, data in data_list:
            zip_file.writestr(file_name, data.getvalue())
    return zip_buffer

data_list = [('1.txt', io.BytesIO(b'111')), ('2.txt', io.BytesIO(b'222'))]
zip_buffer = makeZip(data_list)
with open('./b.zip', 'wb') as f:
    f.write(zip_buffer.getvalue())

..

Thank you.

10/19/2019

Byte 2 opencv Mat

refer to below source code. ^^

import base64

import numpy as np

import cv2

def byte2Mat(data):
    imgdata = base64.b64decode(data)
    nparr = np.frombuffer(imgdata, np.uint8)
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    print(img.shape)
    
    return img    

8/24/2019

python string encryption, decryption - example code

from cryptography.fernet import Fernet

def encrypt(message: bytes, key: bytes):
    return Fernet(key).encrypt(message)

def decrypt(token: bytes, key: bytes):
    return Fernet(key).decrypt(token)

key = Fernet.generate_key()  # store in a secure location
#ex) key is 'Fn1dPza4Gchl7KpPE4kz2oJEMFXYG39ykpSLcsT1icU='

message = 'This is scret string'
#encryption
enstr = encrypt(message.encode(), key)
#decryption
destr = decrypt(enstr, key).decode()

print('input:',  message)
print('encryption:', enstr)
print('decryption:', destr)

8/21/2019

get similarity between two graphs

Basically, this example use networkX python library.
I made very simple two graphs which are G1, G2

Let see here:

and nx.graph_edit_distance this function calculate how much edit graph can be became isomorphic, that is return value of the function.

Check the example code.

..

#https://stackoverflow.com/questions/11804730/networkx-add-node-with-specific-position
#https://stackoverflow.com/questions/23975773/how-to-compare-directed-graphs-in-networkx

import matplotlib.pyplot as plt
import networkx as nx
G1=nx.Graph()
G1.add_node(1,pos=(1,1))
G1.add_node(2,pos=(2,2))
G1.add_node(3,pos=(3,1))
G1.add_edge(1,2)
G1.add_edge(1,3)

pos=nx.get_node_attributes(G1,'pos')
plt.figure('graph1')
nx.draw(G1,pos, with_labels=True)

G2=nx.Graph()
G2.add_node(1,pos=(10,10))
G2.add_node(2,pos=(20,20))
G2.add_node(3,pos=(30,10))
G2.add_node(4,pos=(40,30))
G2.add_edge(1,2)
G2.add_edge(1,3)
G2.add_edge(1,4)
pos2=nx.get_node_attributes(G2,'pos')
plt.figure('b')
nx.draw(G2,pos2, with_labels=True)

dist = nx.graph_edit_distance(G1, G2)
print(dist)

plt.show()

8/20/2019

compare text using fuzzy wuzzy in python

just refer to this example..it's simple and very useful.

#pip install fuzzywuzzy
from fuzzywuzzy import process
candidate = ["Atlanta Falcons", "New York Jetss", "New York Giants", "Dallas Cowboys"]
search = "new york jets"
r1 = process.extract(search, candidate) 
#r1 = process.extract(search, candidate, limit=3)
search = "cowboys"
r2 = process.extractOne(search, candidate)
search = "new york jets"
r3 = process.extractBests(search, candidate, score_cutoff=70)
print(r1)
#[('New York Jetss', 96), ('New York Giants', 79), ('Atlanta Falcons', 29), ('Dallas Cowboys', 22)]
print(r2)
#('Dallas Cowboys', 90)
print(r3)
#[('Dallas Cowboys', 90)]

8/08/2019

PIL to string, string to PIL (python)

It's simple example source code for that:

PIL to string(base64)
- PIL open image
- image to byte
- byte to string (base64)

string(base64) to PIL
- string to byte
- PIL load byte

--

import base64
import io
from PIL import Image

#open file using PIL
pil_img = Image.open('IMG_0510.jpg')
width, height = pil_img.size
print(width, height)


#get image data as byte
buffer = io.BytesIO()
pil_img.save(buffer, format=pil_img.format)
buffer_value = buffer.getvalue()

#byte to string
base64_str = base64.b64encode(buffer_value)

#read string to image buffer
buffer2 = base64.b64decode(base64_str)
pil_img2 = Image.open(io.BytesIO(buffer2))
width2, height2 = pil_img2.size
print(width2, height2)


#check first & second image
pil_img.show()
pil_img2.show()

--

here, another source code for :
OpenCV -> PIL -> resize -> OpenCV

http://study.marearts.com/2019/06/opencv-pil-resize-opencv.html

7/30/2019

Simple example for CNN + MNIST + Keras, Tensorboard, save model, load model

Training Code CNN + MNIST

..

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D

"""Build CNN Model"""
num_classes = 10
input_shape = (28, 28, 1) #mnist channels first format

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()

"""Download MNIST Data"""

from keras.datasets import mnist
import numpy as np


# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#(60000, 28, 28) -> (60000, 28, 28, 1)
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))  # adapt this if using `channels_first` image data format
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))  # adapt this if using `channels_first` image data format

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

"""Show some images"""

import matplotlib.pyplot as plt

row = 10
col = 10
n = row * col
plt.figure(figsize=(4, 4))
for i in range(n):
    # display original
    
    #https://jakevdp.github.io/PythonDataScienceHandbook/04.08-multiple-subplots.html
    ax = plt.subplot(row, col, i+1) 
    plt.imshow(x_test[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

"""set up tensorboard"""
from datetime import datetime
import os

logdir="logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
os.makedirs(logdir, exist_ok=True)
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

"""Train model"""

from keras.callbacks import TensorBoard

batch_size = 128
epochs = 1

model.fit(x_train, y_train,
                epochs=epochs,
                batch_size=batch_size,
                shuffle=True,
                validation_data=(x_test, y_test),
                callbacks=[TensorBoard(log_dir=logdir)])



score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

"""test one image data """

x_test[0].shape
one_image = x_test[0].reshape(1,28,28,1)
y_pred_all = model.predict(one_image)
y_pred_it = model.predict_classes(one_image)
print(y_pred_all, y_pred_it)

plt.imshow(x_test[0].reshape(28, 28))
plt.show()

"""save model to drive"""
model.save('my_cnn_mnist_model.h5')

..
CNN network Layout

Dataset

Run Tensorboard

>cd ./logs/scalars/20190730-105257
>tensorboard --logdir=./

almost 99% accuracy

Load Model and test one mnist image
...

"""load model from drive"""
from keras.models import load_model
new_model = load_model('my_cnn_mnist_model.h5')
"""load 1 image from drive"""
from PIL import Image

import numpy as np


"""test prediction"""
img_path = './mnist_7_450.jpg'
img = Image.open(img_path) #.convert("L")
img = np.resize(img, (28,28,1))
im2arr = np.array(img)
im2arr = im2arr.reshape(1,28,28,1)
y_pred = new_model.predict_classes(im2arr)
print(y_pred)

...

Test image

output
[7]

download minist jpeg file on here: http://study.marearts.com/2015/09/mnist-image-data-jpg-files.html

7/01/2019

Check if string matches with regular expression pattern in python

simple code for checking string matched with certain pattern.

import re
file3 = 'keyvalue_reference_1.json'
pattern = re.compile("keyvalue_reference_[0-9]+.json")
re = pattern.match(file3)

if re:
    print('matched')
else:
    print('non matched')

Thank you.

AWS S3, Get object list in Subfolder by python code using s3_client.list_objects function

This is my s3 folder structure

This is code to get file list in certain subfolder.

#get boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#get object list
contents = s3_client.list_objects(Bucket='test-can-delete-anyone', Prefix='folder1/subfolder1')['Contents']
for object in contents:
     print(object['Key'])

result
folder1/subfolder1/
folder1/subfolder1/4_kitchen.jpg
folder1/subfolder1/5_bathroom.jpg
folder1/subfolder1/5_bedroom.jpg
folder1/subfolder1/5_frontal.jpg
folder1/subfolder1/5_kitchen.jpg
folder1/subfolder1/6_bathroom.jpg

another example

#get object list
contents = s3_client.list_objects(Bucket='test-can-delete-anyone', Prefix='folder1/')['Contents']
for object in contents:
        print(object['Key'])

result
folder1/
folder1/1_kitchen.jpg
folder1/2_bathroom.jpg
folder1/2_bedroom.jpg
folder1/2_frontal.jpg
folder1/2_kitchen.jpg
folder1/subfolder1/
folder1/subfolder1/4_kitchen.jpg
folder1/subfolder1/5_bathroom.jpg
folder1/subfolder1/5_bedroom.jpg
folder1/subfolder1/5_frontal.jpg
folder1/subfolder1/5_kitchen.jpg
folder1/subfolder1/6_bathroom.jpg

AWS s3 bucket - check folder exist or not by python code

Check certain folder exist in s3 bucket by python ncode

-

#create boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#check folder exist
try:
        s3_client.get_object(Bucket='s3-bucket-name', Key='folder-name/')
        print('folder exist')
except botocore.exceptions.ClientError as e:
        print('no folder exist')

-

Thank you.

function type is here:

def check_folder_exist(s3_client, bucket_name, folder_name):
    
    try:
        s3_client.get_object(Bucket=bucket_name, Key=folder_name)
        return True
    except botocore.exceptions.ClientError as e:
        return False

If not working above code then try this one as well.

def check_folder_exist(bucket_name, folder_name):
    try:
        print(bucket_name, folder_name)
        result = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=folder_name )
        if 'Contents' in result:
            return True
        else:
            return False
    except botocore.exceptions.ClientError as e:
        print(e)
        return False

6/30/2019

AWS S3 bucket, folder creation in python code

Basically s3 bucket doesn't have folder concept.
But this code create folder by key, and it doesn't have any object.

--

#create boto3 instance
s3_client = boto3.client(
        's3',
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )

#create folder by key    
s3_client.put_object(Bucket='s3-bucket-name', Key=('folder-name'+'/'))

--

Thank you.

Pages