MareArts Computer Vision Study.: PDF to OpenCV as page by page using PyMuPDF library (python example code)

4/02/2020

Just see the below example code 😊

pip install PyMuPDF
document : https://pymupdf.readthedocs.io/en/latest/

I think this is better library than pypdf2 🤔
..

import fitz

import numpy as np

import cv2

fname = 'information-10-00248-v2'
doc = fitz.open(fname+'.pdf')

 #split pages

for i, page in enumerate(doc.pages()):

    print(i)

    zoom = 1

    mat = fitz.Matrix(zoom, zoom)

    pix = page.getPixmap(matrix = mat)

    imgData = pix.getImageData("png")

    

    #save image from byte

    f = open('./save_by_byte_{}_{}.png'.format(fname, i), 'wb')

    f.write(imgData)

    f.close()

    

    #save image from opencv

    nparr = np.frombuffer(imgData, np.uint8)

    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

    print(img.shape)

    cv2.imwrite('./save_by_opencv_{}_{}.png'.format(fname, i),img)