We need to install PyMuPDF which is a Python binding with support for MuPDF (current version 1.19.*), a lightweight PDF toolkit before our work.
pip3 install --upgrade pip
pip3 install --upgrade pymupdf
Write python script for saving PDF page to image.
import sys, fitz # import the bindings
if __name__ == '__main__':
doc = fitz.open( sys.argv[1] )
zoom_x = 2.0 # horizontal zoom
zoom_y = 2.0 # vertical zoom
mat = fitz.Matrix(zoom_x, zoom_y) # zoom factor 2 in each dimension
for page in doc: # iterate through the pages
pix = page.get_pixmap(matrix=mat) # render page to an image
pix.save("./output/page-%i.png" % page.number) # store image as a PNG
We have to clear the uploads folder if it is bigger than 500M before the python script work. Prepare an another script pre_process.py
.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import os
import sys
import subprocess
import shutil
def getFolderSize( start_path ):
total_size = 0
for dirpath, dirnames, filenames in os.walk(start_path):
for f in filenames:
filePath = os.path.join(dirpath, f)
# skip if it is symbolic link
if not os.path.islink(filePath):
total_size += os.path.getsize(filePath)
return total_size
def get_process_id(name):
child = subprocess.Popen(["pgrep","-f",name],stdout=subprocess.PIPE,shell=False)
response = child.communicate()[0]
return response
if __name__ == '__main__':
pid1 = get_process_id("pdfToPNGs.py")
if (not pid1):
print( "no target pid to kill, go to work" )
else:
print( "error: we are handling, no more cpu resources to use" )
exit( 1 )
filePath = sys.argv[1] #uploads/example.pdf
# clear big folder: uploads
strList = filePath.split("/")
if strList.__len__() > 0 and getFolderSize( strList[0] ) > 1024*1024*500:
shutil.rmtree( strList[0] )
os.mkdir( strList[0] )
GitHub: pdfToPNGs
[…] 참고: Create Service : Make Images From PDF […]