Download image files from website with Python.
The module request can help us to fetch contents of web page, we can use BeautifulSoup to parse the html string and collect all interesting elements, download images by relevant URLs which are filtered by our script.

import requests, sys, webbrowser, bs4, urllib

if __name__ == "__main__":
    res = requests.get( "http://www.nipic.com/index.html" )
    res.raise_for_status()
    soup = bs4.BeautifulSoup( res.text, features='html.parser' )
    elements = soup.select( 'img' )
    elements = list(set(elements))
    count = min( 5,  len( elements ) )
    for i in range(1, count):
        urlStr = elements[i].get('src')
        file = urlStr.split( '/' )[-1]
        index = file.find( '.' )
        index = index + 4
        file = file[0:index]
        print( "Download: " + urlStr + " => file: " + file )
        r = requests.get( urlStr, stream=True )

        with open( file, 'wb' ) as f:
            f.write( r.content )
        print( "status code: ",r.status_code )
        print( "content-type: " + r.headers['content-type'] )
        print( "encoding: ", r.encoding )


Make it more flexible, transfer URL in command line:

C:\Users\Stephen\AppData\Local\Microsoft\WindowsApps\python3.10.exe D:\Images\fetch.py https://www.zhihu.com/question/65562234/answer/2830211890

Improved code:

import requests, sys, webbrowser, bs4, urllib

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print( "usage: scriptPath URL" )
        exit(1)
    URL = sys.argv[1]
    res = requests.get( URL )
    res.raise_for_status()
    soup = bs4.BeautifulSoup( res.text, features='html.parser' )
    elements = soup.select( 'img' )
    elements = list(set(elements))
    count = len( elements )
    for i in range(1, count):
        urlStr = elements[i].get('src')
        if urlStr == None:
            continue
        file = urlStr.split( '/' )[-1]
        index = file.find( '.' )
        if index < 1:
            continue
        index = index + 4
        file = file[0:index]
        r = requests.get( urlStr, stream=True )

        with open( file, 'wb' ) as f:
            f.write( r.content )
        print( "Download: " + urlStr + " => file: " + file )
        # print( "status code: ",r.status_code )
        # print( "content-type: " + r.headers['content-type'] )
        # print( "encoding: ", r.encoding )
Categories: Python

0 0 votes
Article Rating
Subscribe
Notify of
guest

0 Comments
Inline Feedbacks
View all comments

Tex To PDF
: convert the Latex file which suffix is tex to a PDF file

X
0
Would love your thoughts, please comment.x
()
x