python爬取图片并创建文件夹保存

在这里插入图片描述

#coding=utf-8
import urllib
import urllib.request
from urllib.error import URLError, HTTPError
import string
import time
import re
import sys,os
from bs4 import BeautifulSoup
#获取连接内容
def get_imghtml(url):
try:
request=urllib.request.Request(url)
#伪装浏览器访问
request.add_header(“user-agent”,“Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36”)
response=urllib.request.urlopen(request)
except HTTPError as e:
print(‘http协议异常’, e.code)
return “false”
except URLError as e:
print(‘url地址异常’, e.reason)
return “false”
else:
data=response.read()
data=data.decode(‘gbk’)
return data
#获取连接每页连接内容
def download_page(html):
soup = BeautifulSoup(html,“html.parser”)
kdatas=[]
lables = soup.find(‘div’, class_=“lb_box”).find_all(“dd”)
for nm in lables:
tempurl = nm.find(‘a’)
imgurl=dict(tempurl.attrs)[‘href’]
title=dict(tempurl.attrs)[‘title’]
kpart=title.split("-")[0]
klastpart=kpart[0:5]
kdatas.append(imgurl)
downLoadpart(klastpart)
return kdatas

#创建文件夹
def downLoadpart(kname):
path=sys.path[0]
kpartname=kname.replace(’"’,"")
new_path = os.path.join(path, “images/”+kpartname)
if not os.path.isdir(new_path):
os.makedirs(new_path)
print(“创建文件夹:”+new_path)
#下载图片
def downLoadImg(url,new_path):
path=sys.path[0]
kpath = os.path.join(path, “images\”+new_path+"\")
try:
strimg=url.split("/")[-1]
print(strimg)
file=open(kpath+strimg,‘wb’)
img=urllib.request.urlopen(url)
buf=img.read()
file.write(buf)
except IOError:
print(“Error: 没有找到文件或读取文件失败”)
pass
else:
print(“内容写入文件成功”)
file.close()
return
#获取图片地址
def imput_img(url):
html=get_imghtml(url)
if html==“false”:
pass
else:
soup = BeautifulSoup(html,“html.parser”)
lables = soup.find(‘div’, class_=“l_effect_img_mid”).find_all(“img”)[0]
imgurl=dict(lables.attrs)[‘src’]
title=dict(lables.attrs)[‘alt’]
downLoadImg(imgurl,title)
return
#获取图片每页地址
def get_imgpic(html):
soup = BeautifulSoup(html,“html.parser”)
tempurl = soup.find(‘li’, class_=“bottom_show”)
pageno = tempurl.text
firstno=pageno.split("/")[0]
lastno=pageno.split("/")[-1]
knewurl=(tempurl.find(‘a’).attrs)[“href”]
newurl=knewurl.replace(".shtml","")
for x in range(int(firstno),10):
print(newurl+"-"+str(x)+".shtml")
if x == 1:
imput_img(knewurl)
else:
imput_img(newurl+"_"+str(x)+".shtml")
return

def download_imgpage(kurl):
html=get_imghtml(kurl)
data=download_page(html)
for x in data:
khtml=get_imghtml(x)
get_imgpic(khtml)
return
#执行入口
if name==‘main’:
kurl=“http://pic.yesky.com/c/6_18332.shtml”
download_imgpage(kurl)
#for x in range(1,5):
#khtml=kurl+str(x)+".shtml"