>>> import re
>>> import urllib.request
>>> def craw(url,page):
html1=url.request.urlopen(url).read()
html1=str(html1)
pat1='<div id="plist".+?<div class="page clearfix">'
result1=re.compile(pat1).findall(html1)
result1=result[0]
pat2='<img width="220" height="220" data-lazy-img="//(.+?\.jpg)">'
imagelsit=re.compile(pat2).findall(result1)
x=1
for imageur1 in imagelist:
imagename="D:/python/python37/myweb/part4/img1/"+str(page)+str(x)+".jpg"
imageurl1="http://"+imageurl1
try:
urllib.request.urlretrieve(imageurl1,filename=imagename)
except urllib.error.URLError as e:
if hasattr(e,"code"):
x+=1
if hasattr(e,"reason"):
x+=1
x+=1
for i in range(1,79):
url="https://list.jd.com/list.html?cat=9987,653,655&page="+str(i)
craw(url,i)