#!/usr/bin/python
#coding=utf-8
import os
from urllib.request import urlopen
from urllib.request import urlretrieve
import re
def getHtml(url):#
page = urlopen(url)
html = page.read()
return html
def getImg(html,id,page_num): #
reg = r'http:\/\/imgsrc.baidu.com\/forum\/.{70,100}jpg'
imgre = re.compile(reg)
html = str(html)
f = open("/usr/lxp/python_test/getImg_Python/out_" + str(page_num),"w+")
f.write(html)
f.close()
imglist = imgre.findall(html)
x = 0
for imgurl in imglist:
save_name = 'topic_'+ id + '_' + str(page_num) + '_%s.jpg' % x
print('download' + save_name +' sucessfully from ' + imgurl)
urlretrieve(imgurl,save_name)
x+=1
return imglist
def getAllImg(topic_id):#
page_num = 1
html_len=0
os.system('mkdir topic_' + topic_id)
while True :
html = getHtml("http://tieba.baidu.com/p/" + topic_id + '?see_lz=1&pn=' + str(page_num))
print(str(html_len) + ' ' + str(len(html)))
if html_len == len(html):
break
getImg(html,topic_id,page_num)
os.system('mv topic_' + topic_id + '*.jpg topic_' + topic_id)
html_len = len(html)
page_num = page_num + 1
return page_num
topic_id = input("topic id:")
getAllImg(topic_id)