Python爬虫学(簡単なシミュレーション登録(二))
3226 ワード
cookieで新浪微博の登録をシミュレートして、ある人の新浪微博が更新するかどうかリアルタイムのメールの監視を行うことができます
#-*-coding:utf8-*-
import smtplib
from email.mime.text import MIMEText
import requests
from lxml import etree
import os
import time
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
#########################################################################################
class mailhelper(object):
def __init__(self):
self.mail_host="smtp.xxxx.com" #
self.mail_user="xxxxxx" #
self.mail_pass="xxxxxx" #
self.mail_postfix="xxxx.com" #
def send_mail(self,to_list,sub,content):
me="xxoohelper"+""
msg = MIMEText(content,_subtype='plain',_charset='utf-8')
msg['Subject'] = sub
msg['From'] = me
msg['To'] = ";".join(to_list)
try:
server = smtplib.SMTP()
server.connect(self.mail_host)
server.login(self.mail_user,self.mail_pass)
server.sendmail(me, to_list, msg.as_string())
server.close()
return True
except Exception, e:
print str(e)
return False
############################################################################################################3
class xxoohelper(object):
def __init__(self):
self.url = 'http://weibo.cn/xxxxxx' #
def getSource(self):
html = requests.get(self.url).content
return html
def getData(self,html):
data = {"Cookie":" Cookies"}
return data
def getContent(self,data):
newhtml = requests.get(self.url,cookies = data).content
new_selector = etree.HTML(newhtml)
content = new_selector.xpath('//span[@class="ctt"]')
newcontent = unicode(content[2].xpath('string(.)')).replace('http://','')
sendtime = new_selector.xpath('//span[@class="ct"]/text()')[0]
sendtext = newcontent + sendtime
return sendtext
def tosave(self,text):
f= open('weibo.txt','a')
f.write(text + '
')
f.close()
def tocheck(self,data):
if not os.path.exists('weibo.txt'):
return True
else:
f = open('weibo.txt', 'r')
existweibo = f.readlines()
if data + '
' in existweibo:
return False
else:
return True
if __name__ == '__main__':
mailto_list=['[email protected]'] #
helper = xxoohelper()
while True:
source = helper.getSource()
data = helper.getData(source)
content = helper.getContent(data)
if helper.tocheck(content):
if mailhelper().send_mail(mailto_list,u" ",content):
print u" "
else:
print u" "
helper.tosave(content)
print content
else:
print u'pass'
time.sleep(10)