Python爬虫学(簡単なシミュレーション登録(二))

3226 ワード

cookieで新浪微博の登録をシミュレートして、ある人の新浪微博が更新するかどうかリアルタイムのメールの監視を行うことができます
#-*-coding:utf8-*-

import smtplib
from email.mime.text import MIMEText
import requests
from lxml import etree
import os
import time
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


#########################################################################################
class mailhelper(object):
    def __init__(self):

        self.mail_host="smtp.xxxx.com"  #     
        self.mail_user="xxxxxx"    #   
        self.mail_pass="xxxxxx"   #  
        self.mail_postfix="xxxx.com"  #      

    def send_mail(self,to_list,sub,content):
        me="xxoohelper"+""
        msg = MIMEText(content,_subtype='plain',_charset='utf-8')
        msg['Subject'] = sub
        msg['From'] = me
        msg['To'] = ";".join(to_list)
        try:
            server = smtplib.SMTP()
            server.connect(self.mail_host)
            server.login(self.mail_user,self.mail_pass)
            server.sendmail(me, to_list, msg.as_string())
            server.close()
            return True
        except Exception, e:
            print str(e)
            return False
############################################################################################################3



class xxoohelper(object):
    def __init__(self):
        self.url = 'http://weibo.cn/xxxxxx' #            

    def getSource(self):
        html = requests.get(self.url).content
        return html

    def getData(self,html):
        data = {"Cookie":"     Cookies"}
        return data

    def getContent(self,data):
        newhtml = requests.get(self.url,cookies = data).content
        new_selector = etree.HTML(newhtml)
        content = new_selector.xpath('//span[@class="ctt"]')
        newcontent = unicode(content[2].xpath('string(.)')).replace('http://','')
        sendtime = new_selector.xpath('//span[@class="ct"]/text()')[0]
        sendtext = newcontent + sendtime
        return sendtext

    def tosave(self,text):
        f= open('weibo.txt','a')
        f.write(text + '
') f.close() def tocheck(self,data): if not os.path.exists('weibo.txt'): return True else: f = open('weibo.txt', 'r') existweibo = f.readlines() if data + '
' in existweibo: return False else: return True if __name__ == '__main__': mailto_list=['[email protected]'] # helper = xxoohelper() while True: source = helper.getSource() data = helper.getData(source) content = helper.getContent(data) if helper.tocheck(content): if mailhelper().send_mail(mailto_list,u" ",content): print u" " else: print u" " helper.tosave(content) print content else: print u'pass' time.sleep(10)