Python Post and Getはwebバックグラウンドシステムにログインし、ページをキャプチャする

4140 ワード

#coding=utf8
#! /usr/bin/env python

import httplib
import re
import socket
import urllib

timeout = 60
socket.setdefaulttimeout(timeout)


def getTable():

    f = open('kvpage.html')
    page = f.readlines()
    f.close()
    pattern = re.compile(r'.*<tbody>(.*?)</tbody>.*')
    
    for line in page:
        #print line
        m = pattern.match(line.strip())
        if m is not None:
            return m.group(1)
    
    return None

def extractKvEvents(content):
    
    #init result
    table = []
    
    #init pattern
    patternTR = re.compile(r"<tr>(.*?)</tr>")
    patternTD = re.compile(r'<td class="confluenceTd">(.*?)</td>')
    
    #search all the rows
    allrows = patternTR.findall(content)
    if allrows is not None:
        for row in allrows:
            #print row
            cols = patternTD.findall(row)
            if cols is not None:
                
                table.append(cols)
            
    return table

def outputToExcel(table):
    for row in table:
        print row

def loginWiki():

    httpClient = None
    try:
        params = urllib.urlencode({'os_username': '[email protected]',
                                   'os_password': 'xxxx', 
                                   'login': 'Log In'})
        
        headers = {"Content-type": "application/x-www-form-urlencoded"
                        , "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"}
    
        httpClient = httplib.HTTPConnection("xxx.com", 8080, timeout=30)
        httpClient.request("POST", "/login.action", params, headers)
    
        response = httpClient.getresponse()
#         print response.status
#         print response.reason
#         print response.read()
#         print response.getheaders()
        print response.getheader('Set-Cookie')
        cookieFile = open('cookie.txt', 'w')
        cookieFile.write(response.getheader('Set-Cookie'))
        cookieFile.close()
    except Exception, e:
        print e
    finally:
        if httpClient:
            httpClient.close()

def catchPage():
    httpClient = None

    try:
        #read cookie
        f = open('cookie.txt')
        cookie = f.read().strip()
        print cookie
        f.close()
        
        #init headers
        headers = {"Content-type": "application/x-www-form-urlencoded",
                    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
                    'Cookie': cookie}
    
        #send request
        httpClient = httplib.HTTPConnection('xxx.com', 8080, timeout=30)
        httpClient.request('GET', '/xxxPath', headers=headers)
    
        #response HTTPResponse  
        response = httpClient.getresponse()
        print response.status
        print response.reason
        
        htmlPage = open('kvpage.html', 'w')
        htmlPage.write(response.read())
        htmlPage.close()
    except Exception, e:
        print e
    finally:
        if httpClient:
            httpClient.close()

if __name__ == '__main__':

    loginWiki()
    catchPage()
    tablecontent = getTable()
    table = extractKvEvents(tablecontent) 
    outputToExcel(table)