Python BeautifuSoup 4爬表
604 ワード
# -*-coding:utf-8-*-
import os
import sys
from bs4 import BeautifulSoup
reload(sys)
sys.setdefaultencoding("utf-8")
fp = open('txt.txt','a')
for i in range(3):
path = sys.argv[1]
s = path + str(i+1) +'.html'
html_doc = open(str(s))
soup = BeautifulSoup(html_doc)
for tabb in soup.find_all('tr'):
print
for tdd in tabb.find_all('td'):
# print tdd.get_text()+",",
fp.writelines(tdd.get_text()+',')
fp.writelines('
')
fp.close()