Pythonに基づいて疫病統計とニュースデータを取得
46541 ワード
本文は主に科学研究者を助けるために、新型コロナウイルスに関する情報の収集と分析に使用し、著作権などの他の問題があれば、著者に連絡して削除してください.本文はPython言語を使用して疫情統計データ(出典テンセントニュース)とニュースデータ(出典テンセントニュース、丁香園)を取得し、SqlServerに書き込んだ.
都市統計データコードを取得するには、次のようにします.
Excelファイルに書き込む場合はopenpyxlライブラリを使用して書き込みます.ニュースデータのコードと本明細書の完全なコードを取得し、csdnにアップロードしました.
#
import requests
import re
import json
import openpyxl
import time
import pymssql
import time
lastUpdateTime=''#
data_china = []#
data_chinatimeline=[]#
Get_City_V2=r"https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback"# 2
def GetHtmlText(url):
try:
res = requests.get(url,timeout = 30)
res.raise_for_status()
res.encoding = res.apparent_encoding
return res.text
except:
return "Error"
City_json= GetHtmlText(Get_City_V2)
City_Data = json.loads(City_json)
City_Data = City_Data["data"]
def GetCityData(CitysJson):# ,
global lastUpdateTime#
global data_china#
data = json.loads(CitysJson)
lastUpdateTime = data.get('lastUpdateTime') #
#
chinaDayList=data.get('chinaDayList') #
chinaLen=len(chinaDayList)
#
chinaTotal=data.get('chinaTotal')
chinaTotalLen=len(chinaTotal)
#
for chinaIndex in range(0,chinaLen):
chinadata= chinaDayList[chinaIndex]
data_china.append((chinadata['date'],int(chinadata['confirm']),int(chinadata['suspect']),int(chinadata['dead']),int(chinadata['heal'])))
data_china.append((chinadata['date'],chinadata['date'],int(chinadata['confirm']),int(chinadata['suspect']),int(chinadata['dead']),int(chinadata['heal'])))
#
data_chinatimeline.append((int(chinaTotal['confirm']),int(chinaTotal['suspect']),int(chinaTotal['dead']),int(chinaTotal['heal']),lastUpdateTime,int(chinaTotal['confirm']),int(chinaTotal['suspect']),int(chinaTotal['dead']),int(chinaTotal['heal'])))
GetCityData(City_Data)
#
#
server = ""#
user = ""#
password = ""#
database = ""#
conn = pymssql.connect(server, user, password, database)
cursor = conn.cursor()
if not cursor:
raise(NameError," ")
else:
print('OK')
sql_china="if not exists(select * from SARI_ChinaSta where sdate=%s) insert into SARI_ChinaSta ([sdate],[sconfirm],[ssuspect],[sdead],[sheal]) VALUES (%s,%d,%d,%d,%d)"
cursor.executemany(sql_china, data_china)
#
sql_chinaLine="if not exists(select * from SARI_CTLine where sconfirm=%s and ssuspect=%s and sdead=%s and sheal=%s) insert into SARI_CTLine ([lastUpdateTime],[sconfirm],[ssuspect],[sdead],[sheal]) VALUES (%s,%d,%d,%d,%d)"
cursor.executemany(sql_chinaLine, data_chinatimeline)
# autocommit True commit()
conn.commit()
print(time,' ')
conn.close()#
#
f = "log_getchina.txt"
with open(f,"a") as file: # ”w" “a" ,
file.write(" :"+time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))+"
")
都市統計データコードを取得するには、次のようにします.
# v2.0
import requests
import re
import json
import openpyxl
import time
import pymssql
import time
lastUpdateTime=''#
country =[] #
area = []#
city = []#
today_dead = []#
today_confirm = []#
today_suspect = []#
today_heal = []#
total_dead = []#
total_confirm = []#
total_suspect = []#
total_heal = []#
data_china = []#
Get_City_V2=r"https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback"# 2
def GetHtmlText(url):
try:
res = requests.get(url,timeout = 30)
res.raise_for_status()
res.encoding = res.apparent_encoding
return res.text
except:
return "Error"
City_json= GetHtmlText(Get_City_V2)
City_Data = json.loads(City_json)
City_Data = City_Data["data"]
#City_Data = re.findall(r"{[^}]+}",City_Data)#NewsJson
def GetCityData(CitysJson):# ,
global country #
global area #
global city #
global today_dead #
global today_confirm #
global today_suspect #
global today_heal #
global total_dead #
global total_confirm #
global total_suspect #
global total_heal #
global lastUpdateTime#
data = json.loads(CitysJson)
areaTree= data.get('areaTree') #
lastUpdateTime = data.get('lastUpdateTime') #
#
#
i = len(areaTree)# json
for j in range(0,i):
citydata = areaTree[j]#
cname=citydata.get('name')#
fcity=citydata.get('children')#
if(fcity!=None):#
fcount=len(fcity)
for findex in range(0,fcount):
fcitydata = fcity[findex]#
fname=fcitydata.get('name')#
scity=fcitydata.get('children')#
if(scity!=None):#
scount=len(scity)
for sindex in range(0,scount):
scitydata =scity[sindex]#
sname=scitydata.get('name')#
country.append (cname)
area.append (fname)
city.append (sname)
today_dead.append (scitydata['today']['dead'])
today_confirm.append (scitydata['today']['confirm'])
today_suspect.append (scitydata['today']['suspect'])
today_heal.append (scitydata['today']['heal'])
total_dead.append (scitydata['total']['dead'])
total_confirm.append (scitydata['total']['confirm'])
total_suspect.append (scitydata['total']['suspect'])
total_heal.append (scitydata['total']['heal'])
else:#
country.append (cname)
area.append (fname)
city.append (fname)
today_dead.append (fcitydata['today']['dead'])
today_confirm.append (fcitydata['today']['confirm'])
today_suspect.append (fcitydata['today']['suspect'])
today_heal.append (fcitydata['today']['heal'])
total_dead.append (fcitydata['total']['dead'])
total_confirm.append (fcitydata['total']['confirm'])
total_suspect.append (fcitydata['total']['suspect'])
total_heal.append (fcitydata['total']['heal'])
else:#
country.append (cname)
area.append (cname)
city.append (cname)
today_dead.append (citydata['today']['dead'])
today_confirm.append (citydata['today']['confirm'])
today_suspect.append (citydata['today']['suspect'])
today_heal.append (citydata['today']['heal'])
total_dead.append (citydata['total']['dead'])
total_confirm.append (citydata['total']['confirm'])
total_suspect.append (citydata['total']['suspect'])
total_heal.append (citydata['total']['heal'])
return i
GetCityData(City_Data)
length=len(country)
#
data_xj = []#
for n in range(0,length):
data_xj.append((lastUpdateTime,country[n],area[n],city[n],country[n],area[n],city[n],today_dead[n],today_confirm[n],today_suspect[n],today_heal[n],
total_dead[n],total_confirm[n],total_suspect[n],total_heal[n],lastUpdateTime))
#
server = ""#
user = ""#
password = ""#
database = ""#
conn = pymssql.connect(server, user, password, database)
cursor = conn.cursor()
if not cursor:
raise(NameError," ")
else:
print('OK')
sql_xj = "if not exists(select * from SARI_detail where lastUpdateTime=%s and cname=%s and fname=%s and sname=%s) INSERT INTO SARI_detail ([cname],[fName],[sName],[today_dead],[today_confirm],[today_suspect],[today_heal],[total_dead],[total_confirm],[total_suspect],[total_heal],[lastUpdateTime]) VALUES (%s,%s,%s,%d,%d,%d,%d,%d,%d,%d,%d,%s)"
cursor.executemany(sql_xj, data_xj)
# autocommit True commit()
conn.commit()
print(time,' ')
conn.close()#
#
f = "log_getcityv2.txt"
with open(f,"a") as file: # ”w" “a" ,
file.write(" :"+time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))+"
")
Excelファイルに書き込む場合はopenpyxlライブラリを使用して書き込みます.ニュースデータのコードと本明細書の完全なコードを取得し、csdnにアップロードしました.