Pythonに基づいて疫病統計とニュースデータを取得

46541 ワード

本文は主に科学研究者を助けるために、新型コロナウイルスに関する情報の収集と分析に使用し、著作権などの他の問題があれば、著者に連絡して削除してください.本文はPython言語を使用して疫情統計データ(出典テンセントニュース)とニュースデータ(出典テンセントニュース、丁香園)を取得し、SqlServerに書き込んだ.
#             
import requests
import re
import json
import openpyxl
import time
import pymssql
import time 

lastUpdateTime=''#    
data_china = []#      
data_chinatimeline=[]#         


Get_City_V2=r"https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback"#    2

def GetHtmlText(url):
    try:
        res = requests.get(url,timeout = 30)
        res.raise_for_status()
        res.encoding = res.apparent_encoding
        return res.text
    except:
        return "Error"

City_json= GetHtmlText(Get_City_V2)
City_Data = json.loads(City_json)
City_Data = City_Data["data"]

def GetCityData(CitysJson):#      ,      
  
    global lastUpdateTime#    

    global data_china#      
    data = json.loads(CitysJson)
    lastUpdateTime = data.get('lastUpdateTime') #    
    #        
    chinaDayList=data.get('chinaDayList') #        
    chinaLen=len(chinaDayList)
    #           
    chinaTotal=data.get('chinaTotal')
    chinaTotalLen=len(chinaTotal)


    #        
    for chinaIndex in range(0,chinaLen):
      chinadata= chinaDayList[chinaIndex]
      data_china.append((chinadata['date'],int(chinadata['confirm']),int(chinadata['suspect']),int(chinadata['dead']),int(chinadata['heal'])))

    data_china.append((chinadata['date'],chinadata['date'],int(chinadata['confirm']),int(chinadata['suspect']),int(chinadata['dead']),int(chinadata['heal'])))

    #         
    data_chinatimeline.append((int(chinaTotal['confirm']),int(chinaTotal['suspect']),int(chinaTotal['dead']),int(chinaTotal['heal']),lastUpdateTime,int(chinaTotal['confirm']),int(chinaTotal['suspect']),int(chinaTotal['dead']),int(chinaTotal['heal'])))

GetCityData(City_Data)
#    
 #     
server = ""#     
user = ""#   
password = ""#  
database = ""#     
conn = pymssql.connect(server, user, password, database)
cursor = conn.cursor()
if not cursor:
    raise(NameError,"       ")
else:
    print('OK')

sql_china="if not exists(select * from SARI_ChinaSta where sdate=%s) insert into SARI_ChinaSta ([sdate],[sconfirm],[ssuspect],[sdead],[sheal]) VALUES (%s,%d,%d,%d,%d)"
cursor.executemany(sql_china, data_china)

#       
sql_chinaLine="if not exists(select * from SARI_CTLine where sconfirm=%s and  ssuspect=%s and sdead=%s and sheal=%s) insert into SARI_CTLine ([lastUpdateTime],[sconfirm],[ssuspect],[sdead],[sheal]) VALUES (%s,%d,%d,%d,%d)"
cursor.executemany(sql_chinaLine, data_chinatimeline)

#       autocommit   True       commit()  
conn.commit()
print(time,'        ')
conn.close()#     
#    
f = "log_getchina.txt"
with open(f,"a") as file:   #       ”w"  “a"  ,      
    file.write("    :"+time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))+"
"
)

都市統計データコードを取得するには、次のようにします.
#        v2.0
import requests
import re
import json
import openpyxl
import time
import pymssql
import time 

lastUpdateTime=''#    
country =[] #  
area = []#  
city = []#  
today_dead = []#    
today_confirm = []#    
today_suspect = []#    
today_heal = []#    
total_dead = []#    
total_confirm = []#    
total_suspect = []#    
total_heal = []#    

data_china = []#      


Get_City_V2=r"https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback"#    2

def GetHtmlText(url):
    try:
        res = requests.get(url,timeout = 30)
        res.raise_for_status()
        res.encoding = res.apparent_encoding
        return res.text
    except:
        return "Error"

City_json= GetHtmlText(Get_City_V2)
City_Data = json.loads(City_json)
City_Data = City_Data["data"]
#City_Data = re.findall(r"{[^}]+}",City_Data)#NewsJson

def GetCityData(CitysJson):#      ,      
    global country #   
    global area #   
    global city #   
    global today_dead #    
    global today_confirm #    
    global today_suspect #    
    global today_heal #    
    global total_dead #    
    global total_confirm #    
    global total_suspect #    
    global total_heal #    
    global lastUpdateTime#    


    data = json.loads(CitysJson)
    areaTree= data.get('areaTree') #      
    lastUpdateTime = data.get('lastUpdateTime') #    
    #        

    #    
    i = len(areaTree)#  json        

    for j in range(0,i):
       citydata = areaTree[j]#      
       cname=citydata.get('name')#     
       fcity=citydata.get('children')#    
       if(fcity!=None):#      
           fcount=len(fcity)
           for findex in range(0,fcount):
                fcitydata = fcity[findex]#      
                fname=fcitydata.get('name')#      
                scity=fcitydata.get('children')#    
                if(scity!=None):#      
                     scount=len(scity)
                     for sindex in range(0,scount):
                         scitydata =scity[sindex]#      
                         sname=scitydata.get('name')#      
                         country.append (cname)
                         area.append (fname)
                         city.append (sname)
                         today_dead.append (scitydata['today']['dead'])
                         today_confirm.append (scitydata['today']['confirm'])
                         today_suspect.append (scitydata['today']['suspect'])
                         today_heal.append (scitydata['today']['heal'])
                         total_dead.append (scitydata['total']['dead'])
                         total_confirm.append (scitydata['total']['confirm'])
                         total_suspect.append (scitydata['total']['suspect'])
                         total_heal.append (scitydata['total']['heal'])
                else:#       
                    country.append (cname)
                    area.append (fname)
                    city.append (fname)
                    today_dead.append (fcitydata['today']['dead'])
                    today_confirm.append (fcitydata['today']['confirm'])
                    today_suspect.append (fcitydata['today']['suspect'])
                    today_heal.append (fcitydata['today']['heal'])
                    total_dead.append (fcitydata['total']['dead'])
                    total_confirm.append (fcitydata['total']['confirm'])
                    total_suspect.append (fcitydata['total']['suspect'])
                    total_heal.append (fcitydata['total']['heal'])
       else:#       
          country.append (cname)
          area.append (cname)
          city.append (cname)
          today_dead.append (citydata['today']['dead'])
          today_confirm.append (citydata['today']['confirm'])
          today_suspect.append (citydata['today']['suspect'])
          today_heal.append (citydata['today']['heal'])
          total_dead.append (citydata['total']['dead'])
          total_confirm.append (citydata['total']['confirm'])
          total_suspect.append (citydata['total']['suspect'])
          total_heal.append (citydata['total']['heal'])
    return i    

GetCityData(City_Data)
length=len(country)
#    
data_xj = []#      
for n in range(0,length):
      data_xj.append((lastUpdateTime,country[n],area[n],city[n],country[n],area[n],city[n],today_dead[n],today_confirm[n],today_suspect[n],today_heal[n],
                      total_dead[n],total_confirm[n],total_suspect[n],total_heal[n],lastUpdateTime))

 #     
server = ""#     
user = ""#   
password = ""#  
database = ""#     
conn = pymssql.connect(server, user, password, database)
cursor = conn.cursor()
if not cursor:
    raise(NameError,"       ")
else:
    print('OK')

sql_xj = "if not exists(select * from SARI_detail where lastUpdateTime=%s and cname=%s and fname=%s and sname=%s) INSERT INTO SARI_detail ([cname],[fName],[sName],[today_dead],[today_confirm],[today_suspect],[today_heal],[total_dead],[total_confirm],[total_suspect],[total_heal],[lastUpdateTime]) VALUES (%s,%s,%s,%d,%d,%d,%d,%d,%d,%d,%d,%s)"
cursor.executemany(sql_xj, data_xj)


#       autocommit   True       commit()  
conn.commit()
print(time,'        ')
conn.close()#     
#    
f = "log_getcityv2.txt"
with open(f,"a") as file:   #       ”w"  “a"  ,      
    file.write("    :"+time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))+"
"
)

Excelファイルに書き込む場合はopenpyxlライブラリを使用して書き込みます.ニュースデータのコードと本明細書の完全なコードを取得し、csdnにアップロードしました.