
Python及时数据采集-新型冠状病毒
源代码 起源:https://github.com/Progra妹妹ing-With-Love/2019-nCoV
疫情数据工夫为:2020.2.1

名目相干截图:
天下数据展现

国际数据展现

外洋数据展现

查看指定区域具体数据

源代码,留意装置所需模块(例如 pip install 模块名)
import requests
import re
from bs4 import BeautifulSoup
from time import sleep
import json
from prettytable import ALL
from prettytable import PrettyTable
hubei = {}
guangdong = {}
zhejiang = {}
beijing = {}
shanghai = {}
hunan = {}
anhui = {}
chongqing = {}
sichuan = {}
shandong = {}
guangxi = {}
fujian = {}
jiangsu = {}
henan = {}
hainan = {}
tianjin = {}
jiangxi = {}
shanxi1 = {} # 陕西
guizhou = {}
liaoning = {}
xianggang = {}
heilongjiang = {}
aomen = {}
xinjiang = {}
gansu = {}
yunnan = {}
taiwan = {}
shanxi2 = {} # 山西
jilin = {}
hebei = {}
ningxia = {}
neimenggu = {}
qinghai = {} # none
xizang = {} # none
provinces_idx = [hubei, guangdong, zhejiang, chongqing, hunan, anhui, beijing,
shanghai, henan, guangxi, shandong, jiangxi, jiangsu, sichuan,
liaoning, fujian, heilongjiang, hainan, tianjin, hebei, shanxi2,
yunnan, xianggang, shanxi1, guizhou, jilin, gansu, taiwan,
xinjiang, ningxia, aomen, neimenggu, qinghai, xizang]
map = {
'湖北':0, '广东':1, '浙江':2, '北京':3, '上海':4, '湖南':5, '安徽':6, '重庆':7,
'四川':8, '山东':9, '广西':10, '福建':11, '江苏':12, '河南':13, '海南':14,
'天津':15, '江西':16, '陕西':17, '贵州':18, '辽宁':19, '香港':20, '黑龙江':21,
'澳门':22, '新疆':23, '甘肃':24, '云南':25, '台湾':26, '山西':27, '吉林':28,
'河北':29, '宁夏':30, '内蒙古':31, '青海':32, '西藏':33
}
def getTime(text):
TitleTime = str(text)
TitleTime = re.findall('<span>(.*?)</span>', TitleTime)
return TitleTime[0]
def getAllCountry(text):
AllCountry = str(text)
AllCountry = AllCountry.replace("[<p class=\"confirmedNumber___3WrF5\"><span class=\"content___2hIPS\">", "")
AllCountry = AllCountry.replace("<span style=\"color: #4169e2\">", "")
AllCountry = re.sub("</span>", "", AllCountry)
AllCountry = AllCountry.replace("</p>]", "")
AllCountry = AllCountry.replace("<span style=\"color: rgb(65, 105, 226);\">", "")
AllCountry = re.sub("<span>", "", AllCountry)
AllCountry = re.sub("<p>", "", AllCountry)
AllCountry = re.sub("</p>", "", AllCountry)
return AllCountry
def query(province):
table = PrettyTable(['地域', '确诊', '殒命', '治愈'])
for (k, v) in province.items():
name = k
table.add_row([name, v[0] if v[0] != 0 else '-', v[1] if v[1] != 0 else '-', v[2] if v[2] != 0 else '-'])
if len(province.keys()) != 0:
print(table)
else:
print("暂无")
def getInfo(text):
text = str(text)
text = re.sub("<p class=\"descText___Ui3tV\">", "", text)
text = re.sub("</p>", "", text)
return text
def is_json(json_str):
try:
json.loads(json_str)
except ValueError:
return False
return True
def ff(str, num):
return str[:num] + str[num+1:]
def main():
url = "https://3g.dxy.cn/newh5/view/pneumonia"
try:
headers = {}
headers['user-agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' #http头巨细写没有敏感
headers['accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
headers['Connection'] = 'keep-alive'
headers['Upgrade-Insecure-Requests'] = '1'
r = requests.get(url, headers=headers)
r.raise_for_status()
r.encoding = r.apparent_encoding
soup = BeautifulSoup(r.text,'lxml')
table = PrettyTable(['地域', '确诊', '殒命', '治愈'])
table.hrules = ALL
#### 截至工夫
# TitleTime = getTime(soup.select('.title___2d1_B'))
print()
# print(" ",TitleTime + "\n")
while True:
r = requests.get("https://service-f9fjwngp-1252021671.bj.apigw.tencentcs.com/release/pneumonia")
json_str = json.loads(r.text)
if json_str['error'] == 0:
break
print("==================================天下数据==================================")
print()
print(" 确诊 " + str(json_str['data']['statistics']['confirmedCount']) + " 例"
+ " " + "疑似 " + str(json_str['data']['statistics']['suspectedCount']) + " 例"
+ " " + "殒命" + str(json_str['data']['statistics']['deadCount']) + " 例"
+ " " + "治愈" + str(json_str['data']['statistics']['curedCount']) + " 例\n")
print("==================================相干状况==================================")
print()
print("感染源:" + json_str['data']['statistics']['infectSource'])
print("病毒:" + json_str['data']['statistics']['virus'])
print("流传路子:" + json_str['data']['statistics']['passWay'])
print(json_str['data']['statistics']['remark1'])
print(json_str['data']['statistics']['remark2'] + "\n")
print("==================================国际状况==================================")
print()
json_provinces = re.findall("{\"provinceName\":(.*?)]}", str(soup))
idx = 0
for province in json_provinces:
if is_json(province):
pass
else:
province = "{\"provinceName\":" + province + "]}"
province = json.loads(province)
province_name = province['provinceShortName'] if province['provinceShortName'] != 0 else '-'
confirmed = province['confirmedCount'] if province['confirmedCount'] != 0 else '-'
suspected = province['suspectedCount'] if province['suspectedCount'] != 0 else '-'
cured = province['curedCount'] if province['curedCount'] != 0 else '-'
dead = province['deadCount'] if province['deadCount'] != 0 else '-'
table.add_row([province_name, confirmed, dead, cured])
map[province_name] = idx
idx = idx + 1
for city in province['cities']:
provinces_idx[map[province_name]][city['cityName']] = [city['confirmedCount'], city['deadCount'], city['curedCount']]
print(table)
print()
print("==================================外洋状况==================================")
print()
json_provinces = str(re.findall("\"id\":949(.*?)]}", str(soup)))
json_provinces = json_provinces[:1] + "{\"id\":949" + json_provinces[2:]
json_provinces = json_provinces[:len(json_provinces) - 2] + json_provinces[len(json_provinces) - 1:]
provinces = json.loads(json_provinces)
table = PrettyTable(['地域', '确诊', '殒命', '治愈'])
for province in provinces:
confirmed = province['confirmedCount'] if province['confirmedCount'] != 0 else '-'
dead = province['deadCount'] if province['deadCount'] != 0 else '-'
cured = province['curedCount'] if province['curedCount'] != 0 else '-'
table.add_row([province['provinceName'], confirmed, dead, cured])
print(table)
print()
print("==================================最新音讯==================================")
print()
idx = 0
for news in json_str['data']['timeline']:
if idx == 5:
break
print(news['pubDateStr'] + " " + news['title'])
idx = idx + 1
print()
key = input("请输出您想查问具体信息的省分,例如 湖北\n")
print()
if key in map.keys():
query(provinces_idx[map[key]])
else:
print("暂无相干信息")
print("\n欢送提出各类定见")
except:
print("衔接失败")
if __name__ == '__main__':
main()
sleep(30)最初,祝各人百毒没有侵,中国加油!!肯定可以渡过难关!!
以上就是Python及时数据采集-新型冠状病毒的具体内容,更多请存眷资源魔其它相干文章!
标签: 采集 数据 Python python教程 python编程 python使用问题
版权声明:除非特别标注,否则均为本站原创文章,转载时请以链接形式注明文章出处。
抱歉,评论功能暂时关闭!