全部版块 我的主页
论坛 数据科学与人工智能 数据分析与数据科学 R语言论坛
1495 0
2020-07-15
说明:本示例代码仅供学习参考,不得用于商业目的。使用时请遵守互联网信息安全相关法规。

数据源:丁香园
实现:R
数据结果:当日获取前一日国内分省、地级市和全球各国确诊、治愈、未愈、病故数据
代码如下:

#上午11:30提取

#国内数据-----------------------------------------
library(xml2)
library(rvest)
library(magrittr)
library(dplyr)
library(tidyr)
library(stringr)
library(knitr)
library(lubridate)
library(data.table)
library(downloader)

url<-"https://ncov.dxy.cn/ncovh5/view/pneumonia"

domestic<-read_html(url)%>% html_nodes("#getAreaStat")%>%html_text()
province_info<-str_extract_all(domestic,'(?=provinceShortName\\"\\:).+?(?=\\,\\"cities)')
province_name_info<-str_extract_all(province_info[[1]],'(?<=provinceShortName\\"\\:\\").+?(?=\\"\\,)')%>%
unlist()
province_other_info<-data.frame(x=gsub("[^0-9]"," ",province_info[[1]]))%>%
separate(x,c('provinceName','currentConfirmedCount','confirmedCount','suspectedCount','curedCount','deadCount'))%>%
mutate(provincename=as.character(province_name_info),currentConfirmedCount=as.numeric(currentConfirmedCount))
temp_1<-select(province_other_info,provincename,currentConfirmedCount,confirmedCount,curedCount,deadCount)
colnames(temp_1)<-c("省份","尚未愈","确诊","治愈","病故")
write.csv(temp_1,file=paste("e:/covid19/COVID19_07",as.character(day(Sys.Date()-1)),".csv",sep=""),row.names=F,quote=F)

cities_all_info<-str_extract_all(domestic,'(?=cityName\\"\\:).+?(?=\\}\\,)')
city_name_info<-str_extract_all(cities_all_info[[1]],'(?<=cityName\\"\\:\\").+?(?=\\"\\,)')%>%unlist()
city_other_info<-data.frame(x=gsub("[^0-9]"," ",cities_all_info[[1]]))%>%
separate(x, c('cityName','currentConfirmedCount','confirmedCount','suspectedCount','curedCount','deadCount'))%>%
mutate(cityname=as.character(city_name_info),currentConfirmedCount=as.numeric(currentConfirmedCount))
temp_2<-select(city_other_info,cityname,currentConfirmedCount,confirmedCount,curedCount,deadCount)
colnames(temp_2)<-c("城市","尚未愈","确诊","治愈","病故")
write.csv(temp_2,file=paste("e:/covid19/COVID19city_07",as.character(day(Sys.Date()-1)),".csv",sep=""),row.names=F,quote=F)


#全球数据-----------------------------------------
world<-read_html(url)%>%html_nodes("#getListByCountryTypeService2true")%>%html_text()
country_info<-str_extract_all(world,'(?=provinceName\\"\\:).+?deadCount\\"\\:[0-9]+')
country_name_info<-str_extract_all(country_info[[1]],'(?<=provinceName\\"\\:\\").+?(?=\\"\\,)')%>%unlist()
country_other_info<-data.frame(y=gsub("[^0-9]"," ",country_info[[1]]))%>%
separate(y,c('provinceName','currentConfirmedCount','confirmedCount','confirmedCountRank','suspectedCount','curedCount','deadCount'))%>%
mutate(countryName=as.character(country_name_info),currentConfirmedCount=as.numeric(currentConfirmedCount))
norm_1<-filter(country_other_info,!(is.na(deadCount)))%>%
select(countryName,currentConfirmedCount,confirmedCount,curedCount,deadCount)
norm_2<-filter(country_other_info,is.na(deadCount))%>%
select(countryName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount)%>%
rename(deadCount=curedCount,curedCount=suspectedCount)
normal<-rbind(norm_1,norm_2)
colnames(normal)<-c("国别","尚未愈","确诊","治愈","病故")
write.csv(normal,file=paste("e:/covid19/COVID19country_07",as.character(day(Sys.Date()-1)),".csv",sep=""),row.names=F,quote=F)



二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

相关推荐
栏目导航
热门文章
推荐文章

说点什么

分享

扫码加好友,拉您进群
各岗位、行业、专业交流群