说明:本示例代码仅供学习参考,不得用于商业目的。使用时请遵守互联网信息安全相关法规。
数据源:丁香园
实现:R
数据结果:当日获取前一日国内分省、地级市和全球各国确诊、治愈、未愈、病故数据
代码如下:
#上午11:30提取
#国内数据-----------------------------------------
library(xml2)
library(rvest)
library(magrittr)
library(dplyr)
library(tidyr)
library(stringr)
library(knitr)
library(lubridate)
library(data.table)
library(downloader)
url<-"https://ncov.dxy.cn/ncovh5/view/pneumonia"
domestic<-read_html(url)%>% html_nodes("#getAreaStat")%>%html_text()
province_info<-str_extract_all(domestic,'(?=provinceShortName\\"\\:).+?(?=\\,\\"cities)')
province_name_info<-str_extract_all(province_info[[1]],'(?<=provinceShortName\\"\\:\\").+?(?=\\"\\,)')%>%
unlist()
province_other_info<-data.frame(x=gsub("[^0-9]"," ",province_info[[1]]))%>%
separate(x,c('provinceName','currentConfirmedCount','confirmedCount','suspectedCount','curedCount','deadCount'))%>%
mutate(provincename=as.character(province_name_info),currentConfirmedCount=as.numeric(currentConfirmedCount))
temp_1<-select(province_other_info,provincename,currentConfirmedCount,confirmedCount,curedCount,deadCount)
colnames(temp_1)<-c("省份","尚未愈","确诊","治愈","病故")
write.csv(temp_1,file=paste("e:/covid19/COVID19_07",as.character(day(Sys.Date()-1)),".csv",sep=""),row.names=F,quote=F)
cities_all_info<-str_extract_all(domestic,'(?=cityName\\"\\:).+?(?=\\}\\,)')
city_name_info<-str_extract_all(cities_all_info[[1]],'(?<=cityName\\"\\:\\").+?(?=\\"\\,)')%>%unlist()
city_other_info<-data.frame(x=gsub("[^0-9]"," ",cities_all_info[[1]]))%>%
separate(x, c('cityName','currentConfirmedCount','confirmedCount','suspectedCount','curedCount','deadCount'))%>%
mutate(cityname=as.character(city_name_info),currentConfirmedCount=as.numeric(currentConfirmedCount))
temp_2<-select(city_other_info,cityname,currentConfirmedCount,confirmedCount,curedCount,deadCount)
colnames(temp_2)<-c("城市","尚未愈","确诊","治愈","病故")
write.csv(temp_2,file=paste("e:/covid19/COVID19city_07",as.character(day(Sys.Date()-1)),".csv",sep=""),row.names=F,quote=F)
#全球数据-----------------------------------------
world<-read_html(url)%>%html_nodes("#getListByCountryTypeService2true")%>%html_text()
country_info<-str_extract_all(world,'(?=provinceName\\"\\:).+?deadCount\\"\\:[0-9]+')
country_name_info<-str_extract_all(country_info[[1]],'(?<=provinceName\\"\\:\\").+?(?=\\"\\,)')%>%unlist()
country_other_info<-data.frame(y=gsub("[^0-9]"," ",country_info[[1]]))%>%
separate(y,c('provinceName','currentConfirmedCount','confirmedCount','confirmedCountRank','suspectedCount','curedCount','deadCount'))%>%
mutate(countryName=as.character(country_name_info),currentConfirmedCount=as.numeric(currentConfirmedCount))
norm_1<-filter(country_other_info,!(is.na(deadCount)))%>%
select(countryName,currentConfirmedCount,confirmedCount,curedCount,deadCount)
norm_2<-filter(country_other_info,is.na(deadCount))%>%
select(countryName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount)%>%
rename(deadCount=curedCount,curedCount=suspectedCount)
normal<-rbind(norm_1,norm_2)
colnames(normal)<-c("国别","尚未愈","确诊","治愈","病故")
write.csv(normal,file=paste("e:/covid19/COVID19country_07",as.character(day(Sys.Date()-1)),".csv",sep=""),row.names=F,quote=F)