downloadGithubData <- function() {
download.file(
url= "https://github.com/CSSEGISandData/COVID-19/archive/master.zip",
destfile= "./covid19_data.zip")
data_path <- "COVID-19-master/csse_covid_19_data/
csse_covid_19_time_series/time_series_covid19_"
unzip(
zipfile= "./covid19_data.zip",
files= paste0(data_path, c("confirmed_global.csv",
"deaths_global.csv",
"recovered_global.csv",
"confirmed_US.csv",
"deaths_US.csv")),
exdir = ".",
junkpaths = T)
}
# get data
data_confirmed<- read.csv("./time_series_covid19_confirmed_global.csv")
data_deceased<- read.csv("./time_series_covid19_deaths_global.csv")
data_recovered<- read.csv("./time_series_covid19_recovered_global.csv")
data_confirmed_us<- read.csv("./time_series_covid19_confirmed_US.csv")
data_deceased_us<- read.csv("./time_series_covid19_deaths_US.csv")
# wide to long
data_confirmed_long <- data_confirmed %>%
pivot_longer(names_to = "date",
cols = 5:ncol(data_confirmed)) %>%
group_by(`Country/Region`, date) %>%
dplyr::summarise("confirmed" = sum(value, na.rm = T))%>%
dplyr::select(`Country/Region`, date, confirmed)
data_deceased_long <- data_deceased %>%
pivot_longer(names_to = "date",
cols = 5:ncol(data_deceased)) %>%
group_by( `Country/Region`, date) %>%
dplyr::summarise("deceased" = sum(value, na.rm = T))%>%
dplyr::select(`Country/Region`,
date, deceased)
data_recovered_long <- data_recovered %>%
pivot_longer(names_to = "date",
cols = 5:ncol(data_recovered)) %>%
group_by(`Country/Region`, date) %>%
dplyr::summarise("recovered" = sum(value, na.rm = T))%>%
dplyr::select(`Country/Region`,
date, recovered)
# US data
data_confirmed_us_long <- data_confirmed_us %>%
dplyr::select(Province_State,
Country_Region, Lat, Long_,
12:ncol(data_confirmed_us)) %>%
dplyr::rename(`Province/State` = Province_State,
`Country/Region` = Country_Region,
Long = Long_) %>%
pivot_longer(names_to = "date",
cols = 5:(ncol(data_confirmed_us) - 7)) %>%
group_by(`Country/Region`, date) %>%
mutate(Lat = na_if(Lat, 0),
Long = na_if(Long, 0))%>%
dplyr::summarise("confirmed" = sum(value, na.rm = T))%>%
dplyr::select(`Country/Region`,
date, confirmed)
data_deceased_us_long <- data_deceased_us %>%
dplyr::select(Province_State,
Country_Region,
13:(ncol(data_confirmed_us))) %>%
dplyr::rename(`Province/State` = Province_State,
`Country/Region` = Country_Region) %>%
pivot_longer(names_to = "date",
cols = 5:(ncol(data_deceased_us) - 11)) %>%
group_by( `Country/Region`, date) %>%
dplyr::summarise("deceased" = sum(value, na.rm = T))%>%
dplyr::select(`Country/Region`, date, deceased)
# combined us data
data_us <- data_confirmed_us_long %>%
full_join(data_deceased_us_long) %>%
tibble::add_column(recovered = NA) %>%
dplyr::select(`Country/Region`, date,
confirmed, recovered, deceased)
data_world <- data_confirmed_long %>%
full_join(data_recovered_long) %>%
full_join(data_deceased_long) %>%
rbind(data_us) %>%
ungroup() %>%
mutate(date = as.Date(date, "%m/%d/%y")) %>%
arrange(date) %>%
group_by( `Country/Region`) %>%
tidyr::fill(confirmed, recovered, deceased) %>%
tidyr::replace_na(list(deceased = 0, confirmed = 0)) %>%
mutate(
recovered_est = lag(confirmed, 14, default = 0) - deceased,
recovered_est = ifelse(recovered_est > 0, recovered_est, 0),
recovered = coalesce(recovered, recovered_est),
active = confirmed - recovered - deceased
) %>%
dplyr::select(-recovered_est) %>%ungroup()%>%
mutate(iso_code=countrycode(`Country/Region`, 'country.name', 'iso3c'))
# save as RDS file
saveRDS(data_world, "data_world.rds")
# demogrphic data
worldpopuage<-read.csv("./world population by age both sex 2019.csv",
header=T)
# filter country and 2015 population, modify structure
worldpopuage[ ,c(9:29)]<-as.numeric(unlist(worldpopuage[ ,c(9:29)]), digits=2)
worldpopuage.country<-worldpopuage%>%
filter(Type=="Country/Area",
Reference.date..as.of.1.July.==2015)%>%
mutate(age.group1=rowSums(.[ ,c(9,10)], na.rm=T),
age.group2=rowSums(.[ ,c(11,12)], na.rm=T),
age.group3=rowSums(.[ ,c(13,14)], na.rm=T),
age.group4=rowSums(.[ ,c(15,16)], na.rm=T),
age.group5=rowSums(.[ ,c(17,18)], na.rm=T),
age.group6=rowSums(.[ ,c(19,20)], na.rm=T),
age.group7=rowSums(.[ ,c(21,22)], na.rm=T),
age.group8=rowSums(.[ ,c(23,24)], na.rm=T),
age.group9=rowSums(.[ ,c(25,29)], na.rm=T))
worldpopuage.country1<-worldpopuage.country[ ,-c(9:29)]
worldpopuage.country2<-worldpopuage.country1%>%
mutate(Total.pop=rowSums(.[,c(9,17)], na.rm=T ),
f1=age.group1/Total.pop,
f2=age.group2/Total.pop,
f3=age.group3/Total.pop,
f4=age.group4/Total.pop,
f5=age.group5/Total.pop,
f6=age.group6/Total.pop,
f7=age.group7/Total.pop,
f8=age.group8/Total.pop,
f9=age.group9/Total.pop,
iso_code=countrycode(Country.code,
'un', 'iso3c' ))
# to assign iso code for Taiwan
worldpopuage.country2[93,28]<-"TWN"
#save as RDS
saveRDS(worldpopuage.country2,
"worldpopuage.country2.rds")
readRDS("worldpopuage.country2.rds")
## combined
world.count<-readRDS("data_world.rds")%>%
dplyr::group_by(iso_code)%>%
dplyr::mutate(confirmed.d=ifelse(row_number() == 1,
confirmed,
lag(diff(confirmed, 1),1)),
recovered.d=ifelse(row_number() == 1,
recovered,
lag(diff(recovered, 1),1)),
deceased.d=ifelse(row_number() == 1,
recovered,
lag(diff(deceased, 1),1)),
death.rate=deceased/confirmed)%>%ungroup()
# world population data
world.demo<-readRDS("worldpopuage.country2.rds")
## CFR south Korea #fatality rate in South Korea 3/11/2020
cfrsk<-c(0,0, 0, 0.0011, 0.0009,
0.0037, 0.0151, 0.0535,
0.1084)
bench<-world.demo%>%filter(iso_code=="KOR")%>%
mutate(bvt=rowSums(.[ ,c(19:27)]*cfrsk))%>%
dplyr::select(bvt)
world.popage<-world.demo%>%
mutate(vt=rowSums(.[ ,c(19:27)]*cfrsk),
vtb=vt/bench$bvt)%>%
dplyr::select(iso_code,vtb, Total.pop)
# merge vtb information into world daily cases
world.vtb<-merge(world.count,world.popage, by="iso_code")%>%
arrange(iso_code,date)
world.vtb$severe.c<-dplyr::case_when((world.vtb$confirmed<=1000)~"0-1",
(world.vtb$confirmed>1000 &
world.vtb$confirmed<=5000) ~"1-5",
(world.vtb$confirmed>5000 &
world.vtb$confirmed<=10000) ~"5-10",
(world.vtb$confirmed>10000 &
world.vtb$confirmed<=50000) ~"10-50",
(world.vtb$confirmed>50000 &
world.vtb$confirmed<=100000) ~"50-100",
(world.vtb$confirmed>100000 &
world.vtb$confirmed<=500000) ~"100-500",
(world.vtb$confirmed>500000 &
world.vtb$confirmed<=1000000) ~"500-1000",
(world.vtb$confirmed>1000000) ~">1000")
world.vtb$severe.d<-dplyr::case_when((world.vtb$deceased<=100)~"0-1",
(world.vtb$deceased>100 &
world.vtb$deceased<=500) ~"1-5",
(world.vtb$deceased>500 &
world.vtb$deceased<=1000) ~"5-10",
(world.vtb$deceased>1000 &
world.vtb$deceased<=5000) ~"10-50",
(world.vtb$deceased>5000 &
world.vtb$deceased<=10000) ~"50-100",
(world.vtb$deceased>10000 &
world.vtb$deceased<=50000) ~"100-500",
(world.vtb$deceased>500000 ) ~">500")
saveRDS(world.vtb, "world.vtb.rds")