Appendix document of estimation of under-reported COVID-19 cases number

appendix

Download time-series data from Github and update automatically

downloadGithubData <- function() {
  download.file(
  url= "https://github.com/CSSEGISandData/COVID-19/archive/master.zip",
  destfile= "./covid19_data.zip")
  data_path <- "COVID-19-master/csse_covid_19_data/
  csse_covid_19_time_series/time_series_covid19_"
  unzip(
    zipfile= "./covid19_data.zip",
    files= paste0(data_path, c("confirmed_global.csv", 
                               "deaths_global.csv", 
                               "recovered_global.csv", 
                               "confirmed_US.csv", 
                               "deaths_US.csv")),
    exdir     = ".",
    junkpaths = T)
}
# get data
data_confirmed<- read.csv("./time_series_covid19_confirmed_global.csv")
data_deceased<- read.csv("./time_series_covid19_deaths_global.csv")
data_recovered<- read.csv("./time_series_covid19_recovered_global.csv")
data_confirmed_us<- read.csv("./time_series_covid19_confirmed_US.csv")
data_deceased_us<- read.csv("./time_series_covid19_deaths_US.csv")
# wide to long
data_confirmed_long <- data_confirmed %>%
  pivot_longer(names_to = "date", 
               cols = 5:ncol(data_confirmed)) %>%
  group_by(`Country/Region`, date) %>%
  dplyr::summarise("confirmed" = sum(value, na.rm = T))%>%
  dplyr::select(`Country/Region`, date, confirmed)

data_deceased_long <- data_deceased %>%
  pivot_longer(names_to = "date", 
               cols = 5:ncol(data_deceased)) %>%
  group_by( `Country/Region`, date) %>%
  dplyr::summarise("deceased" = sum(value, na.rm = T))%>%
  dplyr::select(`Country/Region`, 
                date, deceased)

data_recovered_long <- data_recovered %>%
  pivot_longer(names_to = "date", 
               cols = 5:ncol(data_recovered)) %>%
  group_by(`Country/Region`, date) %>%
  dplyr::summarise("recovered" = sum(value, na.rm = T))%>%
  dplyr::select(`Country/Region`, 
                date, recovered)

# US data
data_confirmed_us_long <- data_confirmed_us %>%
  dplyr::select(Province_State, 
                Country_Region, Lat, Long_, 
                12:ncol(data_confirmed_us)) %>%
  dplyr::rename(`Province/State` = Province_State, 
                `Country/Region` = Country_Region, 
                Long = Long_) %>%
  pivot_longer(names_to = "date", 
               cols = 5:(ncol(data_confirmed_us) - 7)) %>%
  group_by(`Country/Region`, date) %>%
  mutate(Lat  = na_if(Lat, 0),
         Long = na_if(Long, 0))%>%
  dplyr::summarise("confirmed" = sum(value, na.rm = T))%>%
  dplyr::select(`Country/Region`, 
                date, confirmed)

data_deceased_us_long <- data_deceased_us %>%
  dplyr::select(Province_State, 
                Country_Region, 
                13:(ncol(data_confirmed_us))) %>%
  dplyr::rename(`Province/State` = Province_State, 
                `Country/Region` = Country_Region) %>%
  pivot_longer(names_to = "date", 
               cols = 5:(ncol(data_deceased_us) - 11)) %>%
  group_by( `Country/Region`, date) %>%
  dplyr::summarise("deceased" = sum(value, na.rm = T))%>%
  dplyr::select(`Country/Region`, date, deceased)

# combined us data
data_us <- data_confirmed_us_long %>%
  full_join(data_deceased_us_long) %>%
  tibble::add_column(recovered = NA) %>%
  dplyr::select(`Country/Region`, date, 
                confirmed, recovered, deceased)

data_world <- data_confirmed_long %>%
  full_join(data_recovered_long) %>%
  full_join(data_deceased_long) %>%
  rbind(data_us) %>%
  ungroup() %>%
  mutate(date = as.Date(date, "%m/%d/%y")) %>%
  arrange(date) %>%
  group_by( `Country/Region`) %>%
  tidyr::fill(confirmed, recovered, deceased) %>%
  tidyr::replace_na(list(deceased = 0, confirmed = 0)) %>%
  mutate(
    recovered_est = lag(confirmed, 14, default = 0) - deceased,
    recovered_est = ifelse(recovered_est > 0, recovered_est, 0),
    recovered     = coalesce(recovered, recovered_est),
    active        = confirmed - recovered - deceased
  ) %>%
  dplyr::select(-recovered_est) %>%ungroup()%>%
  mutate(iso_code=countrycode(`Country/Region`, 'country.name', 'iso3c'))
# save as RDS file
saveRDS(data_world, "data_world.rds")

combined COVID-19 data with population by age group worldwide data (source:United Nations)

# demogrphic data
worldpopuage<-read.csv("./world population by age both sex 2019.csv",
                       header=T)

# filter country and 2015 population, modify structure
worldpopuage[ ,c(9:29)]<-as.numeric(unlist(worldpopuage[ ,c(9:29)]), digits=2)
worldpopuage.country<-worldpopuage%>%
  filter(Type=="Country/Area", 
         Reference.date..as.of.1.July.==2015)%>%
  mutate(age.group1=rowSums(.[ ,c(9,10)], na.rm=T),
         age.group2=rowSums(.[ ,c(11,12)], na.rm=T),
         age.group3=rowSums(.[ ,c(13,14)], na.rm=T),
         age.group4=rowSums(.[ ,c(15,16)], na.rm=T),
         age.group5=rowSums(.[ ,c(17,18)], na.rm=T),
         age.group6=rowSums(.[ ,c(19,20)], na.rm=T),
         age.group7=rowSums(.[ ,c(21,22)], na.rm=T),
         age.group8=rowSums(.[ ,c(23,24)], na.rm=T),
         age.group9=rowSums(.[ ,c(25,29)], na.rm=T))
worldpopuage.country1<-worldpopuage.country[ ,-c(9:29)]
worldpopuage.country2<-worldpopuage.country1%>%
  mutate(Total.pop=rowSums(.[,c(9,17)], na.rm=T ),
         f1=age.group1/Total.pop,
         f2=age.group2/Total.pop,
         f3=age.group3/Total.pop,
         f4=age.group4/Total.pop,
         f5=age.group5/Total.pop,
         f6=age.group6/Total.pop,
         f7=age.group7/Total.pop,
         f8=age.group8/Total.pop,
         f9=age.group9/Total.pop, 
         iso_code=countrycode(Country.code, 
                              'un', 'iso3c' ))
# to assign iso code for Taiwan                                                    
worldpopuage.country2[93,28]<-"TWN"                                                    
#save as RDS
saveRDS(worldpopuage.country2, 
        "worldpopuage.country2.rds")

readRDS("worldpopuage.country2.rds")
## combined
world.count<-readRDS("data_world.rds")%>%
  dplyr::group_by(iso_code)%>%
  dplyr::mutate(confirmed.d=ifelse(row_number() == 1,
                                   confirmed, 
                                   lag(diff(confirmed, 1),1)),
                recovered.d=ifelse(row_number() == 1,
                                   recovered, 
                                   lag(diff(recovered, 1),1)),
                deceased.d=ifelse(row_number() == 1,
                                  recovered, 
                                  lag(diff(deceased, 1),1)),
                death.rate=deceased/confirmed)%>%ungroup()

# world population data
world.demo<-readRDS("worldpopuage.country2.rds")
## CFR south Korea #fatality rate in South Korea 3/11/2020
cfrsk<-c(0,0, 0, 0.0011, 0.0009, 
         0.0037, 0.0151, 0.0535, 
         0.1084)
bench<-world.demo%>%filter(iso_code=="KOR")%>%
  mutate(bvt=rowSums(.[ ,c(19:27)]*cfrsk))%>%
  dplyr::select(bvt)

world.popage<-world.demo%>%
  mutate(vt=rowSums(.[ ,c(19:27)]*cfrsk),
         vtb=vt/bench$bvt)%>%
  dplyr::select(iso_code,vtb, Total.pop)

# merge vtb information into world daily cases
world.vtb<-merge(world.count,world.popage, by="iso_code")%>%
  arrange(iso_code,date)
world.vtb$severe.c<-dplyr::case_when((world.vtb$confirmed<=1000)~"0-1",
                                     (world.vtb$confirmed>1000 & 
                                        world.vtb$confirmed<=5000) ~"1-5",
                                     (world.vtb$confirmed>5000 & 
                                        world.vtb$confirmed<=10000) ~"5-10",
                                     (world.vtb$confirmed>10000 & 
                                        world.vtb$confirmed<=50000) ~"10-50",
                                     (world.vtb$confirmed>50000 & 
                                        world.vtb$confirmed<=100000) ~"50-100",
                                     (world.vtb$confirmed>100000 & 
                                        world.vtb$confirmed<=500000) ~"100-500",
                                     (world.vtb$confirmed>500000 & 
                                        world.vtb$confirmed<=1000000) ~"500-1000",
                                     (world.vtb$confirmed>1000000)  ~">1000")
world.vtb$severe.d<-dplyr::case_when((world.vtb$deceased<=100)~"0-1",
                                     (world.vtb$deceased>100 & 
                                        world.vtb$deceased<=500) ~"1-5",
                                     (world.vtb$deceased>500 & 
                                        world.vtb$deceased<=1000) ~"5-10",
                                     (world.vtb$deceased>1000 & 
                                        world.vtb$deceased<=5000) ~"10-50",
                                     (world.vtb$deceased>5000 & 
                                        world.vtb$deceased<=10000) ~"50-100",
                                     (world.vtb$deceased>10000 & 
                                        world.vtb$deceased<=50000) ~"100-500",
                                     (world.vtb$deceased>500000 ) ~">500")  

saveRDS(world.vtb, "world.vtb.rds")