##
library(magrittr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages ------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts --------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x tidyr::extract() masks magrittr::extract()
## x dplyr::filter() masks stats::filter()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 3.6.3
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## source data files
filenames <- c('time_series_covid19_confirmed_global.csv',
'time_series_covid19_deaths_global.csv')
url.path <- paste0('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/',
'master/csse_covid_19_data/csse_covid_19_time_series/')
## download files to local
download <- function(filename) {
url <- file.path(url.path, filename)
dest <- file.path('./data', filename)
download.file(url, dest)
}
bin <- lapply(filenames, download)
## load data into R
data.confirmed <- read.csv('./data/time_series_covid19_confirmed_global.csv')
data.deaths <- read.csv('./data/time_series_covid19_deaths_global.csv')
dim(data.confirmed)
## [1] 256 74
## data cleaning and transformation
cleanData <- function(data) {
## remove some columns
data %<>% select(-c(Province.State, Lat, Long)) %>% rename(country=Country.Region)
## convert from wide to long format
data %<>% gather(key=date, value=count, -country)
## convert from character to date
data %<>% mutate(date = date %>% substr(2,8) %>% mdy())
## aggregate by country
data %<>% group_by(country, date) %>% summarise(count=sum(count)) %>% as.data.frame()
return(data)
}
## clean the three datasets
data.confirmed %<>% cleanData() %>% rename(confirmed=count)
data.deaths %<>% cleanData() %>% rename(deaths=count)
## merge above 3 datasets into one, by country and date
data <- data.confirmed %>% merge(data.deaths)
## countries/regions with confirmed cases, excl. cruise ships
countries <- data %>% pull(country) %>% setdiff('Cruise Ship')
## Fit Exponencial model - Chile
dataChile <- data[data$country=="Chile",]
t <- seq(from = 1, to = nrow(dataChile),by = 1)
dataChile <- cbind(dataChile,t)
fit <- glm(formula = confirmed ~ t ,data = dataChile ,family = "poisson" )
y_hat <- predict(fit,newdata = dataChile,type = "response")
dataChile <- cbind(dataChile,y_hat)
p<-ggplot() + geom_point(aes(x=dataChile$date,y=dataChile$confirmed),color='red') +
geom_line(aes(x=dataChile$date,y=dataChile$y_hat),color='blue') + ggtitle("Crecimiento Exponencial Chile") + xlab("Fecha") + ylab("Casos Confirmados")
ggplotly(p)
summary(fit)
##
## Call:
## glm(formula = confirmed ~ t, family = "poisson", data = dataChile)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.3057 -3.1725 -0.6511 -0.1085 5.9466
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.435953 0.103111 -62.42 <2e-16 ***
## t 0.207738 0.001566 132.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 59882.23 on 69 degrees of freedom
## Residual deviance: 703.16 on 68 degrees of freedom
## AIC: 896.4
##
## Number of Fisher Scoring iterations: 5