## 
library(magrittr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages ------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts --------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x tidyr::extract()         masks magrittr::extract()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x purrr::set_names()       masks magrittr::set_names()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 3.6.3
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## source data files
filenames <- c('time_series_covid19_confirmed_global.csv',
               'time_series_covid19_deaths_global.csv')
url.path <- paste0('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/',
                   'master/csse_covid_19_data/csse_covid_19_time_series/')

## download files to local
download <- function(filename) {
  url <- file.path(url.path, filename)
  dest <- file.path('./data', filename)
  download.file(url, dest)
}
bin <- lapply(filenames, download)

## load data into R
data.confirmed <- read.csv('./data/time_series_covid19_confirmed_global.csv')
data.deaths <- read.csv('./data/time_series_covid19_deaths_global.csv')
dim(data.confirmed)
## [1] 256  74
## data cleaning and transformation
cleanData <- function(data) {
  ## remove some columns
  data %<>% select(-c(Province.State, Lat, Long)) %>% rename(country=Country.Region)
  ## convert from wide to long format
  data %<>% gather(key=date, value=count, -country)
  ## convert from character to date
  data %<>% mutate(date = date %>% substr(2,8) %>% mdy())
  ## aggregate by country
  data %<>% group_by(country, date) %>% summarise(count=sum(count)) %>% as.data.frame()
  return(data)
}

## clean the three datasets
data.confirmed %<>% cleanData() %>% rename(confirmed=count)
data.deaths %<>% cleanData() %>% rename(deaths=count)
## merge above 3 datasets into one, by country and date
data <- data.confirmed %>% merge(data.deaths)
## countries/regions with confirmed cases, excl. cruise ships
countries <- data %>% pull(country) %>% setdiff('Cruise Ship')

## Fit Exponencial model - Chile
dataChile <- data[data$country=="Chile",]
t <- seq(from = 1, to = nrow(dataChile),by = 1)
dataChile <- cbind(dataChile,t)
fit <- glm(formula = confirmed ~ t ,data = dataChile ,family = "poisson" )
y_hat <- predict(fit,newdata = dataChile,type = "response")
dataChile <- cbind(dataChile,y_hat)

p<-ggplot() + geom_point(aes(x=dataChile$date,y=dataChile$confirmed),color='red') + 
  geom_line(aes(x=dataChile$date,y=dataChile$y_hat),color='blue') + ggtitle("Crecimiento Exponencial Chile") + xlab("Fecha") + ylab("Casos Confirmados")
ggplotly(p)
summary(fit)
## 
## Call:
## glm(formula = confirmed ~ t, family = "poisson", data = dataChile)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -10.3057   -3.1725   -0.6511   -0.1085    5.9466  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.435953   0.103111  -62.42   <2e-16 ***
## t            0.207738   0.001566  132.66   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 59882.23  on 69  degrees of freedom
## Residual deviance:   703.16  on 68  degrees of freedom
## AIC: 896.4
## 
## Number of Fisher Scoring iterations: 5