Read the data for Italian's Province:
CV19_pro_backup<- read.csv("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv")
names(CV19_pro_backup)
## [1] "data" "stato"
## [3] "codice_regione" "denominazione_regione"
## [5] "codice_provincia" "denominazione_provincia"
## [7] "sigla_provincia" "lat"
## [9] "long" "totale_casi"
## [11] "note"
Create a new data set with selected variables:
data_pro<- CV19_pro_backup%>%
mutate(date= format(as.Date(data),"%d/%m/%Y"),
province=denominazione_provincia,
region=denominazione_regione,
cases=totale_casi)%>%
select(date,
region,
province,
cases)%>%
filter(!province=="In fase di definizione/aggiornamento")
head(data_pro);tail(data_pro)
## date region province cases
## 1 24/02/2020 Abruzzo L'Aquila 0
## 2 24/02/2020 Abruzzo Teramo 0
## 3 24/02/2020 Abruzzo Pescara 0
## 4 24/02/2020 Abruzzo Chieti 0
## 5 24/02/2020 Basilicata Potenza 0
## 6 24/02/2020 Basilicata Matera 0
## date region province cases
## 29561 31/10/2020 Veneto Belluno 3339
## 29562 31/10/2020 Veneto Treviso 11468
## 29563 31/10/2020 Veneto Venezia 8341
## 29564 31/10/2020 Veneto Padova 10972
## 29565 31/10/2020 Veneto Rovigo 1330
## 29566 31/10/2020 Veneto Fuori Regione / Provincia Autonoma 1057
Plot
#esquisser(data_pro)
require(ggplot2)
library(scales)
##
## Attaching package: 'scales'
## The following objects are masked from 'package:formattable':
##
## comma, percent, scientific
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
ggplot(data_pro) +
aes(x = date, y = cases, colour = region, group = province) +
geom_boxplot(fill = "#0c4c8a") +
scale_color_hue() +
theme_minimal()
ggplot(data_pro) +
aes(x = date, y = cases, colour = region, group = region) +
geom_boxplot(fill = "#0c4c8a") +
scale_color_hue() +
theme_minimal()
data_pro_rm<- data_pro %>%
filter(province==c("Roma"))%>%
mutate(time=seq(1,length(cases)),
new_cases=c(0,diff(cases)),
case_incidence=(new_cases/cases))%>%
select(time,cases,new_cases,case_incidence)
#formattable(head(data_pro_rm),)
require(zoo)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
ggplot(data_pro_rm, aes(time, new_cases)) +
geom_point(position=position_jitter(1,3), pch=21, fill="#FF0000AA") +
geom_line(aes(y=rollmean(new_cases, 7, na.pad=TRUE))) +
theme_bw()+labs(title="New Cases in Rome - Covid19 Italy",caption="DataSource=CivilProtection")+xlab("Time(days)")+ylab("Number of new cases")
data.frame(daily_cases = c("Avgerage","Variation"),
N.=round(c(mean(data_pro_rm$new_cases),sd(data_pro_rm$new_cases))))
## daily_cases N.
## 1 Avgerage 134
## 2 Variation 270
ggplot(data_pro_rm, aes(time, cases)) +
geom_point(position=position_jitter(1,3), pch=21, fill="#FF0000AA") +
geom_line(aes(y=rollmean(cases, 7, na.pad=TRUE))) +
theme_bw()+labs(title="Total Cases trend in Rome - Covid19 Italy",caption="DataSource=CivilProtection")+xlab("Time(days)")+ylab("Cumulate cases")
data_pro_rm$case_incidence<- is.nan(data_pro_rm$case_incidence)==0
ggplot(data_pro_rm, aes(time, case_incidence)) +
geom_point(position=position_jitter(1,3), pch=21, fill="#FF0000AA") +
geom_line(aes(y=rollmean(case_incidence, 7, na.pad=TRUE))) +
theme_bw()+labs(title="Case Incidence in Rome - Covid19 Italy",caption="DataSource=CivilProtection")+xlab("Time(days)")+ylab("Incidence")
incidence<-data_pro_rm$case_incidence
n_inc<-rnorm(incidence)
qqnorm(n_inc)
qqline(n_inc,col="red")
par(mfrow=c(1,2))
plot(density(data_pro_rm$new_cases), main="Covid19 New Cases density")
plot(density(data_pro_rm$cases), main="Covid19 Cases density")
require(forecast)
fit<- auto.arima(data_pro_rm$new_cases,trace=FALSE)
fit
## Series: data_pro_rm$new_cases
## ARIMA(2,2,2)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -1.4388 -0.6792 0.0349 -0.6831
## s.e. 0.0581 0.0507 0.0537 0.0479
##
## sigma^2 estimated as 1409: log likelihood=-1255.6
## AIC=2521.2 AICc=2521.44 BIC=2538.78
Bandwidth 17.3 changed since the begin of the second wave to 25.82
plot(forecast(fit,h=25.82),
main = paste("Covid19 RM second wave - ARIMA ")) #Bandwidth 17.3
accuracy(fit)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 3.411697 37.08992 19.77317 -Inf Inf 0.9103667 0.01758601
#autoplot(fit)
plot(data_pro_rm$new_cases,col="red",main = paste("Covid19 RM second wave - ARIMA "))
lines(fitted(fit),col="Navy")
legend("topleft", legend=c("Covid19 - Rome New Cases curve", "ARIMA-fit"), col=c("red", "Navy"), lty=1:2, cex=0.85,box.lty=0)