Covid19

dataECDC <- read.csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", na.strings = "", fileEncoding = "UTF-8-BOM")

names(dataECDC)

##  [1] "dateRep"                                                   
##  [2] "day"                                                       
##  [3] "month"                                                     
##  [4] "year"                                                      
##  [5] "cases"                                                     
##  [6] "deaths"                                                    
##  [7] "countriesAndTerritories"                                   
##  [8] "geoId"                                                     
##  [9] "countryterritoryCode"                                      
## [10] "popData2019"                                               
## [11] "continentExp"                                              
## [12] "Cumulative_number_for_14_days_of_COVID.19_cases_per_100000"

Table of the last 6 days of COVID19 UK data:

Date	Days	cases	survivors	deaths	Cum_14days	incidence	CFR_100	CMR_100
07/10/2020	282	14542	14466	76	189.899	0.99	0.52	11.40
06/10/2020	281	12593	12574	19	175.470	1.00	0.15	2.85
05/10/2020	280	22961	22928	33	163.129	1.00	0.14	4.95
04/10/2020	279	12871	12822	49	134.528	1.00	0.38	7.35
03/10/2020	278	6968	6902	66	121.851	0.99	0.95	9.90
02/10/2020	277	6914	6855	59	117.881	0.99	0.85	8.85

require(zoo)
require(ggplot2)

ggplot(Covid19_UK, aes(Days, cases)) + 
  geom_point(position=position_jitter(1,3), pch=21, fill="#FF0000AA") +
  geom_line(aes(y=rollmean(cases, 7, na.pad=TRUE))) +
  theme_bw()+
  labs(x="Time(days)",y="Number of Cases", title="UK Covid19 Cases distribution in time"
                  ,caption="Data Source: ECDC")+
  scale_x_continuous(breaks = seq(from = 0, to = length(Covid19_UK$Days), by = 30))

ggplot(Covid19_UK, aes(Days, incidence)) + 
  geom_point(position=position_jitter(1,3), pch=21, fill="#FF0000AA") +
  geom_line(aes(y=rollmean(incidence, 7, na.pad=TRUE))) +
  labs(x="Time(days)",y="Incidence proportion",title="Incidence proportion distribution",subtitle="on going infections on total cases")+
  scale_x_continuous(breaks = seq(from = 0, to = length(Covid19_UK$Days), by = 30))+
  theme_bw()

Data are sparse and it is necessary a further analysis to see if the behaviour is Normal. To do that:

Covid19_UK$incidence[is.nan(Covid19_UK$incidence)]<- 0   #elimination of NaN values

incidence<- Covid19_UK$incidence #creating a incidence variable

n<- length(incidence)
inc_avg<- mean(incidence)#0.7679004
inc_sd<- sd(incidence)# 0.363562
inc_se<- inc_sd/sqrt(n)#0.02168829
inc_med<- median(incidence)#0.93

CI_95<-qnorm(0.975)

error<- CI_95*inc_se #margin of error 0.04250827

CI_interv_1<-inc_avg+CI_95*inc_se
CI_interv_2<-inc_avg-CI_95*inc_se

round(CI_interv_2,2);round(CI_interv_1,2);round(error,2)

## [1] 0.73

## [1] 0.81

## [1] 0.04

A Normal distribution of the incicence:

inc_norm<-rnorm(incidence)

qqnorm(inc_norm)
qqline(inc_norm,col="blue")

The following graph show the difference in shape distribution for the right identification of the shape of Incidence proportion distribution:

qplot(inc_norm, geom="histogram",bins = 30,  
      main = "Histogram for Norm Incidence", 
      xlab = "Norm Inc",  
      fill=I("blue"), 
      col=I("red"),alpha=I(.2))+
    theme_bw()

While if the moving average of the observed incidence data is taken, the value are very difficult to distinguish, as well as the behavioural and so is the shape for further forecasting:

prev_Norm_inc<- Covid19_UK %>% 
  mutate(rolling_avg = slide_dbl(incidence, mean, .before = 3, .complete = F))


  ggplot()+
  geom_line(data=Covid19_UK,aes(x=Days,y=incidence),col="white")+
  geom_line(data=prev_Norm_inc,aes(x=Days,y=rolling_avg),col="red")+
    xlim(60,290)+ylim(0.75,1)+
    labs(title="Rolling Average for UK Incidence proportion of Infection",
         x="Time(days)",y="proportion",caption="Data Source: ECDC")+
    theme_bw()

The density shows a clear increase and prominence on the right tail of the observed distribution:

inc_norm_s<- pnorm((incidence-inc_avg)/inc_sd)
plot(density(inc_norm_s),main= "UK Incidence Density")

To make it clearer and to hazard an estimation, four samples are replicated to obtain sample distribution of the incidence proportion at 100,200,300 and 400 time-frame:

Ns<-seq(100,400,100)
B <- 1000
par(mfrow=c(2,2))
LIM <- c(-4,4)

for(incidence_s in Ns){
    ts <- replicate(B, {
      
    X <- rnorm(incidence_s)
    
    sqrt(incidence_s)*mean(X)/sd(X)
    })
    ps <- seq(1/(B+1),1-1/(B+1),len=B)
    
    qqplot(qt(ps,df=incidence_s-1),ts,main=incidence_s,
           
           xlab="Theoretical",ylab="Observed",
           xlim=LIM, ylim=LIM)
    abline(0,1)
}

A further 10 000 replications of the observed incidence distribution is taken and matched with the normal sample:

set.seed(711)#710
inc_samp<- sample(incidence,1000,replace=TRUE)

mean(inc_samp);sd(inc_samp);sd(inc_samp)/sqrt(10000)

## [1] 0.76299

## [1] 0.3671282

## [1] 0.003671282

samp_inc_norm<- rnorm(inc_samp)

par(mfrow=c(1,2))
qqnorm(samp_inc_norm)
qqline(samp_inc_norm,col="blue")

qqnorm(inc_norm)
qqline(inc_norm,col="red")

The updated histogram of the cases in UK shows some outliners:

ggplot()+
  geom_histogram(data=Covid19_UK, aes(x=cases),bins=35, fill = "darkgreen", 
                 col = "black")+
  labs(x="Cases", y="Frequency",title="UK Cases")#+

 # scale_x_continuous(breaks = seq(from = 0, to = 7000, by = 500)) + theme_bw()

Table of the last 6 days of COVID19 Italy data:

Date	Days	cases	survivors	deaths	Cum_14days	incidence	CFR_100	CMR_100
07/10/2020	282	2677	2649	28	49	0.99	1.05	4.64
06/10/2020	281	2257	2241	16	47	0.99	0.71	2.65
05/10/2020	280	2578	2560	18	45	0.99	0.70	2.98
04/10/2020	279	2843	2816	27	43	0.99	0.95	4.47
03/10/2020	278	2499	2476	23	41	0.99	0.92	3.81
02/10/2020	277	2548	2524	24	40	0.99	0.94	3.98

Plot of the two curves of cumulated cases for UK and Italy for the whole period:

ggplot()+
  geom_line(data=Covid19_UK,aes(x=Days,y=cases),col="blue",size = 0.3)+
  geom_line(data=Covid19_IT,aes(x=Days,y=cases),col="red",size = 0.3)+
  annotate(geom="text", x=50, y=6000, label="Italy",
              color="red")+
  annotate(geom="text", x=150, y=4000, label="UK",
              color="blue")+
    theme_bw(base_size = 12) + theme(legend.position = "bottom")+
  xlab("Time(days)")+ylab("Cases")+
  labs("UK and Italy cumulated cases trend",color = "Legend")

UK Cases, Deaths and Survivors trends:

ggplot()+
  geom_line(data=Covid19_UK,aes(x=Days,y=cases),col="green",size=0.3)+
  geom_line(data=Covid19_UK,aes(x=Days,y=deaths),col="red",size=0.3)+
  geom_line(data=Covid19_UK,aes(x=Days,y=survivors),col="blue",size=0.3)+
  theme_bw(base_size = 12) + theme(legend.position = "bottom")+
  labs(x="Time(days)", y="Cumulate numbers", title="UK Cases, deaths and CFR trends")

UK Crude Mortality rates (CMR) and Case Fatality Rates (CFR):

plot(density(Covid19_UK$CMR_100),col="green",xlim=c(-20,50),ylim=c(0,0.1),main="UK CMR and CFR")
lines(density(Covid19_UK$CFR_100,na.rm = T),col="red")
axis(side = 1, at = round(seq(0, 50, by = 10)))
text(x=median(Covid19_UK$CMR_100), y=0.04, labels="CMR")
text(x=mean(Covid19_UK$CFR_100,na.rm=T), y=0.07, labels="CFR")

The CFR curve shows a right(positive) skewed distribution of fatalities along the time:

ggplot()+
  geom_histogram(data=Covid19_UK,aes(x=CFR_100, main="UK CFR"),bins=30,
                 fill = "darkgreen", 
                 col = "black")+
  labs(x="CFR_100", y="Frequency",title="UK CFR_100")

In this section is represented the basic of the SIR model with given beta and gamma, it is expected a further calibration of the model considering other factors, such as: - effect of treatment - effect of social distancing - age impact - co-morbidity - symptomatic and asymptomatic behavior - vaccination strategies

To begin with a theoretical value of beta to calibrate the model using a simple linear regression model function: the sum of squares values (SSQ). The line that best predict the lowest distance between theoretical predictors and real data, to assess how far is the model from real data.

The prevalence of the theoretical model infection is represented in the graph below:

N<- max(Covid19_UK$cases)

initial_state_values<- c(S=N-1,I=1,R=0)

parameters<- c(beta= 1/3,gamma=1/22)#beta (infection rate) is hypothesized to be 3 days^-1, 
                                    #gamma (recovery rate) is 22 days^-1

times<- seq(0,length(Covid19_UK$Date),by=1)

SIR_model<- function(time,state,parameters){
  with(as.list(c(state,parameters)),{
       N<- S+I+R
       lambda<- beta*I/N
       dS<- -lambda*S
       dI<- lambda*S-gamma*I
       dR<- gamma*I
       return(list(c(dS,dI,dR)))
       })
}

output<- as.data.frame(ode(y=initial_state_values,
                           times=times,
                           func=SIR_model,
                           parms=parameters))

ggplot()+
  geom_line(data=output,aes(x=time,y=I))+
  labs(x="Time(days)",y="Infected", title="UK Prevalence of infection")

output_long<- melt(as.data.frame(output), id="time")
ggplot(data=output_long, aes(x=time, y=value,colour=variable,group=variable))+
  geom_line()+
 labs( x="Time(days)",y="proportions", title="UK Covid19 theoretical Prevalence proportion: basic SIR model")

Simple Linear Regression:

SIR_fun<- function(time,state,parameters){
  with(as.list(c(state,parameters)),{
       N<- S+I+R
      
       dS<- -beta*S*(I/N)
       dI<- beta*S*(I/N)
       dR<- gamma*I
       return(list(c(dS,dI,dR)))
       })}
  
SIR_SSQ <- function(parameters, dat) {  
            
    result <- as.data.frame(ode(y = initial_state_values  
                              , times = times            
                              , func = SIR_fun           
                              , parms = parameters)      
                            )
    dat <- na.omit(dat)  
    
    deltas2 <- (result$I[result$time %in% dat$time] - dat$I)^2 
    
    SSQ   <- sum(deltas2)
    
    return(SSQ)
  }

require(reshape2)

dat_UK<- Covid19_UK%>%
  mutate(number_infected = (cases-deaths),time=Days)%>%
  dplyr::select(time,number_infected)

head(formattable(dat_UK))

time	number_infected
282	14466
281	12574
280	22928
279	12822
278	6902
277	6855

initial_state_values <- c(S = 10500-1, I = 1, R = 0)

beta_start  <- 0.15
gamma_start <- 0.04

times <- seq(from = 0, to = 14, by = 0.1) 

optimised <- optim(par = c(beta = beta_start
                        , gamma = gamma_start)     
                        , fn = SIR_SSQ
                        , dat = dat_UK  
      )

round(optimised$par,2)#0.15  0.04

##  beta gamma 
##  0.15  0.04

times <- seq(from = 0, to = length(Covid19_UK$cases), by = 1) 

opt_mod <- as.data.frame(ode(y = initial_state_values,
                             times = times,
                             func = SIR_model,
                             parms = optimised$par))

require(ggplot2)


opt_plot <- ggplot()
opt_plot <- opt_plot + geom_point(aes(x = time, y = number_infected)
                                , colour = "red"
                                , shape  = "x" 
                                , data = dat_UK)
opt_plot <- opt_plot + geom_line(aes(x = time, y = I)
                                 , colour = "blue"
                                 , data   = opt_mod)
opt_plot

Covid19_Uk

FG

8/9/2020