#setwd("C:\Users\TOSHIBA\Desktop\hu\malaria")
getwd()
## [1] "C:/Users/TOSHIBA/Desktop/hu/malaria"
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
library(deSolve)
## Warning: package 'deSolve' was built under R version 4.3.1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'tibble' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.1
## Warning: package 'lubridate' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(DiagrammeR)
## Warning: package 'DiagrammeR' was built under R version 4.3.2
#Question number 1: ##You have already introduction and statement of problem. #Question number 2: ##Set general objective and specific objectives for your problem #Question number 3: ##Write a small description of your the data.
getwd()
## [1] "C:/Users/TOSHIBA/Desktop/hu/malaria"
M<-read.csv("book MD.csv")
slices<-c(95392,95015,377,107188,24962,82226,95355,37)
lbls<-c("Infected","OPC", "IPC","Suspected","TM RDTP","TM RDTN","Recovery","Death")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct)
# add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, col=rainbow(length(lbls)),
main="Pie Chart of Malaria distribution on 2010")
#Simple Pie Chart
slices<-c(95392,160462,217397,74438,1487,1940,358,133,115,41)
lbls <- c("2010", "2011", "2012", "2013", "2014","2015","2016","2017","2018","2019")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct)
# add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, col=rainbow(length(lbls)),
main="Pie Chart of Infected per year")
summary(M)
## Region Year Total.Malaria.Confirmed.and.Clinical
## Length:10 Min. :2010 Min. : 41.0
## Class :character 1st Qu.:2012 1st Qu.: 189.2
## Mode :character Median :2014 Median : 1713.5
## Mean :2014 Mean : 55176.3
## 3rd Qu.:2017 3rd Qu.: 90153.5
## Max. :2019 Max. :217397.0
## TMalaria_OutP_Cases TMalaria_InP_Cases TMalaria_InP_Deaths
## Min. : 41.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 185.5 1st Qu.: 0.0 1st Qu.: 0.00
## Median : 1712.5 Median : 3.5 Median : 0.00
## Mean : 54942.8 Mean :233.5 Mean :11.30
## 3rd Qu.: 89823.5 3rd Qu.:330.0 3rd Qu.:24.75
## Max. :216429.0 Max. :968.0 Max. :37.00
## TMSuspected.Fever.Examined PosMalaria_RDT_or_Microscopy_PF_OutP_Cases
## Min. : 1328 Min. : 15.00
## 1st Qu.: 3221 1st Qu.: 71.75
## Median : 11652 Median : 333.50
## Mean : 61915 Mean :13720.60
## 3rd Qu.:104707 3rd Qu.:21976.50
## Max. :213745 Max. :51685.00
## PosMalaria_RDT_or_Microscopy_PV_OutP_Cases TMalariaFree_IOP_case
## Min. : 1313 Min. : 41.0
## 1st Qu.: 3135 1st Qu.: 189.2
## Median : 11346 Median : 1713.5
## Mean : 48194 Mean : 55165.0
## 3rd Qu.: 83738 3rd Qu.: 90119.2
## Max. :162060 Max. :217376.0
head(M)
## Region Year Total.Malaria.Confirmed.and.Clinical TMalaria_OutP_Cases
## 1 Sidama 2010 95392 95015
## 2 Sidama 2011 160462 159668
## 3 Sidama 2012 217397 216429
## 4 Sidama 2013 74438 74249
## 5 Sidama 2014 1487 1485
## 6 Sidama 2015 1940 1940
## TMalaria_InP_Cases TMalaria_InP_Deaths TMSuspected.Fever.Examined
## 1 377 37 107188
## 2 794 29 166352
## 3 968 21 213745
## 4 189 26 97262
## 5 2 0 13091
## 6 0 0 10214
## PosMalaria_RDT_or_Microscopy_PF_OutP_Cases
## 1 24962
## 2 46629
## 3 51685
## 4 13020
## 5 484
## 6 128
## PosMalaria_RDT_or_Microscopy_PV_OutP_Cases TMalariaFree_IOP_case
## 1 82226 95355
## 2 119723 160433
## 3 162060 217376
## 4 84242 74412
## 5 12607 1487
## 6 10086 1940
nrow(M)
## [1] 10
ncol(M)
## [1] 10
head(M)
## Region Year Total.Malaria.Confirmed.and.Clinical TMalaria_OutP_Cases
## 1 Sidama 2010 95392 95015
## 2 Sidama 2011 160462 159668
## 3 Sidama 2012 217397 216429
## 4 Sidama 2013 74438 74249
## 5 Sidama 2014 1487 1485
## 6 Sidama 2015 1940 1940
## TMalaria_InP_Cases TMalaria_InP_Deaths TMSuspected.Fever.Examined
## 1 377 37 107188
## 2 794 29 166352
## 3 968 21 213745
## 4 189 26 97262
## 5 2 0 13091
## 6 0 0 10214
## PosMalaria_RDT_or_Microscopy_PF_OutP_Cases
## 1 24962
## 2 46629
## 3 51685
## 4 13020
## 5 484
## 6 128
## PosMalaria_RDT_or_Microscopy_PV_OutP_Cases TMalariaFree_IOP_case
## 1 82226 95355
## 2 119723 160433
## 3 162060 217376
## 4 84242 74412
## 5 12607 1487
## 6 10086 1940
sum(is.na(M))
## [1] 0
data.frame(num_missing=colSums(is.na(M)))
## num_missing
## Region 0
## Year 0
## Total.Malaria.Confirmed.and.Clinical 0
## TMalaria_OutP_Cases 0
## TMalaria_InP_Cases 0
## TMalaria_InP_Deaths 0
## TMSuspected.Fever.Examined 0
## PosMalaria_RDT_or_Microscopy_PF_OutP_Cases 0
## PosMalaria_RDT_or_Microscopy_PV_OutP_Cases 0
## TMalariaFree_IOP_case 0
#Question 4 ##Elaborate your data using table and graph, and then give a short description of each values of the table or graph.
library(ggplot2)
ggplot(M,aes(Year,Total.Malaria.Confirmed.and.Clinical))+
geom_point()
ggplot(M,aes(Year,Total.Malaria.Confirmed.and.Clinical))+
geom_bar(stat="identity")
ggplot(M,aes(Year, TMalariaFree_IOP_case))+
geom_bar(stat="identity")
ggplot(M,aes(Year,TMSuspected.Fever.Examined ))+
geom_bar(stat="identity")
library(ggplot2)
ggplot(M,aes(Year,TMSuspected.Fever.Examined))+
geom_point()
#Question 5 ##What is the meaning of SIR and SEIR?
#Question 6 ##Explain the difference between SIR and SEIR model.
#Question 6 ##Write short discription about how to relate your model to SIR and SEIR model.
#Question 7 #Write short discription about how to relate your model to SIR and SEIR model. Let us assume a situation wherein we have a closed cohort of 1000 individuals who are infected as well as infective. The duration of infectiousness will remain for 07 days. Once recovered, they remain immune throughout their lifespan. — title: “SIR modeling” author: “Gezahegn” date: “2023-10-25” output: html_document: default word_document: default —
Let us assume a situation wherein we have a closed cohort of 1000 individuals who are infected as well as infective. The duration of infectiousness will remain for 07 days. Once recovered, they remain immune throughout their lifespan.
grViz("digraph flowchart {
graph [layout = dot,
rankdir = LR]
node [fontname = Helvetica, shape = rectangle,]
tab1 [label = '@@1']
tab2 [label = '@@2']
# edge definitions with the node IDs
tab1 -> tab2;
}
[1]: 'Infected'
[2]: 'Recovered'
")
#Evidence synthesis As a pre-requisite to model building, we need to undertake evidence synthesis exercise to extract required parameters from the literature or by conducting a study.
initial_number_infected <- 95392
initial_number_recovered <- 0
Duration_of_infectiousness <-7
recovery_rate <- 1/Duration_of_infectiousness
follow_up_duration <-21
Once parameters are obtained, they are placed into three major parameter inputs viz initial values for each compartment, transition parameters from one compartment to another, and the time stamps for which modelling is required.
initial_state_values <- c(I = initial_number_infected,
R = initial_number_recovered)
parameters <- c(gamma = recovery_rate)
times <- seq(from = 0, to = follow_up_duration, by = 1)
The model is created from the above mentioned parameters by specifying differential equations. In R, a function is an object so that the researcher is able to pass control to the function, along with arguments that may be necessary for the function to accomplish the actions. For performing differntial equations, we create a function with compartment as well as transition parameter at varied timestamps
cohort_model <- function(time, state, parameters) {
with(as.list(c(state, parameters)), {
dI <- -gamma * I
dR <- gamma * I
return(list(c(dI, dR)))
})
}
Return a dataframe as output using model equations A data frame is constructed by application of the differential model. Herein, Odinary Differential Equations (ode) function takes initial compartment values, timestamps, parameters and the model function as arguments.
output<- as.data.frame(ode(y = initial_state_values,
times = times,
func = cohort_model,
parms = parameters))
Since, more than one column is representing the state of the person (Infected or recovered), we need to transform the data into tidy format. A tidy data format is said to present when each column represents one variable, each row has one observation and each cell has one value. Since, more than one column is representing the state of the person (Infected or recovered), we need to transform the data into tidy format. A tidy data format is said to present when each column represents one variable, each row has one observation and each cell has one value.
dim(output)
## [1] 22 3
head(output,10)
## time I R
## 1 0 95392.00 0.00
## 2 1 82693.21 12698.79
## 3 2 71684.92 23707.08
## 4 3 62142.08 33249.92
## 5 4 53869.60 41522.40
## 6 5 46698.36 48693.64
## 7 6 40481.78 54910.22
## 8 7 35092.76 60299.24
## 9 8 30421.13 64970.87
## 10 9 26371.41 69020.59
dat <- output %>% pivot_longer(cols = c(I,R),
names_to = "state",
values_to = "value")
head(dat,10)
## # A tibble: 10 × 3
## time state value
## <dbl> <chr> <dbl>
## 1 0 I 95392
## 2 0 R 0
## 3 1 I 82693.
## 4 1 R 12699.
## 5 2 I 71685.
## 6 2 R 23707.
## 7 3 I 62142.
## 8 3 R 33250.
## 9 4 I 53870.
## 10 4 R 41522.
dat %>% ggplot(aes(x = time,
y = value,
color = state,
group = state))+
geom_line()+
xlab("Time(Days)")+
ylab("Number of persons")+
labs(title = "Changes Malaria Infected and Malaria Recovered individuals")
Increasing complexity To demonstrate increasing complexity, let us now
develop a model for a disease wherein three compartments viz
susceptible, infected, and recovered are present. There are no births
(inflows) or deaths (outflows) in the model. Further, all those who are
infected are also infectious and once recovered, immunity is long
lasting. The basic assumptions of compartment models such as homogenity
among others are also applicable.
grViz("digraph flowchart {
graph [layout = dot,
rankdir = LR]
node [fontname = Helvetica, shape = rectangle,]
tab1 [label = '@@1']
tab2 [label = '@@2']
tab3 [label = '@@3']
# edge definitions with the node IDs
tab1 -> tab2;
tab2 -> tab3;
}
[1]: 'Susceptible'
[2]: 'Infected'
[3]: 'Recovered'
")
#Exploratory Data Analysis of the output To understand the output let us first see the dimensions of our dataframe
## 1. Evidence synthesis
initial_number_susceptible <- 619147 # Adding susceptibles to IR model
initial_number_infected <- 1 # Considering one infected at start time
initial_number_recovered <- 0
Duration_of_infectiousness <- 7
recovery_rate <- 1/7
Reproduction_number <- 2.28 #R0== beta/gamma,
infection_rate <- Reproduction_number*(1/Duration_of_infectiousness) #beta
follow_up_duration <- 300
## 2. Create model input using parameters
initial_state_values <- c(S = initial_number_susceptible, # Adding susceptibles
I = initial_number_infected,
R = initial_number_recovered)
parameters <- c(beta = infection_rate, # Adding beta parameter
gamma = recovery_rate)
times <- seq(from = 0, to = follow_up_duration, by = 1)
## 3. Create a model using input values
cohort_model <- function(time, state, parameters) {
with(as.list(c(state, parameters)), {
N <- S+I+R # Total population
lambda <- beta * I/N #Calculating lambda as function of beta
dS <- -lambda * S
dI <- lambda * S - gamma * I
dR <- gamma * I
return(list(c(dS, dI, dR)))
})
}
## 4. Return a dataframe as output using model equations
output <- as.data.frame(ode(y = initial_state_values,
times = times,
func = cohort_model,
parms = parameters))
## 5. Exploratory Data Analysis (Visualisation) of the output
dat <- output %>% pivot_longer(cols = c(S,I,R),
names_to = "state",
values_to = "value")
dat %>% ggplot(aes(x = time,
y = value,
color = state,
group = state))+
geom_line()+
xlab("Time(Days)")+
ylab("Number of persons")+
labs(title = "Changes in Malaria Suceptible, Infected and Recovered")
## 1. Evidence synthesis
prop_vaccinated <- 0.1
initial_number_susceptible <- (1-prop_vaccinated)*619147
initial_number_infected <- 1
initial_number_recovered <- prop_vaccinated*619147
Duration_of_infectiousness <- 7
recovery_rate <- 1/7
Reproduction_number <- 2.28 #R0== beta/gamma,
infection_rate <- Reproduction_number*(1/Duration_of_infectiousness) #beta
follow_up_duration <- 300
## 2. Create model input using parameters
initial_state_values <- c(S = initial_number_susceptible,
I = initial_number_infected,
R = initial_number_recovered)
parameters <- c(beta = infection_rate,
gamma = recovery_rate)
times <- seq(from = 0, to = follow_up_duration, by = 1)
## 3. Create a model using input values
cohort_model <- function(time, state, parameters) {
with(as.list(c(state, parameters)), {
N <- S+I+R # Total population
lambda <- beta * I/N #Calculating lambda as function of beta
dS <- -lambda * S
dI <- lambda * S - gamma * I
dR <- gamma * I
return(list(c(dS, dI, dR)))
})
}
## 4. Return a dataframe as output using model equations
output <- as.data.frame(ode(y = initial_state_values,
times = times,
func = cohort_model,
parms = parameters))
## 5. Exploratory Data Analysis (Visualisation) of the output
dat <- output %>% pivot_longer(cols = c(S,I,R),
names_to = "state",
values_to = "value")
dat %>% ggplot(aes(x = time,
y = value,
color = state,
group = state))+
geom_line()+
xlab("Time(Days)")+
ylab("Number of persons")+
labs(title = "Changes in Malariam Suceptible, Infected and Recovered at 10% vaccine coverage")
## 1. Evidence synthesis
prop_vaccinated <- 0.2
initial_number_susceptible <- (1-prop_vaccinated)*619147
initial_number_infected <- 1
initial_number_recovered <- prop_vaccinated*619147
Duration_of_infectiousness <- 7
recovery_rate <- 1/7
Reproduction_number <- 2.5 #R0== beta/gamma,
infection_rate <- Reproduction_number*(1/Duration_of_infectiousness) #beta
follow_up_duration <- 210
## 2. Create model input using parameters
initial_state_values <- c(S = initial_number_susceptible, # Adding susceptibles
I = initial_number_infected,
R = initial_number_recovered)
parameters <- c(beta = infection_rate, # Adding beta parameter
gamma = recovery_rate)
times <- seq(from = 0, to = follow_up_duration, by = 1)
## 3. Create a model using input values
cohort_model <- function(time, state, parameters) {
with(as.list(c(state, parameters)), {
N <- S+I+R # Total population
lambda <- beta * I/N #Calculating lambda as function of beta
dS <- -lambda * S
dI <- lambda * S - gamma * I
dR <- gamma * I
return(list(c(dS, dI, dR)))
})
}
## 4. Return a dataframe as output using model equations
output <- as.data.frame(ode(y = initial_state_values,
times = times,
func = cohort_model,
parms = parameters))
## 5. Exploratory Data Analysis (Visualisation) of the output
dat <- output %>% pivot_longer(cols = c(S,I,R),
names_to = "state",
values_to = "value")
dat %>% ggplot(aes(x = time,
y = value,
color = state,
group = state))+
geom_line()+
xlab("Time(Days)")+
ylab("Number of persons")+
labs(title = "Changes in Suceptible, Infected and Recovered at 20% vaccine coverage")
## 1. Evidence synthesis
prop_vaccinated <- 0.2
initial_number_susceptible <- (1-prop_vaccinated)*619147
initial_number_infected <- 1
initial_number_recovered <- prop_vaccinated*619147
Duration_of_infectiousness <- 7
recovery_rate <- 1/7
Reproduction_number <- 2.5 #R0== beta/gamma,
infection_rate <- Reproduction_number*(1/Duration_of_infectiousness) #beta
follow_up_duration <- 300
## 2. Create model input using parameters
initial_state_values <- c(S = initial_number_susceptible, # Adding susceptibles
I = initial_number_infected,
R = initial_number_recovered)
parameters <- c(beta = infection_rate, # Adding beta parameter
gamma = recovery_rate)
times <- seq(from = 0, to = follow_up_duration, by = 1)
## 3. Create a model using input values
cohort_model <- function(time, state, parameters) {
with(as.list(c(state, parameters)), {
N <- S+I+R # Total population
lambda <- beta * I/N #Calculating lambda as function of beta
dS <- -lambda * S
dI <- lambda * S - gamma * I
dR <- gamma * I
return(list(c(dS, dI, dR)))
})
}
## 4. Return a dataframe as output using model equations
output <- as.data.frame(ode(y = initial_state_values,
times = times,
func = cohort_model,
parms = parameters))
## 5. Exploratory Data Analysis (Visualisation) of the output
dat <- output %>% pivot_longer(cols = c(S,I,R),
names_to = "state",
values_to = "value")
dat %>% ggplot(aes(x = time,
y = value,
color = state,
group = state))+
geom_line()+
xlab("Time(Days)")+
ylab("Number of persons")+
labs(title = "Changes in Malaria Suceptible, Infected and Recovered at 20% vaccine coverage")