#setwd("C:\Users\TOSHIBA\Desktop\hu\malaria")
getwd()
## [1] "C:/Users/TOSHIBA/Desktop/hu/malaria"
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
library(deSolve)
## Warning: package 'deSolve' was built under R version 4.3.1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'tibble' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.1
## Warning: package 'lubridate' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(DiagrammeR)
## Warning: package 'DiagrammeR' was built under R version 4.3.2

#Question number 1: ##You have already introduction and statement of problem. #Question number 2: ##Set general objective and specific objectives for your problem #Question number 3: ##Write a small description of your the data.

getwd()
## [1] "C:/Users/TOSHIBA/Desktop/hu/malaria"
M<-read.csv("book MD.csv")
slices<-c(95392,95015,377,107188,24962,82226,95355,37)
lbls<-c("Infected","OPC", "IPC","Suspected","TM RDTP","TM RDTN","Recovery","Death")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct)
# add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, col=rainbow(length(lbls)),
   main="Pie Chart of Malaria distribution on 2010")

#Simple Pie Chart
slices<-c(95392,160462,217397,74438,1487,1940,358,133,115,41)
lbls <- c("2010", "2011", "2012", "2013", "2014","2015","2016","2017","2018","2019")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct)
# add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, col=rainbow(length(lbls)),
   main="Pie Chart of Infected per year")

summary(M)
##     Region               Year      Total.Malaria.Confirmed.and.Clinical
##  Length:10          Min.   :2010   Min.   :    41.0                    
##  Class :character   1st Qu.:2012   1st Qu.:   189.2                    
##  Mode  :character   Median :2014   Median :  1713.5                    
##                     Mean   :2014   Mean   : 55176.3                    
##                     3rd Qu.:2017   3rd Qu.: 90153.5                    
##                     Max.   :2019   Max.   :217397.0                    
##  TMalaria_OutP_Cases TMalaria_InP_Cases TMalaria_InP_Deaths
##  Min.   :    41.0    Min.   :  0.0      Min.   : 0.00      
##  1st Qu.:   185.5    1st Qu.:  0.0      1st Qu.: 0.00      
##  Median :  1712.5    Median :  3.5      Median : 0.00      
##  Mean   : 54942.8    Mean   :233.5      Mean   :11.30      
##  3rd Qu.: 89823.5    3rd Qu.:330.0      3rd Qu.:24.75      
##  Max.   :216429.0    Max.   :968.0      Max.   :37.00      
##  TMSuspected.Fever.Examined PosMalaria_RDT_or_Microscopy_PF_OutP_Cases
##  Min.   :  1328             Min.   :   15.00                          
##  1st Qu.:  3221             1st Qu.:   71.75                          
##  Median : 11652             Median :  333.50                          
##  Mean   : 61915             Mean   :13720.60                          
##  3rd Qu.:104707             3rd Qu.:21976.50                          
##  Max.   :213745             Max.   :51685.00                          
##  PosMalaria_RDT_or_Microscopy_PV_OutP_Cases TMalariaFree_IOP_case
##  Min.   :  1313                             Min.   :    41.0     
##  1st Qu.:  3135                             1st Qu.:   189.2     
##  Median : 11346                             Median :  1713.5     
##  Mean   : 48194                             Mean   : 55165.0     
##  3rd Qu.: 83738                             3rd Qu.: 90119.2     
##  Max.   :162060                             Max.   :217376.0
head(M)
##   Region Year Total.Malaria.Confirmed.and.Clinical TMalaria_OutP_Cases
## 1 Sidama 2010                                95392               95015
## 2 Sidama 2011                               160462              159668
## 3 Sidama 2012                               217397              216429
## 4 Sidama 2013                                74438               74249
## 5 Sidama 2014                                 1487                1485
## 6 Sidama 2015                                 1940                1940
##   TMalaria_InP_Cases TMalaria_InP_Deaths TMSuspected.Fever.Examined
## 1                377                  37                     107188
## 2                794                  29                     166352
## 3                968                  21                     213745
## 4                189                  26                      97262
## 5                  2                   0                      13091
## 6                  0                   0                      10214
##   PosMalaria_RDT_or_Microscopy_PF_OutP_Cases
## 1                                      24962
## 2                                      46629
## 3                                      51685
## 4                                      13020
## 5                                        484
## 6                                        128
##   PosMalaria_RDT_or_Microscopy_PV_OutP_Cases TMalariaFree_IOP_case
## 1                                      82226                 95355
## 2                                     119723                160433
## 3                                     162060                217376
## 4                                      84242                 74412
## 5                                      12607                  1487
## 6                                      10086                  1940
nrow(M)
## [1] 10
ncol(M)
## [1] 10
head(M)
##   Region Year Total.Malaria.Confirmed.and.Clinical TMalaria_OutP_Cases
## 1 Sidama 2010                                95392               95015
## 2 Sidama 2011                               160462              159668
## 3 Sidama 2012                               217397              216429
## 4 Sidama 2013                                74438               74249
## 5 Sidama 2014                                 1487                1485
## 6 Sidama 2015                                 1940                1940
##   TMalaria_InP_Cases TMalaria_InP_Deaths TMSuspected.Fever.Examined
## 1                377                  37                     107188
## 2                794                  29                     166352
## 3                968                  21                     213745
## 4                189                  26                      97262
## 5                  2                   0                      13091
## 6                  0                   0                      10214
##   PosMalaria_RDT_or_Microscopy_PF_OutP_Cases
## 1                                      24962
## 2                                      46629
## 3                                      51685
## 4                                      13020
## 5                                        484
## 6                                        128
##   PosMalaria_RDT_or_Microscopy_PV_OutP_Cases TMalariaFree_IOP_case
## 1                                      82226                 95355
## 2                                     119723                160433
## 3                                     162060                217376
## 4                                      84242                 74412
## 5                                      12607                  1487
## 6                                      10086                  1940
sum(is.na(M))
## [1] 0
data.frame(num_missing=colSums(is.na(M)))
##                                            num_missing
## Region                                               0
## Year                                                 0
## Total.Malaria.Confirmed.and.Clinical                 0
## TMalaria_OutP_Cases                                  0
## TMalaria_InP_Cases                                   0
## TMalaria_InP_Deaths                                  0
## TMSuspected.Fever.Examined                           0
## PosMalaria_RDT_or_Microscopy_PF_OutP_Cases           0
## PosMalaria_RDT_or_Microscopy_PV_OutP_Cases           0
## TMalariaFree_IOP_case                                0

#Question 4 ##Elaborate your data using table and graph, and then give a short description of each values of the table or graph.

library(ggplot2)
ggplot(M,aes(Year,Total.Malaria.Confirmed.and.Clinical))+
  geom_point()

ggplot(M,aes(Year,Total.Malaria.Confirmed.and.Clinical))+
  geom_bar(stat="identity")

ggplot(M,aes(Year, TMalariaFree_IOP_case))+
  geom_bar(stat="identity")

ggplot(M,aes(Year,TMSuspected.Fever.Examined  ))+
  geom_bar(stat="identity")

library(ggplot2)
ggplot(M,aes(Year,TMSuspected.Fever.Examined))+
  geom_point()

#Question 5 ##What is the meaning of SIR and SEIR?

#Question 6 ##Explain the difference between SIR and SEIR model.

#Question 6 ##Write short discription about how to relate your model to SIR and SEIR model.

#Question 7 #Write short discription about how to relate your model to SIR and SEIR model. Let us assume a situation wherein we have a closed cohort of 1000 individuals who are infected as well as infective. The duration of infectiousness will remain for 07 days. Once recovered, they remain immune throughout their lifespan. — title: “SIR modeling” author: “Gezahegn” date: “2023-10-25” output: html_document: default word_document: default —

Let us assume a situation wherein we have a closed cohort of 1000 individuals who are infected as well as infective. The duration of infectiousness will remain for 07 days. Once recovered, they remain immune throughout their lifespan.

grViz("digraph flowchart {
      graph [layout = dot,
       rankdir = LR]
      node [fontname = Helvetica, shape = rectangle,]
      tab1 [label = '@@1']
      tab2 [label = '@@2']
      # edge definitions with the node IDs
      tab1 -> tab2;
      
      }

      [1]: 'Infected'
      [2]: 'Recovered'
     
       ")

#Evidence synthesis As a pre-requisite to model building, we need to undertake evidence synthesis exercise to extract required parameters from the literature or by conducting a study.

initial_number_infected <- 95392
initial_number_recovered <- 0 
Duration_of_infectiousness <-7  
recovery_rate <- 1/Duration_of_infectiousness
follow_up_duration <-21

Create model input using parameters

Once parameters are obtained, they are placed into three major parameter inputs viz initial values for each compartment, transition parameters from one compartment to another, and the time stamps for which modelling is required.

initial_state_values <- c(I = initial_number_infected, 
                          R = initial_number_recovered) 

parameters <- c(gamma = recovery_rate) 
times <- seq(from = 0, to = follow_up_duration, by = 1)

The model is created from the above mentioned parameters by specifying differential equations. In R, a function is an object so that the researcher is able to pass control to the function, along with arguments that may be necessary for the function to accomplish the actions. For performing differntial equations, we create a function with compartment as well as transition parameter at varied timestamps

cohort_model <- function(time, state, parameters) {     
   with(as.list(c(state, parameters)), {  
   dI <- -gamma * I
   dR <- gamma * I
   return(list(c(dI, dR)))                             
   })
}

Return a dataframe as output using model equations A data frame is constructed by application of the differential model. Herein, Odinary Differential Equations (ode) function takes initial compartment values, timestamps, parameters and the model function as arguments.

output<- as.data.frame(ode(y = initial_state_values, 
                            times = times, 
                            func = cohort_model,
                            parms = parameters))

Since, more than one column is representing the state of the person (Infected or recovered), we need to transform the data into tidy format. A tidy data format is said to present when each column represents one variable, each row has one observation and each cell has one value. Since, more than one column is representing the state of the person (Infected or recovered), we need to transform the data into tidy format. A tidy data format is said to present when each column represents one variable, each row has one observation and each cell has one value.

dim(output)
## [1] 22  3
head(output,10)
##    time        I        R
## 1     0 95392.00     0.00
## 2     1 82693.21 12698.79
## 3     2 71684.92 23707.08
## 4     3 62142.08 33249.92
## 5     4 53869.60 41522.40
## 6     5 46698.36 48693.64
## 7     6 40481.78 54910.22
## 8     7 35092.76 60299.24
## 9     8 30421.13 64970.87
## 10    9 26371.41 69020.59
dat <- output %>% pivot_longer(cols = c(I,R),
                        names_to = "state",
                        values_to = "value")
head(dat,10) 
## # A tibble: 10 × 3
##     time state  value
##    <dbl> <chr>  <dbl>
##  1     0 I     95392 
##  2     0 R         0 
##  3     1 I     82693.
##  4     1 R     12699.
##  5     2 I     71685.
##  6     2 R     23707.
##  7     3 I     62142.
##  8     3 R     33250.
##  9     4 I     53870.
## 10     4 R     41522.
dat %>% ggplot(aes(x = time,
                   y = value,
                   color = state,
                   group = state))+
  geom_line()+
  xlab("Time(Days)")+
  ylab("Number of persons")+
  labs(title = "Changes Malaria Infected and Malaria Recovered individuals")

Increasing complexity To demonstrate increasing complexity, let us now develop a model for a disease wherein three compartments viz susceptible, infected, and recovered are present. There are no births (inflows) or deaths (outflows) in the model. Further, all those who are infected are also infectious and once recovered, immunity is long lasting. The basic assumptions of compartment models such as homogenity among others are also applicable.

grViz("digraph flowchart {
      graph [layout = dot,
       rankdir = LR]
      node [fontname = Helvetica, shape = rectangle,]
      tab1 [label = '@@1']
      tab2 [label = '@@2']
      tab3 [label = '@@3'] 
      # edge definitions with the node IDs
      tab1 -> tab2;
      tab2 -> tab3;
      }

      [1]: 'Susceptible'
      [2]: 'Infected'
      [3]: 'Recovered'
      ")

#Exploratory Data Analysis of the output To understand the output let us first see the dimensions of our dataframe

## 1. Evidence synthesis
initial_number_susceptible <- 619147 # Adding susceptibles to IR model
initial_number_infected <- 1 # Considering one infected at start time
initial_number_recovered <- 0 
Duration_of_infectiousness <- 7 
recovery_rate <- 1/7
Reproduction_number <- 2.28 #R0== beta/gamma,
infection_rate <- Reproduction_number*(1/Duration_of_infectiousness) #beta
follow_up_duration <- 300
## 2. Create model input using parameters
initial_state_values <- c(S = initial_number_susceptible, # Adding susceptibles
                          I = initial_number_infected, 
                          R = initial_number_recovered) 

parameters <- c(beta = infection_rate, # Adding beta parameter 
                gamma = recovery_rate) 

times <- seq(from = 0, to = follow_up_duration, by = 1) 

## 3. Create a model using input values

cohort_model <- function(time, state, parameters) {     
   with(as.list(c(state, parameters)), { 
     N <- S+I+R # Total population
     lambda <- beta * I/N #Calculating lambda as function of beta
     dS <- -lambda * S               
     dI <- lambda * S - gamma * I    
     dR <- gamma * I   
   return(list(c(dS, dI, dR)))                             
   })
}

## 4. Return a dataframe as output using model equations
output <- as.data.frame(ode(y = initial_state_values, 
                            times = times, 
                            func = cohort_model,
                            parms = parameters))
## 5. Exploratory Data Analysis (Visualisation) of the output
dat <- output %>% pivot_longer(cols = c(S,I,R),
                        names_to = "state",
                        values_to = "value")
dat %>% ggplot(aes(x = time,
                   y = value,
                   color = state,
                   group = state))+
  geom_line()+
  xlab("Time(Days)")+
  ylab("Number of persons")+
  labs(title = "Changes in Malaria Suceptible, Infected and Recovered")

## 1. Evidence synthesis
prop_vaccinated <- 0.1
initial_number_susceptible <- (1-prop_vaccinated)*619147
initial_number_infected <- 1 
initial_number_recovered <- prop_vaccinated*619147
Duration_of_infectiousness <- 7 
recovery_rate <- 1/7
Reproduction_number <- 2.28 #R0== beta/gamma,
infection_rate <- Reproduction_number*(1/Duration_of_infectiousness) #beta
follow_up_duration <- 300
## 2. Create model input using parameters
initial_state_values <- c(S = initial_number_susceptible, 
                          I = initial_number_infected, 
                          R = initial_number_recovered) 

parameters <- c(beta = infection_rate,  
                gamma = recovery_rate) 

times <- seq(from = 0, to = follow_up_duration, by = 1) 

## 3. Create a model using input values

cohort_model <- function(time, state, parameters) {     
   with(as.list(c(state, parameters)), { 
     N <- S+I+R # Total population
     lambda <- beta * I/N #Calculating lambda as function of beta
     dS <- -lambda * S               
     dI <- lambda * S - gamma * I    
     dR <- gamma * I   
   return(list(c(dS, dI, dR)))                             
   })
}
## 4. Return a dataframe as output using model equations
output <- as.data.frame(ode(y = initial_state_values, 
                            times = times, 
                            func = cohort_model,
                            parms = parameters))
## 5. Exploratory Data Analysis (Visualisation) of the output
dat <- output %>% pivot_longer(cols = c(S,I,R),
                        names_to = "state",
                        values_to = "value")
dat %>% ggplot(aes(x = time,
                   y = value,
                   color = state,
                   group = state))+
  geom_line()+
  xlab("Time(Days)")+
  ylab("Number of persons")+
  labs(title = "Changes in Malariam Suceptible, Infected and Recovered at 10% vaccine coverage")

## 1. Evidence synthesis
prop_vaccinated <- 0.2
initial_number_susceptible <- (1-prop_vaccinated)*619147 
initial_number_infected <- 1 
initial_number_recovered <- prop_vaccinated*619147
Duration_of_infectiousness <- 7 
recovery_rate <- 1/7
Reproduction_number <- 2.5 #R0== beta/gamma,
infection_rate <- Reproduction_number*(1/Duration_of_infectiousness) #beta
follow_up_duration <- 210
## 2. Create model input using parameters
initial_state_values <- c(S = initial_number_susceptible, # Adding susceptibles
                          I = initial_number_infected, 
                          R = initial_number_recovered) 

parameters <- c(beta = infection_rate, # Adding beta parameter 
                gamma = recovery_rate) 

times <- seq(from = 0, to = follow_up_duration, by = 1) 

## 3. Create a model using input values

cohort_model <- function(time, state, parameters) {     
   with(as.list(c(state, parameters)), { 
     N <- S+I+R # Total population
     lambda <- beta * I/N #Calculating lambda as function of beta
     dS <- -lambda * S               
     dI <- lambda * S - gamma * I    
     dR <- gamma * I   
   return(list(c(dS, dI, dR)))                             
   })
}
## 4. Return a dataframe as output using model equations
output <- as.data.frame(ode(y = initial_state_values, 
                            times = times, 
                            func = cohort_model,
                            parms = parameters))
## 5. Exploratory Data Analysis (Visualisation) of the output
dat <- output %>% pivot_longer(cols = c(S,I,R),
                        names_to = "state",
                        values_to = "value")
dat %>% ggplot(aes(x = time,
                   y = value,
                   color = state,
                   group = state))+
  geom_line()+
  xlab("Time(Days)")+
  ylab("Number of persons")+
  labs(title = "Changes in Suceptible, Infected and Recovered at 20% vaccine coverage")

## 1. Evidence synthesis
prop_vaccinated <- 0.2
initial_number_susceptible <- (1-prop_vaccinated)*619147 
initial_number_infected <- 1 
initial_number_recovered <- prop_vaccinated*619147
Duration_of_infectiousness <- 7 
recovery_rate <- 1/7
Reproduction_number <- 2.5 #R0== beta/gamma,
infection_rate <- Reproduction_number*(1/Duration_of_infectiousness) #beta
follow_up_duration <- 300
## 2. Create model input using parameters
initial_state_values <- c(S = initial_number_susceptible, # Adding susceptibles
                          I = initial_number_infected, 
                          R = initial_number_recovered) 

parameters <- c(beta = infection_rate, # Adding beta parameter 
                gamma = recovery_rate) 

times <- seq(from = 0, to = follow_up_duration, by = 1) 

## 3. Create a model using input values

cohort_model <- function(time, state, parameters) {     
   with(as.list(c(state, parameters)), { 
     N <- S+I+R # Total population
     lambda <- beta * I/N #Calculating lambda as function of beta
     dS <- -lambda * S               
     dI <- lambda * S - gamma * I    
     dR <- gamma * I   
   return(list(c(dS, dI, dR)))                             
   })
}
## 4. Return a dataframe as output using model equations
output <- as.data.frame(ode(y = initial_state_values, 
                            times = times, 
                            func = cohort_model,
                            parms = parameters))
## 5. Exploratory Data Analysis (Visualisation) of the output
dat <- output %>% pivot_longer(cols = c(S,I,R),
                        names_to = "state",
                        values_to = "value")
dat %>% ggplot(aes(x = time,
                   y = value,
                   color = state,
                   group = state))+
  geom_line()+
  xlab("Time(Days)")+
  ylab("Number of persons")+
  labs(title = "Changes in Malaria Suceptible, Infected and Recovered at 20% vaccine coverage")