0.1 Code Description

The following R script processes the results of the simulation. It takes in the simulation results(m_States, m_Costs and m_Effs) matrices and summarizes them into a data frame. The script also generates a plot of the simulation results.

1 Clear the workspace

rm(list = ls())

1.1 Load Libraries

The script begins by loading the necessary R libraries:

library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(reshape2) # For reshaping data
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggplot2) # For plotting
library(dampack) # For processing PSA results 

2 Processing results for patients with no intervention

2.1 Load Data

The script loads the simulation results from the rda file

# Load the simulation results from rda file
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_States_risk.rda') # No intervention
# load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Costs.rda') # No intervention
# load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Effs.rda') # No intervention
# # load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_States_UC.rda')

2.2 Renaming the dataframes

v_states_names <- c("NE", "ST", "PS", "MI", "PM", "D")

state_occupancy <- as.data.frame(m_States_risk)
# Add individual id variable taken from rownames without the prefix "ind_"
state_occupancy$id <- gsub("ind_", "", rownames(state_occupancy))

# Reshape the data to long format

state_occupancy_long <- melt(state_occupancy, id.vars = "id", variable.name = "cycle", value.name = "state")
# Convert cycle to numeric
state_occupancy_long$cycle <- as.numeric(gsub("cycle_", "", state_occupancy_long$cycle))
# Convert state to factor
state_occupancy_long$state <- factor(state_occupancy_long$state, levels = v_states_names)
head(state_occupancy_long)
##   id cycle state
## 1  1     0    NE
## 2  2     0    NE
## 3  3     0    NE
## 4  4     0    NE
## 5  5     0    NE
## 6  6     0    NE

3 Plot the state occupancy over cycles

ggplot(state_occupancy_long, aes(x = cycle, y = id, fill = state)) +
  geom_tile() +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  labs(title = "Health State Occupancy Over Cycles",
       x = "Cycle",
       y = "Individual ID",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot distribution of state occupancy across the six states for each cycle # ———- 2. Compute proportions by state per cycle ———- # Compute the proportions of individuals in each state for each cycle

state_proportions <- state_occupancy_long %>%
  group_by(cycle, state) %>%
  summarise(proportion = n() / nrow(state_occupancy_long)) %>%
  ungroup()
## `summarise()` has grouped output by 'cycle'. You can override using the
## `.groups` argument.
state_proportions <- state_occupancy_long %>%
  group_by(cycle, state) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(cycle) %>%
  mutate(proportion = count / sum(count))
head(state_proportions)
## # A tibble: 6 × 4
## # Groups:   cycle [3]
##   cycle state  count proportion
##   <dbl> <fct>  <int>      <dbl>
## 1     0 NE    100000    1      
## 2     1 NE     96797    0.968  
## 3     1 ST       606    0.00606
## 4     1 MI       911    0.00911
## 5     1 D       1686    0.0169 
## 6     2 NE     93438    0.934

4 Plot the distribution of state occupancy across the six states for each cycle

ggplot(state_proportions, aes(x = cycle, y = proportion, fill = state)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  labs(title = "Distribution of Health State Occupancy per Cycle",
       x = "Cycle",
       y = "Proportion",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# ———- Plot stacked area chart ———-

# Plot stacked area chart
ggplot(state_proportions, aes(x = cycle, y = proportion, fill = state)) +
  geom_area() +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  labs(title = "Health State Distribution Over Time",
       x = "Cycle",
       y = "Proportion",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Calculating Life Years Lived

# Calculate life years lived for each individual
life_years_lived <- state_occupancy_long %>%
  group_by(id) %>%
  summarise(life_years = sum(state != "D")) %>%
  ungroup()
# Print the first few rows of the life_years_lived data frame
head(life_years_lived)
## # A tibble: 6 × 2
##   id     life_years
##   <chr>       <int>
## 1 1              29
## 2 10              1
## 3 100            25
## 4 1000           32
## 5 10000          16
## 6 100000         22
# Plot the distribution of life years lived
ggplot(life_years_lived, aes(x = life_years)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Life Years Lived",
       x = "Life Years Lived",
       y = "Frequency") +
  theme_minimal()

# Calculate the mean life years lived
mean_life_years_no_int <- mean(life_years_lived$life_years)
median_life_years_no_int <- median(life_years_lived$life_years)
cat("Mean life years lived (No Intervention):", mean_life_years_no_int, "\n")
## Mean life years lived (No Intervention): 20.94767
cat("Median life years lived (No Intervention):", median_life_years_no_int, "\n")
## Median life years lived (No Intervention): 20

5 Counting number of CVD events

# Count the number of CVD events(MI and Stroke) for each individual
cvd_events <- state_occupancy_long %>%
  filter(state %in% c("MI", "ST")) %>%
  group_by(id) %>%
  summarise(cvd_events = n()) %>%
  ungroup()
# Print the first few rows of the cvd_events data frame
head(cvd_events)
## # A tibble: 6 × 2
##   id    cvd_events
##   <chr>      <int>
## 1 1              1
## 2 10004          1
## 3 10006          2
## 4 10008          1
## 5 10013          1
## 6 10019          1

6 Plot the distribution of CVD events-To be fixed so as to define the maximum number of events

ggplot(cvd_events, aes(x = cvd_events)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Distribution of CVD Events",
       x = "Number of CVD Events",
       y = "Frequency") +
  theme_minimal()

# Count the number of CVD events(MI and Stroke) for each individual-separate stroke and MI events

# Count the number of MI events for each individual
mi_events <- state_occupancy_long %>%
  filter(state == "MI") %>%
  group_by(id) %>%
  summarise(mi_events = n()) %>%
  ungroup()
# Count the number of Stroke events for each individual
stroke_events <- state_occupancy_long %>%
  filter(state == "ST") %>%
  group_by(id) %>%
  summarise(stroke_events = n()) %>%
  ungroup()
# Count the number of deaths
deaths <- state_occupancy_long %>%
  filter(state == "D") %>%
  group_by(id) %>%
  summarise(deaths = n()) %>%
  ungroup()
# Print the first few rows of the mi_events and stroke_events data frames
head(mi_events)
## # A tibble: 6 × 2
##   id    mi_events
##   <chr>     <int>
## 1 1             1
## 2 10006         2
## 3 10008         1
## 4 10013         1
## 5 10019         1
## 6 10027         1
head(stroke_events)
## # A tibble: 6 × 2
##   id    stroke_events
##   <chr>         <int>
## 1 10004             1
## 2 10030             1
## 3 10031             1
## 4 10033             1
## 5 10036             1
## 6 10040             1
head(deaths)
## # A tibble: 6 × 2
##   id     deaths
##   <chr>   <int>
## 1 1          32
## 2 10         60
## 3 100        36
## 4 1000       29
## 5 10000      45
## 6 100000     39

7 Plot the distribution of MI events

ggplot(mi_events, aes(x = mi_events)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Distribution of MI Events_No Intervention",
       x = "Number of MI Events",
       y = "Frequency") +
  theme_minimal()

# Plot the distribution of Stroke events

ggplot(stroke_events, aes(x = stroke_events)) +
  geom_histogram(binwidth = 1, fill = "green", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Stroke Events_No Intervention",
       x = "Number of Stroke Events",
       y = "Frequency") +
  theme_minimal()

# Plot the distribution of MI and Stroke events-No intervention

# Combine the MI and Stroke events data frames
cvd_events_combined <- merge(mi_events, stroke_events, by = "id", all = TRUE)
# Replace NA values with 0
cvd_events_combined[is.na(cvd_events_combined)] <- 0
# Reshape the data to long format
cvd_events_long <- melt(cvd_events_combined, id.vars = "id", variable.name = "event_type", value.name = "event_count")
# Convert event_type to factor
cvd_events_long$event_type <- factor(cvd_events_long$event_type, levels = c("mi_events", "stroke_events"), labels = c("MI Events", "Stroke Events"))
# Plot the distribution of MI and Stroke events
ggplot(cvd_events_long, aes(x = event_count, fill = event_type)) +
  geom_histogram(binwidth = 1, position = "identity", alpha = 0.7) +
  labs(title = "Distribution of MI and Stroke Events (No Intervention)",
       x = "Number of Events",
       y = "Frequency") +
  scale_fill_manual(values = c("MI Events" = "red", "Stroke Events" = "green")) +
  theme_minimal()

# Plot the number of CVD events per person over their lifetime

# Combine the MI and Stroke events data frames
cvd_events_combined <- merge(mi_events, stroke_events, by = "id", all = TRUE)
# Replace NA values with 0
cvd_events_combined[is.na(cvd_events_combined)] <- 0
# Reshape the data to long format
cvd_events_long <- melt(cvd_events_combined, id.vars = "id", variable.name = "event_type", value.name = "event_count")
# Convert event_type to factor
cvd_events_long$event_type <- factor(cvd_events_long$event_type, levels = c("mi_events", "stroke_events"), labels = c("MI Events", "Stroke Events"))
# Summarise the data to get the total number of events per person-separate MI and Stroke events
cvd_events_summary <- cvd_events_long %>%
  group_by(id) %>%
  summarise(total_mi_events = sum(event_count[event_type == "MI Events"]),
            total_stroke_events = sum(event_count[event_type == "Stroke Events"])) %>%
  ungroup()
# Plot the number of CVD events per person over their lifetime
ggplot(cvd_events_summary, aes(x = id)) +
  geom_bar(aes(y = total_mi_events), stat = "identity", fill = "red", alpha = 0.7) +
  geom_bar(aes(y = total_stroke_events), stat = "identity", fill = "green", alpha = 0.7) +
  labs(title = "Number of CVD Events per Person Over Their Lifetime",
       x = "Individual ID",
       y = "Number of Events") +
  scale_y_continuous(sec.axis = sec_axis(~., name = "Number of Events")) +
  theme_minimal()

hist(cvd_events_summary$total_mi_events, breaks = 20, main = "Distribution of MI Events", xlab = "Number of MI Events", col = "red")

# Count the number of deaths

# Count the total number of deaths for all individuals- Only take the first instance of death per individual
deaths <- state_occupancy_long %>%
  filter(state == "D") %>%
  distinct(id) %>%
mutate(deaths = 1) 

# Compute the total number of deaths across all individuals
total_deaths <- nrow(deaths)
# Print the total number of deaths
cat("Total number of deaths:", total_deaths, "\n")
## Total number of deaths: 100000

8 Count only the first instance of MI and stroke events per individual

# Count only the first instance of MI events per individual
mi_first_instance <- state_occupancy_long %>%
  filter(state == "MI") %>%
  group_by(id) %>%
  summarise(mi_first_instance = n() > 0) %>%
  ungroup()
# Count only the first instance of Stroke events per individual
stroke_first_instance <- state_occupancy_long %>%
  filter(state == "ST") %>%
  group_by(id) %>%
  summarise(stroke_first_instance = n() > 0) %>%
  ungroup()

# View the first few rows of the mi_first_instance and stroke_first_instance data frames
head(mi_first_instance)
## # A tibble: 6 × 2
##   id    mi_first_instance
##   <chr> <lgl>            
## 1 1     TRUE             
## 2 10006 TRUE             
## 3 10008 TRUE             
## 4 10013 TRUE             
## 5 10019 TRUE             
## 6 10027 TRUE
head(stroke_first_instance)
## # A tibble: 6 × 2
##   id    stroke_first_instance
##   <chr> <lgl>                
## 1 10004 TRUE                 
## 2 10030 TRUE                 
## 3 10031 TRUE                 
## 4 10033 TRUE                 
## 5 10036 TRUE                 
## 6 10040 TRUE
# Count the number of individuals with MI events
mi_first_count <- nrow(mi_first_instance[mi_first_instance$mi_first_instance == TRUE, ])
# Count the number of individuals with Stroke events
stroke_first_count <- nrow(stroke_first_instance[stroke_first_instance$stroke_first_instance == TRUE, ])
# Print the counts
cat("Number of individuals with first instance of MI events:", mi_first_count, "\n")
## Number of individuals with first instance of MI events: 18844
cat("Number of individuals with first instance of Stroke events:", stroke_first_count, "\n")
## Number of individuals with first instance of Stroke events: 13951

9 Summarize people with each event and both events

# Count the number of individuals with MI events
mi_count <- nrow(cvd_events[cvd_events$cvd_events > 0, ])
# Count the number of individuals with Stroke events
stroke_count <- nrow(stroke_events[stroke_events$stroke_events > 0, ])
# Count the number of individuals with both MI and Stroke events
both_count <- nrow(cvd_events_combined[cvd_events_combined$mi_events > 0 & cvd_events_combined$stroke_events > 0, ])
# Print the counts
cat("Number of individuals with MI events:", mi_count, "\n")
## Number of individuals with MI events: 29388
cat("Number of individuals with Stroke events:", stroke_count, "\n")
## Number of individuals with Stroke events: 13951
cat("Number of individuals with both MI and Stroke events:", both_count, "\n")
## Number of individuals with both MI and Stroke events: 3407
cat("Number of individuals with first instance of MI events:", mi_first_count, "\n")
## Number of individuals with first instance of MI events: 18844
cat("Number of individuals with first instance of Stroke events:", stroke_first_count, "\n")
## Number of individuals with first instance of Stroke events: 13951
cat("Total number of deaths:", total_deaths, "\n")
## Total number of deaths: 100000

10 Plot the costs over cycles

# Clear the workspace from previous data frames
rm(m_States_risk)
# Load the simulation results from rda file
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Costs_risk.rda') # No intervention
costs_df <- as.data.frame(m_Costs_risk)
# Add individual id variable taken from rownames without the prefix "ind_"
costs_df$id <- gsub("ind_", "", rownames(costs_df))
costs_long <- melt(costs_df, id.vars = "id", variable.name = "cycle", value.name = "cost")
# Convert cycle to numeric
costs_long$cycle <- as.numeric(gsub("cycle_", "", costs_long$cycle))
# Plot the costs over cycles
ggplot(costs_long, aes(x = cycle, y = id, fill = cost)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "black") +
  labs(title = "Costs Over Cycles",
       x = "Cycle",
       y = "Individual ID",
       fill = "Cost") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot mean costs per cycle

# Calculate mean costs per cycle
mean_costs <- costs_long %>%
  group_by(cycle) %>%
  summarise(mean_cost = mean(cost, na.rm = TRUE)) %>%
  ungroup()
# Plot mean costs per cycle
ggplot(mean_costs, aes(x = cycle, y = mean_cost)) +
  geom_line() +
  labs(title = "Mean Costs per Cycle",
       x = "Cycle",
       y = "Mean Cost") +
  theme_minimal()

# Plot the DALYs over cycles

# Clear the workspace from previous data frames
rm(m_States_risk, m_Costs_risk)
## Warning in rm(m_States_risk, m_Costs_risk): object 'm_States_risk' not found
# Load the simulation results from rda file
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Effs_risk.rda') # No intervention
effs_df <- as.data.frame(m_Effs_risk)
# Add individual id variable taken from rownames without the prefix "ind_"
effs_df$id <- gsub("ind_", "", rownames(effs_df))
effs_long <- melt(effs_df, id.vars = "id", variable.name = "cycle", value.name = "eff")
# Convert cycle to numeric
effs_long$cycle <- as.numeric(gsub("cycle_", "", effs_long$cycle))
# Plot the DALYs over cycles
ggplot(effs_long, aes(x = cycle, y = id, fill = eff)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "black") +
  labs(title = "DALYs Over Cycles",
       x = "Cycle",
       y = "Individual ID",
       fill = "DALY") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot mean DALYs per cycle

# Calculate mean DALYs per cycle
mean_effs <- effs_long %>%
  group_by(cycle) %>%
  summarise(mean_eff = mean(eff, na.rm = TRUE)) %>%
  ungroup()
# Plot mean DALYs per cycle
ggplot(mean_effs, aes(x = cycle, y = mean_eff)) +
  geom_line() +
  labs(title = "Mean DALYs per Cycle",
       x = "Cycle",
       y = "Mean DALY") +
  theme_minimal()

## Repeat process for patients on Empower Health intervention

# Clear the workspace from previous data frames for no intervention
rm(m_Effs_risk)
# Load the simulation results from rda file for patients on Empower Health intervention
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_States_Emp_risk.rda') # On Empower Health intervention
# load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Costs_Emp.rda') # On Empower Health intervention
# load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Effs_Emp.rda') # On Empower Health intervention
# load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_MI_Risks_Emp.rda') # On Empower Health intervention

11 Renaming the dataframes for patients on Empower Health intervention

v_states_names <- c("NE", "ST", "PS", "MI", "PM", "D")
state_occupancy_emp <- as.data.frame(m_States_Emp_risk)
# Add individual id variable taken from rownames without the prefix "ind_"
state_occupancy_emp$id <- gsub("ind_", "", rownames(state_occupancy_emp))
# Reshape the data to long format
state_occupancy_emp_long <- melt(state_occupancy_emp, id.vars = "id", variable.name = "cycle", value.name = "state")
# Convert cycle to numeric
state_occupancy_emp_long$cycle <- as.numeric(gsub("cycle_", "", state_occupancy_emp_long$cycle))
# Convert state to factor
state_occupancy_emp_long$state <- factor(state_occupancy_emp_long$state, levels = v_states_names)
head(state_occupancy_emp_long)
##   id cycle state
## 1  1     0    NE
## 2  2     0    NE
## 3  3     0    NE
## 4  4     0    NE
## 5  5     0    NE
## 6  6     0    NE

12 Plot the state occupancy over cycles for patients on Empower Health intervention

ggplot(state_occupancy_emp_long, aes(x = cycle, y = id, fill = state)) +
  geom_tile() +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  labs(title = "Health State Occupancy Over Cycles (Empower Health Intervention)",
       x = "Cycle",
       y = "Individual ID",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot distribution of state occupancy across the six states for each cycle for patients on Empower Health intervention

# Compute the proportions of individuals in each state for each cycle for patients on Empower Health intervention
state_proportions_emp <- state_occupancy_emp_long %>%
  group_by(cycle, state) %>%
  summarise(proportion = n() / nrow(state_occupancy_emp_long)) %>%
  ungroup()
## `summarise()` has grouped output by 'cycle'. You can override using the
## `.groups` argument.
state_proportions_emp <- state_occupancy_emp_long %>%
  group_by(cycle, state) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(cycle) %>%
  mutate(proportion = count / sum(count))
head(state_proportions_emp)
## # A tibble: 6 × 4
## # Groups:   cycle [3]
##   cycle state  count proportion
##   <dbl> <fct>  <int>      <dbl>
## 1     0 NE    100000    1      
## 2     1 NE     96942    0.969  
## 3     1 ST       560    0.0056 
## 4     1 MI       812    0.00812
## 5     1 D       1686    0.0169 
## 6     2 NE     93774    0.938
# Plot the distribution of state occupancy across the six states for each cycle for patients on Empower Health intervention
ggplot(state_proportions_emp, aes(x = cycle, y = proportion, fill = state)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  labs(title = "Distribution of Health State Occupancy per Cycle (Empower Health Intervention)",
       x = "Cycle",
       y = "Proportion",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot stacked area chart for patients on Empower Health intervention

# Plot stacked area chart for patients on Empower Health intervention
ggplot(state_proportions_emp, aes(x = cycle, y = proportion, fill = state)) +
  geom_area() +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  labs(title = "Health State Distribution Over Time (Empower Health Intervention)",
       x = "Cycle",
       y = "Proportion",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Calculating Life Years Lived for patients on Empower Health intervention

# Calculate life years lived for each individual for patients on Empower Health intervention
life_years_lived_emp <- state_occupancy_emp_long %>%
  group_by(id) %>%
  summarise(life_years = sum(state != "D")) %>%
  ungroup()
# Print the first few rows of the life_years_lived_emp data frame
head(life_years_lived_emp)
## # A tibble: 6 × 2
##   id     life_years
##   <chr>       <int>
## 1 1              29
## 2 10              1
## 3 100            25
## 4 1000           32
## 5 10000          16
## 6 100000         22
# Plot the distribution of life years lived for patients on Empower Health intervention
ggplot(life_years_lived_emp, aes(x = life_years)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Life Years Lived (Empower Health Intervention)",
       x = "Life Years Lived",
       y = "Frequency") +
  theme_minimal()

# Calculate the mean life years lived for patients on Empower Health intervention
mean_life_years_emp <- mean(life_years_lived_emp$life_years)
median_life_years_emp <- median(life_years_lived_emp$life_years)
cat("Mean life years lived (Empower Health Intervention):", mean_life_years_emp, "\n")
## Mean life years lived (Empower Health Intervention): 21.13843
cat("Median life years lived (Empower Health Intervention):", median_life_years_emp, "\n")
## Median life years lived (Empower Health Intervention): 20

13 Counting number of CVD events for patients on Empower Health intervention

# Count the number of CVD events(MI and Stroke) for each individual for patients on Empower Health intervention
cvd_events_emp <- state_occupancy_emp_long %>%
  filter(state %in% c("MI", "ST")) %>%
  group_by(id) %>%
  summarise(cvd_events = n()) %>%
  ungroup()
# Print the first few rows of the cvd_events_emp data frame
head(cvd_events_emp)
## # A tibble: 6 × 2
##   id    cvd_events
##   <chr>      <int>
## 1 1              1
## 2 10004          1
## 3 10006          2
## 4 10008          1
## 5 10013          1
## 6 10019          1

14 Plot the distribution of CVD events for patients on Empower Health intervention

ggplot(cvd_events_emp, aes(x = cvd_events)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Distribution of CVD Events (Empower Health Intervention)",
       x = "Number of CVD Events",
       y = "Frequency") +
  theme_minimal()

# Count the number of CVD events(MI and Stroke) for each individual-separate stroke and MI events for patients on Empower Health intervention

# Count the number of MI events for each individual for patients on Empower Health intervention
mi_events_emp <- state_occupancy_emp_long %>%
  filter(state == "MI") %>%
  group_by(id) %>%
  summarise(mi_events = n()) %>%
  ungroup()
# Count the number of Stroke events for each individual for patients on Empower Health intervention
stroke_events_emp <- state_occupancy_emp_long %>%
  filter(state == "ST") %>%
  group_by(id) %>%
  summarise(stroke_events = n()) %>%
  ungroup()
# Print the first few rows of the mi_events_emp and stroke_events_emp data frames
head(mi_events_emp)
## # A tibble: 6 × 2
##   id    mi_events
##   <chr>     <int>
## 1 1             1
## 2 10006         2
## 3 10008         1
## 4 10013         1
## 5 10027         1
## 6 10033         1
head(stroke_events_emp)
## # A tibble: 6 × 2
##   id    stroke_events
##   <chr>         <int>
## 1 10004             1
## 2 10019             1
## 3 10030             1
## 4 10033             1
## 5 10036             1
## 6 10040             1

15 Plot the distribution of MI events for patients on Empower Health intervention

ggplot(mi_events_emp, aes(x = mi_events)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Distribution of MI Events (Empower Health Intervention)",
       x = "Number of MI Events",
       y = "Frequency") +
  theme_minimal()

# Plot the distribution of Stroke events for patients on Empower Health intervention

ggplot(stroke_events_emp, aes(x = stroke_events)) +
  geom_histogram(binwidth = 1, fill = "green", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Stroke Events (Empower Health Intervention)",
       x = "Number of Stroke Events",
       y = "Frequency") +
  theme_minimal()

# Plot the distribution of MI and Stroke events for patients on Empower Health intervention

# Combine the MI and Stroke events data frames for patients on Empower Health intervention
cvd_events_combined_emp <- merge(mi_events_emp, stroke_events_emp, by = "id", all = TRUE)
# Replace NA values with 0
cvd_events_combined_emp[is.na(cvd_events_combined_emp)] <- 0
# Reshape the data to long format
cvd_events_long_emp <- melt(cvd_events_combined_emp, id.vars = "id", variable.name = "event_type", value.name = "event_count")
# Convert event_type to factor
cvd_events_long_emp$event_type <- factor(cvd_events_long_emp$event_type, levels = c("mi_events", "stroke_events"), labels = c("MI Events", "Stroke Events"))
# Plot the distribution of MI and Stroke events for patients on Empower Health intervention
ggplot(cvd_events_long_emp, aes(x = event_count, fill = event_type)) +
  geom_histogram(binwidth = 1, position = "identity", alpha = 0.7) +
  labs(title = "Distribution of MI and Stroke Events (Empower Health Intervention)",
       x = "Number of Events",
       y = "Frequency") +
  scale_fill_manual(values = c("MI Events" = "red", "Stroke Events" = "green")) +
  theme_minimal()

# Plot the number of CVD events per person over their lifetime for patients on Empower Health intervention

# Combine the MI and Stroke events data frames for patients on Empower Health intervention
cvd_events_combined_emp <- merge(mi_events_emp, stroke_events_emp, by = "id", all = TRUE)
# Replace NA values with 0
cvd_events_combined_emp[is.na(cvd_events_combined_emp)] <- 0
# Reshape the data to long format
cvd_events_long_emp <- melt(cvd_events_combined_emp, id.vars = "id", variable.name = "event_type", value.name = "event_count")
# Convert event_type to factor
cvd_events_long_emp$event_type <- factor(cvd_events_long_emp$event_type, levels = c("mi_events", "stroke_events"), labels = c("MI Events", "Stroke Events"))
# Summarise the data to get the total number of events per person for patients on Empower Health intervention
cvd_events_summary_emp <- cvd_events_long_emp %>%
  group_by(id) %>%
  summarise(total_mi_events = sum(event_count[event_type == "MI Events"]),
            total_stroke_events = sum(event_count[event_type == "Stroke Events"])) %>%
  ungroup()
# Plot the number of CVD events per person over their lifetime for patients on Empower Health intervention
ggplot(cvd_events_summary_emp, aes(x = id)) +
  geom_bar(aes(y = total_mi_events), stat = "identity", fill = "red", alpha = 0.7) +
  geom_bar(aes(y = total_stroke_events), stat = "identity", fill = "green", alpha = 0.7) +
  labs(title = "Number of CVD Events per Person Over Their Lifetime (Empower Health Intervention)",
       x = "Individual ID",
       y = "Number of Events") +
  scale_y_continuous(sec.axis = sec_axis(~., name = "Number of Events")) +
  theme_minimal()

hist(cvd_events_summary_emp$total_mi_events, breaks = 20, main = "Distribution of MI Events (Empower Health Intervention)", xlab = "Number of MI Events", col = "red")

# Count the number of deaths for patients on Empower Health intervention

# Count the total number of deaths for all individuals for patients on Empower Health intervention- Only take the first instance of death per individual
deaths_emp <- state_occupancy_emp_long %>%
  filter(state == "D") %>%
  distinct(id) %>%
  mutate(deaths = 1)
# Compute the total number of deaths across all individuals for patients on Empower Health intervention
total_deaths_emp <- nrow(deaths_emp)
# Print the total number of deaths for patients on Empower Health intervention
cat("Total number of deaths (Empower Health Intervention):", total_deaths_emp, "\n")
## Total number of deaths (Empower Health Intervention): 100000

16 Count only the first instance of MI and stroke events per individual for patients on Empower Health intervention

# Count only the first instance of MI events per individual for patients on Empower Health intervention
mi_first_instance_emp <- state_occupancy_emp_long %>%
  filter(state == "MI") %>%
  group_by(id) %>%
  summarise(mi_first_instance = n() > 0) %>%
  ungroup()
# Count only the first instance of Stroke events per individual for patients on Empower Health intervention
stroke_first_instance_emp <- state_occupancy_emp_long %>%
  filter(state == "ST") %>%
  group_by(id) %>%
  summarise(stroke_first_instance = n() > 0) %>%
  ungroup()
# View the first few rows of the mi_first_instance_emp and stroke_first_instance_emp data frames
head(mi_first_instance_emp)
## # A tibble: 6 × 2
##   id    mi_first_instance
##   <chr> <lgl>            
## 1 1     TRUE             
## 2 10006 TRUE             
## 3 10008 TRUE             
## 4 10013 TRUE             
## 5 10027 TRUE             
## 6 10033 TRUE
head(stroke_first_instance_emp)
## # A tibble: 6 × 2
##   id    stroke_first_instance
##   <chr> <lgl>                
## 1 10004 TRUE                 
## 2 10019 TRUE                 
## 3 10030 TRUE                 
## 4 10033 TRUE                 
## 5 10036 TRUE                 
## 6 10040 TRUE
# Count the number of individuals with MI events for patients on Empower Health intervention
mi_first_count_emp <- nrow(mi_first_instance_emp[mi_first_instance_emp$mi_first_instance == TRUE, ])
# Count the number of individuals with Stroke events for patients on Empower Health intervention
stroke_first_count_emp <- nrow(stroke_first_instance_emp[stroke_first_instance_emp$stroke_first_instance == TRUE, ])
# Print the counts for patients on Empower Health intervention
cat("Number of individuals with first instance of MI events (Empower Health Intervention):", mi_first_count_emp, "\n")
## Number of individuals with first instance of MI events (Empower Health Intervention): 17129
cat("Number of individuals with first instance of Stroke events (Empower Health Intervention):", stroke_first_count_emp, "\n")
## Number of individuals with first instance of Stroke events (Empower Health Intervention): 12490

17 Summarize people with each event and both events for patients on Empower Health intervention

# Count the number of individuals with MI events for patients on Empower Health intervention
mi_count_emp <- nrow(cvd_events_emp[cvd_events_emp$cvd_events > 0, ])
# Count the number of individuals with Stroke events for patients on Empower Health intervention
stroke_count_emp <- nrow(stroke_events_emp[stroke_events_emp$stroke_events > 0, ])
# Count the number of individuals with both MI and Stroke events for patients on Empower Health intervention
both_count_emp <- nrow(cvd_events_combined_emp[cvd_events_combined_emp$mi_events > 0 & cvd_events_combined_emp$stroke_events > 0, ])
# Print the counts for patients on Empower Health intervention
cat("Number of individuals with MI events (Empower Health Intervention):", mi_count_emp, "\n")
## Number of individuals with MI events (Empower Health Intervention): 26551
cat("Number of individuals with Stroke events (Empower Health Intervention):", stroke_count_emp, "\n")
## Number of individuals with Stroke events (Empower Health Intervention): 12490
cat("Number of individuals with both MI and Stroke events (Empower Health Intervention):", both_count_emp, "\n")
## Number of individuals with both MI and Stroke events (Empower Health Intervention): 3068
cat("Number of individuals with first instance of MI events (Empower Health Intervention):", mi_first_count_emp, "\n")
## Number of individuals with first instance of MI events (Empower Health Intervention): 17129
cat("Number of individuals with first instance of Stroke events (Empower Health Intervention):", stroke_first_count_emp, "\n")
## Number of individuals with first instance of Stroke events (Empower Health Intervention): 12490
cat("Total number of deaths (Empower Health Intervention):", total_deaths_emp, "\n")
## Total number of deaths (Empower Health Intervention): 100000

18 Plot the costs over cycles for patients on Empower Health intervention

# Clear the workspace from previous data frames for patients on Empower Health intervention
rm(m_States_Emp_risk)
# Load the simulation results from rda file for patients on Empower Health intervention
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Costs_Emp_risk.rda') # On Empower Health intervention
costs_emp_df <- as.data.frame(m_Costs_Emp_risk)
# Add individual id variable taken from rownames without the prefix "ind_"
costs_emp_df$id <- gsub("ind_", "", rownames(costs_emp_df))
costs_emp_long <- melt(costs_emp_df, id.vars = "id", variable.name = "cycle", value.name = "cost")
# Convert cycle to numeric
costs_emp_long$cycle <- as.numeric(gsub("cycle_", "", costs_emp_long$cycle))
# Plot the costs over cycles for patients on Empower Health intervention
ggplot(costs_emp_long, aes(x = cycle, y = id, fill = cost)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "black") +
  labs(title = "Costs Over Cycles (Empower Health Intervention)",
       x = "Cycle",
       y = "Individual ID",
       fill = "Cost") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot mean costs per cycle for patients on Empower Health intervention

# Calculate mean costs per cycle for patients on Empower Health intervention
mean_costs_emp <- costs_emp_long %>%
  group_by(cycle) %>%
  summarise(mean_cost = mean(cost, na.rm = TRUE)) %>%
  ungroup()
# Plot mean costs per cycle for patients on Empower Health intervention
ggplot(mean_costs_emp, aes(x = cycle, y = mean_cost)) +
  geom_line() +
  labs(title = "Mean Costs per Cycle (Empower Health Intervention)",
       x = "Cycle",
       y = "Mean Cost") +
  theme_minimal()

# Plot the DALYs over cycles for patients on Empower Health intervention

# Clear the workspace from previous data frames for patients on Empower Health intervention
rm(m_Costs_Emp_risk)
# Load the simulation results from rda file for patients on Empower Health intervention
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Effs_Emp_risk.rda') # On Empower Health intervention
effs_emp_df <- as.data.frame(m_Effs_Emp_risk)
# Add individual id variable taken from rownames without the prefix "ind_"
effs_emp_df$id <- gsub("ind_", "", rownames(effs_emp_df))
effs_emp_long <- melt(effs_emp_df, id.vars = "id", variable.name = "cycle", value.name = "eff")
# Convert cycle to numeric
effs_emp_long$cycle <- as.numeric(gsub("cycle_", "", effs_emp_long$cycle))
# Plot the DALYs over cycles for patients on Empower Health intervention
ggplot(effs_emp_long, aes(x = cycle, y = id, fill = eff)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "black") +
  labs(title = "DALYs Over Cycles (Empower Health Intervention)",
       x = "Cycle",
       y = "Individual ID",
       fill = "DALY") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot mean DALYs per cycle for patients on Empower Health intervention

# Calculate mean DALYs per cycle for patients on Empower Health intervention
mean_effs_emp <- effs_emp_long %>%
  group_by(cycle) %>%
  summarise(mean_eff = mean(eff, na.rm = TRUE)) %>%
  ungroup()
# Plot mean DALYs per cycle for patients on Empower Health intervention
ggplot(mean_effs_emp, aes(x = cycle, y = mean_eff)) +
  geom_line() +
  labs(title = "Mean DALYs per Cycle (Empower Health Intervention)",
       x = "Cycle",
       y = "Mean DALY") +
  theme_minimal()

## Repeat process for patients on Usual Care intervention

# Clear the workspace from previous data frames for patients on Empower Health intervention
rm(m_Effs_Emp_risk)
# Load the simulation results from rda file for patients on Usual Care intervention
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_States_UC_risk.rda') # On Usual Care intervention
# load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Costs_UC.rda') # On Usual Care intervention
# load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Effs_UC.rda') # On Usual Care intervention
# # load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_MI_Risks_UC.rda') # On Usual Care intervention

19 Renaming the dataframes for patients on Usual Care intervention

v_states_names <- c("NE", "ST", "PS", "MI", "PM", "D")
state_occupancy_uc <- as.data.frame(m_States_UC_risk)
# Add individual id variable taken from rownames without the prefix "ind_"
state_occupancy_uc$id <- gsub("ind_", "", rownames(state_occupancy_uc))
# Reshape the data to long format
state_occupancy_uc_long <- melt(state_occupancy_uc, id.vars = "id", variable.name = "cycle", value.name = "state")
# Convert cycle to numeric
state_occupancy_uc_long$cycle <- as.numeric(gsub("cycle_", "", state_occupancy_uc_long$cycle))
# Convert state to factor
state_occupancy_uc_long$state <- factor(state_occupancy_uc_long$state, levels = v_states_names)
head(state_occupancy_uc_long)
##   id cycle state
## 1  1     0    NE
## 2  2     0    NE
## 3  3     0    NE
## 4  4     0    NE
## 5  5     0    NE
## 6  6     0    NE

20 Plot the state occupancy over cycles for patients on Usual Care intervention

ggplot(state_occupancy_uc_long, aes(x = cycle, y = id, fill = state)) +
  geom_tile() +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  labs(title = "Health State Occupancy Over Cycles (Usual Care Intervention)",
       x = "Cycle",
       y = "Individual ID",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot distribution of state occupancy across the six states for each cycle for patients on Usual Care intervention

# Compute the proportions of individuals in each state for each cycle for patients on Usual Care intervention
state_proportions_uc <- state_occupancy_uc_long %>%
  group_by(cycle, state) %>%
  summarise(proportion = n() / nrow(state_occupancy_uc_long)) %>%
  ungroup()
## `summarise()` has grouped output by 'cycle'. You can override using the
## `.groups` argument.
state_proportions_uc <- state_occupancy_uc_long %>%
  group_by(cycle, state) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(cycle) %>%
  mutate(proportion = count / sum(count))
head(state_proportions_uc)
## # A tibble: 6 × 4
## # Groups:   cycle [3]
##   cycle state  count proportion
##   <dbl> <fct>  <int>      <dbl>
## 1     0 NE    100000    1      
## 2     1 NE     96840    0.968  
## 3     1 ST       593    0.00593
## 4     1 MI       881    0.00881
## 5     1 D       1686    0.0169 
## 6     2 NE     93549    0.935
# Plot the distribution of state occupancy across the six states for each cycle for patients on Usual Care intervention
ggplot(state_proportions_uc, aes(x = cycle, y = proportion, fill = state)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  labs(title = "Distribution of Health State Occupancy per Cycle (Usual Care Intervention)",
       x = "Cycle",
       y = "Proportion",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot stacked area chart for patients on Usual Care intervention

# Plot stacked area chart for patients on Usual Care intervention
ggplot(state_proportions_uc, aes(x = cycle, y = proportion, fill = state)) +
  geom_area() +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  labs(title = "Health State Distribution Over Time (Usual Care Intervention)",
       x = "Cycle",
       y = "Proportion",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Calculating Life Years Lived for patients on Usual Care intervention

# Calculate life years lived for each individual for patients on Usual Care intervention
life_years_lived_uc <- state_occupancy_uc_long %>%
  group_by(id) %>%
  summarise(life_years = sum(state != "D")) %>%
  ungroup()
# Print the first few rows of the life_years_lived_uc data frame
head(life_years_lived_uc)
## # A tibble: 6 × 2
##   id     life_years
##   <chr>       <int>
## 1 1              29
## 2 10              1
## 3 100            25
## 4 1000           32
## 5 10000          16
## 6 100000         22
# Plot the distribution of life years lived for patients on Usual Care intervention
ggplot(life_years_lived_uc, aes(x = life_years)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Life Years Lived (Usual Care Intervention)",
       x = "Life Years Lived",
       y = "Frequency") +
  theme_minimal()

# Calculate the mean life years lived for patients on Usual Care intervention
mean_life_years_uc <- mean(life_years_lived_uc$life_years)
median_life_years_uc <- median(life_years_lived_uc$life_years)
cat("Mean life years lived (Usual Care Intervention):", mean_life_years_uc, "\n")
## Mean life years lived (Usual Care Intervention): 21.01021
cat("Median life years lived (Usual Care Intervention):", median_life_years_uc, "\n")
## Median life years lived (Usual Care Intervention): 20

21 Counting number of CVD events for patients on Usual Care intervention

# Count the number of CVD events(MI and Stroke) for each individual for patients on Usual Care intervention
cvd_events_uc <- state_occupancy_uc_long %>%
  filter(state %in% c("MI", "ST")) %>%
  group_by(id) %>%
  summarise(cvd_events = n()) %>%
  ungroup()
# Print the first few rows of the cvd_events_uc data frame
head(cvd_events_uc)
## # A tibble: 6 × 2
##   id    cvd_events
##   <chr>      <int>
## 1 1              1
## 2 10004          1
## 3 10006          2
## 4 10008          1
## 5 10013          1
## 6 10019          1

22 Plot the distribution of CVD events for patients on Usual Care intervention

ggplot(cvd_events_uc, aes(x = cvd_events)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Distribution of CVD Events (Usual Care Intervention)",
       x = "Number of CVD Events",
       y = "Frequency") +
  theme_minimal()

23 Count the number of CVD events(MI and Stroke) for each individual-separate stroke and MI events for patients on Usual Care intervention

# Count the number of MI events for each individual for patients on Usual Care intervention
mi_events_uc <- state_occupancy_uc_long %>%
  filter(state == "MI") %>%
  group_by(id) %>%
  summarise(mi_events = n()) %>%
  ungroup()
# Count the number of Stroke events for each individual for patients on Usual Care intervention
stroke_events_uc <- state_occupancy_uc_long %>%
  filter(state == "ST") %>%
  group_by(id) %>%
  summarise(stroke_events = n()) %>%
  ungroup()
# Print the first few rows of the mi_events_uc and stroke_events_uc data frames
head(mi_events_uc)
## # A tibble: 6 × 2
##   id    mi_events
##   <chr>     <int>
## 1 1             1
## 2 10006         2
## 3 10008         1
## 4 10013         1
## 5 10027         1
## 6 10033         1
head(stroke_events_uc)
## # A tibble: 6 × 2
##   id    stroke_events
##   <chr>         <int>
## 1 10004             1
## 2 10019             1
## 3 10030             1
## 4 10033             1
## 5 10036             1
## 6 10040             1

24 Plot the distribution of MI events for patients on Usual Care intervention

ggplot(mi_events_uc, aes(x = mi_events)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Distribution of MI Events (Usual Care Intervention)",
       x = "Number of MI Events",
       y = "Frequency") +
  theme_minimal()

# Plot the distribution of Stroke events for patients on Usual Care intervention

ggplot(stroke_events_uc, aes(x = stroke_events)) +
  geom_histogram(binwidth = 1, fill = "green", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Stroke Events (Usual Care Intervention)",
       x = "Number of Stroke Events",
       y = "Frequency") +
  theme_minimal()

# Plot the distribution of MI and Stroke events for patients on Usual Care intervention

# Combine the MI and Stroke events data frames for patients on Usual Care intervention
cvd_events_combined_uc <- merge(mi_events_uc, stroke_events_uc, by = "id", all = TRUE)
# Replace NA values with 0
cvd_events_combined_uc[is.na(cvd_events_combined_uc)] <- 0
# Reshape the data to long format
cvd_events_long_uc <- melt(cvd_events_combined_uc, id.vars = "id", variable.name = "event_type", value.name = "event_count")
# Convert event_type to factor
cvd_events_long_uc$event_type <- factor(cvd_events_long_uc$event_type, levels = c("mi_events", "stroke_events"), labels = c("MI Events", "Stroke Events"))
# Plot the distribution of MI and Stroke events for patients on Usual Care intervention
ggplot(cvd_events_long_uc, aes(x = event_count, fill = event_type)) +
  geom_histogram(binwidth = 1, position = "identity", alpha = 0.7) +
  labs(title = "Distribution of MI and Stroke Events (Usual Care Intervention)",
       x = "Number of Events",
       y = "Frequency") +
  scale_fill_manual(values = c("MI Events" = "red", "Stroke Events" = "green")) +
  theme_minimal()

# Plot the number of CVD events per person over their lifetime for patients on Usual Care intervention

# Combine the MI and Stroke events data frames for patients on Usual Care intervention
cvd_events_combined_uc <- merge(mi_events_uc, stroke_events_uc, by = "id", all = TRUE)
# Replace NA values with 0
cvd_events_combined_uc[is.na(cvd_events_combined_uc)] <- 0
# Reshape the data to long format
cvd_events_long_uc <- melt(cvd_events_combined_uc, id.vars = "id", variable.name = "event_type", value.name = "event_count")
# Convert event_type to factor
cvd_events_long_uc$event_type <- factor(cvd_events_long_uc$event_type, levels = c("mi_events", "stroke_events"), labels = c("MI Events", "Stroke Events"))
# Summarise the data to get the total number of events per person for patients on Usual Care intervention
cvd_events_summary_uc <- cvd_events_long_uc %>%
  group_by(id) %>%
  summarise(total_mi_events = sum(event_count[event_type == "MI Events"]),
            total_stroke_events = sum(event_count[event_type == "Stroke Events"])) %>%
  ungroup()
# Plot the number of CVD events per person over their lifetime for patients on Usual Care intervention
ggplot(cvd_events_summary_uc, aes(x = id)) +
  geom_bar(aes(y = total_mi_events), stat = "identity", fill = "red", alpha = 0.7) +
  geom_bar(aes(y = total_stroke_events), stat = "identity", fill = "green", alpha = 0.7) +
  labs(title = "Number of CVD Events per Person Over Their Lifetime (Usual Care Intervention)",
       x = "Individual ID",
       y = "Number of Events") +
  scale_y_continuous(sec.axis = sec_axis(~., name = "Number of Events")) +
  theme_minimal()

hist(cvd_events_summary_uc$total_mi_events, breaks = 20, main = "Distribution of MI Events (Usual Care Intervention)", xlab = "Number of MI Events", col = "red")

# Count the number of deaths for patients on Usual Care intervention

# Count the total number of deaths for all individuals for patients on Usual Care intervention- Only take the first instance of death per individual
deaths_uc <- state_occupancy_uc_long %>%
  filter(state == "D") %>%
  distinct(id) %>%
  mutate(deaths = 1)
# Compute the total number of deaths across all individuals for patients on Usual Care intervention
total_deaths_uc <- nrow(deaths_uc)
# Print the total number of deaths for patients on Usual Care intervention
cat("Total number of deaths (Usual Care Intervention):", total_deaths_uc, "\n")
## Total number of deaths (Usual Care Intervention): 100000

25 Count only the first instance of MI and stroke events per individual for patients on Usual Care intervention

# Count only the first instance of MI events per individual for patients on Usual Care intervention
mi_first_instance_uc <- state_occupancy_uc_long %>%
  filter(state == "MI") %>%
  group_by(id) %>%
  summarise(mi_first_instance = n() > 0) %>%
  ungroup()

# Count only the first instance of Stroke events per individual for patients on Usual Care intervention
stroke_first_instance_uc <- state_occupancy_uc_long %>%
  filter(state == "ST") %>%
  group_by(id) %>%
  summarise(stroke_first_instance = n() > 0) %>%
  ungroup()
# View the first few rows of the mi_first_instance_emp and stroke_first_instance_emp data frames
head(mi_first_instance_uc)
## # A tibble: 6 × 2
##   id    mi_first_instance
##   <chr> <lgl>            
## 1 1     TRUE             
## 2 10006 TRUE             
## 3 10008 TRUE             
## 4 10013 TRUE             
## 5 10027 TRUE             
## 6 10033 TRUE
head(stroke_first_instance_uc)
## # A tibble: 6 × 2
##   id    stroke_first_instance
##   <chr> <lgl>                
## 1 10004 TRUE                 
## 2 10019 TRUE                 
## 3 10030 TRUE                 
## 4 10033 TRUE                 
## 5 10036 TRUE                 
## 6 10040 TRUE
# Count the number of individuals with MI events for patients on Empower Health intervention
mi_first_count_uc <- nrow(mi_first_instance_uc[mi_first_instance_uc$mi_first_instance == TRUE, ])
# Count the number of individuals with Stroke events for patients on Empower Health intervention
stroke_first_count_uc <- nrow(stroke_first_instance_uc[stroke_first_instance_uc$stroke_first_instance == TRUE, ])
# Print the counts for patients on Empower Health intervention
cat("Number of individuals with first instance of MI events (Usual Care Intervention):", mi_first_count_uc, "\n")
## Number of individuals with first instance of MI events (Usual Care Intervention): 18246
cat("Number of individuals with first instance of Stroke events (Usual Care Intervention):", stroke_first_count_uc, "\n")
## Number of individuals with first instance of Stroke events (Usual Care Intervention): 13481

26 Summarize people with each event and both events for patients on Usual Care intervention

# Count the number of individuals with MI events for patients on Usual Care intervention
mi_count_uc <- nrow(cvd_events_uc[cvd_events_uc$cvd_events > 0, ])
# Count the number of individuals with Stroke events for patients on Usual Care intervention
stroke_count_uc <- nrow(stroke_events_uc[stroke_events_uc$stroke_events > 0, ])
# Count the number of individuals with both MI and Stroke events for patients on Usual Care intervention
both_count_uc <- nrow(cvd_events_combined_uc[cvd_events_combined_uc$mi_events > 0 & cvd_events_combined_uc$stroke_events > 0, ])  
# Print the counts for patients on Usual Care intervention
cat("Number of individuals with MI events (Usual Care Intervention):", mi_count_uc, "\n")
## Number of individuals with MI events (Usual Care Intervention): 28435
cat("Number of individuals with Stroke events (Usual Care Intervention):", stroke_count_uc, "\n")
## Number of individuals with Stroke events (Usual Care Intervention): 13481
cat("Number of individuals with both MI and Stroke events (Usual Care Intervention):", both_count_uc, "\n")
## Number of individuals with both MI and Stroke events (Usual Care Intervention): 3292
cat("Number of individuals with first instance of MI events (Usual Care Intervention):", mi_first_count_uc, "\n")
## Number of individuals with first instance of MI events (Usual Care Intervention): 18246
cat("Number of individuals with first instance of Stroke events (Usual Care Intervention):", stroke_first_count_uc, "\n")
## Number of individuals with first instance of Stroke events (Usual Care Intervention): 13481
cat("Total number of deaths (Usual Care Intervention):", total_deaths_uc, "\n")
## Total number of deaths (Usual Care Intervention): 100000

27 Plot the costs over cycles for patients on Usual Care intervention

# Clear the workspace from previous data frames for patients on Usual Care intervention
rm(m_States_UC_risk)
# Load the simulation results from rda file for patients on Usual Care intervention
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Costs_UC_risk.rda') # On Usual Care intervention
costs_uc_df <- as.data.frame(m_Costs_UC_risk)
# Add individual id variable taken from rownames without the prefix "ind_"
costs_uc_df$id <- gsub("ind_", "", rownames(costs_uc_df))
costs_uc_long <- melt(costs_uc_df, id.vars = "id", variable.name = "cycle", value.name = "cost")
# Convert cycle to numeric
costs_uc_long$cycle <- as.numeric(gsub("cycle_", "", costs_uc_long$cycle))
# Plot the costs over cycles for patients on Usual Care intervention
ggplot(costs_uc_long, aes(x = cycle, y = id, fill = cost)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "black") +
  labs(title = "Costs Over Cycles (Usual Care Intervention)",
       x = "Cycle",
       y = "Individual ID",
       fill = "Cost") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot mean costs per cycle for patients on Usual Care intervention

# Calculate mean costs per cycle for patients on Usual Care intervention
mean_costs_uc <- costs_uc_long %>%
  group_by(cycle) %>%
  summarise(mean_cost = mean(cost, na.rm = TRUE)) %>%
  ungroup()
# Plot mean costs per cycle for patients on Usual Care intervention
ggplot(mean_costs_uc, aes(x = cycle, y = mean_cost)) +
  geom_line() +
  labs(title = "Mean Costs per Cycle (Usual Care Intervention)",
       x = "Cycle",
       y = "Mean Cost") +
  theme_minimal()

# Plot the DALYs over cycles for patients on Usual Care intervention

# Clear the workspace from previous data frames for patients on Usual Care intervention
rm(m_Costs_UC_risk)
# Load the simulation results from rda file for patients on Usual Care intervention
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/m_Effs_UC_risk.rda') # On Usual Care intervention
effs_uc_df <- as.data.frame(m_Effs_UC_risk)
# Add individual id variable taken from rownames without the prefix "ind_"
effs_uc_df$id <- gsub("ind_", "", rownames(effs_uc_df))
effs_uc_long <- melt(effs_uc_df, id.vars = "id", variable.name = "cycle", value.name = "eff")
# Convert cycle to numeric
effs_uc_long$cycle <- as.numeric(gsub("cycle_", "", effs_uc_long$cycle))
# Plot the DALYs over cycles for patients on Usual Care intervention
ggplot(effs_uc_long, aes(x = cycle, y = id, fill = eff)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "black") +
  labs(title = "DALYs Over Cycles (Usual Care Intervention)",
       x = "Cycle",
       y = "Individual ID",
       fill = "DALY") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot mean DALYs per cycle for patients on Usual Care intervention

# Calculate mean DALYs per cycle for patients on Usual Care intervention
mean_effs_uc <- effs_uc_long %>%
  group_by(cycle) %>%
  summarise(mean_eff = mean(eff, na.rm = TRUE)) %>%
  ungroup()
# Plot mean DALYs per cycle for patients on Usual Care intervention
ggplot(mean_effs_uc, aes(x = cycle, y = mean_eff)) +
  geom_line() +
  labs(title = "Mean DALYs per Cycle (Usual Care Intervention)",
       x = "Cycle",
       y = "Mean DALY") +
  theme_minimal()

27.1 Save the Plots

# Save the plots to files
# ggsave("state_occupancy_empower_health.png", width = 10, height = 6)
# ggsave("state_occupancy_usual_care.png", width = 10, height = 6)
# ggsave("cvd_events_empower_health.png", width = 10, height = 6)
# ggsave("cvd_events_usual_care.png", width = 10, height = 6)
# ggsave("costs_empower_health.png", width = 10, height = 6)
# ggsave("costs_usual_care.png", width = 10, height = 6)
# ggsave("effs_empower_health.png", width = 10, height = 6)
# ggsave("effs_usual_care.png", width = 10, height = 6)

28 Comparing the results between No intervention, Empower Health and Usual Care interventions

# Combine the state occupancy data for none and both interventions
state_occupancy_combined <- rbind(
  mutate(state_occupancy_emp_long, intervention = "Empower Health"),
  mutate(state_occupancy_uc_long, intervention = "Usual Care"),
  mutate(state_occupancy_long, intervention = "No Intervention")
)

# Plot the state occupancy over cycles for both interventions
ggplot(state_occupancy_combined, aes(x = cycle, y = id, fill = state)) +
  geom_tile() +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  facet_wrap(~ intervention) +
  labs(title = "Health State Occupancy Over Cycles (All Interventions)",
       x = "Cycle",
       y = "Individual ID",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# Plot the distribution of state occupancy across the six states for each cycle for none and both interventions

# Combine the state proportions data for none and both interventions
state_proportions_combined <- rbind(
  mutate(state_proportions_emp, intervention = "Empower Health"),
  mutate(state_proportions_uc, intervention = "Usual Care"),
  mutate(state_proportions, intervention = "No Intervention")
)
# Plot the distribution of state occupancy across the six states for each cycle for none and both interventions
state_proportions_all <- ggplot(state_proportions_combined, aes(x = cycle, y = proportion, fill = state)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  facet_wrap(~ intervention) +
  labs(title = "Distribution of Health State Occupancy per Cycle (All Interventions)",
       x = "Cycle",
       y = "Proportion",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

# View the plot
print(state_proportions_all)

# Save the combined state proportions plot
ggsave("state_proportions_all_interventions.png", plot = state_proportions_all, width = 10, height = 6)

29 Plot the distribution of state occupancy across the six states for each cycle for none and both interventions-label proportions in each state for the last cycle

# Plot the distribution of state occupancy across the six states for each cycle for none and both interventions with labels for last cycle
state_proportions_labeled <- ggplot(state_proportions_combined, aes(x = cycle, y = proportion, fill = state)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  facet_wrap(~ intervention) +
  labs(title = "Distribution of Health State Occupancy per Cycle (All Interventions)",
       x = "Cycle",
       y = "Proportion",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank()) +
  geom_text(data = subset(state_proportions_combined, cycle == max(cycle)),
            aes(label = paste0(state, ": ", round(proportion * 100, 1), "%")),
            position = position_stack(vjust = 0.5),
            size = 3,
            color = "white")
# View the plot
print(state_proportions_labeled)

# Save the labeled state proportions plot
ggsave("state_proportions_labeled_all_interventions.png", plot = state_proportions_labeled, width = 10, height = 6)

30 Summarise the proportion state occupancy at the final cycle for all interventions

# Summarise the proportion state occupancy at the final cycle for all interventions
final_cycle_proportions <- state_proportions_combined %>%
  filter(cycle == 60) %>%
  select(intervention, state, proportion)
## Adding missing grouping variables: `cycle`
# Print the final cycle proportions
print(final_cycle_proportions)
## # A tibble: 3 × 4
## # Groups:   cycle [1]
##   cycle intervention    state proportion
##   <dbl> <chr>           <fct>      <dbl>
## 1    60 Empower Health  D              1
## 2    60 Usual Care      D              1
## 3    60 No Intervention D              1
# Create a table of final cycle proportions-convert to percentage

final_cycle_table <- final_cycle_proportions %>%
  pivot_wider(names_from = state, values_from = proportion) %>%
  mutate(across(-intervention, ~ round(.x * 100, 1))) # Convert to percentage and round to 1 decimal place
# Add total column
final_cycle_table <- final_cycle_table %>%
  rowwise() %>%
  mutate(Total = sum(c_across(-intervention)))

# Print the final cycle table
print(final_cycle_table)
## # A tibble: 3 × 4
## # Rowwise:  cycle
##   cycle intervention        D Total
##   <dbl> <chr>           <dbl> <dbl>
## 1    60 Empower Health    100   100
## 2    60 Usual Care        100   100
## 3    60 No Intervention   100   100

31 Plot stacked area chart for all interventions

# Plot stacked area chart for both interventions
state_occupancy_final <- ggplot(state_proportions_combined, aes(x = cycle, y = proportion, fill = state)) +
  geom_area() +
  scale_fill_manual(values = c("NE" = "green", "ST" = "yellow", "PS" = "orange", "MI" = "red", "PM" = "purple", "D" = "black")) +
  facet_wrap(~ intervention) +
  labs(title = NULL,
       x = "Cycle",
       y = "Proportion",
       fill = "Health State") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())
# View the plot
print(state_occupancy_final)

# Save the combined stacked area chart plot
ggsave("state_occupancy_final_all_interventions.png", plot = state_occupancy_final, width = 10, height = 6)
# Save as PDF
ggsave("state_occupancy_final_all_interventions.pdf", plot = state_occupancy_final, width = 10, height = 6)

32 Plot the distribution of CVD events for all interventions

# Combine the CVD events data for none and both interventions
cvd_events_combined <- rbind(
  mutate(cvd_events_emp, intervention = "Empower Health"),
  mutate(cvd_events_uc, intervention = "Usual Care"),
  mutate(cvd_events, intervention = "No Intervention")
)
ggplot(cvd_events_combined, aes(x = cvd_events, fill = intervention)) +
  geom_histogram(binwidth = 1, position = "identity", alpha = 0.7) +
  labs(title = "Distribution of CVD Events (All Interventions)",
       x = "Number of CVD Events",
       y = "Frequency") +
  scale_fill_manual(values = c("Empower Health" = "green", "Usual Care" = "orange", "No Intervention" = "red")) +
  theme_minimal()

# Count the number of events for each intervention

# Count the total number of Mi events for each intervention
cat("Number of individuals with MI events (No Intervention):", mi_count, "\n")
## Number of individuals with MI events (No Intervention): 29388
cat("Number of individuals with MI events (Usual Care Intervention):", mi_count_uc, "\n")
## Number of individuals with MI events (Usual Care Intervention): 28435
cat("Number of individuals with MI events (Empower Health Intervention):", mi_count_emp, "\n")
## Number of individuals with MI events (Empower Health Intervention): 26551
# Count the total number of Stroke events for each intervention

cat("Number of individuals with Stroke events (No Intervention):", stroke_count, "\n")
## Number of individuals with Stroke events (No Intervention): 13951
cat("Number of individuals with Stroke events (Usual Care Intervention):", stroke_count_uc, "\n")
## Number of individuals with Stroke events (Usual Care Intervention): 13481
cat("Number of individuals with Stroke events (Empower Health Intervention):", stroke_count_emp, "\n")
## Number of individuals with Stroke events (Empower Health Intervention): 12490
# Count the total number of individuals with both MI and Stroke events for each intervention

cat("Number of individuals with both MI and Stroke events (No Intervention):", both_count, "\n")
## Number of individuals with both MI and Stroke events (No Intervention): 3407
cat("Number of individuals with both MI and Stroke events (Usual Care Intervention):", both_count_uc, "\n")
## Number of individuals with both MI and Stroke events (Usual Care Intervention): 3292
cat("Number of individuals with both MI and Stroke events (Empower Health Intervention):", both_count_emp, "\n")
## Number of individuals with both MI and Stroke events (Empower Health Intervention): 3068
# Count the number of individuals developing MI event (Only first event)
cat("Number of individuals with first instance of MI events (No Intervention):", mi_first_count, "\n")
## Number of individuals with first instance of MI events (No Intervention): 18844
cat("Number of individuals with first instance of MI events (Usual Care Intervention):", mi_first_count_uc, "\n")
## Number of individuals with first instance of MI events (Usual Care Intervention): 18246
cat("Number of individuals with first instance of MI events (Empower Health Intervention):", mi_first_count_emp, "\n")
## Number of individuals with first instance of MI events (Empower Health Intervention): 17129
# Count the number of individuals developing Stroke event (Only first event)
cat("Number of individuals with first instance of Stroke events (No Intervention):", stroke_first_count, "\n")
## Number of individuals with first instance of Stroke events (No Intervention): 13951
cat("Number of individuals with first instance of Stroke events (Usual Care Intervention):", stroke_first_count_uc, "\n")
## Number of individuals with first instance of Stroke events (Usual Care Intervention): 13481
cat("Number of individuals with first instance of Stroke events (Empower Health Intervention):", stroke_first_count_emp, "\n")
## Number of individuals with first instance of Stroke events (Empower Health Intervention): 12490
# Count the total number of deaths for each intervention
cat("Total number of deaths (No Intervention):", total_deaths, "\n")
## Total number of deaths (No Intervention): 100000
cat("Total number of deaths (Usual Care Intervention):", total_deaths_uc, "\n")
## Total number of deaths (Usual Care Intervention): 100000
cat("Total number of deaths (Empower Health Intervention):", total_deaths_emp, "\n")
## Total number of deaths (Empower Health Intervention): 100000

33 Draw graph of MI, stroke and deaths averted for Empower Health and Usual Care interventions compared to No intervention

# Calculate the number of events averted for each intervention
mi_averted_emp <- mi_count - mi_count_emp
mi_averted_uc <- mi_count - mi_count_uc
stroke_averted_emp <- stroke_count - stroke_count_emp
stroke_averted_uc <- stroke_count - stroke_count_uc
deaths_averted_emp <- total_deaths - total_deaths_emp
deaths_averted_uc <- total_deaths - total_deaths_uc
# Create a data frame for plotting
events_averted <- data.frame(
  intervention = rep(c("Empower Health", "Usual Care"), each = 3),
  event_type = rep(c("MI Events Averted", "Stroke Events Averted", "Deaths Averted"), times = 2),
  count = c(mi_averted_emp, stroke_averted_emp, deaths_averted_emp,
            mi_averted_uc, stroke_averted_uc, deaths_averted_uc)
)
# Plot the events averted for both interventions
ggplot(events_averted, aes(x = event_type, y = count, fill = intervention)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "CVD Events and Deaths Averted by Intervention",
       x = "Event Type",
       y = "Number of Events Averted",
       fill = "Intervention") +
  scale_fill_manual(values = c("Empower Health" = "blue", "Usual Care" = "orange")) +
  theme_minimal()

# Draw graph of MI, stroke and deaths averted for Empower Health and Usual Care interventions compared to No intervention-use onliy first instances of events

# Calculate the number of first instance events averted for each intervention
mi_first_averted_emp <- mi_first_count - mi_first_count_emp
mi_first_averted_uc <- mi_first_count - mi_first_count_uc
stroke_first_averted_emp <- stroke_first_count - stroke_first_count_emp
stroke_first_averted_uc <- stroke_first_count - stroke_first_count_uc
# Create a data frame for plotting
first_events_averted <- data.frame(
  intervention = rep(c("Empower Health", "Usual Care"), each = 2),
  event_type = rep(c("First MI Events Averted", "First Stroke Events Averted"), times = 2),
  count = c(mi_first_averted_emp, stroke_first_averted_emp,
            mi_first_averted_uc, stroke_first_averted_uc)
)
# Plot the first instance events averted for both interventions
ggplot(first_events_averted, aes(x = event_type, y = count, fill = intervention)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "First Instance CVD Events Averted by Intervention",
       x = "Event Type",
       y = "Number of First Instance Events Averted",
       fill = "Intervention") +
  scale_fill_manual(values = c("Empower Health" = "blue", "Usual Care" = "orange")) +
  theme_minimal()

# Draw graph of MI, stroke and deaths averted for Empower Health and Usual Care interventions compared to No intervention-use onliy first instances of events_add deaths averted and value labels-use green,red and purple colors

# Create a data frame for plotting with deaths averted included
first_events_averted_with_deaths <- data.frame(
  intervention = rep(c("Empower Health", "Usual Care"), each = 3),
  event_type = rep(c("First MI Events Averted", "First Stroke Events Averted", "Deaths Averted"), times = 2),
  count = c(mi_first_averted_emp, stroke_first_averted_emp, deaths_averted_emp,
            mi_first_averted_uc, stroke_first_averted_uc, deaths_averted_uc)
)
# Plot the first instance events averted with deaths for both interventions
first_events_deaths_averted <- ggplot(first_events_averted_with_deaths, aes(x = event_type, y = count, fill = intervention)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_text(aes(label = count), position = position_dodge(width = 0.9), vjust = -0.25) +
  labs(title = NULL,
       x = NULL,
       y = "Number of Events Averted",
       fill = "Intervention") +
  scale_fill_manual(values = c("Empower Health" = "black", "Usual Care" = "green")) +
  theme_minimal()
# View the plot
print(first_events_deaths_averted)

# save plot as png
ggsave("first_events_deaths_averted.png", plot = first_events_deaths_averted, width = 10, height = 6)
# save as pdf
ggsave("first_events_deaths_averted.pdf", plot = first_events_deaths_averted, width = 10, height = 6)

34 Plot the costs over cycles for all interventions

# Combine the costs data for none and both interventions
costs_combined <- rbind(
  mutate(costs_emp_long, intervention = "Empower Health"),
  mutate(costs_uc_long, intervention = "Usual Care"),
  mutate(costs_long, intervention = "No Intervention")
)
# Plot the costs over cycles for both interventions
ggplot(costs_combined, aes(x = cycle, y = id, fill = cost)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "black") +
  facet_wrap(~ intervention) +
  labs(title = "Costs Over Cycles (All Interventions)",
       x = "Cycle",
       y = "Individual ID",
       fill = "Cost") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

35 Plot mean costs per cycle for All interventions

mean_costs_combined <- rbind(
  mutate(mean_costs_emp, intervention = "Empower Health"),
  mutate(mean_costs_uc, intervention = "Usual Care"),
  mutate(mean_costs, intervention = "No Intervention")
)
# Plot mean costs per cycle for both interventions
ggplot(mean_costs_combined, aes(x = cycle, y = mean_cost, color = intervention)) +
  geom_line() +
  labs(title = "Mean Costs per Cycle (All Interventions)",
       x = "Cycle",
       y = "Mean Cost") +
  scale_color_manual(values = c("Empower Health" = "blue", "Usual Care" = "orange", "No Intervention" = "grey")) +
  theme_minimal()

# Plot the DALYs over cycles for both interventions

# Combine the DALYs data for none and both interventions
effs_combined <- rbind(
  mutate(effs_emp_long, intervention = "Empower Health"),
  mutate(effs_uc_long, intervention = "Usual Care"),
  mutate(effs_long, intervention = "No Intervention")
)
# Plot the DALYs over cycles for both interventions
ggplot(effs_combined, aes(x = cycle, y = id, fill = eff)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "black") +
  facet_wrap(~ intervention) +
  labs(title = "DALYs Over Cycles (All Interventions)",
       x = "Cycle",
       y = "Individual ID",
       fill = "DALY") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

36 Plot mean DALYs per cycle for both interventions

mean_effs_combined <- rbind(
  mutate(mean_effs_emp, intervention = "Empower Health"),
  mutate(mean_effs_uc, intervention = "Usual Care"),
  mutate(mean_effs, intervention = "No Intervention")
)
# Plot mean DALYs per cycle for both interventions
ggplot(mean_effs_combined, aes(x = cycle, y = mean_eff, color = intervention)) +
  geom_line() +
  labs(title = "Mean DALYs per Cycle (All Interventions)",
       x = "Cycle",
       y = "Mean DALY") +
  scale_color_manual(values = c("Empower Health" = "blue", "Usual Care" = "orange", "No Intervention" = "grey")) +
  theme_minimal()

# Load results lists

rm(list = ls()) # Clear the environment
# Load the lists from rda file
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/res_no_Trt_risk.rda')
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/res_Empower_Health_risk.rda')
load('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/data/res_Usual_Care_risk.rda')

37 Plot a cost effectiveness plane for the three interventions

# Extract the mean costs and effects from the results lists
mean_costs_no_trt <- res_no_Trt_risk$mean_Dcosts
mean_costs_empower_health <- res_Empower_Health_risk$mean_Dcosts
mean_costs_usual_care <- res_Usual_Care_risk$mean_Dcosts
mean_effects_no_trt <- res_no_Trt_risk$mean_Ddalys
mean_effects_empower_health <- res_Empower_Health_risk$mean_Ddalys
mean_effects_usual_care <- res_Usual_Care_risk$mean_Ddalys
dalys_averted_empower_health <- mean_effects_no_trt - mean_effects_empower_health
dalys_averted_usual_care <- mean_effects_no_trt - mean_effects_usual_care

# Print the mean costs and effects for each intervention
cat("Mean Costs and Effects for No Treatment Intervention:\n")
## Mean Costs and Effects for No Treatment Intervention:
mean_costs_no_trt 
## [1] 1899.251
mean_effects_no_trt
## [1] 7.837787
cat("Mean Costs and Effects for Empower Health Intervention:\n")
## Mean Costs and Effects for Empower Health Intervention:
mean_costs_empower_health 
## [1] 4638.662
mean_effects_empower_health
## [1] 7.747699
cat("Mean Costs and Effects for Usual Care Intervention:\n")
## Mean Costs and Effects for Usual Care Intervention:
mean_costs_usual_care 
## [1] 4222.971
mean_effects_usual_care
## [1] 7.8077
cat("DALYs Averted for Empower Health Intervention:\n")
## DALYs Averted for Empower Health Intervention:
dalys_averted_empower_health 
## [1] 0.09008786
cat("DALYs Averted for Usual Care Intervention:\n")
## DALYs Averted for Usual Care Intervention:
dalys_averted_usual_care 
## [1] 0.03008617

38 Plot the cost-effectiveness plane

# Create a data frame for the cost-effectiveness plane
cost_effectiveness_plane <- data.frame(
  costs = c(mean_costs_no_trt, mean_costs_empower_health, mean_costs_usual_care),
  effects = c(mean_effects_no_trt, mean_effects_empower_health, mean_effects_usual_care),
  intervention = c("No Treatment", "Empower Health", "Usual Care")
)
# Plot the cost-effectiveness plane
ggplot(cost_effectiveness_plane, aes(x = effects, y = costs, color = intervention)) +
  geom_point(size = 3) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey") +
  labs(title = NULL,
       x = "Mean Effects (DALYs)",
       y = "Mean Costs (Costs)") +
  scale_color_manual(values = c("No Treatment" = "red", "Empower Health" = "blue", "Usual Care" = "green")) +
  theme_minimal()

# Draw CE plane using dalys averted and costs comparing Empower Health and Usual Care interventions
# Create a data frame for the cost-effectiveness plane comparing Empower Health and Usual Care interventions
cost_effectiveness_plane_empower_vs_usual <- data.frame(
  costs = c(mean_costs_empower_health, mean_costs_usual_care),
  effects = c(dalys_averted_empower_health, dalys_averted_usual_care),
  intervention = c("Empower Health", "Usual Care")
)
# Plot the cost-effectiveness plane comparing Empower Health and Usual Care interventions
ce_plane_empower_vs_uc <- ggplot(cost_effectiveness_plane_empower_vs_usual, aes(x = effects, y = costs, color = intervention)) +
  geom_point(size = 3) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey") +
  labs(title = NULL,
       x = "DALYs Averted",
       y = "Costs") +
  scale_color_manual(values = c("Empower Health" = "blue", "Usual Care" = "green")) +
  theme_minimal()
# View the plot
print(ce_plane_empower_vs_uc)

# Save the cost-effectiveness plane plot
ggsave("cost_effectiveness_plane_empower_vs_usual.png", plot = ce_plane_empower_vs_uc, width = 10, height = 6)
ggsave("cost_effectiveness_plane_empower_vs_usual.pdf", plot = ce_plane_empower_vs_uc, width = 10, height = 6)

39 Plot the cost-effectiveness plane comparing Empower Health and Usual Care interventions using DALYs averted and costs-add willingness to pay (WTP) threshold line at $1000

# Define willingness to pay (WTP) threshold
wtp_threshold <- 1000
# Plot the cost-effectiveness plane comparing Empower Health and Usual Care interventions with WTP line
ggplot(cost_effectiveness_plane_empower_vs_usual, aes(x = effects, y = costs, color = intervention)) +
  geom_point(size = 3) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey") +
  geom_abline(slope = wtp_threshold, intercept = 0, linetype = "dotted", color = "red") +
  labs(title = NULL,
       x = "DALYs Averted",
       y = "Costs") +
  scale_color_manual(values = c("Empower Health" = "blue", "Usual Care" = "green")) +
  theme_minimal()

40 Compute the incremental cost-effectiveness ratio (ICER) for the two interventions

# Compute DALYs averted
dalys_averted_empower_health <- mean_effects_no_trt - mean_effects_empower_health
dalys_averted_usual_care <- mean_effects_no_trt - mean_effects_usual_care
# Compute the incremental costs and effects for the two interventions (Empower Health versus usual care)

# Compute the ICER for the two interventions
icer_empower_health <- (mean_costs_empower_health-mean_costs_usual_care) / (dalys_averted_empower_health-dalys_averted_usual_care)

icer_empower_health
## [1] 6927.997

41 Alternatively

# Compute the incremental cost-effectiveness ratio (ICER) for the two interventions
icer_empower_health_vs_none <- (mean_costs_empower_health - mean_costs_no_trt) / (mean_effects_no_trt-mean_effects_empower_health)
icer_usual_care_vs_none <- (mean_costs_usual_care - mean_costs_no_trt) / (mean_effects_no_trt-mean_effects_usual_care)
icer_empower_vs_usual_care <- (mean_costs_empower_health - mean_costs_usual_care) / (mean_effects_usual_care-mean_effects_empower_health )
# Print the ICER values
cat("ICER for Empower Health Intervention vs No Intervention:", icer_empower_health_vs_none, "\n")
## ICER for Empower Health Intervention vs No Intervention: 30408.22
cat("ICER for Usual Care Intervention vs No Intervention:", icer_usual_care_vs_none, "\n")
## ICER for Usual Care Intervention vs No Intervention: 77235.47
cat("ICER for Empower Health versus Usual Care Intervention:", icer_empower_vs_usual_care, "\n")
## ICER for Empower Health versus Usual Care Intervention: 6927.997