knitr::opts_chunk$set(echo = TRUE)
library(rmarkdown)
library(kableExtra)
library(ggplot2)
library(dplyr)##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
#Clear the environment
# Load the steps_subset dataset
data_12mths <- read_csv('/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/inst/rmarkdown/templates/empowerhealthsummaries/skeleton/data_12mths_clean.csv')## Rows: 60968 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): country, facility, diagnosis, sex, county, enrollment_status, enr...
## dbl (22): patient_id, age, latest_bmi, baseline_systolic, baseline_diastoli...
## date (4): enrollment_date, baseline_assessment_date, anchor_date, followup_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create a subset of patients with followup BP
medtronic_subset <- data_12mths %>%
filter(!is.na(followup_systolic))
# Display the first few rows of the subset
head(medtronic_subset)## # A tibble: 6 × 38
## patient_id country age facility diagnosis sex county enrollment_status
## <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 Kenya 65 Matiliku su… "{Hypert… Fema… Makue… Direct Enrollment
## 2 5 Kenya 42 Tawa Sub-co… "{Hypert… Fema… Makue… Direct Enrollment
## 3 9 Kenya 43 Kibwezi Sub… "{Hypert… Fema… Makue… Direct Enrollment
## 4 11 Kenya 69 Kibwezi Sub… "{Hypert… Male Makue… Direct Enrollment
## 5 15 Kenya 65 Kilungu sub… "{Hypert… Fema… Makue… Direct Enrollment
## 6 29 Kenya 70 Kilungu sub… "{Hypert… Fema… Makue… Direct Enrollment
## # ℹ 30 more variables: enrollment_bmi <chr>, latest_bmi <dbl>,
## # baseline_systolic <dbl>, baseline_diastolic <dbl>,
## # baseline_control_status <chr>, baseline_grade <dbl>,
## # enrollment_date <date>, baseline_assessment_date <date>,
## # anchor_date <date>, followup_systolic <dbl>, followup_diastolic <dbl>,
## # followup_control_status <chr>, followup_grade <dbl>, followup_date <date>,
## # days_between_assessment_and_anchor <dbl>, …
## spc_tbl_ [17,409 × 38] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ patient_id : num [1:17409] 1 5 9 11 15 29 45 54 57 62 ...
## $ country : chr [1:17409] "Kenya" "Kenya" "Kenya" "Kenya" ...
## $ age : num [1:17409] 65 42 43 69 65 70 50 68 65 59 ...
## $ facility : chr [1:17409] "Matiliku sub county hospital" "Tawa Sub-county Hospital" "Kibwezi Sub-county Hospital" "Kibwezi Sub-county Hospital" ...
## $ diagnosis : chr [1:17409] "{Hypertension,\"Diabetes Mellitus Type 2\"}" "{Hypertension}" "{Hypertension}" "{Hypertension}" ...
## $ sex : chr [1:17409] "Female" "Female" "Female" "Male" ...
## $ county : chr [1:17409] "Makueni" "Makueni" "Makueni" "Makueni" ...
## $ enrollment_status : chr [1:17409] "Direct Enrollment" "Direct Enrollment" "Direct Enrollment" "Direct Enrollment" ...
## $ enrollment_bmi : chr [1:17409] "25.4" "37.29" "33.83" "23.24" ...
## $ latest_bmi : num [1:17409] 25.4 37.3 33.8 23.2 23.6 ...
## $ baseline_systolic : num [1:17409] 165 211 151 125 146 117 140 87 156 157 ...
## $ baseline_diastolic : num [1:17409] 74 124 87 81 72 70 90 59 91 85 ...
## $ baseline_control_status : chr [1:17409] "Uncontrolled" "Uncontrolled" "Uncontrolled" "Controlled" ...
## $ baseline_grade : num [1:17409] 2 3 1 0 1 0 1 0 1 1 ...
## $ enrollment_date : Date[1:17409], format: "2020-12-14" "2021-11-17" ...
## $ baseline_assessment_date : Date[1:17409], format: "2022-02-28" "2021-11-17" ...
## $ anchor_date : Date[1:17409], format: "2023-02-28" "2022-11-17" ...
## $ followup_systolic : num [1:17409] 137 136 146 119 113 123 136 109 149 120 ...
## $ followup_diastolic : num [1:17409] 56 65 78 70 73 77 76 64 75 72 ...
## $ followup_control_status : chr [1:17409] "Controlled" "Controlled" "Uncontrolled" "Controlled" ...
## $ followup_grade : num [1:17409] 0 0 1 0 0 0 0 0 1 0 ...
## $ followup_date : Date[1:17409], format: "2023-02-14" "2022-11-03" ...
## $ days_between_assessment_and_anchor : num [1:17409] 14 14 6 27 23 19 6 60 40 3 ...
## $ days_between_assessment_and_baseline: num [1:17409] 351 351 371 392 342 346 359 305 405 368 ...
## $ number_followup_assessments : num [1:17409] 3 1 0 3 0 11 4 3 1 5 ...
## $ total_followup_assessments : num [1:17409] 4 8 7 3 25 12 11 3 3 5 ...
## $ number_followup_by_hs : num [1:17409] 2 0 0 2 1 1 0 2 0 2 ...
## $ number_medical_reviews : num [1:17409] 0 0 0 0 0 0 0 0 0 0 ...
## $ controlled_followup_days : num [1:17409] 351 351 NA -1 342 -1 359 -1 NA 368 ...
## $ number_uncontrolled_followups : num [1:17409] 1 0 1 -1 0 -1 2 -1 2 2 ...
## $ number_community_assessments : num [1:17409] 0 0 0 0 0 1 0 0 0 2 ...
## $ number_facility_assessments : num [1:17409] 1 0 0 1 1 4 2 3 2 1 ...
## $ number_feeder_assessments : num [1:17409] 0 0 0 0 1 0 0 0 0 0 ...
## $ change_in_systolic : num [1:17409] -28 -75 -5 -6 -33 6 -4 22 -7 -37 ...
## $ change_in_diastolic : num [1:17409] -18 -59 -9 -11 1 7 -14 5 -16 -13 ...
## $ filter_col_str : chr [1:17409] "Yes" "Yes" "Yes" "Yes" ...
## $ diabetes : chr [1:17409] "Yes" "No" "No" "No" ...
## $ age_group : chr [1:17409] "65-69" "40-44" "40-44" "65-69" ...
## - attr(*, "spec")=
## .. cols(
## .. patient_id = col_double(),
## .. country = col_character(),
## .. age = col_double(),
## .. facility = col_character(),
## .. diagnosis = col_character(),
## .. sex = col_character(),
## .. county = col_character(),
## .. enrollment_status = col_character(),
## .. enrollment_bmi = col_character(),
## .. latest_bmi = col_double(),
## .. baseline_systolic = col_double(),
## .. baseline_diastolic = col_double(),
## .. baseline_control_status = col_character(),
## .. baseline_grade = col_double(),
## .. enrollment_date = col_date(format = ""),
## .. baseline_assessment_date = col_date(format = ""),
## .. anchor_date = col_date(format = ""),
## .. followup_systolic = col_double(),
## .. followup_diastolic = col_double(),
## .. followup_control_status = col_character(),
## .. followup_grade = col_double(),
## .. followup_date = col_date(format = ""),
## .. days_between_assessment_and_anchor = col_double(),
## .. days_between_assessment_and_baseline = col_double(),
## .. number_followup_assessments = col_double(),
## .. total_followup_assessments = col_double(),
## .. number_followup_by_hs = col_double(),
## .. number_medical_reviews = col_double(),
## .. controlled_followup_days = col_double(),
## .. number_uncontrolled_followups = col_double(),
## .. number_community_assessments = col_double(),
## .. number_facility_assessments = col_double(),
## .. number_feeder_assessments = col_double(),
## .. change_in_systolic = col_double(),
## .. change_in_diastolic = col_double(),
## .. filter_col_str = col_character(),
## .. diabetes = col_character(),
## .. age_group = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
# Summary of the dataset
medtronic_subset$enrollment_bmi <- as.numeric(medtronic_subset$enrollment_bmi)## Warning: NAs introduced by coercion
## patient_id country age facility
## Min. : 1 Length:17409 Min. : 5.00 Length:17409
## 1st Qu.: 47964 Class :character 1st Qu.: 54.00 Class :character
## Median :329310 Mode :character Median : 64.00 Mode :character
## Mean :241930 Mean : 62.72
## 3rd Qu.:370978 3rd Qu.: 72.00
## Max. :536853 Max. :100.00
##
## diagnosis sex county enrollment_status
## Length:17409 Length:17409 Length:17409 Length:17409
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## enrollment_bmi latest_bmi baseline_systolic baseline_diastolic
## Min. : 5.30 Min. : 5.13 Min. : 65.0 Min. : 36.00
## 1st Qu.: 23.06 1st Qu.: 23.03 1st Qu.:128.0 1st Qu.: 75.00
## Median : 26.03 Median : 26.02 Median :140.0 Median : 82.00
## Mean : 29.09 Mean : 28.83 Mean :142.4 Mean : 82.99
## 3rd Qu.: 29.76 3rd Qu.: 29.87 3rd Qu.:155.0 3rd Qu.: 90.00
## Max. :1440.44 Max. :632.00 Max. :285.0 Max. :163.00
## NA's :140 NA's :1
## baseline_control_status baseline_grade enrollment_date
## Length:17409 Min. :0.0000 Min. :2018-04-18
## Class :character 1st Qu.:0.0000 1st Qu.:2021-11-26
## Mode :character Median :1.0000 Median :2022-04-19
## Mean :0.8534 Mean :2022-02-19
## 3rd Qu.:1.0000 3rd Qu.:2022-09-02
## Max. :3.0000 Max. :2023-09-20
##
## baseline_assessment_date anchor_date followup_systolic
## Min. :2018-04-19 Min. :2019-04-19 Min. : 70.0
## 1st Qu.:2022-02-22 1st Qu.:2023-02-22 1st Qu.:124.0
## Median :2022-07-20 Median :2023-07-20 Median :134.0
## Mean :2022-06-01 Mean :2023-06-01 Mean :136.3
## 3rd Qu.:2022-12-02 3rd Qu.:2023-12-02 3rd Qu.:146.0
## Max. :2023-09-26 Max. :2024-09-26 Max. :240.0
##
## followup_diastolic followup_control_status followup_grade
## Min. : 30.00 Length:17409 Min. :0.0000
## 1st Qu.: 73.00 Class :character 1st Qu.:0.0000
## Median : 80.00 Mode :character Median :0.0000
## Mean : 80.49 Mean :0.5977
## 3rd Qu.: 87.00 3rd Qu.:1.0000
## Max. :153.00 Max. :3.0000
##
## followup_date days_between_assessment_and_anchor
## Min. :2019-03-18 Min. : 0.00
## 1st Qu.:2023-02-21 1st Qu.:10.00
## Median :2023-07-19 Median :23.00
## Mean :2023-05-31 Mean :25.66
## 3rd Qu.:2023-12-05 3rd Qu.:40.00
## Max. :2024-07-29 Max. :62.00
##
## days_between_assessment_and_baseline number_followup_assessments
## Min. :303.0 Min. : 0.000
## 1st Qu.:340.0 1st Qu.: 1.000
## Median :364.0 Median : 2.000
## Mean :363.8 Mean : 2.902
## 3rd Qu.:386.0 3rd Qu.: 4.000
## Max. :427.0 Max. :32.000
##
## total_followup_assessments number_followup_by_hs number_medical_reviews
## Min. : 1.000 Min. : 0.000 Min. :0.00000
## 1st Qu.: 3.000 1st Qu.: 0.000 1st Qu.:0.00000
## Median : 4.000 Median : 1.000 Median :0.00000
## Mean : 5.751 Mean : 1.305 Mean :0.03797
## 3rd Qu.: 8.000 3rd Qu.: 2.000 3rd Qu.:0.00000
## Max. :79.000 Max. :28.000 Max. :4.00000
##
## controlled_followup_days number_uncontrolled_followups
## Min. : -1.0 Min. :-1.0000
## 1st Qu.: -1.0 1st Qu.:-1.0000
## Median : -1.0 Median : 0.0000
## Mean :139.4 Mean : 0.4923
## 3rd Qu.:351.0 3rd Qu.: 1.0000
## Max. :427.0 Max. :18.0000
## NA's :4962
## number_community_assessments number_facility_assessments
## Min. : 0.000 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 0.00
## Median : 0.000 Median : 1.00
## Mean : 1.001 Mean : 1.37
## 3rd Qu.: 1.000 3rd Qu.: 2.00
## Max. :21.000 Max. :19.00
##
## number_feeder_assessments change_in_systolic change_in_diastolic
## Min. : 0.00000 Min. :-146.00 Min. :-88.000
## 1st Qu.: 0.00000 1st Qu.: -20.00 1st Qu.:-11.000
## Median : 0.00000 Median : -5.00 Median : -2.000
## Mean : 0.05802 Mean : -6.09 Mean : -2.493
## 3rd Qu.: 0.00000 3rd Qu.: 9.00 3rd Qu.: 6.000
## Max. :26.00000 Max. : 109.00 Max. : 75.000
##
## filter_col_str diabetes age_group
## Length:17409 Length:17409 Length:17409
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## spc_tbl_ [17,409 × 38] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ patient_id : num [1:17409] 1 5 9 11 15 29 45 54 57 62 ...
## $ country : chr [1:17409] "Kenya" "Kenya" "Kenya" "Kenya" ...
## $ age : num [1:17409] 65 42 43 69 65 70 50 68 65 59 ...
## $ facility : chr [1:17409] "Matiliku sub county hospital" "Tawa Sub-county Hospital" "Kibwezi Sub-county Hospital" "Kibwezi Sub-county Hospital" ...
## $ diagnosis : chr [1:17409] "{Hypertension,\"Diabetes Mellitus Type 2\"}" "{Hypertension}" "{Hypertension}" "{Hypertension}" ...
## $ sex : chr [1:17409] "Female" "Female" "Female" "Male" ...
## $ county : chr [1:17409] "Makueni" "Makueni" "Makueni" "Makueni" ...
## $ enrollment_status : chr [1:17409] "Direct Enrollment" "Direct Enrollment" "Direct Enrollment" "Direct Enrollment" ...
## $ enrollment_bmi : num [1:17409] 25.4 37.3 33.8 23.2 23.6 ...
## $ latest_bmi : num [1:17409] 25.4 37.3 33.8 23.2 23.6 ...
## $ baseline_systolic : num [1:17409] 165 211 151 125 146 117 140 87 156 157 ...
## $ baseline_diastolic : num [1:17409] 74 124 87 81 72 70 90 59 91 85 ...
## $ baseline_control_status : chr [1:17409] "Uncontrolled" "Uncontrolled" "Uncontrolled" "Controlled" ...
## $ baseline_grade : num [1:17409] 2 3 1 0 1 0 1 0 1 1 ...
## $ enrollment_date : Date[1:17409], format: "2020-12-14" "2021-11-17" ...
## $ baseline_assessment_date : Date[1:17409], format: "2022-02-28" "2021-11-17" ...
## $ anchor_date : Date[1:17409], format: "2023-02-28" "2022-11-17" ...
## $ followup_systolic : num [1:17409] 137 136 146 119 113 123 136 109 149 120 ...
## $ followup_diastolic : num [1:17409] 56 65 78 70 73 77 76 64 75 72 ...
## $ followup_control_status : chr [1:17409] "Controlled" "Controlled" "Uncontrolled" "Controlled" ...
## $ followup_grade : num [1:17409] 0 0 1 0 0 0 0 0 1 0 ...
## $ followup_date : Date[1:17409], format: "2023-02-14" "2022-11-03" ...
## $ days_between_assessment_and_anchor : num [1:17409] 14 14 6 27 23 19 6 60 40 3 ...
## $ days_between_assessment_and_baseline: num [1:17409] 351 351 371 392 342 346 359 305 405 368 ...
## $ number_followup_assessments : num [1:17409] 3 1 0 3 0 11 4 3 1 5 ...
## $ total_followup_assessments : num [1:17409] 4 8 7 3 25 12 11 3 3 5 ...
## $ number_followup_by_hs : num [1:17409] 2 0 0 2 1 1 0 2 0 2 ...
## $ number_medical_reviews : num [1:17409] 0 0 0 0 0 0 0 0 0 0 ...
## $ controlled_followup_days : num [1:17409] 351 351 NA -1 342 -1 359 -1 NA 368 ...
## $ number_uncontrolled_followups : num [1:17409] 1 0 1 -1 0 -1 2 -1 2 2 ...
## $ number_community_assessments : num [1:17409] 0 0 0 0 0 1 0 0 0 2 ...
## $ number_facility_assessments : num [1:17409] 1 0 0 1 1 4 2 3 2 1 ...
## $ number_feeder_assessments : num [1:17409] 0 0 0 0 1 0 0 0 0 0 ...
## $ change_in_systolic : num [1:17409] -28 -75 -5 -6 -33 6 -4 22 -7 -37 ...
## $ change_in_diastolic : num [1:17409] -18 -59 -9 -11 1 7 -14 5 -16 -13 ...
## $ filter_col_str : chr [1:17409] "Yes" "Yes" "Yes" "Yes" ...
## $ diabetes : chr [1:17409] "Yes" "No" "No" "No" ...
## $ age_group : chr [1:17409] "65-69" "40-44" "40-44" "65-69" ...
## - attr(*, "spec")=
## .. cols(
## .. patient_id = col_double(),
## .. country = col_character(),
## .. age = col_double(),
## .. facility = col_character(),
## .. diagnosis = col_character(),
## .. sex = col_character(),
## .. county = col_character(),
## .. enrollment_status = col_character(),
## .. enrollment_bmi = col_character(),
## .. latest_bmi = col_double(),
## .. baseline_systolic = col_double(),
## .. baseline_diastolic = col_double(),
## .. baseline_control_status = col_character(),
## .. baseline_grade = col_double(),
## .. enrollment_date = col_date(format = ""),
## .. baseline_assessment_date = col_date(format = ""),
## .. anchor_date = col_date(format = ""),
## .. followup_systolic = col_double(),
## .. followup_diastolic = col_double(),
## .. followup_control_status = col_character(),
## .. followup_grade = col_double(),
## .. followup_date = col_date(format = ""),
## .. days_between_assessment_and_anchor = col_double(),
## .. days_between_assessment_and_baseline = col_double(),
## .. number_followup_assessments = col_double(),
## .. total_followup_assessments = col_double(),
## .. number_followup_by_hs = col_double(),
## .. number_medical_reviews = col_double(),
## .. controlled_followup_days = col_double(),
## .. number_uncontrolled_followups = col_double(),
## .. number_community_assessments = col_double(),
## .. number_facility_assessments = col_double(),
## .. number_feeder_assessments = col_double(),
## .. change_in_systolic = col_double(),
## .. change_in_diastolic = col_double(),
## .. filter_col_str = col_character(),
## .. diabetes = col_character(),
## .. age_group = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
## [1] 17409 38
## [1] "patient_id"
## [2] "country"
## [3] "age"
## [4] "facility"
## [5] "diagnosis"
## [6] "sex"
## [7] "county"
## [8] "enrollment_status"
## [9] "enrollment_bmi"
## [10] "latest_bmi"
## [11] "baseline_systolic"
## [12] "baseline_diastolic"
## [13] "baseline_control_status"
## [14] "baseline_grade"
## [15] "enrollment_date"
## [16] "baseline_assessment_date"
## [17] "anchor_date"
## [18] "followup_systolic"
## [19] "followup_diastolic"
## [20] "followup_control_status"
## [21] "followup_grade"
## [22] "followup_date"
## [23] "days_between_assessment_and_anchor"
## [24] "days_between_assessment_and_baseline"
## [25] "number_followup_assessments"
## [26] "total_followup_assessments"
## [27] "number_followup_by_hs"
## [28] "number_medical_reviews"
## [29] "controlled_followup_days"
## [30] "number_uncontrolled_followups"
## [31] "number_community_assessments"
## [32] "number_facility_assessments"
## [33] "number_feeder_assessments"
## [34] "change_in_systolic"
## [35] "change_in_diastolic"
## [36] "filter_col_str"
## [37] "diabetes"
## [38] "age_group"
## [1] 17409
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up
# Calculate the mean SBP at enrollment and follow-up
mean_sbp_enrollment <- mean(medtronic_subset$baseline_systolic, na.rm = TRUE)
mean_sbp_followup <- mean(medtronic_subset$followup_systolic, na.rm = TRUE)
# Calculate the mean difference
mean_diff <- mean_sbp_followup - mean_sbp_enrollment
# Create a data frame for the mean SBP at enrollment and follow-up
mean_sbp_df <- data.frame(
Time = c("Enrollment", "Follow-up"),
Mean_SBP = c(mean_sbp_enrollment, mean_sbp_followup)
)
# Create a bar plot for the mean SBP at enrollment and follow-up
ggplot(mean_sbp_df, aes(x = Time, y = Mean_SBP)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Mean Systolic Blood Pressure at Enrollment and Follow-up",
x = "Time",
y = "Mean SBP (mmHg)") +
theme_minimal()
# Comparing the mean diastolic blood pressure (DBP) at enrollment and
follow-up
# Comparing the mean diastolic blood pressure (DBP) at enrollment and follow-up
# Calculate the mean DBP at enrollment and follow-up
mean_dbp_enrollment <- mean(medtronic_subset$baseline_diastolic, na.rm = TRUE)
mean_dbp_followup <- mean(medtronic_subset$followup_diastolic, na.rm = TRUE)
# Calculate the mean difference
mean_diff_dbp <- mean_dbp_followup - mean_dbp_enrollment
# Create a data frame for the mean DBP at enrollment and follow-up
mean_dbp_df <- data.frame(
Time = c("Enrollment", "Follow-up"),
Mean_DBP = c(mean_dbp_enrollment, mean_dbp_followup)
)
# Create a bar plot for the mean DBP at enrollment and follow-up
ggplot(mean_dbp_df, aes(x = Time, y = Mean_DBP)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Mean Diastolic Blood Pressure at Enrollment and Follow-up",
x = "Time",
y = "Mean DBP (mmHg)") +
theme_minimal()
# Comparing the mean body mass index (BMI) at enrollment and
follow-up
# Comparing the mean body mass index (BMI) at enrollment and follow-up
# Calculate the mean BMI at enrollment and follow-up
mean_bmi_enrollment <- mean(medtronic_subset$enrollment_bmi, na.rm = TRUE)
mean_bmi_followup <- mean(medtronic_subset$latest_bmi, na.rm = TRUE)
# Calculate the mean difference
mean_diff_bmi <- mean_bmi_followup - mean_bmi_enrollment
# Create a data frame for the mean BMI at enrollment and follow-up
mean_bmi_df <- data.frame(
Time = c("Enrollment", "Follow-up"),
Mean_BMI = c(mean_bmi_enrollment, mean_bmi_followup)
)
# Create a bar plot for the mean BMI at enrollment and follow-up
ggplot(mean_bmi_df, aes(x = Time, y = Mean_BMI)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Mean Body Mass Index at Enrollment and Follow-up",
x = "Time",
y = "Mean BMI (kg/m^2)") +
theme_minimal()
# Comparing the mean sbp at enrollment and follow-up by
enrollment_status
# Comparing the mean sbp at enrollment and follow-up by enrollment_status
# Calculate the mean sbp at enrollment and follow-up by enrollment_status-add number of patients
mean_sbp_enrollment_status <- medtronic_subset %>%
group_by(enrollment_status) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_enrollment_status <- mean_sbp_enrollment_status %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_enrollment_status## # A tibble: 2 × 5
## enrollment_status Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 Direct Enrollment 142. 136. 15376
## 2 From Screening 143. 135. 2033
## # ℹ 1 more variable: Mean_Difference <dbl>
# Comparing mean sbp at enrollment and follow-up by county
# Calculate the mean sbp at enrollment and follow-up by county add number of patients
mean_sbp_county <- medtronic_subset %>%
group_by(county) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_county <- mean_sbp_county %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_county## # A tibble: 10 × 5
## county Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 Kakamega 143. 140. 1193
## 2 Kilifi 138. 134. 1702
## 3 Makueni 142. 135. 8474
## 4 Meru 144. 143. 248
## 5 Mombasa 143. 135. 1632
## 6 Nairobi 145. 137. 166
## 7 Nakuru 146. 139. 2191
## 8 Nyandurua 143. 140. 329
## 9 Nyeri 141. 139. 1280
## 10 Siaya 141. 137. 194
## # ℹ 1 more variable: Mean_Difference <dbl>
# Drop non-binary sex category because of few observations (Only 2)
# Drop rows where sex is not "Male" or "Female"
medtronic_subset <- medtronic_subset[medtronic_subset$sex %in% c("Male", "Female"), ]
mean_sbp_sex <- medtronic_subset %>%
group_by(sex) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_sex <- mean_sbp_sex %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_sex## # A tibble: 2 × 5
## sex Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients Mean_Difference
## <chr> <dbl> <dbl> <int> <dbl>
## 1 Fema… 142. 136. 13010 -5.90
## 2 Male 145. 138. 4397 -6.63
# Testing whether the mean difference is statistically significant
t_test_result <- t.test(medtronic_subset$followup_systolic, medtronic_subset$baseline_systolic, paired = TRUE)
# Display the t-test result
t_test_result##
## Paired t-test
##
## data: medtronic_subset$followup_systolic and medtronic_subset$baseline_systolic
## t = -32.918, df = 17406, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -6.449306 -5.724418
## sample estimates:
## mean difference
## -6.086862
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by five-year age groups
# Create a new variable for age group
medtronic_subset$age_group <- cut(medtronic_subset$age,
breaks = seq(0, 100, by = 5),
right = FALSE,
labels = paste(seq(0, 95, by = 5), seq(4, 99, by = 5), sep = "-"))
# Calculate the mean SBP at enrollment and follow-up by age group-add patient counts
mean_sbp_age_group <- medtronic_subset %>%
group_by(age_group) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_age_group <- mean_sbp_age_group %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_age_group## # A tibble: 20 × 5
## age_group Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <fct> <dbl> <dbl> <int>
## 1 5-9 134 130. 2
## 2 10-14 114. 130. 3
## 3 15-19 136. 129. 12
## 4 20-24 133. 126. 30
## 5 25-29 138. 133. 101
## 6 30-34 139. 133. 212
## 7 35-39 138. 133. 435
## 8 40-44 140. 133. 747
## 9 45-49 140. 134. 1202
## 10 50-54 141. 135. 1906
## 11 55-59 140. 135. 1880
## 12 60-64 142. 136. 2636
## 13 65-69 143. 137. 2492
## 14 70-74 144. 138. 2680
## 15 75-79 145. 138. 1364
## 16 80-84 146. 138. 994
## 17 85-89 147. 139. 412
## 18 90-94 145. 139. 215
## 19 95-99 149. 142. 75
## 20 <NA> 154. 138. 9
## # ℹ 1 more variable: Mean_Difference <dbl>
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by diabetes status
# Calculate the mean SBP at enrollment and follow-up by diabetes status
mean_sbp_diabetes <- medtronic_subset %>%
group_by(diabetes) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_diabetes <- mean_sbp_diabetes %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_diabetes## # A tibble: 2 × 5
## diabetes Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 No 142. 136. 12806
## 2 Yes 142. 138. 4601
## # ℹ 1 more variable: Mean_Difference <dbl>
# Summarise the total number of followup assessments
summary(medtronic_subset$total_followup_assessments)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 4.000 5.751 8.000 79.000
# Display the distribution of the total number of followup assessments-
# histogram
hist(medtronic_subset$total_followup_assessments,
main = "Distribution of Total Follow-up Assessments",
xlab = "Total Follow-up Assessments",
ylab = "Frequency",
col = "lightblue",
border = "black")# Display the distribution of the total number of followup assessments-boxplot
boxplot(medtronic_subset$total_followup_assessments,
main = "Boxplot of Total Follow-up Assessments",
ylab = "Total Follow-up Assessments",
col = "lightblue",
border = "black")# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by number of followup assessments
mean_sbp_followup_assessments <- medtronic_subset %>%
group_by(num_followup_assess) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_followup_assessments <- mean_sbp_followup_assessments %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_followup_assessments## # A tibble: 2 × 5
## num_followup_assess Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 "< 5 " 142. 137. 9256
## 2 "Five or more" 143. 135. 8151
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ num_followup_assess, data = medtronic_subset)##
## Welch Two Sample t-test
##
## data: change_in_systolic by num_followup_assess
## t = 6.4316, df = 17128, p-value = 1.296e-10
## alternative hypothesis: true difference in means between group < 5 and group Five or more is not equal to 0
## 95 percent confidence interval:
## 1.655077 3.106106
## sample estimates:
## mean in group < 5 mean in group Five or more
## -4.972126 -7.352717
##
## Welch Two Sample t-test
##
## data: change_in_systolic by diabetes
## t = -6.148, df = 8260.7, p-value = 8.209e-10
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -3.366255 -1.738611
## sample estimates:
## mean in group No mean in group Yes
## -6.761518 -4.209085
##
## Welch Two Sample t-test
##
## data: change_in_systolic by enrollment_status
## t = 3.828, df = 2607.4, p-value = 0.0001322
## alternative hypothesis: true difference in means between group Direct Enrollment and group From Screening is not equal to 0
## 95 percent confidence interval:
## 1.068750 3.313623
## sample estimates:
## mean in group Direct Enrollment mean in group From Screening
## -5.830948 -8.022135
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by number of followup assessments
# Create a subset of patients with only two followup assessments
# Create number of followup assessment variable- Categorise d based on those with two or more followup assessments
medtronic_subset <- medtronic_subset %>%
mutate(
assess_count = case_when(
total_followup_assessments <= 1 ~ "One",
TRUE ~ "Two or more "
)
)
# Create number of followup assessment variable- Cutoff for two or more and those with less
medtronic_subset <- medtronic_subset %>%
mutate (
at_least_two_followup = case_when(
total_followup_assessments >= 2 ~ "Two or more followups",
TRUE ~ "Less than two followups"
)
)mean_sbp_one_followup_assessments <- medtronic_subset %>%
group_by(assess_count) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_one_followup_assessments <- mean_sbp_one_followup_assessments %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_one_followup_assessments## # A tibble: 2 × 5
## assess_count Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 "One" 142. 137. 1364
## 2 "Two or more " 142. 136. 16043
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ assess_count, data = medtronic_subset)##
## Welch Two Sample t-test
##
## data: change_in_systolic by assess_count
## t = 2.6913, df = 1618, p-value = 0.007191
## alternative hypothesis: true difference in means between group One and group Two or more is not equal to 0
## 95 percent confidence interval:
## 0.4904499 3.1265206
## sample estimates:
## mean in group One mean in group Two or more
## -4.420088 -6.228573
##
## Welch Two Sample t-test
##
## data: change_in_systolic by diabetes
## t = -6.148, df = 8260.7, p-value = 8.209e-10
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -3.366255 -1.738611
## sample estimates:
## mean in group No mean in group Yes
## -6.761518 -4.209085
##
## Welch Two Sample t-test
##
## data: change_in_systolic by enrollment_status
## t = 3.828, df = 2607.4, p-value = 0.0001322
## alternative hypothesis: true difference in means between group Direct Enrollment and group From Screening is not equal to 0
## 95 percent confidence interval:
## 1.068750 3.313623
## sample estimates:
## mean in group Direct Enrollment mean in group From Screening
## -5.830948 -8.022135
mean_sbp_at_least_two_followup_assessments <- medtronic_subset %>%
group_by(at_least_two_followup) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_at_least_two_followup_assessments <- mean_sbp_at_least_two_followup_assessments %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_at_least_two_followup_assessments## # A tibble: 2 × 5
## at_least_two_followup Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 Less than two follow… 142. 137. 1364
## 2 Two or more followups 142. 136. 16043
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ assess_count, data = medtronic_subset)##
## Welch Two Sample t-test
##
## data: change_in_systolic by assess_count
## t = 2.6913, df = 1618, p-value = 0.007191
## alternative hypothesis: true difference in means between group One and group Two or more is not equal to 0
## 95 percent confidence interval:
## 0.4904499 3.1265206
## sample estimates:
## mean in group One mean in group Two or more
## -4.420088 -6.228573
##
## Welch Two Sample t-test
##
## data: change_in_systolic by diabetes
## t = -6.148, df = 8260.7, p-value = 8.209e-10
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -3.366255 -1.738611
## sample estimates:
## mean in group No mean in group Yes
## -6.761518 -4.209085
##
## Welch Two Sample t-test
##
## data: change_in_systolic by enrollment_status
## t = 3.828, df = 2607.4, p-value = 0.0001322
## alternative hypothesis: true difference in means between group Direct Enrollment and group From Screening is not equal to 0
## 95 percent confidence interval:
## 1.068750 3.313623
## sample estimates:
## mean in group Direct Enrollment mean in group From Screening
## -5.830948 -8.022135
##
## Welch Two Sample t-test
##
## data: change_in_systolic by at_least_two_followup
## t = 2.6913, df = 1618, p-value = 0.007191
## alternative hypothesis: true difference in means between group Less than two followups and group Two or more followups is not equal to 0
## 95 percent confidence interval:
## 0.4904499 3.1265206
## sample estimates:
## mean in group Less than two followups mean in group Two or more followups
## -4.420088 -6.228573
# Create subsets of patients by enrollment status
medtronic_direct_enrollment <- medtronic_subset %>%
filter(enrollment_status == "Direct Enrollment")
medtronic_screening <- medtronic_subset %>%
filter(enrollment_status == "From Screening")
# Display the number of patients in each subset
nrow(medtronic_direct_enrollment)## [1] 15374
## [1] 2033
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by number of followup assessments
mean_sbp_one_followup_assessments_screening <- medtronic_screening %>%
group_by(assess_count) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_one_followup_assessments_screening <- mean_sbp_one_followup_assessments_screening %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_one_followup_assessments_screening## # A tibble: 2 × 5
## assess_count Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 "One" 141. 137. 169
## 2 "Two or more " 144. 135. 1864
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ assess_count, data = medtronic_screening)##
## Welch Two Sample t-test
##
## data: change_in_systolic by assess_count
## t = 2.1467, df = 200.23, p-value = 0.03302
## alternative hypothesis: true difference in means between group One and group Two or more is not equal to 0
## 95 percent confidence interval:
## 0.3378558 7.9581799
## sample estimates:
## mean in group One mean in group Two or more
## -4.218935 -8.366953
##
## Welch Two Sample t-test
##
## data: change_in_systolic by diabetes
## t = -3.5855, df = 381.4, p-value = 0.0003801
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -8.369783 -2.441307
## sample estimates:
## mean in group No mean in group Yes
## -8.763968 -3.358423
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by number of followup assessments
mean_sbp_one_followup_assessments_direct_enrollment <- medtronic_direct_enrollment %>%
group_by(assess_count) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_one_followup_assessments_direct_enrollment <- mean_sbp_one_followup_assessments_direct_enrollment %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_one_followup_assessments_direct_enrollment## # A tibble: 2 × 5
## assess_count Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 "One" 142. 137. 1195
## 2 "Two or more " 142. 136. 14179
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ assess_count, data = medtronic_direct_enrollment)##
## Welch Two Sample t-test
##
## data: change_in_systolic by assess_count
## t = 2.0907, df = 1416.1, p-value = 0.03673
## alternative hypothesis: true difference in means between group One and group Two or more is not equal to 0
## 95 percent confidence interval:
## 0.09255293 2.90529096
## sample estimates:
## mean in group One mean in group Two or more
## -4.448536 -5.947458
##
## Welch Two Sample t-test
##
## data: change_in_systolic by diabetes
## t = -5.02, df = 8014.9, p-value = 5.278e-07
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -3.030880 -1.328565
## sample estimates:
## mean in group No mean in group Yes
## -6.443721 -4.263998
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by number of followup assessments
mean_sbp_at_least_two_followup_assessments_screening <- medtronic_screening %>%
group_by(at_least_two_followup) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_at_least_two_followup_assessments_screening <- mean_sbp_at_least_two_followup_assessments_screening %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_at_least_two_followup_assessments_screening## # A tibble: 2 × 5
## at_least_two_followup Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 Less than two follow… 141. 137. 169
## 2 Two or more followups 144. 135. 1864
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ assess_count, data = medtronic_screening)##
## Welch Two Sample t-test
##
## data: change_in_systolic by assess_count
## t = 2.1467, df = 200.23, p-value = 0.03302
## alternative hypothesis: true difference in means between group One and group Two or more is not equal to 0
## 95 percent confidence interval:
## 0.3378558 7.9581799
## sample estimates:
## mean in group One mean in group Two or more
## -4.218935 -8.366953
##
## Welch Two Sample t-test
##
## data: change_in_systolic by diabetes
## t = -3.5855, df = 381.4, p-value = 0.0003801
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -8.369783 -2.441307
## sample estimates:
## mean in group No mean in group Yes
## -8.763968 -3.358423
##
## Welch Two Sample t-test
##
## data: change_in_systolic by at_least_two_followup
## t = 2.1467, df = 200.23, p-value = 0.03302
## alternative hypothesis: true difference in means between group Less than two followups and group Two or more followups is not equal to 0
## 95 percent confidence interval:
## 0.3378558 7.9581799
## sample estimates:
## mean in group Less than two followups mean in group Two or more followups
## -4.218935 -8.366953
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by number of followup assessments
mean_sbp_at_least_two_followup_assessments_direct_enrollment <- medtronic_direct_enrollment %>%
group_by(at_least_two_followup) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_at_least_two_followup_assessments_direct_enrollment <- mean_sbp_at_least_two_followup_assessments_direct_enrollment %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_at_least_two_followup_assessments_direct_enrollment## # A tibble: 2 × 5
## at_least_two_followup Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 Less than two follow… 142. 137. 1195
## 2 Two or more followups 142. 136. 14179
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ assess_count, data = medtronic_direct_enrollment)##
## Welch Two Sample t-test
##
## data: change_in_systolic by assess_count
## t = 2.0907, df = 1416.1, p-value = 0.03673
## alternative hypothesis: true difference in means between group One and group Two or more is not equal to 0
## 95 percent confidence interval:
## 0.09255293 2.90529096
## sample estimates:
## mean in group One mean in group Two or more
## -4.448536 -5.947458
##
## Welch Two Sample t-test
##
## data: change_in_systolic by diabetes
## t = -5.02, df = 8014.9, p-value = 5.278e-07
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -3.030880 -1.328565
## sample estimates:
## mean in group No mean in group Yes
## -6.443721 -4.263998
##
## Welch Two Sample t-test
##
## data: change_in_systolic by at_least_two_followup
## t = 2.0907, df = 1416.1, p-value = 0.03673
## alternative hypothesis: true difference in means between group Less than two followups and group Two or more followups is not equal to 0
## 95 percent confidence interval:
## 0.09255293 2.90529096
## sample estimates:
## mean in group Less than two followups mean in group Two or more followups
## -4.448536 -5.947458
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by number of followup assessments
mean_sbp_five_followup_assessments_screening <- medtronic_screening %>%
group_by(num_followup_assess) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_five_followup_assessments_screening <- mean_sbp_five_followup_assessments_screening %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_five_followup_assessments_screening## # A tibble: 2 × 5
## num_followup_assess Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 "< 5 " 142. 136. 1064
## 2 "Five or more" 145. 135. 969
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ num_followup_assess, data = medtronic_screening)##
## Welch Two Sample t-test
##
## data: change_in_systolic by num_followup_assess
## t = 4.4073, df = 2006.2, p-value = 1.102e-05
## alternative hypothesis: true difference in means between group < 5 and group Five or more is not equal to 0
## 95 percent confidence interval:
## 2.622849 6.828389
## sample estimates:
## mean in group < 5 mean in group Five or more
## -5.769737 -10.495356
##
## Welch Two Sample t-test
##
## data: change_in_systolic by diabetes
## t = -3.5855, df = 381.4, p-value = 0.0003801
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -8.369783 -2.441307
## sample estimates:
## mean in group No mean in group Yes
## -8.763968 -3.358423
##
## Welch Two Sample t-test
##
## data: change_in_systolic by assess_count
## t = 2.1467, df = 200.23, p-value = 0.03302
## alternative hypothesis: true difference in means between group One and group Two or more is not equal to 0
## 95 percent confidence interval:
## 0.3378558 7.9581799
## sample estimates:
## mean in group One mean in group Two or more
## -4.218935 -8.366953
# Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by number of followup assessments
mean_sbp_five_followup_assessments_direct_enrollment <- medtronic_direct_enrollment %>%
group_by(num_followup_assess) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_five_followup_assessments_direct_enrollment <- mean_sbp_five_followup_assessments_direct_enrollment %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_five_followup_assessments_direct_enrollment## # A tibble: 2 × 5
## num_followup_assess Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 "< 5 " 142. 137. 8192
## 2 "Five or more" 142. 136. 7182
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ num_followup_assess, data = medtronic_direct_enrollment)##
## Welch Two Sample t-test
##
## data: change_in_systolic by num_followup_assess
## t = 5.2274, df = 15123, p-value = 1.742e-07
## alternative hypothesis: true difference in means between group < 5 and group Five or more is not equal to 0
## 95 percent confidence interval:
## 1.287674 2.832687
## sample estimates:
## mean in group < 5 mean in group Five or more
## -4.868530 -6.928711
##
## Welch Two Sample t-test
##
## data: change_in_systolic by diabetes
## t = -5.02, df = 8014.9, p-value = 5.278e-07
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -3.030880 -1.328565
## sample estimates:
## mean in group No mean in group Yes
## -6.443721 -4.263998
##
## Welch Two Sample t-test
##
## data: change_in_systolic by assess_count
## t = 2.0907, df = 1416.1, p-value = 0.03673
## alternative hypothesis: true difference in means between group One and group Two or more is not equal to 0
## 95 percent confidence interval:
## 0.09255293 2.90529096
## sample estimates:
## mean in group One mean in group Two or more
## -4.448536 -5.947458
##
## Welch Two Sample t-test
##
## data: change_in_systolic by at_least_two_followup
## t = 2.0907, df = 1416.1, p-value = 0.03673
## alternative hypothesis: true difference in means between group Less than two followups and group Two or more followups is not equal to 0
## 95 percent confidence interval:
## 0.09255293 2.90529096
## sample estimates:
## mean in group Less than two followups mean in group Two or more followups
## -4.448536 -5.947458
###############Assessing the effect of followup assessments on systolic blood pressure change#######
# Fitting a linear regression model to predict change in systolic blood pressure per month
# Create a new variable for change in systolic blood pressure
medtronic_subset$change_in_systolic <- medtronic_subset$followup_systolic - medtronic_subset$baseline_systolic
# Fit a linear regression model to predict change in systolic blood pressure
lm_sbp_change <- lm(change_in_systolic~num_followup_assess, data = medtronic_screening)
# Display the summary of the linear regression model
summary(lm_sbp_change)##
## Call:
## lm(formula = change_in_systolic ~ num_followup_assess, data = medtronic_screening)
##
## Residuals:
## Min 1Q Median 3Q Max
## -93.50 -13.51 0.77 14.49 89.50
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.7697 0.7396 -7.801 9.76e-15 ***
## num_followup_assessFive or more -4.7256 1.0713 -4.411 1.08e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.13 on 2031 degrees of freedom
## Multiple R-squared: 0.009489, Adjusted R-squared: 0.009001
## F-statistic: 19.46 on 1 and 2031 DF, p-value: 1.083e-05
## (Intercept) num_followup_assessFive or more
## -5.769737 -4.725619
## Analysis of Variance Table
##
## Response: change_in_systolic
## Df Sum Sq Mean Sq F value Pr(>F)
## num_followup_assess 1 11325 11325.2 19.457 1.083e-05 ***
## Residuals 2031 1182193 582.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(lm_sbp_change)
# Adding other covariates
# Adding other covariates
lm_sbp_change_2 <- lm(change_in_systolic~num_followup_assess+age+sex+latest_bmi+diabetes, data = medtronic_screening)
# Display the summary of the linear regression model
summary(lm_sbp_change_2)##
## Call:
## lm(formula = change_in_systolic ~ num_followup_assess + age +
## sex + latest_bmi + diabetes, data = medtronic_screening)
##
## Residuals:
## Min 1Q Median 3Q Max
## -93.412 -13.718 1.135 14.439 90.860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.33270 2.65984 -0.877 0.38059
## num_followup_assessFive or more -4.86393 1.07055 -4.543 5.86e-06 ***
## age -0.06557 0.04049 -1.619 0.10551
## sexMale -1.19379 1.21762 -0.980 0.32699
## latest_bmi 0.01026 0.01821 0.563 0.57325
## diabetesYes 5.72624 1.55156 3.691 0.00023 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.04 on 2027 degrees of freedom
## Multiple R-squared: 0.01833, Adjusted R-squared: 0.01591
## F-statistic: 7.571 on 5 and 2027 DF, p-value: 4.681e-07
## (Intercept) num_followup_assessFive or more
## -2.33269794 -4.86392963
## age sexMale
## -0.06556565 -1.19379261
## latest_bmi diabetesYes
## 0.01025691 5.72623962
## Analysis of Variance Table
##
## Response: change_in_systolic
## Df Sum Sq Mean Sq F value Pr(>F)
## num_followup_assess 1 11325 11325.2 19.5932 1.009e-05 ***
## age 1 1900 1900.1 3.2872 0.0699685 .
## sex 1 574 573.8 0.9928 0.3191877
## latest_bmi 1 209 208.9 0.3614 0.5478166
## diabetes 1 7873 7873.0 13.6208 0.0002296 ***
## Residuals 2027 1171637 578.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(lm_sbp_change_2)
####################################################################################################
# Propensity score matching
library(MatchIt)
# Create a new variable for enrollment status
medtronic_screening$num_followup_assess <- as.factor(medtronic_screening$num_followup_assess)
# Fit a propensity score model
ps_model <- matchit(num_followup_assess ~ age + sex + baseline_grade + diabetes,
data = medtronic_screening, method = "nearest", distance = "logit")
# Display the summary of the propensity score model
summary(ps_model)##
## Call:
## matchit(formula = num_followup_assess ~ age + sex + baseline_grade +
## diabetes, data = medtronic_screening, method = "nearest",
## distance = "logit")
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.4844 0.4695 0.2414 1.0793 0.0629
## age 63.4964 62.3590 0.0893 0.8437 0.0159
## sexFemale 0.7430 0.7190 0.0550 . 0.0240
## sexMale 0.2570 0.2810 -0.0550 . 0.0240
## baseline_grade 0.9783 0.7970 0.1860 1.1205 0.0453
## diabetesNo 0.8452 0.8788 -0.0928 . 0.0336
## diabetesYes 0.1548 0.1212 0.0928 . 0.0336
## eCDF Max
## distance 0.1257
## age 0.0438
## sexFemale 0.0240
## sexMale 0.0240
## baseline_grade 0.0913
## diabetesNo 0.0336
## diabetesYes 0.0336
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.4844 0.4765 0.1291 1.1510 0.0350
## age 63.4964 63.3106 0.0146 0.8952 0.0084
## sexFemale 0.7430 0.7410 0.0047 . 0.0021
## sexMale 0.2570 0.2590 -0.0047 . 0.0021
## baseline_grade 0.9783 0.8648 0.1164 1.0916 0.0284
## diabetesNo 0.8452 0.8669 -0.0599 . 0.0217
## diabetesYes 0.1548 0.1331 0.0599 . 0.0217
## eCDF Max Std. Pair Dist.
## distance 0.0898 0.1294
## age 0.0310 0.9663
## sexFemale 0.0021 0.6896
## sexMale 0.0021 0.6896
## baseline_grade 0.0506 0.5503
## diabetesNo 0.0217 0.5392
## diabetesYes 0.0217 0.5392
##
## Sample Sizes:
## Control Treated
## All 1064 969
## Matched 969 969
## Unmatched 95 0
## Discarded 0 0
# Create a matched dataset
matched_data <- match.data(ps_model)
# Fit a linear regression model to predict change in systolic blood pressure
lm_sbp_change_matched <- lm(change_in_systolic ~ num_followup_assess, data = matched_data)
# Display the summary of the linear regression model
summary(lm_sbp_change_matched)##
## Call:
## lm(formula = change_in_systolic ~ num_followup_assess, data = matched_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -93.505 -13.505 0.809 14.809 89.495
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.8091 0.7775 -8.758 < 2e-16 ***
## num_followup_assessFive or more -3.6863 1.0995 -3.353 0.000816 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.2 on 1936 degrees of freedom
## Multiple R-squared: 0.005772, Adjusted R-squared: 0.005259
## F-statistic: 11.24 on 1 and 1936 DF, p-value: 0.000816
## (Intercept) num_followup_assessFive or more
## -6.809082 -3.686275
## Analysis of Variance Table
##
## Response: change_in_systolic
## Df Sum Sq Mean Sq F value Pr(>F)
## num_followup_assess 1 6584 6583.7 11.24 0.000816 ***
## Residuals 1936 1133990 585.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(lm_sbp_change_matched)## cobalt (Version 4.6.0, Build Date: 2025-04-15)
##
## Attaching package: 'cobalt'
## The following object is masked from 'package:MatchIt':
##
## lalonde
## Balance Measures
## Type Diff.Adj
## distance Distance 0.1291
## age Contin. 0.0146
## sex_Male Binary -0.0021
## baseline_grade Contin. 0.1164
## diabetes_Yes Binary 0.0217
##
## Sample sizes
## Control Treated
## All 1064 969
## Matched 969 969
## Unmatched 95 0
## Balance Measures
## Type Diff.Adj
## distance Distance 0.1291
## age Contin. 0.0146
## sex_Male Binary -0.0021
## baseline_grade Contin. 0.1164
## diabetes_Yes Binary 0.0217
##
## Sample sizes
## Control Treated
## All 1064 969
## Matched 969 969
## Unmatched 95 0
# Create a plot comparing observed vs. counterfactual BP change for
matched data:
# Create a plot comparing observed vs. counterfactual BP change for matched sample
library(ggplot2)
ggplot(matched_data, aes(x = num_followup_assess, y = change_in_systolic)) +
geom_boxplot() +
labs(title = "Effect of Follow-up on Systolic BP Change (Matched Sample)",
x = "Follow-up Assessments",
y = "Change in Systolic BP (mmHg)")
# Create a plot comparing observed vs. counterfactual BP change for
matched sample
# Create a plot comparing observed vs. counterfactual BP change for control group
ggplot(matched_data, aes(x = num_followup_assess, y = change_in_systolic)) +
geom_boxplot() +
labs(title = "Effect of Follow-up on Systolic BP Change (Matched Sample)",
x = "Follow-up Assessments",
y = "Change in Systolic BP (mmHg)")
## Inverse Probability of Treatment Weighting (IPTW)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
# Step 1: Estimate the propensity scores
ps <- glm(num_followup_assess ~ age + sex + baseline_grade + diabetes,
family = binomial(), data = medtronic_screening)$fitted.values
# Step 2: Create the weights
medtronic_screening$weights <- ifelse(medtronic_screening$num_followup_assess == "Five or more", 1/ps, 1/(1 - ps))
# Step 3: Create a survey design object
design <- svydesign(id = ~1, data = medtronic_screening, weights = ~weights)
# Step 4: Fit a linear regression model using the survey design object
iptw_model <- svyglm(change_in_systolic ~ num_followup_assess, design = design)
# Step 5: Display the summary of the linear regression model
summary(iptw_model)##
## Call:
## svyglm(formula = change_in_systolic ~ num_followup_assess, design = design)
##
## Survey design:
## svydesign(id = ~1, data = medtronic_screening, weights = ~weights)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.1008 0.7822 -9.078 <2e-16 ***
## num_followup_assessFive or more -1.9648 1.0879 -1.806 0.0711 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 585.9808)
##
## Number of Fisher Scoring iterations: 2
## (Intercept) num_followup_assessFive or more
## -7.100807 -1.964818
## NULL
# Step 8: Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(iptw_model)
# IPTW Balance Diagnostics
# IPTW Balance Diagnostics
library(cobalt)
bal.tab(num_followup_assess ~ age + sex + latest_bmi + diabetes,
data = medtronic_screening,
weights = medtronic_screening$weights,
method = "weighting")## Note: `s.d.denom` not specified; assuming "pooled".
## Balance Measures
## Type Diff.Adj
## age Contin. 0.0002
## sex_Male Binary -0.0002
## latest_bmi Contin. -0.0267
## diabetes_Yes Binary -0.0001
##
## Effective sample sizes
## < 5 Five or more
## Unadjusted 1064. 969
## Adjusted 1047.88 954
love.plot(num_followup_assess ~ age + sex + baseline_grade + diabetes,
data = medtronic_screening,
weights = medtronic_screening$weights,
method = "weighting")## Note: `s.d.denom` not specified; assuming "pooled".
## Warning: Standardized mean differences and raw mean differences are present in
## the same plot. Use the `stars` argument to distinguish between them and
## appropriately label the x-axis. See `?love.plot` for details.
# Load the survey package
library(survey)
# --- Existing IPTW Steps ---
# Step 1: Estimate the propensity scores
ps <- glm(num_followup_assess ~ age + sex + baseline_grade + diabetes,
family = binomial(), data = medtronic_screening)$fitted.values
# Step 2: Create the weights
medtronic_screening$weights <- ifelse(medtronic_screening$num_followup_assess == "Five or more", 1/ps, 1/(1 - ps))
# Step 3: Create a survey design object
design <- svydesign(id = ~1, data = medtronic_screening, weights = ~weights)
# Step 4: Fit a linear regression model using the survey design object
iptw_model <- svyglm(change_in_systolic ~ num_followup_assess, design = design)
# --- Doubly Robust Estimation Step ---
# Step 4: Fit a linear regression model using the survey design object
# AND include the previously unbalanced covariates (age, diabetes)
# in the outcome model.
iptw_doubly_robust_model <- svyglm(change_in_systolic ~ num_followup_assess + age + diabetes,
design = design)
# Step 5: Display the summary of the doubly robust linear regression model
summary(iptw_doubly_robust_model)##
## Call:
## svyglm(formula = change_in_systolic ~ num_followup_assess + age +
## diabetes, design = design)
##
## Survey design:
## svydesign(id = ~1, data = medtronic_screening, weights = ~weights)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.04615 2.60254 -1.170 0.241956
## num_followup_assessFive or more -1.96429 1.08426 -1.812 0.070190 .
## age -0.07612 0.04008 -1.899 0.057663 .
## diabetesYes 5.34718 1.54922 3.452 0.000569 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 581.5351)
##
## Number of Fisher Scoring iterations: 2
# Step 6: Display the coefficients of the doubly robust linear regression model
coef(iptw_doubly_robust_model)## (Intercept) num_followup_assessFive or more
## -3.04615012 -1.96428747
## age diabetesYes
## -0.07612023 5.34718442
# Step 7: Display the ANOVA table of the doubly robust linear regression model
anova(iptw_doubly_robust_model)## Anova table: (Rao-Scott LRT)
## svyglm(formula = change_in_systolic ~ num_followup_assess, design = design)
## stats DEff df ddf p
## num_followup_assess 1962.1 601.53 1.00 2031 0.0731049 .
## age 2131.8 584.11 1.00 2030 0.0580155 .
## diabetes 6901.8 579.34 1.00 2029 0.0006081 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Step 8: Display the diagnostic plots of the doubly robust linear regression model
par(mfrow = c(2, 2))
plot(iptw_doubly_robust_model)36.1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.1
# Propensity score matching
library(MatchIt)
# Create a new variable for enrollment status
medtronic_screening$assess_count <- as.factor(medtronic_screening$assess_count)
# OR Option B: Remove weight variable
medtronic_screening$weights <- NULL
# Fit a propensity score model
ps_model_2 <- matchit(assess_count ~ age + sex + baseline_grade + diabetes,
data = medtronic_screening, method = "nearest", distance = "logit")## Warning: Fewer control units than treated units; not all treated units will get
## a match.
##
## Call:
## matchit(formula = assess_count ~ age + sex + baseline_grade +
## diabetes, data = medtronic_screening, method = "nearest",
## distance = "logit")
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.9172 0.9135 0.2220 1.0300 0.0488
## age 63.0397 61.3728 0.1264 0.7636 0.0241
## sexFemale 0.7318 0.7160 0.0356 . 0.0158
## sexMale 0.2682 0.2840 -0.0356 . 0.0158
## baseline_grade 0.8879 0.8343 0.0562 1.0201 0.0161
## diabetesNo 0.8584 0.9112 -0.1516 . 0.0529
## diabetesYes 0.1416 0.0888 0.1516 . 0.0529
## eCDF Max
## distance 0.0963
## age 0.0664
## sexFemale 0.0158
## sexMale 0.0158
## baseline_grade 0.0410
## diabetesNo 0.0529
## diabetesYes 0.0529
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.9502 0.9135 2.2311 0.0723 0.5148
## age 68.6272 61.3728 0.5503 0.3653 0.0904
## sexFemale 0.8225 0.7160 0.2404 . 0.1065
## sexMale 0.1775 0.2840 -0.2404 . 0.1065
## baseline_grade 0.9467 0.8343 0.1181 0.9810 0.0340
## diabetesNo 0.0059 0.9112 -2.5965 . 0.9053
## diabetesYes 0.9941 0.0888 2.5965 . 0.9053
## eCDF Max Std. Pair Dist.
## distance 0.9349 2.2311
## age 0.3018 0.8645
## sexFemale 0.1065 0.8548
## sexMale 0.1065 0.8548
## baseline_grade 0.1006 0.9879
## diabetesNo 0.9053 2.5965
## diabetesYes 0.9053 2.5965
##
## Sample Sizes:
## Control Treated
## All 169 1864
## Matched 169 169
## Unmatched 0 1695
## Discarded 0 0
# Create a matched dataset
matched_data_2 <- match.data(ps_model_2)
# Fit a linear regression model to predict change in systolic blood pressure
lm_sbp_change_matched_2 <- lm(change_in_systolic ~ assess_count, data = matched_data_2)
# Display the summary of the linear regression model
summary(lm_sbp_change_matched_2)##
## Call:
## lm(formula = change_in_systolic ~ assess_count, data = matched_data_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -81.456 -16.287 -0.118 15.463 67.219
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.219 1.900 -2.220 0.0271 *
## assess_countTwo or more -4.325 2.688 -1.609 0.1085
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.71 on 336 degrees of freedom
## Multiple R-squared: 0.00765, Adjusted R-squared: 0.004697
## F-statistic: 2.59 on 1 and 336 DF, p-value: 0.1085
## (Intercept) assess_countTwo or more
## -4.218935 -4.325444
## Analysis of Variance Table
##
## Response: change_in_systolic
## Df Sum Sq Mean Sq F value Pr(>F)
## assess_count 1 1581 1580.95 2.5902 0.1085
## Residuals 336 205081 610.36
# Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(lm_sbp_change_matched_2)## Balance Measures
## Type Diff.Adj
## distance Distance 2.2311
## age Contin. 0.5503
## sex_Male Binary -0.1065
## baseline_grade Contin. 0.1181
## diabetes_Yes Binary 0.9053
##
## Sample sizes
## Control Treated
## All 169 1864
## Matched 169 169
## Unmatched 0 1695
## Balance Measures
## Type Diff.Adj
## distance Distance 2.2311
## age Contin. 0.5503
## sex_Male Binary -0.1065
## baseline_grade Contin. 0.1181
## diabetes_Yes Binary 0.9053
##
## Sample sizes
## Control Treated
## All 169 1864
## Matched 169 169
## Unmatched 0 1695
# Create a plot comparing observed vs. counterfactual BP change for
matched data:
# Create a plot comparing observed vs. counterfactual BP change for matched sample
library(ggplot2)
ggplot(matched_data_2, aes(x = assess_count, y = change_in_systolic)) +
geom_boxplot() +
labs(title = "Effect of Follow-up on Systolic BP Change (Matched Sample)",
x = "Follow-up Assessments",
y = "Change in Systolic BP (mmHg)")
# Create a plot comparing observed vs. counterfactual BP change for
matched sample
# Create a plot comparing observed vs. counterfactual BP change for control group
ggplot(matched_data_2, aes(x = assess_count, y = change_in_systolic)) +
geom_boxplot() +
labs(title = "Effect of Follow-up on Systolic BP Change (Matched Sample)",
x = "Follow-up Assessments",
y = "Change in Systolic BP (mmHg)")
## Inverse Probability of Treatment Weighting (IPTW)
# Inverse Probability of Treatment Weighting (IPTW)
library(survey)
# Step 1: Estimate the propensity scores
ps_2 <- glm(assess_count ~ age + sex + latest_bmi + diabetes,
family = binomial(), data = medtronic_screening)$fitted.values
# Step 2: Create the weights
medtronic_screening$weights_2 <- ifelse(medtronic_screening$assess_count == "> 2", 1/ps_2, 1/(1 - ps_2))
# Step 3: Create a survey design object
design <- svydesign(id = ~1, data = medtronic_screening, weights = ~weights_2)
# Step 4: Fit a linear regression model using the survey design object
iptw_model_2 <- svyglm(change_in_systolic ~ assess_count, design = design)
# Step 5: Display the summary of the linear regression model
summary(iptw_model_2)##
## Call:
## svyglm(formula = change_in_systolic ~ assess_count, design = design)
##
## Survey design:
## svydesign(id = ~1, data = medtronic_screening, weights = ~weights_2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.715 1.939 -1.916 0.0555 .
## assess_countTwo or more -4.404 2.024 -2.176 0.0297 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 590.4661)
##
## Number of Fisher Scoring iterations: 2
## (Intercept) assess_countTwo or more
## -3.714679 -4.403589
## NULL
# Step 8: Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(iptw_model_2)
## IPTW Balance Diagnostics
# IPTW Balance Diagnostics
library(cobalt)
bal.tab(assess_count ~ age + sex + latest_bmi + diabetes,
data = medtronic_screening,
weights = medtronic_screening$weights_2,
method = "weighting")## Note: `s.d.denom` not specified; assuming "pooled".
## Balance Measures
## Type Diff.Adj
## age Contin. 0.0763
## sex_Male Binary -0.0185
## latest_bmi Contin. -0.0207
## diabetes_Yes Binary 0.0715
##
## Effective sample sizes
## One Two or more
## Unadjusted 169. 1864.
## Adjusted 160.67 1765.67
love.plot(assess_count ~ age + sex + latest_bmi + diabetes,
data = medtronic_screening,
weights = medtronic_screening$weights_2,
method = "weighting")## Note: `s.d.denom` not specified; assuming "pooled".
## Warning: Standardized mean differences and raw mean differences are present in
## the same plot. Use the `stars` argument to distinguish between them and
## appropriately label the x-axis. See `?love.plot` for details.
# Load the survey package
library(survey)
# --- Existing IPTW Steps ---
# Step 1: Estimate the propensity scores
# (Assuming 'num_followup_assess' is a factor with "Five or more" as one level)
ps_2 <- glm(assess_count ~ age + sex + latest_bmi + diabetes,
family = binomial(), data = medtronic_screening)$fitted.values
# Step 2: Create the weights
# Ensure 'num_followup_assess' is a factor and the levels are correctly identified
# For example, if "One" is the reference and "Five or more" is the treated group:
medtronic_screening$weights_2 <- ifelse(medtronic_screening$assess_count == "> 2", 1/ps_2, 1/(1 - ps_2))
# Step 3: Create a survey design object
design <- svydesign(id = ~1, data = medtronic_screening, weights = ~weights_2)
# Step 4: Fit a linear regression model using the survey design object
iptw_model_2 <- svyglm(change_in_systolic ~ assess_count, design = design)
# --- Doubly Robust Estimation Step ---
# Step 4: Fit a linear regression model using the survey design object
# AND include the previously unbalanced covariates (age, diabetes)
# in the outcome model.
iptw_doubly_robust_model <- svyglm(change_in_systolic ~ assess_count + age + diabetes,
design = design)
# Step 5: Display the summary of the doubly robust linear regression model
summary(iptw_doubly_robust_model)##
## Call:
## svyglm(formula = change_in_systolic ~ assess_count + age + diabetes,
## design = design)
##
## Survey design:
## svydesign(id = ~1, data = medtronic_screening, weights = ~weights_2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.02141 3.25499 0.314 0.753706
## assess_countTwo or more -4.69669 2.00897 -2.338 0.019491 *
## age -0.08667 0.04219 -2.054 0.040081 *
## diabetesYes 5.41021 1.53042 3.535 0.000417 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 584.3363)
##
## Number of Fisher Scoring iterations: 2
# Step 6: Display the coefficients of the doubly robust linear regression model
coef(iptw_doubly_robust_model)## (Intercept) assess_countTwo or more age
## 1.02141345 -4.69668600 -0.08666987
## diabetesYes
## 5.41021430
# Step 7: Display the ANOVA table of the doubly robust linear regression model
anova(iptw_doubly_robust_model)## Anova table: (Rao-Scott LRT)
## svyglm(formula = change_in_systolic ~ assess_count, design = design)
## stats DEff df ddf p
## assess_count 2891.1 610.68 1.00 2031 0.0308532 *
## age 2768.2 625.31 1.00 2030 0.0368268 *
## diabetes 9687.5 775.19 1.00 2029 0.0004462 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Step 8: Display the diagnostic plots of the doubly robust linear regression model
par(mfrow = c(2, 2))
plot(iptw_doubly_robust_model)## drtmle: TMLE with doubly robust inference
## Version: 1.1.2
# Encode treatment as binary: 0 = "<5", 1 = "Five or more"
medtronic_screening$treat <- ifelse(medtronic_screening$assess_count == "Five or more", 1, 0)
# 2. Ensure covariates are numeric or factors (no characters!)
medtronic_screening$sex <- as.factor(medtronic_screening$sex)
medtronic_screening$diabetes <- as.factor(medtronic_screening$diabetes)
# 3. Subset covariates (W), treatment (A), and outcome (Y)
W <- medtronic_screening[, c("age", "sex", "latest_bmi", "diabetes")]
W <- as.data.frame(W)
A <- medtronic_screening$treat
Y <- medtronic_screening$change_in_systolic
# # Fiting doubly robust TMLE with GLMs for outcome and treatment models
# dr_result <- drtmle(
# W = W,
# A = A,
# Y = Y,
# family = gaussian(),
# glm_Q = ~ + age + sex + latest_bmi + diabetes,
# glm_g = "A ~ age + sex + latest_bmi + diabetes"
# )
#
# # Summarize the results
# summary(dr_result)# Create a subset of data with individuals with only one follow-up assessment and those with five or more follow-up assessments- Use Total follow-up assessments variable and assign the values-drop those with two to four follow-up assessments and assign new dataset name-use screening dataset
medtronic_screening_subset <- medtronic_screening %>%
filter(total_followup_assessments == 1 | total_followup_assessments >= 5) %>%
mutate(
num_followup_assess = case_when(
total_followup_assessments == 1 ~ "One",
total_followup_assessments >= 5 ~ "Five or more"
)
)
# Display the number of patients in the new subset
nrow(medtronic_screening_subset)## [1] 1138
# # Comparing the mean systolic blood pressure (SBP) at enrollment and follow-up by number of followup assessments in the new subset
mean_sbp_one_vs_five_screening <- medtronic_screening_subset %>%
group_by(num_followup_assess) %>%
summarise(
Mean_SBP_Enrollment = mean(baseline_systolic, na.rm = TRUE),
Mean_SBP_Followup = mean(followup_systolic, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
mean_diff_sbp_one_vs_five_followup_assessments_screening <- mean_sbp_one_vs_five_screening %>%
mutate(Mean_Difference = Mean_SBP_Followup - Mean_SBP_Enrollment)
mean_diff_sbp_one_vs_five_followup_assessments_screening## # A tibble: 2 × 5
## num_followup_assess Mean_SBP_Enrollment Mean_SBP_Followup Number_of_Patients
## <chr> <dbl> <dbl> <int>
## 1 Five or more 145. 135. 969
## 2 One 141. 137. 169
## # ℹ 1 more variable: Mean_Difference <dbl>
# Testing whether the differences between baseline and followup sbp between the groups is statistically significant
t.test(change_in_systolic ~ num_followup_assess, data = medtronic_screening_subset)##
## Welch Two Sample t-test
##
## data: change_in_systolic by num_followup_assess
## t = -3.1264, df = 232.21, p-value = 0.001996
## alternative hypothesis: true difference in means between group Five or more and group One is not equal to 0
## 95 percent confidence interval:
## -10.231804 -2.321039
## sample estimates:
## mean in group Five or more mean in group One
## -10.495356 -4.218935
# Fitting a linear regression model to predict change in systolic blood pressure
lm_sbp_change_screening_subset <- lm(change_in_systolic ~ num_followup_assess, data = medtronic_screening_subset)
# Display the summary of the linear regression model
summary(lm_sbp_change_screening_subset)##
## Call:
## lm(formula = change_in_systolic ~ num_followup_assess, data = medtronic_screening_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -93.505 -13.781 0.857 14.495 89.495
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.4954 0.7807 -13.444 < 2e-16 ***
## num_followup_assessOne 6.2764 2.0258 3.098 0.00199 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.3 on 1136 degrees of freedom
## Multiple R-squared: 0.008379, Adjusted R-squared: 0.007506
## F-statistic: 9.599 on 1 and 1136 DF, p-value: 0.001994
## (Intercept) num_followup_assessOne
## -10.495356 6.276421
## Analysis of Variance Table
##
## Response: change_in_systolic
## Df Sum Sq Mean Sq F value Pr(>F)
## num_followup_assess 1 5669 5668.8 9.5991 0.001994 **
## Residuals 1136 670871 590.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(lm_sbp_change_screening_subset)
# Adding other covariates
# Adding other covariates
lm_sbp_change_2_screening_subset <- lm(change_in_systolic~num_followup_assess+age+sex+latest_bmi+diabetes, data = medtronic_screening_subset)
# Display the summary of the linear regression model
summary(lm_sbp_change_2_screening_subset)##
## Call:
## lm(formula = change_in_systolic ~ num_followup_assess + age +
## sex + latest_bmi + diabetes, data = medtronic_screening_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -93.25 -14.02 1.36 14.10 91.25
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.38721 3.62952 -2.035 0.042052 *
## num_followup_assessOne 6.58511 2.02396 3.254 0.001173 **
## age -0.07193 0.05538 -1.299 0.194244
## sexMale -1.31438 1.65298 -0.795 0.426687
## latest_bmi 0.02451 0.02178 1.125 0.260683
## diabetesYes 7.21262 2.03990 3.536 0.000423 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.17 on 1132 degrees of freedom
## Multiple R-squared: 0.02263, Adjusted R-squared: 0.01832
## F-statistic: 5.243 on 5 and 1132 DF, p-value: 9.124e-05
## (Intercept) num_followup_assessOne age
## -7.38721204 6.58511279 -0.07193071
## sexMale latest_bmi diabetesYes
## -1.31438445 0.02450555 7.21262220
## Analysis of Variance Table
##
## Response: change_in_systolic
## Df Sum Sq Mean Sq F value Pr(>F)
## num_followup_assess 1 5669 5668.8 9.7048 0.001884 **
## age 1 1247 1247.5 2.1356 0.144190
## sex 1 429 429.4 0.7351 0.391430
## latest_bmi 1 665 664.6 1.1377 0.286369
## diabetes 1 7303 7302.5 12.5017 0.000423 ***
## Residuals 1132 661227 584.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(lm_sbp_change_2_screening_subset)
## Applying some Causal inference methods –> One vs Five or more
follow-up visits # Propensity score matching
# Propensity score matching
library(MatchIt)
# Create a new variable for enrollment status
medtronic_screening_subset$num_followup_assess <- as.factor(medtronic_screening_subset$num_followup_assess)
# Fit a propensity score model
ps_model_1_vs_5 <- matchit(num_followup_assess ~ age + sex + baseline_grade + diabetes,
data = medtronic_screening_subset, method = "nearest", distance = "logit")
# Display the summary of the propensity score model
summary(ps_model_1_vs_5)##
## Call:
## matchit(formula = num_followup_assess ~ age + sex + baseline_grade +
## diabetes, data = medtronic_screening_subset, method = "nearest",
## distance = "logit")
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1590 0.1467 0.3167 1.0028 0.0755
## age 61.3728 63.4964 -0.1408 1.4015 0.0295
## sexFemale 0.7160 0.7430 -0.0600 . 0.0271
## sexMale 0.2840 0.2570 0.0600 . 0.0271
## baseline_grade 0.8343 0.9783 -0.1527 0.9349 0.0360
## diabetesNo 0.9112 0.8452 0.2322 . 0.0660
## diabetesYes 0.0888 0.1548 -0.2322 . 0.0660
## eCDF Max
## distance 0.1515
## age 0.0808
## sexFemale 0.0271
## sexMale 0.0271
## baseline_grade 0.0853
## diabetesNo 0.0660
## diabetesYes 0.0660
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.1590 0.1591 -0.0009 0.9988 0.0006
## age 61.3728 61.6627 -0.0192 1.1747 0.0137
## sexFemale 0.7160 0.7337 -0.0394 . 0.0178
## sexMale 0.2840 0.2663 0.0394 . 0.0178
## baseline_grade 0.8343 0.8343 0.0000 0.9614 0.0059
## diabetesNo 0.9112 0.9231 -0.0416 . 0.0118
## diabetesYes 0.0888 0.0769 0.0416 . 0.0118
## eCDF Max Std. Pair Dist.
## distance 0.0118 0.0037
## age 0.0355 0.3048
## sexFemale 0.0178 0.3805
## sexMale 0.0178 0.3805
## baseline_grade 0.0118 0.2604
## diabetesNo 0.0118 0.1248
## diabetesYes 0.0118 0.1248
##
## Sample Sizes:
## Control Treated
## All 969 169
## Matched 169 169
## Unmatched 800 0
## Discarded 0 0
# Create a matched dataset
matched_data_1_vs_5 <- match.data(ps_model_1_vs_5)
# Fit a linear regression model to predict change in systolic blood pressure
lm_sbp_change_matched_1_vs_5 <- lm(change_in_systolic ~ num_followup_assess, data = matched_data_1_vs_5)
# Display the summary of the linear regression model
summary(lm_sbp_change_matched_1_vs_5)##
## Call:
## lm(formula = change_in_systolic ~ num_followup_assess, data = matched_data_1_vs_5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -94.45 -13.78 -0.45 14.05 81.55
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.550 1.841 -4.102 5.15e-05 ***
## num_followup_assessOne 3.331 2.603 1.280 0.202
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.93 on 336 degrees of freedom
## Multiple R-squared: 0.004851, Adjusted R-squared: 0.001889
## F-statistic: 1.638 on 1 and 336 DF, p-value: 0.2015
## (Intercept) num_followup_assessOne
## -7.550296 3.331361
## Analysis of Variance Table
##
## Response: change_in_systolic
## Df Sum Sq Mean Sq F value Pr(>F)
## num_followup_assess 1 938 937.78 1.6378 0.2015
## Residuals 336 192389 572.59
# Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(lm_sbp_change_matched_1_vs_5)# Display the balance of covariates before and after matching
library(cobalt)
bal.tab(ps_model_1_vs_5)## Balance Measures
## Type Diff.Adj
## distance Distance -0.0009
## age Contin. -0.0192
## sex_Male Binary 0.0178
## baseline_grade Contin. 0.0000
## diabetes_Yes Binary 0.0118
##
## Sample sizes
## Control Treated
## All 969 169
## Matched 169 169
## Unmatched 800 0
## Balance Measures
## Type Diff.Adj
## distance Distance -0.0009
## age Contin. -0.0192
## sex_Male Binary 0.0178
## baseline_grade Contin. 0.0000
## diabetes_Yes Binary 0.0118
##
## Sample sizes
## Control Treated
## All 969 169
## Matched 169 169
## Unmatched 800 0
# Create a plot comparing observed vs. counterfactual BP change for
matched data:
# Create a plot comparing observed vs. counterfactual BP change for matched sample
library(ggplot2)
ggplot(matched_data_1_vs_5, aes(x = num_followup_assess, y = change_in_systolic)) +
geom_boxplot() +
labs(title = "Effect of Follow-up on Systolic BP Change (Matched Sample)",
x = "Follow-up Assessments",
y = "Change in Systolic BP (mmHg)")# Inverse Probability of Treatment Weighting (IPTW)
library(survey)
# Step 1: Estimate the propensity scores
ps_1_vs_5 <- glm(num_followup_assess ~ age + sex + baseline_grade + diabetes,
family = binomial(), data = medtronic_screening_subset)$fitted.values
# Step 2: Create the weights
medtronic_screening_subset$weights_1_vs_5 <- ifelse(medtronic_screening_subset$num_followup_assess == "Five or more", 1/ps_1_vs_5, 1/(1 - ps_1_vs_5))
# Step 3: Create a survey design object
design <- svydesign(id = ~1, data = medtronic_screening_subset, weights = ~weights_1_vs_5)
# Step 4: Fit a linear regression model using the survey design object
iptw_model_1_vs_5 <- svyglm(change_in_systolic ~ num_followup_assess, design = design)
# Step 5: Display the summary of the linear regression model
summary(iptw_model_1_vs_5)##
## Call:
## svyglm(formula = change_in_systolic ~ num_followup_assess, design = design)
##
## Survey design:
## svydesign(id = ~1, data = medtronic_screening_subset, weights = ~weights_1_vs_5)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.3649 0.8823 -14.014 < 2e-16 ***
## num_followup_assessOne 8.3747 2.0233 4.139 3.74e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 634.3074)
##
## Number of Fisher Scoring iterations: 2
## (Intercept) num_followup_assessOne
## -12.364858 8.374726
## NULL
# Step 8: Display the diagnostic plots of the linear regression model
par(mfrow = c(2, 2))
plot(iptw_model_1_vs_5)
# IPTW Balance Diagnostics
# IPTW Balance Diagnostics
library(cobalt)
bal.tab(num_followup_assess ~ age + sex + latest_bmi + diabetes,
data = medtronic_screening_subset,,
weights = medtronic_screening_subset$weights_1_vs_5,
method = "weighting")## Note: `s.d.denom` not specified; assuming "pooled".
## Balance Measures
## Type Diff.Adj
## age Contin. -0.3040
## sex_Male Binary 0.0543
## latest_bmi Contin. 0.0627
## diabetes_Yes Binary -0.1585
##
## Effective sample sizes
## Five or more One
## Unadjusted 969. 169.
## Adjusted 874.92 168.63
love.plot(num_followup_assess ~ age + sex + baseline_grade + diabetes,
data = medtronic_screening_subset,
weights = medtronic_screening_subset$weights_1_vs_5,
method = "weighting")## Note: `s.d.denom` not specified; assuming "pooled".
## Warning: Standardized mean differences and raw mean differences are present in
## the same plot. Use the `stars` argument to distinguish between them and
## appropriately label the x-axis. See `?love.plot` for details.
## Apply Doubly Robust Estimation Method
# Load the survey package
library(survey)
# --- Existing IPTW Steps ---
# Step 1: Estimate the propensity scores
# (Assuming 'num_followup_assess' is a factor with "Five or more" as one level)
ps_1_vs_5 <- glm(num_followup_assess ~ age + sex + baseline_grade + diabetes,
family = binomial(), data = medtronic_screening_subset)$fitted.values
# Step 2: Create the weights
# Ensure 'num_followup_assess' is a factor and the levels are correctly identified
# For example, if "One" is the reference and "Five or more" is the treated group:
medtronic_screening_subset$weights_1_vs_5 <- ifelse(
medtronic_screening_subset$num_followup_assess == "Five or more",
1 / ps_1_vs_5,
1 / (1 - ps_1_vs_5)
)
# Step 3: Create a survey design object
design <- svydesign(id = ~1, data = medtronic_screening_subset, weights = ~weights_1_vs_5)
# --- Doubly Robust Estimation Step ---
# Step 4: Fit a linear regression model using the survey design object
# AND include the previously unbalanced covariates (age, diabetes)
# in the outcome model.
iptw_doubly_robust_model <- svyglm(change_in_systolic ~ num_followup_assess + age + diabetes,
design = design)
# Step 5: Display the summary of the doubly robust linear regression model
summary(iptw_doubly_robust_model)##
## Call:
## svyglm(formula = change_in_systolic ~ num_followup_assess + age +
## diabetes, design = design)
##
## Survey design:
## svydesign(id = ~1, data = medtronic_screening_subset, weights = ~weights_1_vs_5)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.61669 4.43271 -1.267 0.205380
## num_followup_assessOne 8.99313 2.00876 4.477 8.34e-06 ***
## age -0.13084 0.07014 -1.866 0.062367 .
## diabetesYes 7.40757 2.24213 3.304 0.000984 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 621.6061)
##
## Number of Fisher Scoring iterations: 2
# Step 6: Display the coefficients of the doubly robust linear regression model
coef(iptw_doubly_robust_model)## (Intercept) num_followup_assessOne age
## -5.6166916 8.9931348 -0.1308387
## diabetesYes
## 7.4075721
# Step 7: Display the ANOVA table of the doubly robust linear regression model
anova(iptw_doubly_robust_model)## Anova table: (Rao-Scott LRT)
## svyglm(formula = change_in_systolic ~ num_followup_assess, design = design)
## stats DEff df ddf p
## num_followup_assess 2117.1 123.57 1.00 1136 4.047e-05 ***
## age 3220.6 927.71 1.00 1135 0.064605 .
## diabetes 11220.7 1027.99 1.00 1134 0.001049 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Step 8: Display the diagnostic plots of the doubly robust linear regression model
par(mfrow = c(2, 2))
plot(iptw_doubly_robust_model)# Assessing proportions of blood pressure control at follow-up by number of follow-up assessments
bp_control_proportions <- medtronic_screening %>%
group_by(assess_count) %>%
summarise(
Proportion_BP_Control = mean(followup_systolic < 140, na.rm = TRUE),
Number_of_Patients = n()
)
# Calculate the mean difference
bp_control_proportions <- bp_control_proportions %>%
mutate(Mean_Difference = Proportion_BP_Control - mean(medtronic_screening$followup_systolic < 140, na.rm = TRUE))
bp_control_proportions## # A tibble: 2 × 4
## assess_count Proportion_BP_Control Number_of_Patients Mean_Difference
## <fct> <dbl> <int> <dbl>
## 1 "One" 0.621 169 -0.0344
## 2 "Two or more " 0.659 1864 0.00312
# Alluvial for changes in systolic blood pressure by number of follow-up assessments
library(ggalluvial)
medtronic_screening %>%
mutate(
change_in_systolic = case_when(
change_in_systolic < -10 ~ "Decrease > 10",
change_in_systolic >= -10 & change_in_systolic <= 10 ~ "No Change",
change_in_systolic > 10 ~ "Increase > 10"
)
) %>%
ggplot(aes(axis1 = assess_count, axis2 = change_in_systolic)) +
geom_alluvium(aes(fill = change_in_systolic)) +
geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
labs(title = "Changes in Systolic Blood Pressure by Number of Follow-up Assessments",
x = "Number of Follow-up Assessments",
y = "Change in Systolic Blood Pressure") +
theme_minimal()
# Alluvial for changes in systolic blood pressure by number of follow-up
assessments in direct enrollment dataset
# Alluvial for changes in systolic blood pressure by number of follow-up assessments in direct enrollment dataset
library(ggalluvial)
medtronic_direct_enrollment %>%
mutate(
change_in_systolic = case_when(
change_in_systolic < -10 ~ "Decrease > 10",
change_in_systolic >= -10 & change_in_systolic <= 10 ~ "No Change",
change_in_systolic > 10 ~ "Increase > 10"
)
) %>%
ggplot(aes(axis1 = assess_count, axis2 = change_in_systolic)) +
geom_alluvium(aes(fill = change_in_systolic)) +
geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
labs(title = "Changes in Systolic Blood Pressure by Number of Follow-up Assessments (Direct Enrollment)",
x = "Number of Follow-up Assessments",
y = "Change in Systolic Blood Pressure") +
theme_minimal()# Alluvial for changes in systolic blood pressure by number of follow-up assessments in screening dataset# Alluvial for changes in systolic blood pressure by number of follow-up assessments in screening dataset
medtronic_screening %>%
mutate(
change_in_systolic = case_when(
change_in_systolic < -10 ~ "Decrease > 10",
change_in_systolic >= -10 & change_in_systolic <= 10 ~ "No Change",
change_in_systolic > 10 ~ "Increase > 10"
)
) %>%
ggplot(aes(axis1 = assess_count, axis2 = change_in_systolic)) +
geom_alluvium(aes(fill = change_in_systolic)) +
geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
labs(title = "Changes in Systolic Blood Pressure by Number of Follow-up Assessments (Screening)",
x = "Number of Follow-up Assessments",
y = "Change in Systolic Blood Pressure") +
theme_minimal()
# Alluvial for transitions in blood pressure control from baseline and
follow-up -Add percentage of patients in each category
# Alluvial for transitions in blood pressure control from baseline and follow-up -Add percentage of patients in each category-use baseline control and follow-up control variables
library(dplyr)
library(ggplot2)
library(ggalluvial)
medtronic_screening %>%
mutate(
baseline_control = ifelse(baseline_systolic < 140, "Controlled", "Uncontrolled"),
followup_control = ifelse(followup_systolic < 140, "Controlled", "Uncontrolled")
) %>%
group_by(baseline_control, followup_control) %>%
summarise(Count = n(), .groups = "drop") %>%
ggplot(aes(axis1 = baseline_control, axis2 = followup_control, y = Count)) +
geom_alluvium(aes(fill = followup_control), width = 1/12) +
geom_stratum(width = 1/12, fill = "grey", color = "black") +
geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("Baseline", "Follow-up"), expand = c(.05, .05)) +
scale_fill_manual(
values = c("Controlled" = "green", "Uncontrolled" = "red"),
name = "Follow-up Control Status"
) +
labs(
title = "Transitions in Blood Pressure Control from Baseline to Follow-up",
x = "Timepoint",
y = "Number of Individuals"
) +
theme_minimal()## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
# Alluvial for transitions in blood pressure control from baseline and
follow-up -Add percentage of patients in each category
library(dplyr)
library(ggplot2)
library(ggalluvial)
# Summarized data
df_summary <- medtronic_screening %>%
mutate(
baseline_control = ifelse(baseline_systolic < 140, "Controlled", "Uncontrolled"),
followup_control = ifelse(followup_systolic < 140, "Controlled", "Uncontrolled")
) %>%
group_by(baseline_control, followup_control) %>%
summarise(Count = n(), .groups = "drop")
# Compute % for each axis
baseline_pct <- df_summary %>%
group_by(baseline_control) %>%
summarise(total = sum(Count)) %>%
mutate(pct_label = paste0(baseline_control, " (", round(100 * total / sum(total), 1), "%)"))
followup_pct <- df_summary %>%
group_by(followup_control) %>%
summarise(total = sum(Count)) %>%
mutate(pct_label = paste0(followup_control, " (", round(100 * total / sum(total), 1), "%)"))
# Named vectors to map stratum names to % labels
baseline_labels <- setNames(baseline_pct$pct_label, baseline_pct$baseline_control)
followup_labels <- setNames(followup_pct$pct_label, followup_pct$followup_control)
# Final plot
ggplot(df_summary, aes(axis1 = baseline_control, axis2 = followup_control, y = Count)) +
geom_alluvium(aes(fill = followup_control), width = 1/12) +
geom_stratum(width = 1/12, fill = "grey", color = "black") +
geom_text(
stat = "stratum",
aes(label = ifelse(
after_stat(x) == 1,
baseline_labels[after_stat(stratum)],
followup_labels[after_stat(stratum)]
)),
size = 4
) +
scale_x_discrete(limits = c("Baseline", "Follow-up"), expand = c(.05, .05)) +
scale_fill_manual(
values = c("Controlled" = "green", "Uncontrolled" = "orange"),
name = "Follow-up Control Status"
) +
labs(
title = "Transitions in Blood Pressure Control from Baseline to Follow-up_Enrolled from Screening",
x = "Timepoint",
y = "Number of Individuals"
) +
theme_minimal()## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
# Alluvial for transitions in blood pressure control from baseline and
follow-up -Add percentage of patients in each category-Enrollment
dataset
# Alluvial for transitions in blood pressure control from baseline and follow-up -Add percentage of patients in each category-Enrollment dataset
# Summarized data
df_summary <- medtronic_direct_enrollment %>%
mutate(
baseline_control = ifelse(baseline_systolic < 140, "Controlled", "Uncontrolled"),
followup_control = ifelse(followup_systolic < 140, "Controlled", "Uncontrolled")
) %>%
group_by(baseline_control, followup_control) %>%
summarise(Count = n(), .groups = "drop")
# Compute % for each axis
baseline_pct <- df_summary %>%
group_by(baseline_control) %>%
summarise(total = sum(Count)) %>%
mutate(pct_label = paste0(baseline_control, " (", round(100 * total / sum(total), 1), "%)"))
followup_pct <- df_summary %>%
group_by(followup_control) %>%
summarise(total = sum(Count)) %>%
mutate(pct_label = paste0(followup_control, " (", round(100 * total / sum(total), 1), "%)"))
# Named vectors to map stratum names to % labels
baseline_labels <- setNames(baseline_pct$pct_label, baseline_pct$baseline_control)
followup_labels <- setNames(followup_pct$pct_label, followup_pct$followup_control)
# Final plot
ggplot(df_summary, aes(axis1 = baseline_control, axis2 = followup_control, y = Count)) +
geom_alluvium(aes(fill = followup_control), width = 1/12) +
geom_stratum(width = 1/12, fill = "grey", color = "black") +
geom_text(
stat = "stratum",
aes(label = ifelse(
after_stat(x) == 1,
baseline_labels[after_stat(stratum)],
followup_labels[after_stat(stratum)]
)),
size = 4
) +
scale_x_discrete(limits = c("Baseline", "Follow-up"), expand = c(.05, .05)) +
scale_fill_manual(
values = c("Controlled" = "green", "Uncontrolled" = "orange"),
name = "Follow-up Control Status"
) +
labs(
title = "Transitions in Blood Pressure Control from Baseline to Follow-up_Direct Enrollment",
x = "Timepoint",
y = "Number of Individuals"
) +
theme_minimal()## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.