knitr::opts_chunk$set(echo = TRUE)
library(rmarkdown)
library(kableExtra)
library(ggplot2)
library(dplyr)##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
#Clear the environment
# Load the steps_subset dataset
data_12mths <- read_csv("/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/PhD Data Analysis-Modelling/Medtronic Dataset/Quantitative Data/Kenya/MEDTRONIC Dataset/Latest/Outputs/bp_result_12mnths.csv")## Rows: 61041 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): country, facility, diagnosis, sex, county, enrollment_status, enr...
## dbl (21): patient_id, age, latest_bmi, baseline_systolic, baseline_diastoli...
## date (4): enrollment_date, baseline_assessment_date, anchor_date, followup_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 36
## patient_id country age facility diagnosis sex county enrollment_status
## <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 Kenya 65 Matiliku su… "{Hypert… Fema… Makue… Direct Enrollment
## 2 5 Kenya 42 Tawa Sub-co… "{Hypert… Fema… Makue… Direct Enrollment
## 3 6 Kenya 48 Kakamega Co… "{Hypert… Fema… Kakam… Direct Enrollment
## 4 9 Kenya 43 Kibwezi Sub… "{Hypert… Fema… Makue… Direct Enrollment
## 5 11 Kenya 69 Kibwezi Sub… "{Hypert… Male Makue… Direct Enrollment
## 6 15 Kenya 65 Kilungu sub… "{Hypert… Fema… Makue… Direct Enrollment
## # ℹ 28 more variables: enrollment_bmi <chr>, latest_bmi <dbl>,
## # baseline_systolic <dbl>, baseline_diastolic <dbl>,
## # baseline_control_status <chr>, baseline_grade <dbl>,
## # enrollment_date <date>, baseline_assessment_date <date>,
## # anchor_date <date>, followup_systolic <dbl>, followup_diastolic <dbl>,
## # followup_control_status <chr>, followup_grade <dbl>, followup_date <date>,
## # days_between_assessment_and_anchor <dbl>, …
## spc_tbl_ [61,041 × 36] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ patient_id : num [1:61041] 1 5 6 9 11 15 16 17 29 30 ...
## $ country : chr [1:61041] "Kenya" "Kenya" "Kenya" "Kenya" ...
## $ age : num [1:61041] 65 42 48 43 69 65 48 55 70 49 ...
## $ facility : chr [1:61041] "Matiliku sub county hospital" "Tawa Sub-county Hospital" "Kakamega County General Hospital" "Kibwezi Sub-county Hospital" ...
## $ diagnosis : chr [1:61041] "{Hypertension,\"Diabetes Mellitus Type 2\"}" "{Hypertension}" "{Hypertension,\"Diabetes Mellitus Type 2\"}" "{Hypertension}" ...
## $ sex : chr [1:61041] "Female" "Female" "Female" "Female" ...
## $ county : chr [1:61041] "Makueni" "Makueni" "Kakamega" "Makueni" ...
## $ enrollment_status : chr [1:61041] "Direct Enrollment" "Direct Enrollment" "Direct Enrollment" "Direct Enrollment" ...
## $ enrollment_bmi : chr [1:61041] "25.4" "37.29" "29.87" "33.83" ...
## $ latest_bmi : num [1:61041] 25.4 37.3 30.1 33.8 23.2 ...
## $ baseline_systolic : num [1:61041] 165 211 136 151 125 146 119 133 117 185 ...
## $ baseline_diastolic : num [1:61041] 74 124 83 87 81 72 75 86 70 86 ...
## $ baseline_control_status : chr [1:61041] "Uncontrolled" "Uncontrolled" "Controlled" "Uncontrolled" ...
## $ baseline_grade : num [1:61041] 2 3 0 1 0 1 0 0 0 3 ...
## $ enrollment_date : Date[1:61041], format: "2020-12-14" "2021-11-17" ...
## $ baseline_assessment_date : Date[1:61041], format: "2022-02-28" "2021-11-17" ...
## $ anchor_date : Date[1:61041], format: "2023-02-28" "2022-11-17" ...
## $ followup_systolic : num [1:61041] 137 136 NA 146 119 113 NA NA 123 NA ...
## $ followup_diastolic : num [1:61041] 56 65 NA 78 70 73 NA NA 77 NA ...
## $ followup_control_status : chr [1:61041] "Controlled" "Controlled" NA "Uncontrolled" ...
## $ followup_grade : num [1:61041] 0 0 NA 1 0 0 NA NA 0 NA ...
## $ followup_date : Date[1:61041], format: "2023-02-14" "2022-11-03" ...
## $ days_between_assessment_and_anchor : num [1:61041] 14 14 NA 6 27 23 NA NA 19 NA ...
## $ days_between_assessment_and_baseline: num [1:61041] 351 351 NA 371 392 342 NA NA 346 NA ...
## $ number_followup_assessments : num [1:61041] 3 1 NA 0 3 0 NA NA 11 NA ...
## $ number_followup_by_hs : num [1:61041] 2 0 NA 0 2 1 NA NA 1 NA ...
## $ number_medical_reviews : num [1:61041] 0 0 NA 0 0 0 NA NA 0 NA ...
## $ controlled_followup_days : num [1:61041] 351 351 -1 NA -1 342 -1 -1 -1 NA ...
## $ number_uncontrolled_followups : num [1:61041] 1 0 -1 1 -1 0 -1 -1 -1 NA ...
## $ number_community_assessments : num [1:61041] 0 0 NA 0 0 0 NA NA 1 NA ...
## $ number_facility_assessments : num [1:61041] 1 0 NA 0 1 1 NA NA 4 NA ...
## $ number_feeder_assessments : num [1:61041] 0 0 NA 0 0 1 NA NA 0 NA ...
## $ change_in_systolic : num [1:61041] -28 -75 NA -5 -6 -33 NA NA 6 NA ...
## $ change_in_diastolic : num [1:61041] -18 -59 NA -9 -11 1 NA NA 7 NA ...
## $ filter_col_str : chr [1:61041] "Yes" "Yes" "No" "Yes" ...
## $ diabetes : chr [1:61041] "Yes" "No" "Yes" "No" ...
## - attr(*, "spec")=
## .. cols(
## .. patient_id = col_double(),
## .. country = col_character(),
## .. age = col_double(),
## .. facility = col_character(),
## .. diagnosis = col_character(),
## .. sex = col_character(),
## .. county = col_character(),
## .. enrollment_status = col_character(),
## .. enrollment_bmi = col_character(),
## .. latest_bmi = col_double(),
## .. baseline_systolic = col_double(),
## .. baseline_diastolic = col_double(),
## .. baseline_control_status = col_character(),
## .. baseline_grade = col_double(),
## .. enrollment_date = col_date(format = ""),
## .. baseline_assessment_date = col_date(format = ""),
## .. anchor_date = col_date(format = ""),
## .. followup_systolic = col_double(),
## .. followup_diastolic = col_double(),
## .. followup_control_status = col_character(),
## .. followup_grade = col_double(),
## .. followup_date = col_date(format = ""),
## .. days_between_assessment_and_anchor = col_double(),
## .. days_between_assessment_and_baseline = col_double(),
## .. number_followup_assessments = col_double(),
## .. number_followup_by_hs = col_double(),
## .. number_medical_reviews = col_double(),
## .. controlled_followup_days = col_double(),
## .. number_uncontrolled_followups = col_double(),
## .. number_community_assessments = col_double(),
## .. number_facility_assessments = col_double(),
## .. number_feeder_assessments = col_double(),
## .. change_in_systolic = col_double(),
## .. change_in_diastolic = col_double(),
## .. filter_col_str = col_character(),
## .. diabetes = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
## [1] "patient_id"
## [2] "country"
## [3] "age"
## [4] "facility"
## [5] "diagnosis"
## [6] "sex"
## [7] "county"
## [8] "enrollment_status"
## [9] "enrollment_bmi"
## [10] "latest_bmi"
## [11] "baseline_systolic"
## [12] "baseline_diastolic"
## [13] "baseline_control_status"
## [14] "baseline_grade"
## [15] "enrollment_date"
## [16] "baseline_assessment_date"
## [17] "anchor_date"
## [18] "followup_systolic"
## [19] "followup_diastolic"
## [20] "followup_control_status"
## [21] "followup_grade"
## [22] "followup_date"
## [23] "days_between_assessment_and_anchor"
## [24] "days_between_assessment_and_baseline"
## [25] "number_followup_assessments"
## [26] "number_followup_by_hs"
## [27] "number_medical_reviews"
## [28] "controlled_followup_days"
## [29] "number_uncontrolled_followups"
## [30] "number_community_assessments"
## [31] "number_facility_assessments"
## [32] "number_feeder_assessments"
## [33] "change_in_systolic"
## [34] "change_in_diastolic"
## [35] "filter_col_str"
## [36] "diabetes"
## [1] 61041 36
## patient_id country
## 0 0
## age facility
## 0 0
## diagnosis sex
## 0 0
## county enrollment_status
## 0 0
## enrollment_bmi latest_bmi
## 0 18
## baseline_systolic baseline_diastolic
## 0 0
## baseline_control_status baseline_grade
## 0 0
## enrollment_date baseline_assessment_date
## 0 0
## anchor_date followup_systolic
## 0 43617
## followup_diastolic followup_control_status
## 43617 43617
## followup_grade followup_date
## 43617 43617
## days_between_assessment_and_anchor days_between_assessment_and_baseline
## 43617 43617
## number_followup_assessments number_followup_by_hs
## 43617 43617
## number_medical_reviews controlled_followup_days
## 43617 29403
## number_uncontrolled_followups number_community_assessments
## 24437 43617
## number_facility_assessments number_feeder_assessments
## 43617 43617
## change_in_systolic change_in_diastolic
## 43617 43617
## filter_col_str diabetes
## 0 0
## patient_id country age facility
## Min. : 1 Length:61041 Min. : 1.00 Length:61041
## 1st Qu.: 42991 Class :character 1st Qu.: 53.00 Class :character
## Median :329626 Mode :character Median : 63.00 Mode :character
## Mean :255672 Mean : 62.32
## 3rd Qu.:435102 3rd Qu.: 72.00
## Max. :594198 Max. :702.00
##
## diagnosis sex county enrollment_status
## Length:61041 Length:61041 Length:61041 Length:61041
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## enrollment_bmi latest_bmi baseline_systolic baseline_diastolic
## Length:61041 Min. : 3.12 Min. : 54.0 Min. : 34.00
## Class :character 1st Qu.: 23.07 1st Qu.:127.0 1st Qu.: 75.00
## Mode :character Median : 26.04 Median :140.0 Median : 82.00
## Mean : 29.31 Mean :142.2 Mean : 83.24
## 3rd Qu.: 29.95 3rd Qu.:155.0 3rd Qu.: 91.00
## Max. :2110.71 Max. :288.0 Max. :234.00
## NA's :18
## baseline_control_status baseline_grade enrollment_date
## Length:61041 Min. :0.0000 Min. :2018-04-18
## Class :character 1st Qu.:0.0000 1st Qu.:2021-07-09
## Mode :character Median :1.0000 Median :2022-04-08
## Mean :0.8605 Mean :2022-03-01
## 3rd Qu.:1.0000 3rd Qu.:2022-12-02
## Max. :3.0000 Max. :2024-07-29
##
## baseline_assessment_date anchor_date followup_systolic
## Min. :2018-04-18 Min. :2019-04-18 Min. : 70.0
## 1st Qu.:2021-12-01 1st Qu.:2022-12-01 1st Qu.:124.0
## Median :2022-09-02 Median :2023-09-02 Median :134.0
## Mean :2022-06-26 Mean :2023-06-27 Mean :136.3
## 3rd Qu.:2023-03-23 3rd Qu.:2024-03-23 3rd Qu.:146.0
## Max. :2024-07-29 Max. :2025-07-29 Max. :240.0
## NA's :43617
## followup_diastolic followup_control_status followup_grade
## Min. : 30.00 Length:61041 Min. :0.0
## 1st Qu.: 73.00 Class :character 1st Qu.:0.0
## Median : 80.00 Mode :character Median :0.0
## Mean : 80.49 Mean :0.6
## 3rd Qu.: 87.00 3rd Qu.:1.0
## Max. :153.00 Max. :3.0
## NA's :43617 NA's :43617
## followup_date days_between_assessment_and_anchor
## Min. :2019-03-18 Min. : 0.00
## 1st Qu.:2023-02-21 1st Qu.:10.00
## Median :2023-07-19 Median :23.00
## Mean :2023-05-31 Mean :25.66
## 3rd Qu.:2023-12-05 3rd Qu.:40.00
## Max. :2024-07-29 Max. :62.00
## NA's :43617 NA's :43617
## days_between_assessment_and_baseline number_followup_assessments
## Min. :303.0 Min. : 0.0
## 1st Qu.:340.0 1st Qu.: 1.0
## Median :364.0 Median : 2.0
## Mean :363.8 Mean : 2.9
## 3rd Qu.:386.0 3rd Qu.: 4.0
## Max. :427.0 Max. :32.0
## NA's :43617 NA's :43617
## number_followup_by_hs number_medical_reviews controlled_followup_days
## Min. : 0.00 Min. :0.00 Min. : -1.00
## 1st Qu.: 0.00 1st Qu.:0.00 1st Qu.: -1.00
## Median : 1.00 Median :0.00 Median : -1.00
## Mean : 1.31 Mean :0.04 Mean : 54.28
## 3rd Qu.: 2.00 3rd Qu.:0.00 3rd Qu.: -1.00
## Max. :28.00 Max. :4.00 Max. :427.00
## NA's :43617 NA's :43617 NA's :29403
## number_uncontrolled_followups number_community_assessments
## Min. :-1.00 Min. : 0
## 1st Qu.:-1.00 1st Qu.: 0
## Median :-1.00 Median : 0
## Mean :-0.29 Mean : 1
## 3rd Qu.: 0.00 3rd Qu.: 1
## Max. :18.00 Max. :21
## NA's :24437 NA's :43617
## number_facility_assessments number_feeder_assessments change_in_systolic
## Min. : 0.00 Min. : 0.00 Min. :-146.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: -20.00
## Median : 1.00 Median : 0.00 Median : -5.00
## Mean : 1.37 Mean : 0.06 Mean : -6.09
## 3rd Qu.: 2.00 3rd Qu.: 0.00 3rd Qu.: 9.00
## Max. :19.00 Max. :26.00 Max. : 109.00
## NA's :43617 NA's :43617 NA's :43617
## change_in_diastolic filter_col_str diabetes
## Min. :-88.00 Length:61041 Length:61041
## 1st Qu.:-11.00 Class :character Class :character
## Median : -2.00 Mode :character Mode :character
## Mean : -2.49
## 3rd Qu.: 6.00
## Max. : 75.00
## NA's :43617
# Creating summary statistics for age
age_summary <- data_12mths %>%
summarise(
mean_age = mean(age, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE)
)
# Display the summary statistics
age_summary## # A tibble: 1 × 5
## mean_age median_age sd_age min_age max_age
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 62.3 63 14.1 1 702
# Plotting the distribution of age on histogram
ggplot(data_12mths, aes(x = age)) +
geom_histogram(binwidth = 5, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Distribution of Age", x = "Age", y = "Frequency") +
theme_minimal()# Plotting the distribution of age on box plot
ggplot(data_12mths, aes(y = age)) +
geom_boxplot(fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Box Plot of Age", y = "Age") +
theme_minimal()# Excluding all patients aged above 100
data_12mths <- data_12mths %>%
filter(age <= 100)
# Plotting the distribution of age on boxplot
ggplot(data_12mths, aes(y = age)) +
geom_boxplot(fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Box Plot of Age (Excluding Patients Aged Above 100)", y = "Age") +
theme_minimal()
# Creating summary statistics for age after excluding patients aged
above 100
# Creating summary statistics for age after excluding patients aged above 100
age_summary <- data_12mths %>%
summarise(
mean_age = mean(age, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE)
)
# Display the summary statistics
age_summary## # A tibble: 1 × 5
## mean_age median_age sd_age min_age max_age
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 62.3 63 13.8 1 100
# Creating five year age groups and name the categories
data_12mths <- data_12mths %>%
mutate(age_group = case_when(
age < 20 ~ "<20",
age >= 20 & age < 25 ~ "20-24",
age >= 25 & age < 30 ~ "25-29",
age >= 30 & age < 35 ~ "30-34",
age >= 35 & age < 40 ~ "35-39",
age >= 40 & age < 45 ~ "40-44",
age >= 45 & age < 50 ~ "45-49",
age >= 50 & age < 55 ~ "50-54",
age >= 55 & age < 60 ~ "55-59",
age >= 60 & age < 65 ~ "60-64",
age >= 65 & age < 70 ~ "65-69",
age >= 70 & age < 75 ~ "70-74",
age >= 75 & age < 80 ~ "75-79",
age >= 80 & age < 85 ~ "80-84",
TRUE ~ "85+"
))
# Display the first few rows of the dataset with age groups
head(data_12mths)## # A tibble: 6 × 37
## patient_id country age facility diagnosis sex county enrollment_status
## <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 Kenya 65 Matiliku su… "{Hypert… Fema… Makue… Direct Enrollment
## 2 5 Kenya 42 Tawa Sub-co… "{Hypert… Fema… Makue… Direct Enrollment
## 3 6 Kenya 48 Kakamega Co… "{Hypert… Fema… Kakam… Direct Enrollment
## 4 9 Kenya 43 Kibwezi Sub… "{Hypert… Fema… Makue… Direct Enrollment
## 5 11 Kenya 69 Kibwezi Sub… "{Hypert… Male Makue… Direct Enrollment
## 6 15 Kenya 65 Kilungu sub… "{Hypert… Fema… Makue… Direct Enrollment
## # ℹ 29 more variables: enrollment_bmi <chr>, latest_bmi <dbl>,
## # baseline_systolic <dbl>, baseline_diastolic <dbl>,
## # baseline_control_status <chr>, baseline_grade <dbl>,
## # enrollment_date <date>, baseline_assessment_date <date>,
## # anchor_date <date>, followup_systolic <dbl>, followup_diastolic <dbl>,
## # followup_control_status <chr>, followup_grade <dbl>, followup_date <date>,
## # days_between_assessment_and_anchor <dbl>, …
# Display the missing observations by variable
missing_values <- sapply(data_12mths, function(x) sum(is.na(x)))
missing_values <- data.frame(variable = names(missing_values), missing = missing_values)
missing_values <- missing_values[missing_values$missing > 0, ]
# Display the missing values
missing_values## variable
## latest_bmi latest_bmi
## followup_systolic followup_systolic
## followup_diastolic followup_diastolic
## followup_control_status followup_control_status
## followup_grade followup_grade
## followup_date followup_date
## days_between_assessment_and_anchor days_between_assessment_and_anchor
## days_between_assessment_and_baseline days_between_assessment_and_baseline
## number_followup_assessments number_followup_assessments
## number_followup_by_hs number_followup_by_hs
## number_medical_reviews number_medical_reviews
## controlled_followup_days controlled_followup_days
## number_uncontrolled_followups number_uncontrolled_followups
## number_community_assessments number_community_assessments
## number_facility_assessments number_facility_assessments
## number_feeder_assessments number_feeder_assessments
## change_in_systolic change_in_systolic
## change_in_diastolic change_in_diastolic
## missing
## latest_bmi 17
## followup_systolic 43559
## followup_diastolic 43559
## followup_control_status 43559
## followup_grade 43559
## followup_date 43559
## days_between_assessment_and_anchor 43559
## days_between_assessment_and_baseline 43559
## number_followup_assessments 43559
## number_followup_by_hs 43559
## number_medical_reviews 43559
## controlled_followup_days 29371
## number_uncontrolled_followups 24409
## number_community_assessments 43559
## number_facility_assessments 43559
## number_feeder_assessments 43559
## change_in_systolic 43559
## change_in_diastolic 43559
# Display the missing values as a table
missing_values_table <- kable(missing_values, format = "html", caption = "Missing Values by Variable") %>%
kable_styling("striped", full_width = F)
# Display the table
missing_values_table| variable | missing | |
|---|---|---|
| latest_bmi | latest_bmi | 17 |
| followup_systolic | followup_systolic | 43559 |
| followup_diastolic | followup_diastolic | 43559 |
| followup_control_status | followup_control_status | 43559 |
| followup_grade | followup_grade | 43559 |
| followup_date | followup_date | 43559 |
| days_between_assessment_and_anchor | days_between_assessment_and_anchor | 43559 |
| days_between_assessment_and_baseline | days_between_assessment_and_baseline | 43559 |
| number_followup_assessments | number_followup_assessments | 43559 |
| number_followup_by_hs | number_followup_by_hs | 43559 |
| number_medical_reviews | number_medical_reviews | 43559 |
| controlled_followup_days | controlled_followup_days | 29371 |
| number_uncontrolled_followups | number_uncontrolled_followups | 24409 |
| number_community_assessments | number_community_assessments | 43559 |
| number_facility_assessments | number_facility_assessments | 43559 |
| number_feeder_assessments | number_feeder_assessments | 43559 |
| change_in_systolic | change_in_systolic | 43559 |
| change_in_diastolic | change_in_diastolic | 43559 |
# Summarize the systolic blood pressure of all patients
systolic_bp <- data_12mths %>%
group_by(baseline_systolic) %>%
summarise(count = n()) %>%
arrange(desc(count))
# Display the summary of systolic blood pressure
systolic_bp## # A tibble: 190 × 2
## baseline_systolic count
## <dbl> <int>
## 1 135 1357
## 2 136 1308
## 3 130 1272
## 4 138 1270
## 5 133 1249
## 6 140 1244
## 7 142 1224
## 8 132 1203
## 9 134 1202
## 10 137 1198
## # ℹ 180 more rows
# Generate summary statistics for the systolic blood pressure
systolic_bp_summary <- data_12mths %>%
summarise(mean = mean(baseline_systolic, na.rm = TRUE),
median = median(baseline_systolic, na.rm = TRUE),
sd = sd(baseline_systolic, na.rm = TRUE),
min = min(baseline_systolic, na.rm = TRUE),
max = max(baseline_systolic, na.rm = TRUE))
# Display the summary statistics for the systolic blood pressure
systolic_bp_summary## # A tibble: 1 × 5
## mean median sd min max
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 142. 140 21.7 54 288
# Display the summary statistics for the systolic blood pressure as a table
systolic_bp_summary_table <- kable(systolic_bp_summary, format = "html", caption = "Summary Statistics for Systolic Blood Pressure-All Patients") %>%
kable_styling("striped", full_width = F)
# Display the table
systolic_bp_summary_table| mean | median | sd | min | max |
|---|---|---|---|---|
| 142.1662 | 140 | 21.67617 | 54 | 288 |
# Plot the distribution of systolic blood pressure
ggplot(data_12mths, aes(x = baseline_systolic)) +
geom_histogram(binwidth = 5, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Distribution of Systolic Blood Pressure", x = "Systolic Blood Pressure", y = "Frequency") +
theme_minimal()# Plot the distribution of systolic blood pressure on box plot # To check outlier BP readings
ggplot(data_12mths, aes(y = baseline_systolic)) +
geom_boxplot(fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Box Plot of Systolic Blood Pressure", y = "Systolic Blood Pressure") +
theme_minimal()
# Summarise the systolic blood pressure of patients by age group-include
patients counts
# Summarise the systolic blood pressure of patients by age group-include patients counts
systolic_bp_medtronic_baseline <- data_12mths %>%
group_by(age_group) %>%
summarise(count = n(),
mean = mean(baseline_systolic, na.rm = TRUE),
median = median(baseline_systolic, na.rm = TRUE),
sd = sd(baseline_systolic, na.rm = TRUE),
min = min(baseline_systolic, na.rm = TRUE),
max = max(baseline_systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure by age group
systolic_bp_medtronic_baseline## # A tibble: 15 × 7
## age_group count mean median sd min max
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 20-24 175 136. 136 20.4 90 232
## 2 25-29 443 137. 135 20.1 69 216
## 3 30-34 942 139. 137 20.0 86 240
## 4 35-39 1797 138. 136 20.5 80 250
## 5 40-44 2953 139. 137 20.6 69 252
## 6 45-49 4314 140. 138 20.2 65 246
## 7 50-54 6577 141. 138 20.9 74 260
## 8 55-59 6580 141. 139 21.0 80 288
## 9 60-64 8761 142. 140 21.7 54 285
## 10 65-69 8120 143. 141 21.5 70 239
## 11 70-74 9065 144. 142 22.0 76 269
## 12 75-79 4953 145. 142 22.3 76 257
## 13 80-84 3466 145. 143 23.0 76 263
## 14 85+ 2695 145. 143 24.0 74 231
## 15 <20 127 131. 130 24.3 71 215
# Export the table as CSV file
write.csv(systolic_bp_medtronic_baseline, "systolic_bp_medtronic_baseline.csv", row.names = FALSE)
# Display the summary of systolic blood pressure by age group as a table
systolic_bp_medtronic_baseline_table <- kable(systolic_bp_medtronic_baseline, format = "html", caption = "Summary of Systolic Blood Pressure by Age Group") %>%
kable_styling("striped", full_width = F)
# Display the table
systolic_bp_medtronic_baseline_table| age_group | count | mean | median | sd | min | max |
|---|---|---|---|---|---|---|
| 20-24 | 175 | 136.0457 | 136 | 20.43375 | 90 | 232 |
| 25-29 | 443 | 137.1919 | 135 | 20.14746 | 69 | 216 |
| 30-34 | 942 | 138.8270 | 137 | 20.04360 | 86 | 240 |
| 35-39 | 1797 | 138.2387 | 136 | 20.47420 | 80 | 250 |
| 40-44 | 2953 | 139.2167 | 137 | 20.63008 | 69 | 252 |
| 45-49 | 4314 | 139.6354 | 138 | 20.24455 | 65 | 246 |
| 50-54 | 6577 | 140.5965 | 138 | 20.86798 | 74 | 260 |
| 55-59 | 6580 | 140.9512 | 139 | 21.04281 | 80 | 288 |
| 60-64 | 8761 | 142.0468 | 140 | 21.73070 | 54 | 285 |
| 65-69 | 8120 | 142.9201 | 141 | 21.51000 | 70 | 239 |
| 70-74 | 9065 | 144.0042 | 142 | 21.96746 | 76 | 269 |
| 75-79 | 4953 | 144.5750 | 142 | 22.27816 | 76 | 257 |
| 80-84 | 3466 | 145.2822 | 143 | 23.01210 | 76 | 263 |
| 85+ | 2695 | 145.2935 | 143 | 23.99778 | 74 | 231 |
| <20 | 127 | 130.5591 | 130 | 24.29568 | 71 | 215 |
# Plot the systolic blood pressure by age group
ggplot(systolic_bp_medtronic_baseline, aes(x = age_group, y = mean)) +
geom_bar(stat = "identity", fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Mean Systolic Blood Pressure by Age Group", x = "Age Group", y = "Mean Systolic Blood Pressure") +
theme_minimal()# Plot the systolic blood pressure by age group on box plot
ggplot(data_12mths, aes(x = age_group, y = baseline_systolic)) +
geom_boxplot(fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Box Plot of Systolic Blood Pressure by Age Group", x = "Age Group", y = "Systolic Blood Pressure") +
theme_minimal()# Create subsets of patients from the dataset-Selecting only id, baseline sbp (rename to systolic) and age-All patients
medtronic_subset_all <- data_12mths %>%
select(patient_id, baseline_systolic, age, age_group) %>%
rename(systolic = baseline_systolic) %>%
mutate(dataset="medtronic_subset_all")
# Save the dataset
write.csv(medtronic_subset_all, "medtronic_subset_all.csv", row.names = FALSE)
# Create a subset of patients from the dataset-Selecting only id, baseline sbp (rename to systolic) and age-Patients Enrolled from screening
medtronic_subset_screening <- data_12mths %>%
filter(enrollment_status == "From Screening") %>%
select(patient_id, baseline_systolic, age, age_group) %>%
rename(systolic = baseline_systolic) %>%
mutate(dataset="medtronic_subset_screening")
# Save the dataset
write.csv(medtronic_subset_screening, "medtronic_subset_screening.csv", row.names = FALSE)
# Create a subset of patients from the dataset-Selecting only id, baseline sbp (rename to systolic) and age-Patients Enrolled Directly_On care
medtronic_subset_direct <- data_12mths %>%
filter(enrollment_status == "Direct Enrollment") %>%
select(patient_id, baseline_systolic, age, age_group) %>%
rename(systolic = baseline_systolic) %>%
mutate(dataset="medtronic_subset_direct")
# Save the dataset
write.csv(medtronic_subset_direct, "medtronic_subset_direct.csv", row.names = FALSE)
# Create a subset of patients from the dataset-Selecting only id, baseline sbp (rename to systolic) and age-Patients with diabetes
medtronic_subset_diabetes <- data_12mths %>%
filter(diabetes == "Yes") %>%
select(patient_id, baseline_systolic, age, age_group) %>%
rename(systolic = baseline_systolic) %>%
mutate(dataset="medtronic_subset_diabetes")
# Save the dataset
write.csv(medtronic_subset_diabetes, "medtronic_subset_diabetes.csv", row.names = FALSE)