knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(rmarkdown)
library(kableExtra)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(tidyr)

#Clear the environment

# Clear the environment
rm(list = ls())

1 Load the dataset

# Load the steps_subset dataset
 steps_subset <- read_csv("/Users/jamesoguta/Documents/James Oguta/My PhD Folder-2023-2025/Trainings/KenyaCVDModel/steps_subset.csv")
# Display the first few rows of the dataset
head(steps_subset)
## # A tibble: 6 × 19
##      id   psu stratum wealth    age age_group county sex   region smk   systolic
##   <dbl> <dbl>   <dbl> <chr>   <dbl> <chr>      <dbl> <chr> <chr>  <chr>    <dbl>
## 1  1001    32       1 4 Four…    39 35-39         19 Women Coast  No        125.
## 2  1003    32       1 2 Seco…    35 35-39         19 Men   Coast  Yes       132.
## 3  1008    32       1 3 Midd…    60 60-64         19 Men   Coast  No        213.
## 4  1011    32       1 2 Seco…    18 15-19         19 Men   Coast  No        114 
## 5  1013    32       1 4 Four…    51 50-54         19 Women Coast  No        175.
## 6  1016    32       1 5 Rich…    47 45-49         19 Women Coast  No        115 
## # ℹ 8 more variables: htn <dbl>, htn_treatment <dbl>, diastolic <dbl>,
## #   diabetes <dbl>, diabetes_treatment <dbl>, chol <dbl>, alcohol <dbl>,
## #   weights <dbl>

2 Summarize the dataset

# Summarize the dataset
summary(steps_subset)
##        id              psu            stratum         wealth         
##  Min.   :  1001   Min.   :  1.00   Min.   :1.000   Length:4500       
##  1st Qu.: 51074   1st Qu.: 51.75   1st Qu.:1.000   Class :character  
##  Median :103013   Median :103.00   Median :1.000   Mode  :character  
##  Mean   :101783   Mean   :102.00   Mean   :1.488                     
##  3rd Qu.:153026   3rd Qu.:152.00   3rd Qu.:2.000                     
##  Max.   :200123   Max.   :200.00   Max.   :2.000                     
##                                                                      
##       age        age_group             county          sex           
##  Min.   :16.0   Length:4500        Min.   : 1.00   Length:4500       
##  1st Qu.:27.0   Class :character   1st Qu.:12.00   Class :character  
##  Median :35.0   Mode  :character   Median :23.00   Mode  :character  
##  Mean   :37.5                      Mean   :23.43                     
##  3rd Qu.:46.0                      3rd Qu.:35.00                     
##  Max.   :70.0                      Max.   :47.00                     
##                                                                      
##     region              smk               systolic           htn        
##  Length:4500        Length:4500        Min.   : 70.67   Min.   :0.0000  
##  Class :character   Class :character   1st Qu.:114.33   1st Qu.:1.0000  
##  Mode  :character   Mode  :character   Median :123.67   Median :1.0000  
##                                        Mean   :127.12   Mean   :0.8963  
##                                        3rd Qu.:135.00   3rd Qu.:1.0000  
##                                        Max.   :263.67   Max.   :1.0000  
##                                        NA's   :63       NA's   :3063    
##  htn_treatment     diastolic         diabetes      diabetes_treatment
##  Min.   :0.000   Min.   : 48.33   Min.   :0.0000   Min.   :0.000     
##  1st Qu.:0.000   1st Qu.: 74.33   1st Qu.:0.0000   1st Qu.:0.000     
##  Median :0.000   Median : 81.67   Median :0.0000   Median :1.000     
##  Mean   :0.244   Mean   : 82.44   Mean   :0.0356   Mean   :0.514     
##  3rd Qu.:0.000   3rd Qu.: 89.33   3rd Qu.:0.0000   3rd Qu.:1.000     
##  Max.   :1.000   Max.   :151.67   Max.   :1.0000   Max.   :1.000     
##  NA's   :4017    NA's   :59       NA's   :319      NA's   :4428      
##       chol           alcohol         weights       
##  Min.   : 2.000   Min.   :1.000   Min.   :  105.8  
##  1st Qu.: 2.830   1st Qu.:1.000   1st Qu.: 1130.1  
##  Median : 3.550   Median :1.000   Median : 2631.9  
##  Mean   : 3.714   Mean   :1.208   Mean   : 3839.8  
##  3rd Qu.: 4.300   3rd Qu.:1.000   3rd Qu.: 4969.9  
##  Max.   :10.300   Max.   :2.000   Max.   :55389.3  
##  NA's   :331      NA's   :1
# Display the structure of the dataset
str(steps_subset)
## spc_tbl_ [4,500 × 19] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id                : num [1:4500] 1001 1003 1008 1011 1013 ...
##  $ psu               : num [1:4500] 32 32 32 32 32 32 32 32 32 32 ...
##  $ stratum           : num [1:4500] 1 1 1 1 1 1 1 1 1 1 ...
##  $ wealth            : chr [1:4500] "4 Fourth" "2 Second" "3 Middle" "2 Second" ...
##  $ age               : num [1:4500] 39 35 60 18 51 47 38 45 41 48 ...
##  $ age_group         : chr [1:4500] "35-39" "35-39" "60-64" "15-19" ...
##  $ county            : num [1:4500] 19 19 19 19 19 19 19 19 19 19 ...
##  $ sex               : chr [1:4500] "Women" "Men" "Men" "Men" ...
##  $ region            : chr [1:4500] "Coast" "Coast" "Coast" "Coast" ...
##  $ smk               : chr [1:4500] "No" "Yes" "No" "No" ...
##  $ systolic          : num [1:4500] 125 132 213 114 175 ...
##  $ htn               : num [1:4500] NA NA 1 NA 1 NA 1 NA 1 NA ...
##  $ htn_treatment     : num [1:4500] NA NA NA NA NA NA NA NA 0 NA ...
##  $ diastolic         : num [1:4500] 85 81.3 94.7 69.3 101.3 ...
##  $ diabetes          : num [1:4500] 0 0 0 0 0 0 0 0 0 1 ...
##  $ diabetes_treatment: num [1:4500] NA NA NA NA NA NA NA NA NA 1 ...
##  $ chol              : num [1:4500] 3.68 4.49 3.79 2.59 3.31 4.14 3.72 3.72 4.49 4.26 ...
##  $ alcohol           : num [1:4500] 1 1 1 1 1 1 1 1 1 1 ...
##  $ weights           : num [1:4500] 4027 5714 3973 9097 3578 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_double(),
##   ..   psu = col_double(),
##   ..   stratum = col_double(),
##   ..   wealth = col_character(),
##   ..   age = col_double(),
##   ..   age_group = col_character(),
##   ..   county = col_double(),
##   ..   sex = col_character(),
##   ..   region = col_character(),
##   ..   smk = col_character(),
##   ..   systolic = col_double(),
##   ..   htn = col_double(),
##   ..   htn_treatment = col_double(),
##   ..   diastolic = col_double(),
##   ..   diabetes = col_double(),
##   ..   diabetes_treatment = col_double(),
##   ..   chol = col_double(),
##   ..   alcohol = col_double(),
##   ..   weights = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
# Display the dimensions of the dataset
dim(steps_subset)
## [1] 4500   19
# Display the column names of the dataset
colnames(steps_subset)
##  [1] "id"                 "psu"                "stratum"           
##  [4] "wealth"             "age"                "age_group"         
##  [7] "county"             "sex"                "region"            
## [10] "smk"                "systolic"           "htn"               
## [13] "htn_treatment"      "diastolic"          "diabetes"          
## [16] "diabetes_treatment" "chol"               "alcohol"           
## [19] "weights"
# Display the unique values in the dataset
unique(steps_subset)
## # A tibble: 4,500 × 19
##       id   psu stratum wealth   age age_group county sex   region smk   systolic
##    <dbl> <dbl>   <dbl> <chr>  <dbl> <chr>      <dbl> <chr> <chr>  <chr>    <dbl>
##  1  1001    32       1 4 Fou…    39 35-39         19 Women Coast  No        125.
##  2  1003    32       1 2 Sec…    35 35-39         19 Men   Coast  Yes       132.
##  3  1008    32       1 3 Mid…    60 60-64         19 Men   Coast  No        213.
##  4  1011    32       1 2 Sec…    18 15-19         19 Men   Coast  No        114 
##  5  1013    32       1 4 Fou…    51 50-54         19 Women Coast  No        175.
##  6  1016    32       1 5 Ric…    47 45-49         19 Women Coast  No        115 
##  7  1018    32       1 3 Mid…    38 35-39         19 Men   Coast  No        131.
##  8  1023    32       1 4 Fou…    45 45-49         19 Men   Coast  No        137 
##  9  1026    32       1 1 Poo…    41 40-44         19 Women Coast  No        219.
## 10  1028    32       1 2 Sec…    48 45-49         19 Men   Coast  Yes       115.
## # ℹ 4,490 more rows
## # ℹ 8 more variables: htn <dbl>, htn_treatment <dbl>, diastolic <dbl>,
## #   diabetes <dbl>, diabetes_treatment <dbl>, chol <dbl>, alcohol <dbl>,
## #   weights <dbl>
# Display the number of unique values in the dataset
length(unique(steps_subset))
## [1] 19
# Display the number of missing values in the dataset
sum(is.na(steps_subset))
## [1] 12285
# Display the number of rows and columns in the dataset
nrow(steps_subset)
## [1] 4500
ncol(steps_subset)
## [1] 19
# Display the first few rows of the dataset
head(steps_subset)
## # A tibble: 6 × 19
##      id   psu stratum wealth    age age_group county sex   region smk   systolic
##   <dbl> <dbl>   <dbl> <chr>   <dbl> <chr>      <dbl> <chr> <chr>  <chr>    <dbl>
## 1  1001    32       1 4 Four…    39 35-39         19 Women Coast  No        125.
## 2  1003    32       1 2 Seco…    35 35-39         19 Men   Coast  Yes       132.
## 3  1008    32       1 3 Midd…    60 60-64         19 Men   Coast  No        213.
## 4  1011    32       1 2 Seco…    18 15-19         19 Men   Coast  No        114 
## 5  1013    32       1 4 Four…    51 50-54         19 Women Coast  No        175.
## 6  1016    32       1 5 Rich…    47 45-49         19 Women Coast  No        115 
## # ℹ 8 more variables: htn <dbl>, htn_treatment <dbl>, diastolic <dbl>,
## #   diabetes <dbl>, diabetes_treatment <dbl>, chol <dbl>, alcohol <dbl>,
## #   weights <dbl>
# Display the last few rows of the dataset
tail(steps_subset)
## # A tibble: 6 × 19
##       id   psu stratum wealth   age age_group county sex   region smk   systolic
##    <dbl> <dbl>   <dbl> <chr>  <dbl> <chr>      <dbl> <chr> <chr>  <chr>    <dbl>
## 1 200097   200       2 5 Ric…    27 25-29          4 Men   Weste… No        120 
## 2 200102   200       2 3 Mid…    46 45-49          4 Men   Weste… No        175.
## 3 200106   200       2 4 Fou…    23 20-24          4 Men   Weste… No        125 
## 4 200115   200       2 5 Ric…    61 60-64          4 Men   Weste… No        133.
## 5 200119   200       2 5 Ric…    27 25-29          4 Women Weste… No        118.
## 6 200123   200       2 4 Fou…    47 45-49          4 Women Weste… No        135.
## # ℹ 8 more variables: htn <dbl>, htn_treatment <dbl>, diastolic <dbl>,
## #   diabetes <dbl>, diabetes_treatment <dbl>, chol <dbl>, alcohol <dbl>,
## #   weights <dbl>

3 Display the missing observations by variable

# Display the missing observations by variable
missing_values <- sapply(steps_subset, function(x) sum(is.na(x)))
missing_values <- data.frame(variable = names(missing_values), missing = missing_values)
missing_values <- missing_values[missing_values$missing > 0, ]
# Display the missing values
missing_values
##                              variable missing
## age_group                   age_group       1
## smk                               smk       3
## systolic                     systolic      63
## htn                               htn    3063
## htn_treatment           htn_treatment    4017
## diastolic                   diastolic      59
## diabetes                     diabetes     319
## diabetes_treatment diabetes_treatment    4428
## chol                             chol     331
## alcohol                       alcohol       1
# Display the missing values as a table
missing_values_table <- kable(missing_values, format = "html", caption = "Missing Values by Variable") %>%
  kable_styling("striped", full_width = F)
# Display the table
missing_values_table
Missing Values by Variable
variable missing
age_group age_group 1
smk smk 3
systolic systolic 63
htn htn 3063
htn_treatment htn_treatment 4017
diastolic diastolic 59
diabetes diabetes 319
diabetes_treatment diabetes_treatment 4428
chol chol 331
alcohol alcohol 1

4 Summarize the systolic blood pressure of patients

# Summarize the systolic blood pressure of all patients
systolic_bp <- steps_subset %>%
  group_by(systolic) %>%
  summarise(count = n()) %>%
  arrange(desc(count))
# Display the summary of systolic blood pressure
systolic_bp
## # A tibble: 352 × 2
##    systolic count
##       <dbl> <int>
##  1      NA     63
##  2     124     60
##  3     123     52
##  4     118.    51
##  5     125     51
##  6     117.    48
##  7     115.    47
##  8     120.    47
##  9     117.    46
## 10     119.    45
## # ℹ 342 more rows
# Display the summary of systolic blood pressure as a table
systolic_bp_table <- kable(systolic_bp, format = "html", caption = "Summary of Systolic Blood Pressure") %>%
  kable_styling("striped", full_width = F)
# Display the table
# systolic_bp_table

5 Generate summary statistics for the systolic blood pressure

# Generate summary statistics for the systolic blood pressure
systolic_bp_summary <- steps_subset %>%
  summarise(mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary statistics for the systolic blood pressure
systolic_bp_summary
## # A tibble: 1 × 5
##    mean median    sd   min   max
##   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1  127.   124.  19.9  70.7  264.
# Display the summary statistics for the systolic blood pressure as a table
systolic_bp_summary_table <- kable(systolic_bp_summary, format = "html", caption = "Summary Statistics for Systolic Blood Pressure-All Patients") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_summary_table
Summary Statistics for Systolic Blood Pressure-All Patients
mean median sd min max
127.1154 123.6667 19.89775 70.66667 263.6667

6 Generate a histogram of the systolic blood pressure

# Generate a histogram of the systolic blood pressure
histogram <- ggplot(steps_subset, aes(x = systolic)) +
  geom_histogram(binwidth = 5, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure-All Patients",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal()
# Display the histogram
histogram

# Generate a boxplot of the systolic blood pressure

# Generate a boxplot of the systolic blood pressure
boxplot <- ggplot(steps_subset, aes(x = "", y = systolic)) +
  geom_boxplot(fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Boxplot of Systolic Blood Pressure-All Patients",
       x = "",
       y = "Systolic Blood Pressure") +
  theme_minimal()
# Display the boxplot
boxplot

# Summarise systolic BP by age group

# Summarise systolic BP by age group -include number of patients
systolic_bp_age_group <- steps_subset %>%
  group_by(age_group) %>%
  summarise(count = n(),
            mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure by age group
systolic_bp_age_group
## # A tibble: 12 × 7
##    age_group count  mean median    sd   min   max
##    <chr>     <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
##  1 15-19       217  119.   119   13.0  92.3  167.
##  2 20-24       593  121.   119   12.9  85.3  175 
##  3 25-29       688  122.   120.  14.7  80.3  196.
##  4 30-34       666  122.   121   14.4  81.7  200.
##  5 35-39       588  125.   124   16.8  70.7  214 
##  6 40-44       459  127.   124   17.8  81.7  219.
##  7 45-49       334  133.   128.  24.2  83    258.
##  8 50-54       285  135.   131.  23.0  83.7  234.
##  9 55-59       256  137.   134.  25.1  86.3  249.
## 10 60-64       226  141.   137   26.9  95.7  264.
## 11 65-69       187  147.   144.  26.3  89.3  229.
## 12 <NA>          1  139    139   NA   139    139
# Export the table to a CSV file
write.csv(systolic_bp_age_group, "Steps_sbp_age_all_pts.csv", row.names = FALSE)
# Display the summary of systolic blood pressure by age group as a table
systolic_bp_age_group_table <- kable(systolic_bp_age_group, format = "html", caption = "Summary of Systolic Blood Pressure by Age Group-All patients") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_age_group_table
Summary of Systolic Blood Pressure by Age Group-All patients
age_group count mean median sd min max
15-19 217 119.1312 119.0000 12.96394 92.33333 167.3333
20-24 593 120.8713 119.0000 12.92174 85.33333 175.0000
25-29 688 121.6706 119.6667 14.65747 80.33333 196.3333
30-34 666 122.2111 121.0000 14.43230 81.66667 199.6667
35-39 588 124.8330 124.0000 16.82695 70.66667 214.0000
40-44 459 127.4081 124.0000 17.78734 81.66667 218.6667
45-49 334 132.9939 127.5000 24.19028 83.00000 258.3333
50-54 285 134.6450 131.3333 23.01394 83.66667 234.3333
55-59 256 137.4196 134.3333 25.14245 86.33333 249.3333
60-64 226 141.3007 137.0000 26.85366 95.66667 263.6667
65-69 187 146.9604 144.3333 26.34179 89.33333 229.3333
NA 1 139.0000 139.0000 NA 139.00000 139.0000

7 Generate a histogram of the systolic blood pressure by age group

# Generate a histogram of the systolic blood pressure by age group
histogram_age_group <- ggplot(steps_subset, aes(x = systolic, fill = age_group)) +
  geom_histogram(binwidth = 5, color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure by Age Group",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal() +
  facet_wrap(~ age_group)
# Display the histogram
histogram_age_group

8 Generate a bar plot of the systolic blood pressure by age group

# Generate a bar plot of the systolic blood pressure by age group
bar_plot_age_group <- ggplot(systolic_bp_age_group, aes(x = age_group, y = mean)) +
  geom_bar(stat = "identity", fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Bar Plot of Systolic Blood Pressure by Age Group-All Patients",
       x = "Age Group",
       y = "Mean Systolic Blood Pressure") +
  theme_minimal()
# Display the bar plot
bar_plot_age_group

9 Summarise systolic BP for patients with hypertension

# Generate summary statistics for systolic BP for all patients with hypertension
systolic_bp_hypertension <- steps_subset %>%
  filter(htn == 1) %>%
  summarise(mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients with hypertension
systolic_bp_hypertension
## # A tibble: 1 × 5
##    mean median    sd   min   max
##   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1  149.   144.  20.8   107  264.
# Display the summary of systolic blood pressure for patients with hypertension as a table
systolic_bp_hypertension_table <- kable(systolic_bp_hypertension, format = "html", caption = "Summary of Systolic Blood Pressure for Patients with Hypertension") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_hypertension_table
Summary of Systolic Blood Pressure for Patients with Hypertension
mean median sd min max
148.9481 144.3333 20.8193 107 263.6667

10 Generate a histogram of the systolic blood pressure for patients with hypertension

# Generate a histogram of the systolic blood pressure for patients with hypertension
histogram_hypertension <- ggplot(steps_subset %>% filter(htn == 1), aes(x = systolic)) +
  geom_histogram(binwidth = 5, fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients with Hypertension",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal()
# Display the histogram
histogram_hypertension

# Generate a boxplot of the systolic blood pressure for patients with hypertension

# Generate a boxplot of the systolic blood pressure for patients with hypertension
boxplot_hypertension <- ggplot(steps_subset %>% filter(htn == 1), aes(x = "", y = systolic)) +
  geom_boxplot(fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Boxplot of Systolic Blood Pressure for Patients with Hypertension",
       x = "",
       y = "Systolic Blood Pressure") +
  theme_minimal()
# Display the boxplot
boxplot_hypertension

# Summarise systolic BP for patients with hypertension by age group-include number of patients
systolic_bp_hypertension <- steps_subset %>%
  filter(htn == 1) %>%
  group_by(age_group) %>%
  summarise(count = n(),
            mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients with hypertension
systolic_bp_hypertension
## # A tibble: 11 × 7
##    age_group count  mean median    sd   min   max
##    <chr>     <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
##  1 15-19        34  137.   133.  12.8  114.  167.
##  2 20-24        88  139.   139.  12.0  108.  175 
##  3 25-29       109  143.   141   16.8  116.  196.
##  4 30-34       118  140.   138.  15.9  116.  200.
##  5 35-39       159  143.   141.  16.3  111.  214 
##  6 40-44       137  146.   142   17.7  117.  219.
##  7 45-49       139  152.   146.  25.0  111.  258.
##  8 50-54       132  152.   146   20.5  121.  234.
##  9 55-59       125  156.   150.  21.8  120.  249.
## 10 60-64       124  158.   154   24.2  120.  264.
## 11 65-69       123  160.   156   21.5  107   229.
# Export the table to a CSV file
write.csv(systolic_bp_hypertension, "Steps_sbp_age_hypertension.csv", row.names = FALSE)
# Display the summary of systolic blood pressure for patients with hypertension as a table
systolic_bp_hypertension_table <- kable(systolic_bp_hypertension, format = "html", caption = "Summary of Systolic Blood Pressure for Patients with Hypertension") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_hypertension_table
Summary of Systolic Blood Pressure for Patients with Hypertension
age_group count mean median sd min max
15-19 34 136.7353 133.1667 12.79409 114.3333 167.3333
20-24 88 139.4713 139.3333 11.97840 108.3333 175.0000
25-29 109 143.1223 141.0000 16.78916 115.6667 196.3333
30-34 118 140.4548 137.6667 15.88537 115.6667 199.6667
35-39 159 143.0764 140.6667 16.27772 111.3333 214.0000
40-44 137 145.9611 142.0000 17.69162 117.3333 218.6667
45-49 139 152.1087 146.1667 25.02392 111.3333 258.3333
50-54 132 151.9389 146.0000 20.51457 121.3333 234.3333
55-59 125 155.8627 150.3333 21.82683 120.3333 249.3333
60-64 124 158.3442 154.0000 24.20506 119.6667 263.6667
65-69 123 160.0054 156.0000 21.50549 107.0000 229.3333

11 Plot a bar graph of the systolic blood pressure for patients with hypertension by age group

# Plot a bar graph of the systolic blood pressure for patients with hypertension by age group
bar_plot_hypertension <- ggplot(systolic_bp_hypertension, aes(x = age_group, y = mean)) +
  geom_bar(stat = "identity", fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Bar Plot of Systolic Blood Pressure for Patients with Hypertension by Age Group",
       x = "Age Group",
       y = "Mean Systolic Blood Pressure") +
  theme_minimal()
# Display the bar plot
bar_plot_hypertension

# Generate a histogram of the systolic blood pressure for patients with hypertension by age group

# Generate a histogram of the systolic blood pressure for patients with hypertension by age group
histogram_hypertension_age_group <- ggplot(steps_subset %>% filter(htn == 1), aes(x = systolic, fill = age_group)) +
  geom_histogram(binwidth = 5, color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients with Hypertension by Age Group",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal() +
  facet_wrap(~ age_group)
# Display the histogram
histogram_hypertension_age_group

11.0.1 Generate summary statistics for systolic BP for patients on hypertension treatment

# Generate summary statistics for systolic BP for patients on hypertension treatment
systolic_bp_treatment <- steps_subset %>%
  filter(htn == 1 & htn_treatment == 1) %>%
  summarise(mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients on hypertension treatment
systolic_bp_treatment
## # A tibble: 1 × 5
##    mean median    sd   min   max
##   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1  148.   142.  27.0   107  258.
# Display the summary of systolic blood pressure for patients on hypertension treatment as a table
systolic_bp_treatment_table <- kable(systolic_bp_treatment, format = "html", caption = "Summary of Systolic Blood Pressure for Patients on Hypertension Treatment") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_treatment_table
Summary of Systolic Blood Pressure for Patients on Hypertension Treatment
mean median sd min max
148.4195 142.1667 26.97177 107 258.3333

12 Generate a histogram of the systolic blood pressure for patients on hypertension treatment

# Generate a histogram of the systolic blood pressure for patients on hypertension treatment
histogram_treatment <- ggplot(steps_subset %>% filter(htn == 1 & htn_treatment == 1), aes(x = systolic)) +
  geom_histogram(binwidth = 5, fill = "green", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients on Hypertension Treatment",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal()
# Display the histogram
histogram_treatment

# Generate a boxplot of the systolic blood pressure for patients on hypertension treatment

# Generate a boxplot of the systolic blood pressure for patients on hypertension treatment
boxplot_treatment <- ggplot(steps_subset %>% filter(htn == 1 & htn_treatment == 1), aes(x = "", y = systolic)) +
  geom_boxplot(fill = "green", color = "black", alpha = 0.7) +
  labs(title = "Boxplot of Systolic Blood Pressure for Patients on Hypertension Treatment",
       x = "",
       y = "Systolic Blood Pressure") +
  theme_minimal()
# Display the boxplot
boxplot_treatment

# Summarise systolic BP for patients on hypertension treatment by age group

# Summarise systolic BP for patients on hypertension treatment by age group
systolic_bp_treatment <- steps_subset %>%
  filter(htn == 1 & htn_treatment == 1) %>%
  group_by(age_group) %>%
  summarise(count = n(),
            mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients on hypertension treatment
systolic_bp_treatment
## # A tibble: 11 × 7
##    age_group count  mean median    sd   min   max
##    <chr>     <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
##  1 15-19         4  129.   124.  17.2  114.  153.
##  2 20-24         2  118    118   13.7  108.  128.
##  3 25-29         1  128.   128.  NA    128.  128.
##  4 30-34         3  138.   133.  13.0  129.  153 
##  5 35-39         9  142.   135.  23.0  120.  192.
##  6 40-44        10  146.   138.  19.7  126.  181.
##  7 45-49        20  148.   137   38.9  111.  258.
##  8 50-54        11  151.   142.  24.3  121.  208.
##  9 55-59        18  159.   152   30.1  130.  249.
## 10 60-64        18  148.   146.  21.8  120.  186 
## 11 65-69        22  152.   148   23.6  107   192.
# Export the table to a CSV file
write.csv(systolic_bp_treatment, "Steps_sbp_age_hypertension_treatment.csv", row.names = FALSE)
# Display the summary of systolic blood pressure for patients on hypertension treatment as a table
systolic_bp_treatment_table <- kable(systolic_bp_treatment, format = "html", caption = "Summary of Systolic Blood Pressure for Patients on Hypertension Treatment") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_treatment_table
Summary of Systolic Blood Pressure for Patients on Hypertension Treatment
age_group count mean median sd min max
15-19 4 128.6667 123.5000 17.24121 114.3333 153.3333
20-24 2 118.0000 118.0000 13.67073 108.3333 127.6667
25-29 1 128.3333 128.3333 NA 128.3333 128.3333
30-34 3 138.1111 132.6667 13.04834 128.6667 153.0000
35-39 9 142.4583 134.8333 23.04236 120.3333 191.6667
40-44 10 145.7000 138.3333 19.66287 126.3333 180.6667
45-49 20 147.6140 137.0000 38.86384 111.3333 258.3333
50-54 11 150.5455 142.3333 24.25021 121.3333 207.6667
55-59 18 158.8519 152.0000 30.06990 130.3333 249.3333
60-64 18 147.7778 146.1667 21.75142 119.6667 186.0000
65-69 22 152.1212 148.0000 23.64431 107.0000 191.6667

13 Plot a bar graph of the systolic blood pressure for patients on hypertension treatment by age group

# Plot a bar graph of the systolic blood pressure for patients on hypertension treatment by age group
bar_plot_treatment <- ggplot(systolic_bp_treatment, aes(x = age_group, y = mean)) +
  geom_bar(stat = "identity", fill = "green", color = "black", alpha = 0.7) +
  labs(title = "Bar Plot of Systolic Blood Pressure for Patients on Hypertension Treatment by Age Group",
       x = "Age Group",
       y = "Mean Systolic Blood Pressure") +
  theme_minimal()
# Display the bar plot
bar_plot_treatment

# Generate a histogram of the systolic blood pressure for patients on hypertension treatment by age group

# Generate a histogram of the systolic blood pressure for patients on hypertension treatment by age group
histogram_treatment_age_group <- ggplot(steps_subset %>% filter(htn == 1 & htn_treatment == 1), aes(x = systolic, fill = age_group)) +
  geom_histogram(binwidth = 5, color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients on Hypertension Treatment by Age Group",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal() +
  facet_wrap(~ age_group)
# Display the histogram
histogram_treatment_age_group

# Generate a summary of the systolic blood pressure for diabetes patients

# Generate summary statistics for systolic BP for all patients with diabetes
systolic_bp_diabetes <- steps_subset %>%
  filter(diabetes == 1) %>%
  summarise(mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients with diabetes
systolic_bp_diabetes
## # A tibble: 1 × 5
##    mean median    sd   min   max
##   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1  140.   134.  26.7  91.3  249.
# Display the summary of systolic blood pressure for patients with diabetes as a table
systolic_bp_diabetes_table <- kable(systolic_bp_diabetes, format = "html", caption = "Summary of Systolic Blood Pressure for Patients with Diabetes") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_diabetes_table
Summary of Systolic Blood Pressure for Patients with Diabetes
mean median sd min max
140.0685 133.6667 26.6901 91.33333 249.3333

14 Generate a histogram of the systolic blood pressure for patients with diabetes

# Generate a histogram of the systolic blood pressure for patients with diabetes
histogram_diabetes <- ggplot(steps_subset %>% filter(diabetes == 1), aes(x = systolic)) +
  geom_histogram(binwidth = 5, fill = "purple", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients with Diabetes",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal()
# Display the histogram
histogram_diabetes

# Generate a boxplot of the systolic blood pressure for patients with diabetes

# Generate a boxplot of the systolic blood pressure for patients with diabetes
boxplot_diabetes <- ggplot(steps_subset %>% filter(diabetes == 1), aes(x = "", y = systolic)) +
  geom_boxplot(fill = "purple", color = "black", alpha = 0.7) +
  labs(title = "Boxplot of Systolic Blood Pressure for Patients with Diabetes",
       x = "",
       y = "Systolic Blood Pressure") +
  theme_minimal()
# Display the boxplot
boxplot_diabetes

# Summarise systolic BP for patients with diabetes by age group

# Summarise systolic BP for patients with diabetes by age group
systolic_bp_diabetes <- steps_subset %>%
  filter(diabetes == 1) %>%
  group_by(age_group) %>%
  summarise(count = n(),
            mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients with diabetes
systolic_bp_diabetes
## # A tibble: 12 × 7
##    age_group count  mean median    sd   min   max
##    <chr>     <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
##  1 15-19         1  117.   117.  NA   117.   117.
##  2 20-24         3  119.   119.  16.0 108    131.
##  3 25-29        10  124.   123.  11.5 110.   151.
##  4 30-34         9  113.   112.  10.1  99    129.
##  5 35-39        13  120.   124.  12.3  91.3  135 
##  6 40-44        19  134.   128.  20.1 105.   178.
##  7 45-49        17  142.   132   25.2 115.   211 
##  8 50-54        20  150.   145.  25.7 112    209 
##  9 55-59        20  157.   153.  35.1  97    249.
## 10 60-64        17  139.   133   25.4 115    219.
## 11 65-69        19  156.   155.  21.5 124.   204 
## 12 <NA>          1  139    139   NA   139    139
# Export the table to a CSV file
write.csv(systolic_bp_diabetes, "Steps_sbp_age_diabetes.csv", row.names = FALSE)
# Display the summary of systolic blood pressure for patients with diabetes as a table
systolic_bp_diabetes_table <- kable(systolic_bp_diabetes, format = "html", caption = "Summary of Systolic Blood Pressure for Patients with Diabetes") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_diabetes_table
Summary of Systolic Blood Pressure for Patients with Diabetes
age_group count mean median sd min max
15-19 1 116.6667 116.6667 NA 116.66667 116.6667
20-24 3 119.3333 119.3333 16.02775 108.00000 130.6667
25-29 10 124.5000 122.6667 11.46250 110.33333 151.3333
30-34 9 112.9630 112.3333 10.11111 99.00000 128.6667
35-39 13 120.0513 123.6667 12.30390 91.33333 135.0000
40-44 19 133.8246 128.3333 20.13791 105.33333 178.3333
45-49 17 142.1961 132.0000 25.23722 114.66667 211.0000
50-54 20 150.3833 145.1667 25.65704 112.00000 209.0000
55-59 20 156.9825 152.6667 35.14493 97.00000 249.3333
60-64 17 138.5417 133.0000 25.38879 115.00000 218.6667
65-69 19 156.1228 154.6667 21.53757 123.66667 204.0000
NA 1 139.0000 139.0000 NA 139.00000 139.0000

15 Plot a bar graph of the systolic blood pressure for patients with diabetes by age group

# Plot a bar graph of the systolic blood pressure for patients with diabetes by age group
bar_plot_diabetes <- ggplot(systolic_bp_diabetes, aes(x = age_group, y = mean)) +
  geom_bar(stat = "identity", fill = "purple", color = "black", alpha = 0.7) +
  labs(title = "Bar Plot of Systolic Blood Pressure for Patients with Diabetes by Age Group",
       x = "Age Group",
       y = "Mean Systolic Blood Pressure") +
  theme_minimal()
# Display the bar plot
bar_plot_diabetes

# Generate a histogram of the systolic blood pressure for patients with diabetes by age group

# Generate a histogram of the systolic blood pressure for patients with diabetes by age group
histogram_diabetes_age_group <- ggplot(steps_subset %>% filter(diabetes == 1), aes(x = systolic, fill = age_group)) +
  geom_histogram(binwidth = 5, color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients with Diabetes by Age Group",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal() +
  facet_wrap(~ age_group)
# Display the histogram
histogram_diabetes_age_group

# Generate a summary of the systolic blood pressure for diabetes patients with hypertension

# Generate summary statistics for systolic BP for all patients with diabetes and hypertension
systolic_bp_diabetes_hypertension <- steps_subset %>%
  filter(diabetes == 1 & htn == 1) %>%
  summarise(mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients with diabetes and hypertension
systolic_bp_diabetes_hypertension
## # A tibble: 1 × 5
##    mean median    sd   min   max
##   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1  155.   150.  25.3  121.  249.
# Display the summary of systolic blood pressure for patients with diabetes and hypertension as a table
systolic_bp_diabetes_hypertension_table <- kable(systolic_bp_diabetes_hypertension, format = "html", caption = "Summary of Systolic Blood Pressure for Patients with Diabetes and Hypertension") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_diabetes_hypertension_table
Summary of Systolic Blood Pressure for Patients with Diabetes and Hypertension
mean median sd min max
155.3049 150.3333 25.32885 120.6667 249.3333

16 Generate a histogram of the systolic blood pressure for patients with diabetes and hypertension

# Generate a histogram of the systolic blood pressure for patients with diabetes and hypertension
histogram_diabetes_hypertension <- ggplot(steps_subset %>% filter(diabetes == 1 & htn == 1), aes(x = systolic)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients with Diabetes and Hypertension",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal()
# Display the histogram
histogram_diabetes_hypertension

# Generate a boxplot of the systolic blood pressure for patients with diabetes and hypertension

# Generate a boxplot of the systolic blood pressure for patients with diabetes and hypertension
boxplot_diabetes_hypertension <- ggplot(steps_subset %>% filter(diabetes == 1 & htn == 1), aes(x = "", y = systolic)) +
  geom_boxplot(fill = "orange", color = "black", alpha = 0.7) +
  labs(title = "Boxplot of Systolic Blood Pressure for Patients with Diabetes and Hypertension",
       x = "",
       y = "Systolic Blood Pressure") +
  theme_minimal()
# Display the boxplot
boxplot_diabetes_hypertension

# Summarise systolic BP for patients with diabetes and hypertension by age group

# Summarise systolic BP for patients with diabetes and hypertension by age group
systolic_bp_diabetes_hypertension <- steps_subset %>%
  filter(diabetes == 1 & htn == 1) %>%
  group_by(age_group) %>%
  summarise(count = n(),
            mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients with diabetes and hypertension
systolic_bp_diabetes_hypertension
## # A tibble: 10 × 7
##    age_group count  mean median    sd   min   max
##    <chr>     <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
##  1 20-24         1  NaN     NA  NA     Inf  -Inf 
##  2 25-29         3  135.   133. 15.0   122.  151.
##  3 30-34         1  129.   129. NA     129.  129.
##  4 35-39         2  132.   132.  1.89  131   134.
##  5 40-44        10  146.   148. 19.3   121.  178.
##  6 45-49         8  158.   155. 29.3   124.  211 
##  7 50-54        16  157.   149. 23.7   133   209 
##  8 55-59        16  167.   158. 28.6   138.  249.
##  9 60-64         9  149.   140. 32.4   122   219.
## 10 65-69        18  158.   156. 21.3   124.  204
# Export the table to a CSV file
write.csv(systolic_bp_diabetes_hypertension, "Steps_sbp_age_diabetes_hypertension.csv", row.names = FALSE)
# Display the summary of systolic blood pressure for patients with diabetes and hypertension as a table
systolic_bp_diabetes_hypertension_table <- kable(systolic_bp_diabetes_hypertension, format = "html", caption = "Summary of Systolic Blood Pressure for Patients with Diabetes and Hypertension") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_diabetes_hypertension_table
Summary of Systolic Blood Pressure for Patients with Diabetes and Hypertension
age_group count mean median sd min max
20-24 1 NaN NA NA Inf -Inf
25-29 3 135.2222 132.6667 14.997531 121.6667 151.3333
30-34 1 128.6667 128.6667 NA 128.6667 128.6667
35-39 2 132.3333 132.3333 1.885618 131.0000 133.6667
40-44 10 146.0667 148.5000 19.328735 120.6667 178.3333
45-49 8 157.9583 154.6667 29.261512 123.6667 211.0000
50-54 16 157.1250 149.1667 23.666471 133.0000 209.0000
55-59 16 166.8750 158.3333 28.560235 137.6667 249.3333
60-64 9 149.0417 139.5000 32.413224 122.0000 218.6667
65-69 18 157.5185 155.8333 21.259438 123.6667 204.0000

17 Plot a bar graph of the systolic blood pressure for patients with diabetes and hypertension by age group

# Plot a bar graph of the systolic blood pressure for patients with diabetes and hypertension by age group
bar_plot_diabetes_hypertension <- ggplot(systolic_bp_diabetes_hypertension, aes(x = age_group, y = mean)) +
  geom_bar(stat = "identity", fill = "orange", color = "black", alpha = 0.7) +
  labs(title = "Bar Plot of Systolic Blood Pressure for Patients with Diabetes and Hypertension by Age Group",
       x = "Age Group",
       y = "Mean Systolic Blood Pressure") +
  theme_minimal()
# Display the bar plot
bar_plot_diabetes_hypertension

# Generate a histogram of the systolic blood pressure for patients with diabetes and hypertension by age group

# Generate a histogram of the systolic blood pressure for patients with diabetes and hypertension by age group
histogram_diabetes_hypertension_age_group <- ggplot(steps_subset %>% filter(diabetes == 1 & htn == 1), aes(x = systolic, fill = age_group)) +
  geom_histogram(binwidth = 5, color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients with Diabetes and Hypertension by Age Group",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal() +
  facet_wrap(~ age_group)
# Display the histogram
histogram_diabetes_hypertension_age_group

# Generate a summary of the systolic blood pressure for diabetes patients on hypertension treatment

# Generate summary statistics for systolic BP for all patients with diabetes and hypertension treatment
systolic_bp_diabetes_hypertension_treatment <- steps_subset %>%
  filter(diabetes == 1 & htn == 1 & htn_treatment == 1) %>%
  summarise(mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients with diabetes and hypertension treatment
systolic_bp_diabetes_hypertension_treatment
## # A tibble: 1 × 5
##    mean median    sd   min   max
##   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1  158.   151.  32.0   122  249.
# Display the summary of systolic blood pressure for patients with diabetes and hypertension treatment as a table
systolic_bp_diabetes_hypertension_treatment_table <- kable(systolic_bp_diabetes_hypertension_treatment, format = "html", caption = "Summary of Systolic Blood Pressure for Patients with Diabetes and Hypertension Treatment") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_diabetes_hypertension_treatment_table
Summary of Systolic Blood Pressure for Patients with Diabetes and Hypertension Treatment
mean median sd min max
158.3333 151.3333 31.96145 122 249.3333

18 Generate a histogram of the systolic blood pressure for patients with diabetes and hypertension treatment

# Generate a histogram of the systolic blood pressure for patients with diabetes and hypertension treatment
histogram_diabetes_hypertension_treatment <- ggplot(steps_subset %>% filter(diabetes == 1 & htn == 1 & htn_treatment == 1), aes(x = systolic)) +
  geom_histogram(binwidth = 5, fill = "pink", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients with Diabetes and Hypertension Treatment",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal()
# Display the histogram
histogram_diabetes_hypertension_treatment

# Generate a boxplot of the systolic blood pressure for patients with diabetes and hypertension treatment

# Generate a boxplot of the systolic blood pressure for patients with diabetes and hypertension treatment
boxplot_diabetes_hypertension_treatment <- ggplot(steps_subset %>% filter(diabetes == 1 & htn == 1 & htn_treatment == 1), aes(x = "", y = systolic)) +
  geom_boxplot(fill = "pink", color = "black", alpha = 0.7) +
  labs(title = "Boxplot of Systolic Blood Pressure for Patients with Diabetes and Hypertension Treatment",
       x = "",
       y = "Systolic Blood Pressure") +
  theme_minimal()
# Display the boxplot
boxplot_diabetes_hypertension_treatment

# Summarise systolic BP for patients with diabetes and hypertension treatment by age group

# Summarise systolic BP for patients with diabetes and hypertension treatment by age group
systolic_bp_diabetes_hypertension_treatment <- steps_subset %>%
  filter(diabetes == 1 & htn == 1 & htn_treatment == 1) %>%
  group_by(age_group) %>%
  summarise(count = n(),
            mean = mean(systolic, na.rm = TRUE),
            median = median(systolic, na.rm = TRUE),
            sd = sd(systolic, na.rm = TRUE),
            min = min(systolic, na.rm = TRUE),
            max = max(systolic, na.rm = TRUE))
# Display the summary of systolic blood pressure for patients with diabetes and hypertension treatment
systolic_bp_diabetes_hypertension_treatment
## # A tibble: 6 × 7
##   age_group count  mean median    sd   min   max
##   <chr>     <int> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 40-44         2  154.   154. 34.6   129.  178.
## 2 45-49         2  154.   154. 43.1   124.  185.
## 3 50-54         3  175.   168. 29.5   150   208.
## 4 55-59         5  177.   153. 46.6   138.  249.
## 5 60-64         3  127.   124.  6.69  122   134.
## 6 65-69         7  154.   161  18.6   124.  175
# Export the table to a CSV file
write.csv(systolic_bp_diabetes_hypertension_treatment, "Steps_sbp_age_diabetes_hypertension_treatment.csv", row.names = FALSE)
# Display the summary of systolic blood pressure for patients with diabetes and hypertension treatment as a table
systolic_bp_diabetes_hypertension_treatment_table <- kable(systolic_bp_diabetes_hypertension_treatment, format = "html", caption = "Summary of Systolic Blood Pressure for Patients with Diabetes and Hypertension Treatment") %>%
  kable_styling("striped", full_width = F)
# Display the table
systolic_bp_diabetes_hypertension_treatment_table
Summary of Systolic Blood Pressure for Patients with Diabetes and Hypertension Treatment
age_group count mean median sd min max
40-44 2 153.8333 153.8333 34.64823 129.3333 178.3333
45-49 2 154.1667 154.1667 43.13351 123.6667 184.6667
50-54 3 175.3333 168.3333 29.46373 150.0000 207.6667
55-59 5 176.8667 152.6667 46.60150 138.3333 249.3333
60-64 3 126.6667 123.6667 6.69162 122.0000 134.3333
65-69 7 153.8571 161.0000 18.57603 123.6667 175.0000

19 Plot a bar graph of the systolic blood pressure for patients with diabetes and hypertension treatment by age group

# Plot a bar graph of the systolic blood pressure for patients with diabetes and hypertension treatment by age group
bar_plot_diabetes_hypertension_treatment <- ggplot(systolic_bp_diabetes_hypertension_treatment, aes(x = age_group, y = mean)) +
  geom_bar(stat = "identity", fill = "pink", color = "black", alpha = 0.7) +
  labs(title = "Bar Plot of Systolic Blood Pressure for Patients with Diabetes and Hypertension Treatment by Age Group",
       x = "Age Group",
       y = "Mean Systolic Blood Pressure") +
  theme_minimal()
# Display the bar plot
bar_plot_diabetes_hypertension_treatment

# Generate a histogram of the systolic blood pressure for patients with diabetes and hypertension treatment by age group

# Generate a histogram of the systolic blood pressure for patients with diabetes and hypertension treatment by age group
histogram_diabetes_hypertension_treatment_age_group <- ggplot(steps_subset %>% filter(diabetes == 1 & htn == 1 & htn_treatment == 1), aes(x = systolic, fill = age_group)) +
  geom_histogram(binwidth = 5, color = "black", alpha = 0.7) +
  labs(title = "Histogram of Systolic Blood Pressure for Patients with Diabetes and Hypertension Treatment by Age Group",
       x = "Systolic Blood Pressure",
       y = "Frequency") +
  theme_minimal() +
  facet_wrap(~ age_group)
# Display the histogram
histogram_diabetes_hypertension_treatment_age_group

# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age

# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -All patients
steps_subset_all <- steps_subset %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_all")
# Save the dataset
write.csv(steps_subset_all, "Steps_subset_all.csv", row.names = FALSE)
# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients with hypertension
steps_subset_hypertension <- steps_subset %>%
  filter(htn == 1) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_hypertension")
# Save the dataset
write.csv(steps_subset_hypertension, "Steps_subset_hypertension.csv", row.names = FALSE)
# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients with hypertension and hypertension treatment
steps_subset_hypertension_treatment <- steps_subset %>%
  filter(htn == 1 & htn_treatment == 1) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_hypertension_treatment")
# Save the dataset
write.csv(steps_subset_hypertension_treatment, "Steps_subset_hypertension_treatment.csv", row.names = FALSE)

# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients with diabetes
steps_subset_diabetes <- steps_subset %>%
  filter(diabetes == 1) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_diabetes")
# Save the dataset
write.csv(steps_subset_diabetes, "Steps_subset_diabetes.csv", row.names = FALSE)
# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients with diabetes and hypertension
steps_subset_diabetes_hypertension <- steps_subset %>%
  filter(diabetes == 1 & htn == 1) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_diabetes_hypertension")
# Save the dataset
write.csv(steps_subset_diabetes_hypertension, "Steps_subset_diabetes_hypertension.csv", row.names = FALSE)
# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients with diabetes and hypertension treatment
steps_subset_diabetes_hypertension_treatment <- steps_subset %>%
  filter(diabetes == 1 & htn == 1 & htn_treatment == 1) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_diabetes_hypertension_treatment")
# Save the dataset
write.csv(steps_subset_diabetes_hypertension_treatment, "Steps_subset_diabetes_hypertension_treatment.csv", row.names = FALSE)

# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients in rural areas
steps_subset_rural <- steps_subset %>%
  filter(stratum == 1) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_rural")
# Save the dataset
write.csv(steps_subset_rural, "Steps_subset_rural.csv", row.names = FALSE)
# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients in urban areas
steps_subset_urban <- steps_subset %>%
  filter(stratum == 2) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_urban")
# Save the dataset
write.csv(steps_subset_urban, "Steps_subset_urban.csv", row.names = FALSE)
# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients in rural areas with hypertension
steps_subset_rural_hypertension <- steps_subset %>%
  filter(stratum == 1 & htn == 1) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_rural_hypertension")
# Save the dataset
write.csv(steps_subset_rural_hypertension, "Steps_subset_rural_hypertension.csv", row.names = FALSE)
# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients in urban areas with hypertension
steps_subset_urban_hypertension <- steps_subset %>%
  filter(stratum == 2 & htn == 1) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_urban_hypertension")
# Save the dataset
write.csv(steps_subset_urban_hypertension, "Steps_subset_urban_hypertension.csv", row.names = FALSE)
# Create subsets of the datasets for statistical analysis- Selecting only id, systolic BP, and age -Patients in rural areas with diabetes
steps_subset_rural_diabetes <- steps_subset %>%
  filter(stratum == 1 & diabetes == 1) %>%
  select(id, systolic, age, age_group) %>% 
  mutate(dataset="steps_subset_rural_diabetes")

20 End of the R Markdown document

# End of the R Markdown document

21 Save the R Markdown document

# Save the R Markdown document
# rmarkdown::render("skeleton.Rmd", output_format = "html_document")

22 Save the R Markdown document as a PDF

# Save the R Markdown document as a PDF
# rmarkdown::render("skeleton.Rmd", output_format = "pdf_document")

23 Save the R Markdown document as a Word document

# Save the R Markdown document as a Word document
# rmarkdown::render("skeleton.Rmd", output_format = "word_document")