Activity 6

library(riskCommunicator)
library(tidyverse)
library(skimr)
library(knitr)
library(ggthemes)
library(patchwork)

data(framingham, package = "riskCommunicator")

glimpse(framingham)

## Rows: 11,627
## Columns: 39
## $ RANDID   <dbl> 2448, 2448, 6238, 6238, 6238, 9428, 9428, 10552, 10552, 11252…
## $ SEX      <dbl> 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1…
## $ TOTCHOL  <dbl> 195, 209, 250, 260, 237, 245, 283, 225, 232, 285, 343, NA, 22…
## $ AGE      <dbl> 39, 52, 46, 52, 58, 48, 54, 61, 67, 46, 51, 58, 43, 49, 55, 6…
## $ SYSBP    <dbl> 106.0, 121.0, 121.0, 105.0, 108.0, 127.5, 141.0, 150.0, 183.0…
## $ DIABP    <dbl> 70.0, 66.0, 81.0, 69.5, 66.0, 80.0, 89.0, 95.0, 109.0, 84.0, …
## $ CURSMOKE <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0…
## $ CIGPDAY  <dbl> 0, 0, 0, 0, 0, 20, 30, 30, 20, 23, 30, 30, 0, 0, 0, 0, 0, 20,…
## $ BMI      <dbl> 26.97, NA, 28.73, 29.43, 28.50, 25.34, 25.34, 28.58, 30.18, 2…
## $ DIABETES <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ BPMEDS   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0…
## $ HEARTRTE <dbl> 80, 69, 95, 80, 80, 75, 75, 65, 60, 85, 90, 74, 77, 120, 86, …
## $ GLUCOSE  <dbl> 77, 92, 76, 86, 71, 70, 87, 103, 89, 85, 72, NA, 99, 86, 81, …
## $ educ     <dbl> 4, 4, 2, 2, 2, 1, 1, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 2, 2, 2, 1…
## $ PREVCHD  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ PREVAP   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ PREVMI   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ PREVSTRK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ PREVHYP  <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1…
## $ TIME     <dbl> 0, 4628, 0, 2156, 4344, 0, 2199, 0, 1977, 0, 2072, 4285, 0, 2…
## $ PERIOD   <dbl> 1, 3, 1, 2, 3, 1, 2, 1, 2, 1, 2, 3, 1, 2, 3, 1, 2, 1, 2, 3, 1…
## $ HDLC     <dbl> NA, 31, NA, NA, 54, NA, NA, NA, NA, NA, NA, NA, NA, NA, 46, N…
## $ LDLC     <dbl> NA, 178, NA, NA, 141, NA, NA, NA, NA, NA, NA, NA, NA, NA, 135…
## $ DEATH    <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ANGINA   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0…
## $ HOSPMI   <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ MI_FCHD  <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0…
## $ ANYCHD   <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0…
## $ STROKE   <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CVD      <dbl> 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0…
## $ HYPERTEN <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ TIMEAP   <dbl> 8766, 8766, 8766, 8766, 8766, 8766, 8766, 2956, 2956, 8766, 8…
## $ TIMEMI   <dbl> 6438, 6438, 8766, 8766, 8766, 8766, 8766, 2956, 2956, 8766, 8…
## $ TIMEMIFC <dbl> 6438, 6438, 8766, 8766, 8766, 8766, 8766, 2956, 2956, 8766, 8…
## $ TIMECHD  <dbl> 6438, 6438, 8766, 8766, 8766, 8766, 8766, 2956, 2956, 8766, 8…
## $ TIMESTRK <dbl> 8766, 8766, 8766, 8766, 8766, 8766, 8766, 2089, 2089, 8766, 8…
## $ TIMECVD  <dbl> 6438, 6438, 8766, 8766, 8766, 8766, 8766, 2089, 2089, 8766, 8…
## $ TIMEDTH  <dbl> 8766, 8766, 8766, 8766, 8766, 8766, 8766, 2956, 2956, 8766, 8…
## $ TIMEHYP  <dbl> 8766, 8766, 8766, 8766, 8766, 8766, 8766, 0, 0, 4285, 4285, 4…

framinghamSub <-framingham %>% select(RANDID:DIABETES) %>% 
  mutate(SEX = case_when(SEX == 1 ~ "Male"
                         ,SEX == 2 ~ "Female",
                         TRUE ~ as.character(NA)),
         CURSMOKE = case_when(CURSMOKE == 0 ~ "No",
                              CURSMOKE == 1 ~ "Yes",
                              TRUE ~ as.character(NA)))

skim(framinghamSub)

Data summary
Name	framinghamSub
Number of rows	11627
Number of columns	10
_______________________
Column type frequency:
character	2
numeric	8
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SEX	0	1	4	6	0	2	0
CURSMOKE	0	1	2	3	0	2	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
RANDID	0	1.00	5004740.92	2900877.44	2448.00	2474378.00	5006008.00	7472730.00	9999312.0	▇▇▇▇▇
TOTCHOL	409	0.96	241.16	45.37	107.00	210.00	238.00	268.00	696.0	▅▇▁▁▁
AGE	0	1.00	54.79	9.56	32.00	48.00	54.00	62.00	81.0	▂▇▇▅▁
SYSBP	0	1.00	136.32	22.80	83.50	120.00	132.00	149.00	295.0	▆▇▁▁▁
DIABP	0	1.00	83.04	11.66	30.00	75.00	82.00	90.00	150.0	▁▅▇▁▁
CIGPDAY	79	0.99	8.25	12.19	0.00	0.00	0.00	20.00	90.0	▇▂▁▁▁
BMI	52	1.00	25.88	4.10	14.43	23.09	25.48	28.07	56.8	▃▇▁▁▁
DIABETES	0	1.00	0.05	0.21	0.00	0.00	0.00	0.00	1.0	▇▁▁▁▁

framinghamSub %>% ggplot(aes(x = SYSBP,
                            y = DIABP)) + 
  geom_point(alpha = 0.2) + 
  facet_grid(.~SEX)

mapping cigarettes per day

scatter <- framinghamSub %>% ggplot(aes(x = SYSBP,
                            y = DIABP,
                            size = CIGPDAY,
                            color = SEX)) + 
  geom_point(alpha = 0.2) + 
  facet_grid(.~SEX) + 
  scale_color_colorblind() + 
  labs(title = "Systolic by diastolic blood pressure",
       x = "Systolic BP (mg/dL",
      y = "Diastolic BP (mg/dL)",
      caption = "Data source: Framingham Heart STudy & the reiskCommunicator package" ) + 
  guides(color = FALSE) +
  geom_smooth(se = FALSE, method = "lm", size = 1) +
  theme_bw() +
  theme(legend.position = "bottom")

side by side Box plot

next we create side by side box plots

ggplot(data = framinghamSub,
       aes(x = CURSMOKE,
           y = TOTCHOL,
           fill = CURSMOKE)) + 
  geom_boxplot() + 
  facet_grid(.~SEX) +
  scale_fill_viridis_d() +
  labs(title = "Total cholesterol by smoking status",
       x = "Curent smoker",
       y = "Serum Total Cholesterol",
       caption = "Data source: Framingham Heart Study & the riskCommunicater package") +
  theme_bw() +
  theme(text = element_text(face = "bold"), 
        legend.position = "none",
        axis.title = element_text(size = 14))

##line chart

linechart<-framinghamSub %>% ggplot() +
  stat_summary(aes(x = AGE,
                   y = CIGPDAY,
               group = SEX, color = SEX),
               geom = "line", size = 1, fun.y = mean) +
  labs(title = "Average cigarettes per day by age and sex",
       x = "Age",
       y = "Average cigarettes per day" ,
       color = "Sex") + 
  scale_y_continuous(breaks = c(0,4,8,12,16)) +
  scale_color_viridis_d() + 
  theme_bw()

combine plots

now combining scatter and line plots with patchwork package

linechart / scatter

##faceted bar chart creating a new variable based on the intervals defined by John Hopkins Hospital

framinghamSub <- framinghamSub %>% 
  mutate(CholesterolCat = case_when(TOTCHOL < 200 ~ "Normal",
                                    TOTCHOL >= 200 &  TOTCHOL < 240 ~ "Borderline high",
                                    TOTCHOL > 240 ~ "High",
                         TRUE ~ as.character(NA)))

framinghamSub %>% filter(AGE >= 40, !is.na(CholesterolCat)) %>% ggplot(aes(x = CholesterolCat)) + 
  geom_bar()

Now we order the bars based on the ordering of the categories.

framinghamSub %>% filter(AGE >= 40, !is.na(CholesterolCat)) %>% ggplot(aes(x = fct_relevel(CholesterolCat, "Normal", "Borderline high", "High"), fill = CholesterolCat)) + 
  facet_grid(.~SEX) +
  labs(title = "Distribution of cholestoerol levels",
       x = "Cholesterol level",
       y = "Count",
       caption = "Data source: Framingham Heart Study & the riskCommunicater package") +
  scale_fill_viridis_d() + 
  geom_bar(color = "black") +
  theme_bw() +
  theme(legend.position = "none")

flip to a horizontal bar chart

framinghamSub %>% filter(AGE >= 40, !is.na(CholesterolCat)) %>% ggplot(aes(x = fct_relevel(CholesterolCat, "Normal", "Borderline high", "High"), fill = CholesterolCat)) + 
  facet_grid(.~SEX) +
  labs(title = "Distribution of cholestoerol levels",
       x = "Cholesterol level",
       y = "Count",
       caption = "Data source: Framingham Heart Study & the riskCommunicater package") +
  scale_fill_viridis_d() + 
  geom_bar(color = "black") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none")

Activity 6

Sarah Stuck

2022-10-04

side by side Box plot

combine plots