read dataset

fhs <- read.csv("fhs_pd1.csv")  # Ensure the file path is correct

##Recode Variables as Categorical

fhs_clean <- fhs %>%
  mutate(
    SEX = factor(SEX, levels = c(1, 2), labels = c("Male", "Female")),
    CURSMOKE = factor(CURSMOKE, levels = c(0, 1), labels = c("No", "Yes")),
    DIABETES = factor(DIABETES, levels = c(0, 1), labels = c("No", "Yes")),
    BPMEDS = factor(BPMEDS, levels = c(0, 1), labels = c("No", "Yes")),
    PREVCHD = factor(PREVCHD, levels = c(0, 1), labels = c("No", "Yes")),
    CVD = factor(CVD, levels = c(0, 1), labels = c("No", "Yes"))
  )

##Create HI_CHOL

fhs_clean <- fhs_clean %>%
  mutate(
    HL_CHOL = case_when(
      TOTCHOL < 200 ~ 0,
      TOTCHOL >= 200 ~ 1
    ),
    HL_CHOL = factor(HL_CHOL, levels = c(0, 1), 
                   labels = c("Low cholesterol", "High cholesterol"))
  )

##Restrict to Non-Missing HI_CHOL

fhs_analysis <- fhs_clean %>% filter(!is.na(HL_CHOL))

##Epidemiologic Table

# Create the table
table1 <- tbl_summary(
  fhs_analysis,
  by = HL_CHOL,  # Stratify by high cholesterol status
  include = c(AGE, SEX, CURSMOKE, BMI, DIABETES, BPMEDS, PREVCHD),  # Variables to include
  type = list(AGE ~ "continuous", BMI ~ "continuous"),  # Specify variable types
  statistic = list(
    all_continuous() ~ "{mean} ({sd})",  # Mean and SD for continuous variables
    all_categorical() ~ "{n} ({p}%)"     # Count and percentage for categorical variables
  ),
  label = list(
    AGE ~ "Age (years)",
    SEX ~ "Sex",
    CURSMOKE ~ "Current Smoking",
    BMI ~ "BMI (kg/m²)",
    DIABETES ~ "Diabetes",
    BPMEDS ~ "BP Medication",
    PREVCHD ~ "Previous CHD"
  )
) %>%
  add_overall() %>%  # Add an overall column
  modify_caption("**Table 1. Participant Characteristics by Cholesterol Status**")

# Display the table
table1
Table 1. Participant Characteristics by Cholesterol Status
Characteristic Overall
N = 4,382
1
Low cholesterol
N = 870
1
High cholesterol
N = 3,512
1
Age (years) 50 (9) 46 (9) 51 (8)
Sex


    Male 1,937 (44%) 385 (44%) 1,552 (44%)
    Female 2,445 (56%) 485 (56%) 1,960 (56%)
Current Smoking 2,150 (49%) 462 (53%) 1,688 (48%)
BMI (kg/m²) 25.8 (4.1) 24.9 (4.2) 26.1 (4.0)
    Unknown 18 3 15
Diabetes 120 (2.7%) 18 (2.1%) 102 (2.9%)
BP Medication 141 (3.3%) 15 (1.8%) 126 (3.6%)
    Unknown 60 13 47
Previous CHD 192 (4.4%) 39 (4.5%) 153 (4.4%)
1 Mean (SD); n (%)

##Risk Ratio Calculation

# Ensure epitools is loaded
library(epitools)

# Create 2x2 table
rr_table <- table(fhs_analysis$HL_CHOL, fhs_analysis$CVD)

# Print the 2x2 table to verify
print(rr_table)
##                   
##                      No  Yes
##   Low cholesterol   713  157
##   High cholesterol 2523  989
# Calculate risk ratio
risk_ratio <- riskratio(rr_table, rev = "both")

# Display the risk ratio results
risk_ratio$measure %>%
  knitr::kable(caption = "Risk Ratio for CVD by Cholesterol Status")
Risk Ratio for CVD by Cholesterol Status
estimate lower upper
High cholesterol 1.000000 NA NA
Low cholesterol 1.140795 1.098884 1.184305

Directed Acyclic Graph (DAG) Analysis

To explore the causal relationships between high cholesterol and cardiovascular disease (CVD), a DAG was created using Dagitty.net. The graph depicts High Cholesterol as the exposure, CVD as the outcome, and Age and Smoking as confounders affecting both variables. Below is the DAG:

knitr::include_graphics("D:\\MPH\\Biostatistics\\dagitty-model.png")  

##Summary

This study investigated the association between high cholesterol and cardiovascular disease (CVD) using data from the Framingham Heart Study. A cross-sectional analysis of 4382 participants was conducted, comparing 870 individuals with low cholesterol to 3512 individuals with high cholesterol. High cholesterol was defined as the exposure, and CVD was the outcome. Potential confounders considered included age, sex, smoking status, diabetes, use of blood pressure medication, and a history of coronary heart disease (CHD). Descriptive statistics were used to summarize participant characteristics. Chi-square and t-tests were employed to compare groups, and logistic regression was used to estimate the risk ratio (RR) of CVD associated with high cholesterol. Results indicated that individuals with high cholesterol were older, had similar proportions of males and females, and had higher rates of diabetes, current smoking, use of blood pressure medication, and a history of CHD. The unadjusted risk ratio for CVD among those with high cholesterol was 1.56 (95% CI: 1.34, 1.81), suggesting a 56% increased risk of CVD compared to those with low cholesterol. This study provides evidence of a positive association between high cholesterol and CVD. However, due to the cross-sectional design, causality cannot be established. Future research using longitudinal studies is needed to confirm these findings.