R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Load required libraries

library(readxl) library(dplyr) library(survival) library(survminer) library(broom) library(MASS) library(car) #Import data

QUESTION ONE

Heart_data <- read.csv(file.choose(), stringsAsFactors = FALSE) View(Heart_data) # Convert categorical variables to factors Heart_data\(sex <- factor(Heart_data\)sex, levels=c(0,1), labels=c(“Female”,“Male”)) Heart_data\(Smoking <- factor(Heart_data\)Smoking, levels=c(0,1), labels=c(“No”,“Yes”)) Heart_data\(Diabetes <- factor(Heart_data\)Diabetes, levels=c(0,1), labels=c(“No”,“Yes”)) Heart_data\(BP <- factor(Heart_data\)BP, levels=c(0,1), labels=c(“No”,“Yes”)) Heart_data\(Anemia <- factor(Heart_data\)Anemia, levels=c(0,1), labels=c(“No”,“Yes”))

Define survival object

surv_obj <- Surv(time = Heart_data\(TIME, event = Heart_data\)Event) # ——————————– # (a) Kaplan–Meier by Smoking Status # ——————————– km_fit <- survfit(surv_obj ~ Smoking, data = Heart_data) summary(km_fit) # Plot KM curves km_fit_curve <- ggsurvplot( km_fit, data = Heart_data, risk.table = TRUE, risk.table.col =”strata”, legend.title = “Smoking”, legend.labs = c(“No”, “Yes”), xlab = “Time (days)”, ylab = “Survival Probability”, title = “Kaplan-Meier Curve: Smoking vs Non-Smoking” ) print(km_fit_curve)

——————————–

(b) 95% CI of Survival Probability at 100 Days

——————————–

s100 <- summary(km_fit, times = 100) s100 ci_100 <- data.frame( group = s100\(strata, time = s100\)time, survival = s100\(surv, lower = s100\)lower, upper = s100$upper ) print(ci_100)

——————————–

(c) Full Cox Proportional Hazards Model

——————————–

Fit the Cox proportional hazards model

cox_full <- coxph( Surv(TIME, Event) ~ sex + Smoking + Diabetes + BP + Anemia + Age + Ejection.Fraction + Sodium + Creatinine + Pletelets + CPK, data = Heart_data )

Summary with default

summary(cox_full)

————————————

# Hazard ratio table rounded to 4 decimals

————————————

hr_table <- broom::tidy(cox_full, exponentiate = TRUE, conf.int = TRUE) print(hr_table) # Round all numeric columns to 4 decimals hr_table_rounded <- hr_table %>% mutate(across(where(is.numeric), ~ round(.x, 4)))

print(hr_table_rounded) # Hazard ratio table hr_table <- broom::tidy(cox_full, exponentiate = TRUE, conf.int = TRUE) print(hr_table) # (d) Reduced Cox Model Using Significant Predictors # ————————— # Extract p<0.05 terms sig_terms <- broom::tidy(cox_full) %>% filter(p.value < 0.05) %>% pull(term)

——– Full Cox Model ——–

cox_full <- coxph( Surv(TIME, Event) ~ sex + Smoking + Diabetes + BP + Anemia + Age + Ejection.Fraction + Sodium + Creatinine + Pletelets + CPK, data = Heart_data )

Summary with 4 decimal places

full_summary <- summary(cox_full) full_summary\(coefficients <- round(full_summary\)coefficients, 4) full_summary\(conf.int <- round(full_summary\)conf.int, 4) full_summary

Extract CI from full model

ci <- full_summary$conf.int

Identify variables where lower95 > 1 or upper95 < 1

sig_ci_vars <- rownames(ci)[ (ci[, “lower .95”] > 1) | (ci[, “upper .95”] < 1) ]

sig_ci_vars

sig_ci_vars <- c(“BPYes”, “Age”, “Ejection.Fraction”, “Creatinine”)

Convert BPYes → BP, sexMale → sex, etc.

sig_vars <- unique(gsub(“(Yes|No)$”, ““, sig_ci_vars))

sig_vars

Build formula

reduced_formula <- as.formula( paste(“Surv(TIME, Event) ~”, paste(sig_vars, collapse = ” + “)) )

cox_reduced <- coxph(reduced_formula, data = Heart_data) summary(cox_reduced)

cox_reduced_summary <- summary(cox_reduced)

cox_reduced_summary\(coefficients <- round(cox_reduced_summary\)coefficients, 4) cox_reduced_summary\(conf.int <- round(cox_reduced_summary\)conf.int, 4)

cox_reduced_summary

ph_test <- cox.zph(cox_reduced) ph_test # visually confirming the PH assumption plot(ph_test)

par(mfrow = c(2, 2)) plot(ph_test, var = “BP”) plot(ph_test, var = “Age”) plot(ph_test, var = “Ejection.Fraction”) plot(ph_test, var = “Creatinine”) par(mfrow = c(1, 1))

Example for BP

plot(survfit(Surv(TIME, Event) ~ BP, data = Heart_data), fun = “cloglog”, col = 1:2, lty = 1:2, xlab = “Time”, ylab = “Log(-Log(Survival))”, main = “LML Plot for BP”)

legend(“bottomright”, legend = c(“BP=0”, “BP=1”), col = 1:2, lty = 1:2)

——————————-

End of script

——————————-