This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readxl) library(dplyr) library(survival) library(survminer) library(broom) library(MASS) library(car) #Import data
Heart_data <- read.csv(file.choose(), stringsAsFactors = FALSE) View(Heart_data) # Convert categorical variables to factors Heart_data\(sex <- factor(Heart_data\)sex, levels=c(0,1), labels=c(“Female”,“Male”)) Heart_data\(Smoking <- factor(Heart_data\)Smoking, levels=c(0,1), labels=c(“No”,“Yes”)) Heart_data\(Diabetes <- factor(Heart_data\)Diabetes, levels=c(0,1), labels=c(“No”,“Yes”)) Heart_data\(BP <- factor(Heart_data\)BP, levels=c(0,1), labels=c(“No”,“Yes”)) Heart_data\(Anemia <- factor(Heart_data\)Anemia, levels=c(0,1), labels=c(“No”,“Yes”))
surv_obj <- Surv(time = Heart_data\(TIME, event = Heart_data\)Event) # ——————————– # (a) Kaplanâ“Meier by Smoking Status # ——————————– km_fit <- survfit(surv_obj ~ Smoking, data = Heart_data) summary(km_fit) # Plot KM curves km_fit_curve <- ggsurvplot( km_fit, data = Heart_data, risk.table = TRUE, risk.table.col =”strata”, legend.title = “Smoking”, legend.labs = c(“No”, “Yes”), xlab = “Time (days)”, ylab = “Survival Probability”, title = “Kaplan-Meier Curve: Smoking vs Non-Smoking” ) print(km_fit_curve)
s100 <- summary(km_fit, times = 100) s100 ci_100 <- data.frame( group = s100\(strata, time = s100\)time, survival = s100\(surv, lower = s100\)lower, upper = s100$upper ) print(ci_100)
cox_full <- coxph( Surv(TIME, Event) ~ sex + Smoking + Diabetes + BP + Anemia + Age + Ejection.Fraction + Sodium + Creatinine + Pletelets + CPK, data = Heart_data )
summary(cox_full)
hr_table <- broom::tidy(cox_full, exponentiate = TRUE, conf.int = TRUE) print(hr_table) # Round all numeric columns to 4 decimals hr_table_rounded <- hr_table %>% mutate(across(where(is.numeric), ~ round(.x, 4)))
print(hr_table_rounded) # Hazard ratio table hr_table <- broom::tidy(cox_full, exponentiate = TRUE, conf.int = TRUE) print(hr_table) # (d) Reduced Cox Model Using Significant Predictors # ————————— # Extract p<0.05 terms sig_terms <- broom::tidy(cox_full) %>% filter(p.value < 0.05) %>% pull(term)
cox_full <- coxph( Surv(TIME, Event) ~ sex + Smoking + Diabetes + BP + Anemia + Age + Ejection.Fraction + Sodium + Creatinine + Pletelets + CPK, data = Heart_data )
full_summary <- summary(cox_full) full_summary\(coefficients <- round(full_summary\)coefficients, 4) full_summary\(conf.int <- round(full_summary\)conf.int, 4) full_summary
ci <- full_summary$conf.int
sig_ci_vars <- rownames(ci)[ (ci[, “lower .95”] > 1) | (ci[, “upper .95”] < 1) ]
sig_ci_vars
sig_ci_vars <- c(“BPYes”, “Age”, “Ejection.Fraction”, “Creatinine”)
sig_vars <- unique(gsub(“(Yes|No)$”, ““, sig_ci_vars))
sig_vars
reduced_formula <- as.formula( paste(“Surv(TIME, Event) ~”, paste(sig_vars, collapse = ” + “)) )
cox_reduced <- coxph(reduced_formula, data = Heart_data) summary(cox_reduced)
cox_reduced_summary <- summary(cox_reduced)
cox_reduced_summary\(coefficients <- round(cox_reduced_summary\)coefficients, 4) cox_reduced_summary\(conf.int <- round(cox_reduced_summary\)conf.int, 4)
cox_reduced_summary
ph_test <- cox.zph(cox_reduced) ph_test # visually confirming the PH assumption plot(ph_test)
par(mfrow = c(2, 2)) plot(ph_test, var = “BP”) plot(ph_test, var = “Age”) plot(ph_test, var = “Ejection.Fraction”) plot(ph_test, var = “Creatinine”) par(mfrow = c(1, 1))
plot(survfit(Surv(TIME, Event) ~ BP, data = Heart_data), fun = “cloglog”, col = 1:2, lty = 1:2, xlab = “Time”, ylab = “Log(-Log(Survival))”, main = “LML Plot for BP”)
legend(“bottomright”, legend = c(“BP=0”, “BP=1”), col = 1:2, lty = 1:2)