remove() #Remove objects from the workspace
rm(list=ls()) #removes all objects from the current workspace (R memory)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
data1<-read_csv("C:\\Users\\anami\\OneDrive\\Documents\\EHA\\MHAS0121.csv")
## Rows: 9636 Columns: 39
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): locsize01, died18, died21
## dbl (36): id, perwght01, mobirth, yrbirth, female, moint, yrint, schooling, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(survival)
library(ggsurvfit)
summary(data1$yrcensor)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2001 2008 2018 2015 2021 2022 847
summary(data1$yrint)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2001 2001 2001 2001 2001 2001
summary(data1$mocensor)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 4.000 9.000 8.822 12.000 99.000 102
summary(data1$moint)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 7.000 7.000 7.494 8.000 11.000
table(data1$mocensor)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 88 99
## 1387 327 385 333 343 546 874 435 387 481 1171 2755 12 98
data1 <- data1[!is.na(data1$yrcensor), ]
data1 <- data1[!is.na(data1$mocensor), ]
data1 <- data1[data1$yrcensor > data1$yrint | (data1$yrcensor == data1$yrint & data1$mocensor >= data1$moint), ]
data1$time_exposed <- with(data1, ((yrcensor + mocensor/12) - (yrint + moint/12)) * 12 + 1)
summary(data1$time_exposed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 91.0 201.0 165.5 245.0 297.0
data1 <- data1[!is.na(data1$died0121), ]
data1$event_occurred <- data1$died0121
summary(data1$ageint)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.50 55.33 61.33 63.51 69.67 106.08
summary(data1$agecensor)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 51.17 72.75 77.67 78.30 83.75 114.42
data1$entry_time <- data1$ageint
data1$exit_time <-data1$agecensor
survobj <- with(data1, Surv(time = time_exposed, event = event_occurred))
fit <- survfit(survobj ~ 1, data = data1)
km_summary <- summary(fit)
surv_times <- km_summary$time
surv_prob <- km_summary$surv
std_error <- km_summary$std.err
cum_hazard <- -log(surv_prob)
km_results <- data.frame(
time = surv_times,
survival_prob = surv_prob,
std_error = std_error,
cumulative_hazard = cum_hazard
)
library(flextable)
km_table <- flextable(km_results)
library(officer)
library(dplyr)
doc <- read_docx()
doc <- doc %>%
body_add_par("Kaplan-Meier Survival Results") %>%
body_add_flextable(km_table)
print(doc, target = "Survival_Results.docx")
library(survival)
survobj <- with(data1, Surv(time = time_exposed, event = event_occurred))
fit <- survfit(survobj ~ 1)
1. Kaplan-Meier survival curve
plot(fit, xlab = "Time", ylab = "Survival Probability", main = "Kaplan-Meier Survival Curve")

km_summary <- summary(fit)
surv_times <- km_summary$time
surv_prob <- km_summary$surv
cum_hazard <- -log(surv_prob)
plot(surv_times, cum_hazard, type = "l", xlab = "Time", ylab = "Cumulative Hazard",
main = "Cumulative Hazard Curve")

library(ggplot2)
hazard_estimate <- c(0, diff(-log(surv_prob)) / diff(surv_times))
hazard_df <- data.frame(time = surv_times[-1], hazard = hazard_estimate[-1])
ggplot(hazard_df, aes(x = time, y = hazard)) +
geom_line() +
geom_smooth(color="red",method = "loess", se = FALSE) +
labs(x = "Time", y = "Hazard", title = "Smoothed Hazard Curve") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
