For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores()).
To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)
For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
change `threads_per_chain` option:
rstan_options(threads_per_chain = 1)
library(arrow)
Attaching package: 'arrow'
The following object is masked from 'package:utils':
timestamp
library(survival)library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(ggplot2)library(eha)
Loading Data
# Load Data df <-read_parquet("age-stage-survival.parquet")# Prepare data for Stan df$status <-1# All events occurred, no censoring stan_data <-list(N =nrow(df), # Number of observationsK =length(unique(df$grade)), # Number of unique stagestime = df$time, # Survival timesstatus = df$status, # Event indicators (1 = event, 0 = censored)age = df$age, # Age covariatestage = df$grade # Stage )
A single Weibull baseline hazard with a common age relationship across stage. 1. Write a mathematical expression of the model
h(t∣λ,k)= (λ/k) (t/ λ)^ k−1
df$stage <-as.factor(df$grade)fit1<-phreg(Surv(time,status) ~ age +I(age^2)+ stage, data =df, dist ="weibull")phreg(Surv(time,status) ~ age +I(age^2)+ stage, data =df, dist ="weibull")
Call:
phreg(formula = Surv(time, status) ~ age + I(age^2) + stage,
data = df, dist = "weibull")
Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
age 45.301 0.005 1.005 0.019 0.785
I(age^2) 2198.323 0.000 1.000 0.000 0.111
stage
1 0.247 0 1 (reference)
2 0.515 -0.876 0.416 0.128 0.000
3 0.230 1.401 4.059 0.134 0.000
4 0.009 3.750 42.520 0.211 0.000
log(scale) 2.374 0.273 0.000
log(shape) 0.611 0.033 0.000
Events 500
Total time at risk 2560.4
Max. log. likelihood -1057.9
LR test statistic 492.29
Degrees of freedom 5
Overall p-value 0
2.Generate a partial effect plot of log relative hazard by age, stratified by stage. (Uncertainty interval not required for now.)
Generate a partial effect plot of median survival by age, stratified by stage. (Uncertainty interval not required for now.) There is no reason to think the log relative hazard is linear in age.
#stan only reads the data as a list, so we need to convert our df for stan to read itstan_data <-list(N =nrow(df), # Number of observationsage = df$age, # Agestage2 = df$stage2,stage3 = df$stage3,stage4 = df$stage4, # Disease stagestage = df$stage,time_til_death = df$time # Time till death)
Write the model
writeLines("data { int<lower=1> N; // Number of observations vector[N] age; // Age variable array[N] int<lower=0, upper=1> stage2; array[N] int<lower=0, upper=1> stage3; array[N] int<lower=0, upper=1> stage4; vector[N] time_til_death; // Time until death variable}parameters { real alpha_stage2; // Coefficients for stages 2, 3, and 4 (stage 1 is the baseline) real alpha_stage3; real alpha_stage4; real beta0; real beta_age; // Coefficient for age real beta_age_sq; // Coefficient for squared age real<lower=0> shape; // Shape parameter of the Weibull distribution real<lower=0> scale; // Scale parameter of the Weibull distribution}model { for (i in 1:N) { // Loop through each observation // Calculate the linear predictor based on age and stage // Add the stage coefficient; stage 1 is the baseline, so we only adjust for stages 2-4 real linear_pred = beta0 + beta_age * age[i] + beta_age_sq * square(age[i]) + alpha_stage2 * stage2[i] + alpha_stage3 * stage3[i] + alpha_stage4 * stage4[i] ; target += weibull_lpdf(time_til_death[i] | shape, scale) +linear_pred + (exp(linear_pred)-1) * weibull_lccdf(time_til_death[i] | shape, scale); }}", con ="model1.stan")
Fit the model
# Compile the Stan modelstan_model_1 <-stan_model(file ="model1.stan")
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 14.0.3 (clang-1403.0.22.14.1)’
using SDK: ‘MacOSX13.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
# Perform MLE using the optimizing functionfit_mle_1 <-optimizing(stan_model_1, data = stan_data, hessian =TRUE)
Warning in .local(object, ...): non-zero return code in optimizing
# View the optimized parameter estimatesprint(fit_mle_1)
# Load necessary librarieslibrary(ggplot2)library(rstan)# Load your fitted model (assuming it is called `fit`)# fit <- stan("model1.stan", data = your_data) # Example of fitting the model, if not done yet# Assume fit_mle is the result of your model fit using optimizing()# fit_mle <- optimizing(your_stan_model, data = your_data)# Extract coefficients from the fitted model (this will be a list)coef <- fit_mle_1$par # Adjust this line if your parameter names are different# Create a sequence of agesage_seq <-seq(min(stan_data$age, na.rm =TRUE), max(stan_data$age, na.rm =TRUE), length.out =100)# Create a new dataset with age and stagesnew_data <-expand.grid(age = age_seq, stage =factor(1:4, levels =1:4))new_data$age_sq <- new_data$age^2# Initialize log hazardnew_data$log_hazard <-0# Calculate log hazards for the new datasetfor (lvl inlevels(new_data$stage)) {# Adjust index for dummy variable stagesif (lvl =="1") {# For stage 1 (baseline), do not add any alpha coefficient new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + coef["beta_age"] * new_data$age + coef["beta_age_sq"] * new_data$age_sq } elseif (lvl =="2") {# For stage 1 (baseline), do not add any alpha coefficient new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + coef["alpha_stage2"] + coef["beta_age"] * new_data$age + coef["beta_age_sq"] * new_data$age_sq } elseif (lvl =="3") {# For stage 1 (baseline), do not add any alpha coefficient new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + coef["alpha_stage3"] + coef["beta_age"] * new_data$age+ coef["beta_age_sq"] * new_data$age_sq } elseif (lvl =="4") {# For stage 1 (baseline), do not add any alpha coefficient new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + coef["alpha_stage4"] + coef["beta_age"] * new_data$age + coef["beta_age_sq"] * new_data$age_sq }}
Warning in new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + :
number of items to replace is not a multiple of replacement length
Warning in new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + :
number of items to replace is not a multiple of replacement length
Warning in new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + :
number of items to replace is not a multiple of replacement length
Warning in new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + :
number of items to replace is not a multiple of replacement length
# Check the structure of new_data to ensure it is as expectedstr(new_data)
'data.frame': 400 obs. of 4 variables:
$ age : num 21 21.7 22.4 23.1 23.7 ...
$ stage : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
$ age_sq : num 441 470 501 532 564 ...
$ log_hazard: num -45.6 -50.1 -54.8 -59.6 -64.7 ...
- attr(*, "out.attrs")=List of 2
..$ dim : Named int [1:2] 100 4
.. ..- attr(*, "names")= chr [1:2] "age" "stage"
..$ dimnames:List of 2
.. ..$ age : chr [1:100] "age=21.00000" "age=21.68687" "age=22.37374" "age=23.06061" ...
.. ..$ stage: chr [1:4] "stage=1" "stage=2" "stage=3" "stage=4"
# Plot the resultsggplot(new_data, aes(x = age, y = log_hazard, color = stage)) +geom_line(linewidth =1) +# Increase line size for better visibilitylabs(title ="Partial Effect of Age on Log Relative Hazard by Stage",x ="Age",y ="Log Relative Hazard",color ="Stage" ) +theme_minimal() +theme(legend.position ="bottom")
Write the model
writeLines("data { int<lower=1> N; // Number of observations vector[N] age; // Age variable array[N] int<lower=0, upper=1> stage2; array[N] int<lower=0, upper=1> stage3; array[N] int<lower=0, upper=1> stage4; vector[N] time_til_death; // Time until death variable}parameters { real beta0; // constant coefficient stage1 real beta_age; // Coefficient for age stage 1 real beta_age_sq; // Coefficient for squared age stage 1 real gamma0; //coefficient for stage 2 constant real gamma1; //cofefficient for stage 2 age real gamma2; // coefficient for stage 2 age squared real nu0; //coefficient for stage 3 constant real nu1; //cofefficient for stage 3 age real nu2; // coefficient for stage 3 age squared real alpha0; //coefficient for stage 4 constant real alpha1; //cofefficient for stage 4 age real alpha2; // coefficient for stage 4 age squared real<lower=0> shape; // Shape parameter of the Weibull distribution real<lower=0> scale; // Scale parameter of the Weibull distribution}model { for (i in 1:N) { // Loop through each observation // Calculate the linear predictor based on age and stage // Add the stage coefficient; stage 1 is the baseline, so we only adjust for stages 2-4 real linear_pred = beta_age * age[i] + beta_age_sq * square(age[i]) + gamma0 * stage2[i] + gamma1 * stage2[i] * age[i] + gamma2 * stage2[i] * square(age[i]) + nu0 *stage3[i] + nu1 * stage3[i] * age[i] + nu2 * stage3[i] * square(age[i]) + alpha0 * stage4[i] + alpha1 * stage4[i] * age[i] + alpha2 * stage4[i] * square(age[i]) ; target += weibull_lpdf(time_til_death[i] | shape, scale) +linear_pred + (exp(linear_pred)-1) * weibull_lccdf(time_til_death[i] | shape, scale); }}", con ="model2.stan")
Fit the model
# Compile the Stan modelstan_model_2 <-stan_model(file ="model2.stan")
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 14.0.3 (clang-1403.0.22.14.1)’
using SDK: ‘MacOSX13.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
# Perform MLE using the optimizing functionfit_mle_2 <-optimizing(stan_model_2, data = stan_data, hessian =TRUE)# View the optimized parameter estimatesprint(fit_mle_2)
# Load necessary librarieslibrary(ggplot2)library(rstan)# Load your fitted model (assuming it is called `fit`)# fit <- stan("model1.stan", data = your_data) # Example of fitting the model, if not done yet# Assume fit_mle is the result of your model fit using optimizing()# fit_mle <- optimizing(your_stan_model, data = your_data)# Extract coefficients from the fitted model (this will be a list)coef <- fit_mle_2$par # Adjust this line if your parameter names are different# Create a sequence of agesage_seq <-seq(min(stan_data$age, na.rm =TRUE), max(stan_data$age, na.rm =TRUE), length.out =100)# Create a new dataset with age and stagesnew_data <-expand.grid(age = age_seq, stage =factor(1:4, levels =1:4))new_data$age_sq <- new_data$age^2# Initialize log hazardnew_data$log_hazard <-0# Calculate log hazards for the new datasetfor (lvl inlevels(new_data$stage)) {# Check the current level of stage and calculate log hazard accordinglyif (lvl =="1") {# For stage 1 (baseline), do not add any alpha coefficient new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] +# Assuming this is the baseline coefficient coef["beta_age"] * new_data$age[new_data$stage == lvl] + coef["beta_age_sq"] * new_data$age_sq[new_data$stage == lvl] } elseif (lvl =="2") {# For stage 2 new_data$log_hazard[new_data$stage == lvl] <- coef["gamma0"] + coef["gamma1"] * new_data$age[new_data$stage == lvl] + coef["gamma2"] * new_data$age_sq[new_data$stage == lvl] } elseif (lvl =="3") {# For stage 3 new_data$log_hazard[new_data$stage == lvl] <- coef["nu0"] + coef["nu1"] * new_data$age[new_data$stage == lvl] + coef["nu2"] * new_data$age_sq[new_data$stage == lvl] } elseif (lvl =="4") {# For stage 4 new_data$log_hazard[new_data$stage == lvl] <- coef["alpha0"] + coef["alpha1"] * new_data$age[new_data$stage == lvl] + coef["alpha2"] * new_data$age_sq[new_data$stage == lvl] }}# Check the structure of new_data to ensure it is as expectedstr(new_data)
'data.frame': 400 obs. of 4 variables:
$ age : num 21 21.7 22.4 23.1 23.7 ...
$ stage : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
$ age_sq : num 441 470 501 532 564 ...
$ log_hazard: num -2.19 -2.2 -2.21 -2.21 -2.22 ...
- attr(*, "out.attrs")=List of 2
..$ dim : Named int [1:2] 100 4
.. ..- attr(*, "names")= chr [1:2] "age" "stage"
..$ dimnames:List of 2
.. ..$ age : chr [1:100] "age=21.00000" "age=21.68687" "age=22.37374" "age=23.06061" ...
.. ..$ stage: chr [1:4] "stage=1" "stage=2" "stage=3" "stage=4"
# Plot the resultsggplot(new_data, aes(x = age, y = log_hazard, color = stage)) +geom_line(linewidth =1) +# Increase line size for better visibilitylabs(title ="Partial Effect of Age on Log Relative Hazard by Stage",x ="Age",y ="Log Relative Hazard",color ="Stage" ) +theme_minimal() +theme(legend.position ="bottom")