HW3a

      # Dependencies
      library(rstan)
Loading required package: StanHeaders

rstan version 2.35.0.9000 (Stan version 2.35.0)
For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores()).
To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)
For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
change `threads_per_chain` option:
rstan_options(threads_per_chain = 1)
      library(arrow)

Attaching package: 'arrow'
The following object is masked from 'package:utils':

    timestamp
      library(survival)
      library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
      library(ggplot2)
      library(eha)

Loading Data

# Load Data
      df <- read_parquet("age-stage-survival.parquet")

      # Prepare data for Stan
      df$status <- 1  # All events occurred, no censoring
      
      stan_data <- list(
        N = nrow(df),            # Number of observations
        K = length(unique(df$grade)),  # Number of unique stages
        time = df$time,          # Survival times
        status = df$status,      # Event indicators (1 = event, 0 = censored)
        age = df$age,            # Age covariate
        stage = df$grade         # Stage 
      )

A single Weibull baseline hazard with a common age relationship across stage. 1. Write a mathematical expression of the model

h(t∣λ,k)= (λ/k) ​ (t/ λ)^ k−1

df$stage <- as.factor(df$grade)
fit1<- phreg(Surv(time,status) ~ age + I(age^2)+ stage, data =df, dist = "weibull")

phreg(Surv(time,status) ~ age + I(age^2)+ stage, data =df, dist = "weibull")
Call:
phreg(formula = Surv(time, status) ~ age + I(age^2) + stage, 
    data = df, dist = "weibull")

Covariate          W.mean      Coef Exp(Coef)  se(Coef)    Wald p
age                45.301     0.005     1.005     0.019     0.785 
I(age^2)         2198.323     0.000     1.000     0.000     0.111 
stage 
               1    0.247     0         1           (reference)
               2    0.515    -0.876     0.416     0.128     0.000 
               3    0.230     1.401     4.059     0.134     0.000 
               4    0.009     3.750    42.520     0.211     0.000 

log(scale)                    2.374               0.273     0.000 
log(shape)                    0.611               0.033     0.000 

Events                    500 
Total time at risk        2560.4 
Max. log. likelihood      -1057.9 
LR test statistic         492.29 
Degrees of freedom        5 
Overall p-value           0

2.Generate a partial effect plot of log relative hazard by age, stratified by stage. (Uncertainty interval not required for now.)

age_seq <- seq(min(df$age, na.rm = TRUE), max(df$age, na.rm = TRUE), length.out = 100)
new_data <- expand.grid(age=age_seq, stage = factor(1:4, levels = 1:4))
new_data$age_sq <- new_data$age^2

coef <- coef(fit1)
new_data$log_hazard <- coef["age"] * new_data$age + coef["I(age^2)"] * new_data$age_sq

for (lvl in levels(new_data$stage)) {
  if (lvl != "1") {
    new_data$log_hazard[new_data$stage == lvl] <- new_data$log_hazard[new_data$stage == lvl] + coef[paste0("stage", lvl)]
  }
}

ggplot(new_data, aes(x=age, y=log_hazard, color = stage)) +
  geom_line() +
  labs(
    title = "Partial Effect of Age on Log Relative Hazard by Stage",
    x = "Age",
    y = "Log Relative Hazard",
    color = "Stage"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

  1. Generate a partial effect plot of median survival by age, stratified by stage. (Uncertainty interval not required for now.) There is no reason to think the log relative hazard is linear in age.
age_seq <- seq(min(df$age, na.rm = TRUE), max(df$age, na.rm = TRUE), length.out = 100)
new_data <- expand.grid(age= age_seq, stage = factor(1:4, levels = 1:4))
new_data$age_sq <- new_data$age^2

coef <- coef(fit1)
new_data$lp <- coef["age"] * new_data$age + coef["I(age^2)"] * new_data$age_sq


for (lvl in levels(new_data$stage)) {
  if (lvl != "1") {
    new_data$lp[new_data$stage == lvl] <- new_data$lp[new_data$stage == lvl] + coef[paste0("stage", lvl)]
  }
}

gamma <- fit1$shape
new_data$lambda <- exp(-new_data$lp)


new_data$median_survival <- (log(2) / new_data$lambda)^(1/gamma)
#stage3 <- subset(new_data, stage== "3")
#head(stage3)

ggplot(new_data, aes(x=age, y=median_survival, color = stage)) +
  geom_line() +
  labs(
    title = "Partial Effect of Age on Median Survival by Stage",
    x = "Age",
    y = "Median Survival Time",
    color = "Stage"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

Repeating Model 1 except using Stan

df$stage <- as.integer(df$grade)
df <- df %>% 
  mutate(
    stage2 = ifelse(stage ==2,1,0),
    stage3 = ifelse(stage ==3,1,0),
    stage4 = ifelse(stage ==4,1,0)
  )
#stan only reads the data as a list, so we need to convert our df for stan to read it
stan_data <- list(
  N = nrow(df),                 # Number of observations
  age = df$age,                 # Age
  stage2 = df$stage2,
  stage3 = df$stage3,
  stage4 = df$stage4,   # Disease stage
  stage = df$stage,
  time_til_death = df$time  # Time till death
)

Write the model

writeLines("

data {
  int<lower=1> N;  // Number of observations
  vector[N] age;  // Age variable
  array[N] int<lower=0, upper=1> stage2;
  array[N] int<lower=0, upper=1> stage3;
  array[N] int<lower=0, upper=1> stage4;
  vector[N] time_til_death;  // Time until death variable
}

parameters {
  real alpha_stage2;  // Coefficients for stages 2, 3, and 4 (stage 1 is the baseline)
  real alpha_stage3;
  real alpha_stage4;
  real beta0;
  real beta_age;  // Coefficient for age
  real beta_age_sq;  // Coefficient for squared age
  real<lower=0> shape;  // Shape parameter of the Weibull distribution
  real<lower=0> scale;  // Scale parameter of the Weibull distribution
}

model {
  for (i in 1:N) {  // Loop through each observation
    // Calculate the linear predictor based on age and stage
    // Add the stage coefficient; stage 1 is the baseline, so we only adjust for stages 2-4
    real linear_pred = beta0 + beta_age * age[i] + beta_age_sq * square(age[i])
                        + alpha_stage2 * stage2[i]
                        + alpha_stage3 * stage3[i]
                        + alpha_stage4 * stage4[i]
                        ;
    
    target += weibull_lpdf(time_til_death[i] | shape, scale) +linear_pred + (exp(linear_pred)-1) * weibull_lccdf(time_til_death[i] | shape, scale);  
  }
}
", con = "model1.stan")

Fit the model

# Compile the Stan model
stan_model_1 <- stan_model(file = "model1.stan")
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 14.0.3 (clang-1403.0.22.14.1)’
using SDK: ‘MacOSX13.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
# Perform MLE using the optimizing function
fit_mle_1 <- optimizing(stan_model_1, data = stan_data, hessian = TRUE)
Warning in .local(object, ...): non-zero return code in optimizing
# View the optimized parameter estimates
print(fit_mle_1)
$par
alpha_stage2 alpha_stage3 alpha_stage4        beta0     beta_age  beta_age_sq 
  0.06321457  -1.27169947   1.71657615   0.59049742   1.98029545  -0.19908379 
       shape        scale 
  0.40510284   1.79194992 

$value
[1] -195597.2

$return_code
[1] 70

$hessian
             alpha_stage2 alpha_stage3 alpha_stage4 beta0 beta_age beta_age_sq
alpha_stage2            0            0            0     0        0           0
alpha_stage3            0            0            0     0        0           0
alpha_stage4            0            0            0     0        0           0
beta0                   0            0            0     0        0           0
beta_age                0            0            0     0        0           0
beta_age_sq             0            0            0     0        0           0
shape                   0            0            0     0        0           0
scale                   0            0            0     0        0           0
                 shape         scale
alpha_stage2    0.0000  0.000000e+00
alpha_stage3    0.0000  0.000000e+00
alpha_stage4    0.0000  0.000000e+00
beta0           0.0000  0.000000e+00
beta_age        0.0000  0.000000e+00
beta_age_sq     0.0000  0.000000e+00
shape         125.8654 -2.025514e+02
scale        -202.5514  1.563194e-10

$theta_tilde
     alpha_stage2 alpha_stage3 alpha_stage4     beta0 beta_age beta_age_sq
[1,]   0.06321457    -1.271699     1.716576 0.5904974 1.980295  -0.1990838
         shape   scale
[1,] 0.4051028 1.79195
# Load necessary libraries
library(ggplot2)
library(rstan)

# Load your fitted model (assuming it is called `fit`)
# fit <- stan("model1.stan", data = your_data) # Example of fitting the model, if not done yet

# Assume fit_mle is the result of your model fit using optimizing()
# fit_mle <- optimizing(your_stan_model, data = your_data)

# Extract coefficients from the fitted model (this will be a list)
coef <- fit_mle_1$par  # Adjust this line if your parameter names are different

# Create a sequence of ages
age_seq <- seq(min(stan_data$age, na.rm = TRUE), max(stan_data$age, na.rm = TRUE), length.out = 100)

# Create a new dataset with age and stages
new_data <- expand.grid(age = age_seq, stage = factor(1:4, levels = 1:4))
new_data$age_sq <- new_data$age^2

# Initialize log hazard
new_data$log_hazard <- 0

# Calculate log hazards for the new dataset
for (lvl in levels(new_data$stage)) {
  # Adjust index for dummy variable stages
  if (lvl == "1") {
    # For stage 1 (baseline), do not add any alpha coefficient
    new_data$log_hazard[new_data$stage == lvl] <- 
      coef["beta0"] +
      coef["beta_age"] * new_data$age + 
      coef["beta_age_sq"] * new_data$age_sq
  } else if (lvl == "2") {
    # For stage 1 (baseline), do not add any alpha coefficient
    new_data$log_hazard[new_data$stage == lvl] <- 
      coef["beta0"] + coef["alpha_stage2"] +
      coef["beta_age"] * new_data$age + 
      coef["beta_age_sq"] * new_data$age_sq
  } else if (lvl == "3") {
    # For stage 1 (baseline), do not add any alpha coefficient
    new_data$log_hazard[new_data$stage == lvl] <- 
      coef["beta0"] + coef["alpha_stage3"] +
      coef["beta_age"] * new_data$age+ 
      coef["beta_age_sq"] * new_data$age_sq
  }    else if (lvl == "4") {
    # For stage 1 (baseline), do not add any alpha coefficient
    new_data$log_hazard[new_data$stage == lvl] <- 
      coef["beta0"] + coef["alpha_stage4"] +
      coef["beta_age"] * new_data$age + 
      coef["beta_age_sq"] * new_data$age_sq
  }
}
Warning in new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + :
number of items to replace is not a multiple of replacement length
Warning in new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + :
number of items to replace is not a multiple of replacement length
Warning in new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + :
number of items to replace is not a multiple of replacement length
Warning in new_data$log_hazard[new_data$stage == lvl] <- coef["beta0"] + :
number of items to replace is not a multiple of replacement length
# Check the structure of new_data to ensure it is as expected
str(new_data)
'data.frame':   400 obs. of  4 variables:
 $ age       : num  21 21.7 22.4 23.1 23.7 ...
 $ stage     : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
 $ age_sq    : num  441 470 501 532 564 ...
 $ log_hazard: num  -45.6 -50.1 -54.8 -59.6 -64.7 ...
 - attr(*, "out.attrs")=List of 2
  ..$ dim     : Named int [1:2] 100 4
  .. ..- attr(*, "names")= chr [1:2] "age" "stage"
  ..$ dimnames:List of 2
  .. ..$ age  : chr [1:100] "age=21.00000" "age=21.68687" "age=22.37374" "age=23.06061" ...
  .. ..$ stage: chr [1:4] "stage=1" "stage=2" "stage=3" "stage=4"
# Plot the results
ggplot(new_data, aes(x = age, y = log_hazard, color = stage)) +
  geom_line(linewidth = 1) +  # Increase line size for better visibility
  labs(
    title = "Partial Effect of Age on Log Relative Hazard by Stage",
    x = "Age",
    y = "Log Relative Hazard",
    color = "Stage"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

Write the model

writeLines("

data {
  int<lower=1> N;  // Number of observations
  vector[N] age;  // Age variable
  array[N] int<lower=0, upper=1> stage2;
  array[N] int<lower=0, upper=1> stage3;
  array[N] int<lower=0, upper=1> stage4;
  vector[N] time_til_death;  // Time until death variable
}

parameters {
  real beta0; // constant coefficient stage1
  real beta_age;  // Coefficient for age stage 1
  real beta_age_sq;  // Coefficient for squared age stage 1
  real gamma0; //coefficient for stage 2 constant
  real gamma1; //cofefficient for stage 2 age
  real gamma2; // coefficient for stage 2 age squared
  real nu0; //coefficient for stage 3 constant
  real nu1; //cofefficient for stage 3 age
  real nu2; // coefficient for stage 3 age squared
  real alpha0; //coefficient for stage 4 constant
  real alpha1; //cofefficient for stage 4 age
  real alpha2; // coefficient for stage 4 age squared
  real<lower=0> shape;  // Shape parameter of the Weibull distribution
  real<lower=0> scale;  // Scale parameter of the Weibull distribution
}

model {
  for (i in 1:N) {  // Loop through each observation
    // Calculate the linear predictor based on age and stage
    // Add the stage coefficient; stage 1 is the baseline, so we only adjust for stages 2-4
    real linear_pred = beta_age * age[i] + beta_age_sq * square(age[i])
                        + gamma0 * stage2[i]  + gamma1 *  stage2[i] * age[i] + gamma2  * stage2[i] * square(age[i])
                        +  nu0  *stage3[i]  + nu1 *  stage3[i] * age[i] + nu2  * stage3[i] * square(age[i])
                        + alpha0  * stage4[i]  + alpha1 * stage4[i] * age[i] + alpha2 * stage4[i] * square(age[i])
                        ;
    
    target += weibull_lpdf(time_til_death[i] | shape, scale) +linear_pred + (exp(linear_pred)-1) * weibull_lccdf(time_til_death[i] | shape, scale);  
  }
}
", con = "model2.stan")

Fit the model

# Compile the Stan model
stan_model_2 <- stan_model(file = "model2.stan")
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 14.0.3 (clang-1403.0.22.14.1)’
using SDK: ‘MacOSX13.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
# Perform MLE using the optimizing function
fit_mle_2 <- optimizing(stan_model_2, data = stan_data, hessian = TRUE)

# View the optimized parameter estimates
print(fit_mle_2)
$par
        beta0      beta_age   beta_age_sq        gamma0        gamma1 
-1.609391e+00 -4.553109e-02  8.416523e-04  9.627998e-03 -9.211637e-03 
       gamma2           nu0           nu1           nu2        alpha0 
-1.694624e-04  7.997586e-01  1.338674e-02  9.804606e-05  1.754940e+00 
       alpha1        alpha2         shape         scale 
 2.010043e-01 -2.774556e-03  2.039551e+00  6.219822e+00 

$value
[1] -1023.398

$return_code
[1] 0

$hessian
            beta0      beta_age   beta_age_sq        gamma0        gamma1
beta0           0  0.000000e+00  0.000000e+00        0.0000  0.000000e+00
beta_age        0 -1.214210e+06 -4.903701e+08    -6570.3778 -3.375650e+05
beta_age_sq     0 -4.903701e+08 -6.890065e+10 -1751053.1615 -1.266246e+08
gamma0          0 -6.570378e+03 -1.751053e+06     -135.0242 -6.570378e+03
gamma1          0 -3.375650e+05 -1.266246e+08    -6570.3778 -3.375650e+05
gamma2          0 -1.266246e+08 -1.804873e+10 -1751053.1615 -1.266246e+08
nu0             0 -9.446574e+03 -3.444133e+06        0.0000  0.000000e+00
nu1             0 -4.813718e+05 -2.534964e+08        0.0000  0.000000e+00
nu2             0 -2.534964e+08 -3.658406e+10        0.0000  0.000000e+00
alpha0          0 -2.187222e+03 -3.088689e+05        0.0000  0.000000e+00
alpha1          0 -1.164504e+05 -1.819768e+07        0.0000  0.000000e+00
alpha2          0 -1.819768e+07 -1.841196e+09        0.0000  0.000000e+00
shape           0  1.030800e+04  1.167981e+07     -148.7711 -7.092734e+03
scale           0  4.817558e+04  1.392251e+07      275.3890  1.340063e+04
                   gamma2           nu0           nu1          nu2       alpha0
beta0        0.000000e+00        0.0000  0.000000e+00            0       0.0000
beta_age    -1.266246e+08    -9446.5740 -4.813718e+05   -253496425   -2187.2218
beta_age_sq -1.804873e+10 -3444133.4545 -2.534964e+08 -36584064932 -308868.9356
gamma0      -1.751053e+06        0.0000  0.000000e+00            0       0.0000
gamma1      -1.266246e+08        0.0000  0.000000e+00            0       0.0000
gamma2      -1.804873e+10        0.0000  0.000000e+00            0       0.0000
nu0          0.000000e+00     -210.2282 -9.446574e+03     -3444133       0.0000
nu1          0.000000e+00    -9446.5740 -4.813718e+05   -253496425       0.0000
nu2          0.000000e+00 -3444133.4545 -2.534964e+08 -36584064932       0.0000
alpha0       0.000000e+00        0.0000  0.000000e+00            0     -42.5339
alpha1       0.000000e+00        0.0000  0.000000e+00            0   -2187.2218
alpha2       0.000000e+00        0.0000  0.000000e+00            0 -308868.9356
shape       -6.891676e+04      207.5557  1.385518e+04      9578365     146.1959
scale        3.571363e+06      428.7713  1.926678e+04      7024488      86.7501
                   alpha1        alpha2         shape         scale
beta0               0.000           0.0        0.0000        0.0000
beta_age      -116450.381   -18197684.5    10307.9971    48175.5756
beta_age_sq -18197684.482 -1841196157.9 11679814.3376 13922510.5901
gamma0              0.000           0.0     -148.7711      275.3890
gamma1              0.000           0.0    -7092.7338    13400.6273
gamma2              0.000           0.0   -68916.7645  3571363.2318
nu0                 0.000           0.0      207.5557      428.7713
nu1                 0.000           0.0    13855.1767    19266.7789
nu2                 0.000           0.0  9578365.0790  7024487.7103
alpha0          -2187.222     -308868.9      146.1959       86.7501
alpha1        -116450.381   -18197684.5     7292.0492     4460.9526
alpha2      -18197684.482 -1841196157.9  1002562.2546   629954.1543
shape            7292.049     1002562.3    -2343.0011     -222.3579
scale            4460.953      629954.2     -222.3579    -2080.5424

$theta_tilde
         beta0    beta_age  beta_age_sq      gamma0       gamma1        gamma2
[1,] -1.609391 -0.04553109 0.0008416523 0.009627998 -0.009211637 -0.0001694624
           nu0        nu1          nu2  alpha0    alpha1       alpha2    shape
[1,] 0.7997586 0.01338674 9.804606e-05 1.75494 0.2010043 -0.002774556 2.039551
        scale
[1,] 6.219822
# Load necessary libraries
library(ggplot2)
library(rstan)

# Load your fitted model (assuming it is called `fit`)
# fit <- stan("model1.stan", data = your_data) # Example of fitting the model, if not done yet

# Assume fit_mle is the result of your model fit using optimizing()
# fit_mle <- optimizing(your_stan_model, data = your_data)

# Extract coefficients from the fitted model (this will be a list)
coef <- fit_mle_2$par  # Adjust this line if your parameter names are different

# Create a sequence of ages
age_seq <- seq(min(stan_data$age, na.rm = TRUE), max(stan_data$age, na.rm = TRUE), length.out = 100)

# Create a new dataset with age and stages
new_data <- expand.grid(age = age_seq, stage = factor(1:4, levels = 1:4))
new_data$age_sq <- new_data$age^2

# Initialize log hazard
new_data$log_hazard <- 0

# Calculate log hazards for the new dataset
for (lvl in levels(new_data$stage)) {
  # Check the current level of stage and calculate log hazard accordingly
  if (lvl == "1") {
    # For stage 1 (baseline), do not add any alpha coefficient
    new_data$log_hazard[new_data$stage == lvl] <- 
      coef["beta0"] +  # Assuming this is the baseline coefficient
      coef["beta_age"] * new_data$age[new_data$stage == lvl] + 
      coef["beta_age_sq"] * new_data$age_sq[new_data$stage == lvl]
  } else if (lvl == "2") {
    # For stage 2
    new_data$log_hazard[new_data$stage == lvl] <- 
      coef["gamma0"] + 
      coef["gamma1"] * new_data$age[new_data$stage == lvl] + 
      coef["gamma2"] * new_data$age_sq[new_data$stage == lvl]
  } else if (lvl == "3") {
    # For stage 3
    new_data$log_hazard[new_data$stage == lvl] <- 
      coef["nu0"] + 
      coef["nu1"] * new_data$age[new_data$stage == lvl] + 
      coef["nu2"] * new_data$age_sq[new_data$stage == lvl]
  } else if (lvl == "4") {
    # For stage 4
    new_data$log_hazard[new_data$stage == lvl] <- 
      coef["alpha0"] + 
      coef["alpha1"] * new_data$age[new_data$stage == lvl] + 
      coef["alpha2"] * new_data$age_sq[new_data$stage == lvl]
  }
}


# Check the structure of new_data to ensure it is as expected
str(new_data)
'data.frame':   400 obs. of  4 variables:
 $ age       : num  21 21.7 22.4 23.1 23.7 ...
 $ stage     : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
 $ age_sq    : num  441 470 501 532 564 ...
 $ log_hazard: num  -2.19 -2.2 -2.21 -2.21 -2.22 ...
 - attr(*, "out.attrs")=List of 2
  ..$ dim     : Named int [1:2] 100 4
  .. ..- attr(*, "names")= chr [1:2] "age" "stage"
  ..$ dimnames:List of 2
  .. ..$ age  : chr [1:100] "age=21.00000" "age=21.68687" "age=22.37374" "age=23.06061" ...
  .. ..$ stage: chr [1:4] "stage=1" "stage=2" "stage=3" "stage=4"
# Plot the results
ggplot(new_data, aes(x = age, y = log_hazard, color = stage)) +
  geom_line(linewidth = 1) +  # Increase line size for better visibility
  labs(
    title = "Partial Effect of Age on Log Relative Hazard by Stage",
    x = "Age",
    y = "Log Relative Hazard",
    color = "Stage"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")