データ整理

パッケージの読み込み

pacman::p_load(tidyverse,
              foreign,
              rms,
              tableone,
              ggplot2,
              gtsummary,
              summarytools,
              skimr,
              car,
              naniar,
              mice,
              survival,
              corrplot,
              survminer)

データの読み込み

df<-read.csv("frmgham2.csv")

列名の確認

colnames(df)
##  [1] "RANDID"   "SEX"      "TOTCHOL"  "AGE"      "SYSBP"    "DIABP"   
##  [7] "CURSMOKE" "CIGPDAY"  "BMI"      "DIABETES" "BPMEDS"   "HEARTRTE"
## [13] "GLUCOSE"  "educ"     "PREVCHD"  "PREVAP"   "PREVMI"   "PREVSTRK"
## [19] "PREVHYP"  "TIME"     "PERIOD"   "HDLC"     "LDLC"     "DEATH"   
## [25] "ANGINA"   "HOSPMI"   "MI_FCHD"  "ANYCHD"   "STROKE"   "CVD"     
## [31] "HYPERTEN" "TIMEAP"   "TIMEMI"   "TIMEMIFC" "TIMECHD"  "TIMESTRK"
## [37] "TIMECVD"  "TIMEDTH"  "TIMEHYP"

データの確認①

dfSummary(df) %>% view()
## Switching method to 'browser'
## Output file written: /var/folders/n9/tf_wmwpn3gl2t7l1cz4tqk000000gn/T//Rtmp7UONei/fileed821603a472.html

データの確認②

str(df)
## 'data.frame':    11627 obs. of  39 variables:
##  $ RANDID  : int  2448 2448 6238 6238 6238 9428 9428 10552 10552 11252 ...
##  $ SEX     : int  1 1 2 2 2 1 1 2 2 2 ...
##  $ TOTCHOL : int  195 209 250 260 237 245 283 225 232 285 ...
##  $ AGE     : int  39 52 46 52 58 48 54 61 67 46 ...
##  $ SYSBP   : num  106 121 121 105 108 ...
##  $ DIABP   : num  70 66 81 69.5 66 80 89 95 109 84 ...
##  $ CURSMOKE: int  0 0 0 0 0 1 1 1 1 1 ...
##  $ CIGPDAY : int  0 0 0 0 0 20 30 30 20 23 ...
##  $ BMI     : num  27 NA 28.7 29.4 28.5 ...
##  $ DIABETES: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BPMEDS  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ HEARTRTE: int  80 69 95 80 80 75 75 65 60 85 ...
##  $ GLUCOSE : int  77 92 76 86 71 70 87 103 89 85 ...
##  $ educ    : int  4 4 2 2 2 1 1 3 3 3 ...
##  $ PREVCHD : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PREVAP  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PREVMI  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PREVSTRK: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PREVHYP : int  0 0 0 0 0 0 0 1 1 0 ...
##  $ TIME    : int  0 4628 0 2156 4344 0 2199 0 1977 0 ...
##  $ PERIOD  : int  1 3 1 2 3 1 2 1 2 1 ...
##  $ HDLC    : int  NA 31 NA NA 54 NA NA NA NA NA ...
##  $ LDLC    : int  NA 178 NA NA 141 NA NA NA NA NA ...
##  $ DEATH   : int  0 0 0 0 0 0 0 1 1 0 ...
##  $ ANGINA  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ HOSPMI  : int  1 1 0 0 0 0 0 0 0 0 ...
##  $ MI_FCHD : int  1 1 0 0 0 0 0 0 0 0 ...
##  $ ANYCHD  : int  1 1 0 0 0 0 0 0 0 0 ...
##  $ STROKE  : int  0 0 0 0 0 0 0 1 1 0 ...
##  $ CVD     : int  1 1 0 0 0 0 0 1 1 0 ...
##  $ HYPERTEN: int  0 0 0 0 0 0 0 1 1 1 ...
##  $ TIMEAP  : int  8766 8766 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMEMI  : int  6438 6438 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMEMIFC: int  6438 6438 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMECHD : int  6438 6438 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMESTRK: int  8766 8766 8766 8766 8766 8766 8766 2089 2089 8766 ...
##  $ TIMECVD : int  6438 6438 8766 8766 8766 8766 8766 2089 2089 8766 ...
##  $ TIMEDTH : int  8766 8766 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMEHYP : int  8766 8766 8766 8766 8766 8766 8766 0 0 4285 ...

因子変換

# 因子に変換したい列名のリスト
col_fact <- c("SEX", "CURSMOKE", "DIABETES", "BPMEDS", "PREVCHD", "PREVAP", "PREVMI", "PREVSTRK", "PREVHYP", "PERIOD", "educ","DEATH", "ANGINA", "HOSPMI", "MI_FCHD", "ANYCHD", "STROKE", "CVD", "HYPERTEN")

# forループで一度にすべての列を因子に変換
for(col in col_fact) {
  df[[col]] <- as.factor(df[[col]])
}

変換の確認

str(df)
## 'data.frame':    11627 obs. of  39 variables:
##  $ RANDID  : int  2448 2448 6238 6238 6238 9428 9428 10552 10552 11252 ...
##  $ SEX     : Factor w/ 2 levels "1","2": 1 1 2 2 2 1 1 2 2 2 ...
##  $ TOTCHOL : int  195 209 250 260 237 245 283 225 232 285 ...
##  $ AGE     : int  39 52 46 52 58 48 54 61 67 46 ...
##  $ SYSBP   : num  106 121 121 105 108 ...
##  $ DIABP   : num  70 66 81 69.5 66 80 89 95 109 84 ...
##  $ CURSMOKE: Factor w/ 2 levels "0","1": 1 1 1 1 1 2 2 2 2 2 ...
##  $ CIGPDAY : int  0 0 0 0 0 20 30 30 20 23 ...
##  $ BMI     : num  27 NA 28.7 29.4 28.5 ...
##  $ DIABETES: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ BPMEDS  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ HEARTRTE: int  80 69 95 80 80 75 75 65 60 85 ...
##  $ GLUCOSE : int  77 92 76 86 71 70 87 103 89 85 ...
##  $ educ    : Factor w/ 4 levels "1","2","3","4": 4 4 2 2 2 1 1 3 3 3 ...
##  $ PREVCHD : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PREVAP  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PREVMI  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PREVSTRK: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PREVHYP : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 2 1 ...
##  $ TIME    : int  0 4628 0 2156 4344 0 2199 0 1977 0 ...
##  $ PERIOD  : Factor w/ 3 levels "1","2","3": 1 3 1 2 3 1 2 1 2 1 ...
##  $ HDLC    : int  NA 31 NA NA 54 NA NA NA NA NA ...
##  $ LDLC    : int  NA 178 NA NA 141 NA NA NA NA NA ...
##  $ DEATH   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 2 1 ...
##  $ ANGINA  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ HOSPMI  : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1 1 ...
##  $ MI_FCHD : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1 1 ...
##  $ ANYCHD  : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1 1 ...
##  $ STROKE  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 2 1 ...
##  $ CVD     : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 2 2 1 ...
##  $ HYPERTEN: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 2 2 ...
##  $ TIMEAP  : int  8766 8766 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMEMI  : int  6438 6438 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMEMIFC: int  6438 6438 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMECHD : int  6438 6438 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMESTRK: int  8766 8766 8766 8766 8766 8766 8766 2089 2089 8766 ...
##  $ TIMECVD : int  6438 6438 8766 8766 8766 8766 8766 2089 2089 8766 ...
##  $ TIMEDTH : int  8766 8766 8766 8766 8766 8766 8766 2956 2956 8766 ...
##  $ TIMEHYP : int  8766 8766 8766 8766 8766 8766 8766 0 0 4285 ...
# データフレームdfの全ての列名を取得
all_cols <- names(df)

# all_colsからcol_factを除外したものをcol_contとする
col_cont <- setdiff(all_cols, col_fact)

period:1のみを予測モデル作成の対象とする。period:1にはLDL/HDL情報がないため削除

df %>% filter(PERIOD==1)->df_p1
str(df_p1)
## 'data.frame':    4434 obs. of  39 variables:
##  $ RANDID  : int  2448 6238 9428 10552 11252 11263 12629 12806 14367 16365 ...
##  $ SEX     : Factor w/ 2 levels "1","2": 1 2 1 2 2 2 2 2 1 1 ...
##  $ TOTCHOL : int  195 250 245 225 285 228 205 313 260 225 ...
##  $ AGE     : int  39 46 48 61 46 43 63 45 52 43 ...
##  $ SYSBP   : num  106 121 128 150 130 ...
##  $ DIABP   : num  70 81 80 95 84 110 71 71 89 107 ...
##  $ CURSMOKE: Factor w/ 2 levels "0","1": 1 1 2 2 2 1 1 2 1 2 ...
##  $ CIGPDAY : int  0 0 20 30 23 0 0 20 0 30 ...
##  $ BMI     : num  27 28.7 25.3 28.6 23.1 ...
##  $ DIABETES: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ BPMEDS  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ HEARTRTE: int  80 95 75 65 85 77 60 79 76 93 ...
##  $ GLUCOSE : int  77 76 70 103 85 99 85 78 79 88 ...
##  $ educ    : Factor w/ 4 levels "1","2","3","4": 4 2 1 3 3 2 1 2 1 1 ...
##  $ PREVCHD : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PREVAP  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PREVMI  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PREVSTRK: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PREVHYP : Factor w/ 2 levels "0","1": 1 1 1 2 1 2 1 1 2 2 ...
##  $ TIME    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PERIOD  : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ HDLC    : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ LDLC    : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ DEATH   : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 1 1 ...
##  $ ANGINA  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
##  $ HOSPMI  : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 1 ...
##  $ MI_FCHD : Factor w/ 2 levels "0","1": 2 1 1 1 1 2 1 1 1 1 ...
##  $ ANYCHD  : Factor w/ 2 levels "0","1": 2 1 1 1 1 2 2 1 1 1 ...
##  $ STROKE  : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 1 1 ...
##  $ CVD     : Factor w/ 2 levels "0","1": 2 1 1 2 1 2 1 1 1 1 ...
##  $ HYPERTEN: Factor w/ 2 levels "0","1": 1 1 1 2 2 2 2 2 2 2 ...
##  $ TIMEAP  : int  8766 8766 8766 2956 8766 8766 373 8766 8766 8766 ...
##  $ TIMEMI  : int  6438 8766 8766 2956 8766 8766 8766 8766 8766 8766 ...
##  $ TIMEMIFC: int  6438 8766 8766 2956 8766 5719 8766 8766 8766 8766 ...
##  $ TIMECHD : int  6438 8766 8766 2956 8766 5719 373 8766 8766 8766 ...
##  $ TIMESTRK: int  8766 8766 8766 2089 8766 8766 8766 8766 8766 8766 ...
##  $ TIMECVD : int  6438 8766 8766 2089 8766 5719 8766 8766 8766 8766 ...
##  $ TIMEDTH : int  8766 8766 8766 2956 8766 8766 8766 8766 8766 8766 ...
##  $ TIMEHYP : int  8766 8766 8766 0 4285 0 2212 8679 0 0 ...
sum(duplicated(df_p1$RANDID))
## [1] 0
summary(df_p1)
##      RANDID        SEX         TOTCHOL         AGE            SYSBP      
##  Min.   :   2448   1:1944   Min.   :107   Min.   :32.00   Min.   : 83.5  
##  1st Qu.:2440336   2:2490   1st Qu.:206   1st Qu.:42.00   1st Qu.:117.5  
##  Median :4972848            Median :234   Median :49.00   Median :129.0  
##  Mean   :4987278            Mean   :237   Mean   :49.93   Mean   :132.9  
##  3rd Qu.:7463577            3rd Qu.:264   3rd Qu.:57.00   3rd Qu.:144.0  
##  Max.   :9999312            Max.   :696   Max.   :70.00   Max.   :295.0  
##                             NA's   :52                                   
##      DIABP        CURSMOKE    CIGPDAY            BMI        DIABETES
##  Min.   : 48.00   0:2253   Min.   : 0.000   Min.   :15.54   0:4313  
##  1st Qu.: 75.00   1:2181   1st Qu.: 0.000   1st Qu.:23.09   1: 121  
##  Median : 82.00            Median : 0.000   Median :25.45           
##  Mean   : 83.08            Mean   : 8.966   Mean   :25.85           
##  3rd Qu.: 90.00            3rd Qu.:20.000   3rd Qu.:28.09           
##  Max.   :142.50            Max.   :70.000   Max.   :56.80           
##                            NA's   :32       NA's   :19              
##   BPMEDS        HEARTRTE         GLUCOSE         educ      PREVCHD  PREVAP  
##  0   :4229   Min.   : 44.00   Min.   : 40.00   1   :1822   0:4240   0:4287  
##  1   : 144   1st Qu.: 68.00   1st Qu.: 72.00   2   :1281   1: 194   1: 147  
##  NA's:  61   Median : 75.00   Median : 78.00   3   : 716                    
##              Mean   : 75.89   Mean   : 82.19   4   : 502                    
##              3rd Qu.: 83.00   3rd Qu.: 87.00   NA's: 113                    
##              Max.   :143.00   Max.   :394.00                                
##              NA's   :1        NA's   :397                                   
##  PREVMI   PREVSTRK PREVHYP       TIME   PERIOD        HDLC           LDLC     
##  0:4348   0:4402   0:3004   Min.   :0   1:4434   Min.   : NA    Min.   : NA   
##  1:  86   1:  32   1:1430   1st Qu.:0   2:   0   1st Qu.: NA    1st Qu.: NA   
##                             Median :0   3:   0   Median : NA    Median : NA   
##                             Mean   :0            Mean   :NaN    Mean   :NaN   
##                             3rd Qu.:0            3rd Qu.: NA    3rd Qu.: NA   
##                             Max.   :0            Max.   : NA    Max.   : NA   
##                                                  NA's   :4434   NA's   :4434  
##  DEATH    ANGINA   HOSPMI   MI_FCHD  ANYCHD   STROKE   CVD      HYPERTEN
##  0:2884   0:3709   0:3980   0:3703   0:3194   0:4019   0:3277   0:1182  
##  1:1550   1: 725   1: 454   1: 731   1:1240   1: 415   1:1157   1:3252  
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##      TIMEAP         TIMEMI        TIMEMIFC       TIMECHD        TIMESTRK   
##  Min.   :   0   Min.   :   0   Min.   :   0   Min.   :   0   Min.   :   0  
##  1st Qu.:5348   1st Qu.:6224   1st Qu.:6058   1st Qu.:4767   1st Qu.:6345  
##  Median :8766   Median :8766   Median :8766   Median :8766   Median :8766  
##  Mean   :6901   Mean   :7240   Mean   :7186   Mean   :6666   Mean   :7305  
##  3rd Qu.:8766   3rd Qu.:8766   3rd Qu.:8766   3rd Qu.:8766   3rd Qu.:8766  
##  Max.   :8766   Max.   :8766   Max.   :8766   Max.   :8766   Max.   :8766  
##                                                                            
##     TIMECVD        TIMEDTH        TIMEHYP    
##  Min.   :   0   Min.   :  26   Min.   :   0  
##  1st Qu.:5145   1st Qu.:6974   1st Qu.:   0  
##  Median :8766   Median :8766   Median :2188  
##  Mean   :6818   Mean   :7506   Mean   :3436  
##  3rd Qu.:8766   3rd Qu.:8766   3rd Qu.:6934  
##  Max.   :8766   Max.   :8766   Max.   :8766  
## 

変数をわけてtabbleoneの準備

ordered_vars <- c( "SEX","AGE","BMI","educ", "CURSMOKE","CIGPDAY","PREVCHD","PREVAP","PREVMI","PREVSTRK", "PREVHYP", "DIABETES","BPMEDS", "HEARTRTE","SYSBP","DIABP", "TOTCHOL","GLUCOSE","HDLC","LDLC")

continuous_vars<- c("AGE","BMI","CIGPDAY","HEARTRTE","SYSBP","DIABP", "TOTCHOL","GLUCOSE")
factor_vars<-c("SEX", "educ", "CURSMOKE","PREVCHD","PREVAP","PREVMI","PREVSTRK", "PREVHYP", "DIABETES","BPMEDS")

outcome_vars<-c("TIMEAP","TIMEMI","TIMEMIFC","TIMECHD","TIMESTRK","TIMECVD","TIMEDTH","TIMEHYP")

table1の作成と欠測割合を含めておく

CreateTableOne(vars = ordered_vars, factorVars = factor_vars, data = df_p1) -> tableone
## Warning in ModuleReturnVarsExist(vars, data): These variables only have NA/NaN:
## HDLC LDLC Dropped
print(tableone, missing =T , test = F, explain = T) 
##                       
##                        Overall        Missing
##   n                      4434                
##   SEX = 2 (%)            2490 (56.2)  0.0    
##   AGE (mean (SD))       49.93 (8.68)  0.0    
##   BMI (mean (SD))       25.85 (4.10)  0.4    
##   educ (%)                            2.5    
##      1                   1822 (42.2)         
##      2                   1281 (29.6)         
##      3                    716 (16.6)         
##      4                    502 (11.6)         
##   CURSMOKE = 1 (%)       2181 (49.2)  0.0    
##   CIGPDAY (mean (SD))    8.97 (11.93) 0.7    
##   PREVCHD = 1 (%)         194 ( 4.4)  0.0    
##   PREVAP = 1 (%)          147 ( 3.3)  0.0    
##   PREVMI = 1 (%)           86 ( 1.9)  0.0    
##   PREVSTRK = 1 (%)         32 ( 0.7)  0.0    
##   PREVHYP = 1 (%)        1430 (32.3)  0.0    
##   DIABETES = 1 (%)        121 ( 2.7)  0.0    
##   BPMEDS = 1 (%)          144 ( 3.3)  1.4    
##   HEARTRTE (mean (SD))  75.89 (12.11) 0.0    
##   SYSBP (mean (SD))    132.91 (22.42) 0.0    
##   DIABP (mean (SD))     83.08 (12.06) 0.0    
##   TOTCHOL (mean (SD))  236.98 (44.65) 1.2    
##   GLUCOSE (mean (SD))   82.19 (24.40) 9.0

アウトカムもテーブル評価しておく

CreateTableOne(vars = outcome_vars,  data = df_p1) -> tableone
print(tableone, missing =T , test = F, explain = T) 
##                       
##                        Overall           Missing
##   n                       4434                  
##   TIMEAP (mean (SD))   6901.45 (2749.24) 0.0    
##   TIMEMI (mean (SD))   7240.30 (2483.43) 0.0    
##   TIMEMIFC (mean (SD)) 7185.68 (2530.99) 0.0    
##   TIMECHD (mean (SD))  6666.20 (2878.82) 0.0    
##   TIMESTRK (mean (SD)) 7304.63 (2388.34) 0.0    
##   TIMECVD (mean (SD))  6817.95 (2804.32) 0.0    
##   TIMEDTH (mean (SD))  7505.63 (2213.96) 0.0    
##   TIMEHYP (mean (SD))  3436.21 (3451.65) 0.0

可視化もする

df_p1 |>  #全体
  select(col_cont) |> 
  pivot_longer(cols = col_cont, names_to = "name", values_to = "value") |> 
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 5) +
  theme_bw()+
  theme(text = element_text(size = 12))
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_cont)
## 
##   # Now:
##   data %>% select(all_of(col_cont))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 9369 rows containing non-finite values (`stat_bin()`).

df_p1にはLDLC,HDLC,TIMEは不要なため削除する

df_p1 %>% select(-LDLC,-HDLC,-TIME)->df_p1_a
# 数値でない変数を数値に変換
numeric_columns <- df_p1_a %>% select_if(is.numeric) %>% na.omit()

# 相関行列を計算
cor_matrix <- cor(numeric_columns, use = "complete.obs")

print(cor_matrix)
##                 RANDID     TOTCHOL          AGE        SYSBP        DIABP
## RANDID    1.000000e+00 -0.01308338  0.006313416  0.005819597  0.003689941
## TOTCHOL  -1.308338e-02  1.00000000  0.248952601  0.207827142  0.169990333
## AGE       6.313416e-03  0.24895260  1.000000000  0.394686901  0.204824206
## SYSBP     5.819597e-03  0.20782714  0.394686901  1.000000000  0.784110388
## DIABP     3.689941e-03  0.16999033  0.204824206  0.784110388  1.000000000
## CIGPDAY   3.572220e-03 -0.03213282 -0.189860323 -0.102787247 -0.064904264
## BMI       1.122936e-02  0.12289991  0.132056066  0.324423267  0.375287572
## HEARTRTE  3.205149e-02  0.09839785 -0.007392307  0.182793863  0.185598973
## GLUCOSE   1.677001e-02  0.05079913  0.121411157  0.127689999  0.048410059
## TIMEAP   -3.161215e-03 -0.12218587 -0.371067853 -0.302404445 -0.203184415
## TIMEMI    1.914465e-05 -0.09953516 -0.325698882 -0.277340389 -0.181171284
## TIMEMIFC -7.421407e-03 -0.09871309 -0.328230944 -0.284447005 -0.191292224
## TIMECHD  -8.738654e-03 -0.13109915 -0.371606650 -0.306548726 -0.208769133
## TIMESTRK -1.525889e-03 -0.08098408 -0.373292755 -0.328781855 -0.224117724
## TIMECVD  -1.532707e-02 -0.11792654 -0.370852419 -0.321572262 -0.224405073
## TIMEDTH   3.336488e-03 -0.07848455 -0.365158050 -0.300897520 -0.191221828
## TIMEHYP  -1.133558e-02 -0.19293270 -0.369497019 -0.650485317 -0.588976780
##              CIGPDAY         BMI     HEARTRTE     GLUCOSE       TIMEAP
## RANDID    0.00357222  0.01122936  0.032051495  0.01677001 -0.003161215
## TOTCHOL  -0.03213282  0.12289991  0.098397854  0.05079913 -0.122185872
## AGE      -0.18986032  0.13205607 -0.007392307  0.12141116 -0.371067853
## SYSBP    -0.10278725  0.32442327  0.182793863  0.12769000 -0.302404445
## DIABP    -0.06490426  0.37528757  0.185598973  0.04841006 -0.203184415
## CIGPDAY   1.00000000 -0.09719796  0.070243043 -0.05842741 -0.040659875
## BMI      -0.09719796  1.00000000  0.076006974  0.09309522 -0.117851512
## HEARTRTE  0.07024304  0.07600697  1.000000000  0.09044531 -0.043404696
## GLUCOSE  -0.05842741  0.09309522  0.090445311  1.00000000 -0.134234759
## TIMEAP   -0.04065988 -0.11785151 -0.043404696 -0.13423476  1.000000000
## TIMEMI   -0.08753481 -0.07183089 -0.056520081 -0.14729791  0.800859028
## TIMEMIFC -0.08406198 -0.08356582 -0.058183143 -0.15508505  0.797737710
## TIMECHD  -0.05404097 -0.13288021 -0.032275716 -0.13878778  0.938535919
## TIMESTRK -0.04876332 -0.08200755 -0.058829481 -0.14913419  0.756876586
## TIMECVD  -0.06465571 -0.12008681 -0.037014664 -0.14984984  0.811524941
## TIMEDTH  -0.05896194 -0.07347200 -0.072000309 -0.16027186  0.780488617
## TIMEHYP   0.06714813 -0.31421471 -0.153834785 -0.10531437  0.370997490
##                 TIMEMI     TIMEMIFC      TIMECHD     TIMESTRK     TIMECVD
## RANDID    1.914465e-05 -0.007421407 -0.008738654 -0.001525889 -0.01532707
## TOTCHOL  -9.953516e-02 -0.098713093 -0.131099148 -0.080984080 -0.11792654
## AGE      -3.256989e-01 -0.328230944 -0.371606650 -0.373292755 -0.37085242
## SYSBP    -2.773404e-01 -0.284447005 -0.306548726 -0.328781855 -0.32157226
## DIABP    -1.811713e-01 -0.191292224 -0.208769133 -0.224117724 -0.22440507
## CIGPDAY  -8.753481e-02 -0.084061982 -0.054040974 -0.048763318 -0.06465571
## BMI      -7.183089e-02 -0.083565817 -0.132880214 -0.082007549 -0.12008681
## HEARTRTE -5.652008e-02 -0.058183143 -0.032275716 -0.058829481 -0.03701466
## GLUCOSE  -1.472979e-01 -0.155085049 -0.138787776 -0.149134188 -0.14984984
## TIMEAP    8.008590e-01  0.797737710  0.938535919  0.756876586  0.81152494
## TIMEMI    1.000000e+00  0.980071052  0.823383065  0.842341285  0.86046154
## TIMEMIFC  9.800711e-01  1.000000000  0.839538522  0.830842129  0.87891230
## TIMECHD   8.233831e-01  0.839538522  1.000000000  0.717491329  0.87090763
## TIMESTRK  8.423413e-01  0.830842129  0.717491329  1.000000000  0.84955559
## TIMECVD   8.604615e-01  0.878912299  0.870907634  0.849555590  1.00000000
## TIMEDTH   8.848760e-01  0.873081668  0.738669609  0.911272493  0.78077287
## TIMEHYP   3.405246e-01  0.344844868  0.370431832  0.385557488  0.37653448
##               TIMEDTH     TIMEHYP
## RANDID    0.003336488 -0.01133558
## TOTCHOL  -0.078484551 -0.19293270
## AGE      -0.365158050 -0.36949702
## SYSBP    -0.300897520 -0.65048532
## DIABP    -0.191221828 -0.58897678
## CIGPDAY  -0.058961940  0.06714813
## BMI      -0.073471995 -0.31421471
## HEARTRTE -0.072000309 -0.15383479
## GLUCOSE  -0.160271861 -0.10531437
## TIMEAP    0.780488617  0.37099749
## TIMEMI    0.884875951  0.34052462
## TIMEMIFC  0.873081668  0.34484487
## TIMECHD   0.738669609  0.37043183
## TIMESTRK  0.911272493  0.38555749
## TIMECVD   0.780772871  0.37653448
## TIMEDTH   1.000000000  0.35875185
## TIMEHYP   0.358751853  1.00000000
# 相関行列のプロット
corrplot(cor_matrix, method = "shade", shade.col = NA, tl.col = "black", tl.srt = 45)

time=0でのアウトカムがあり、これのせいでcox回帰コードが回らなくなるためこれも除いておく

df_p1_b <- subset(df_p1_a, TIMESTRK > 0)

この状況での背景テーブルの作成(欠測割合あり)

CreateTableOne(vars = ordered_vars, factorVars = factor_vars, data = df_p1_b) -> tableone
## Warning in ModuleReturnVarsExist(vars, data): The data frame does not have:
## HDLC LDLC Dropped
print(tableone, missing =T , test = F, explain = T) 
##                       
##                        Overall         Missing
##   n                      4402                 
##   SEX = 2 (%)            2472 ( 56.2)  0.0    
##   AGE (mean (SD))       49.88 (8.66)   0.0    
##   BMI (mean (SD))       25.83 (4.08)   0.4    
##   educ (%)                             2.6    
##      1                   1802 ( 42.0)         
##      2                   1273 ( 29.7)         
##      3                    713 ( 16.6)         
##      4                    501 ( 11.7)         
##   CURSMOKE = 1 (%)       2169 ( 49.3)  0.0    
##   CIGPDAY (mean (SD))    8.99 (11.95)  0.7    
##   PREVCHD = 1 (%)         187 (  4.2)  0.0    
##   PREVAP = 1 (%)          144 (  3.3)  0.0    
##   PREVMI = 1 (%)           82 (  1.9)  0.0    
##   PREVSTRK = 0 (%)       4402 (100.0)  0.0    
##   PREVHYP = 1 (%)        1404 ( 31.9)  0.0    
##   DIABETES = 1 (%)        119 (  2.7)  0.0    
##   BPMEDS = 1 (%)          136 (  3.1)  1.4    
##   HEARTRTE (mean (SD))  75.91 (12.13)  0.0    
##   SYSBP (mean (SD))    132.76 (22.32)  0.0    
##   DIABP (mean (SD))     83.03 (12.01)  0.0    
##   TOTCHOL (mean (SD))  237.01 (44.65)  1.2    
##   GLUCOSE (mean (SD))   82.13 (24.35)  9.0

欠測評価

#Hmiscパッケージ naclus関数が便利です 
na.patterns <- naclus(df_p1_b) 
plot(na.patterns, ylab="Fraction of NAs in common") 

naplot(na.patterns, which=c('na per var')) 

naplot(na.patterns, which=c('na per obs')) 

vis_miss(df_p1_b,cluster = T)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the visdat package.
##   Please report the issue at <]8;;https://github.com/ropensci/visdat/issueshttps://github.com/ropensci/visdat/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

生存時間の準備

# ヒストグラムの作成
hist(df_p1_b$TIMESTRK, main="Histogram of TIMESTRK", xlab="TIMESTRK", col="lightblue", border="black")

# 総追跡人年の計算
total_person_years <- sum(df_p1_b$TIMESTRK) / 365.25
cat("Total person-years:", total_person_years, "\n")
## Total person-years: 88675.54
# 追跡期間の平均および中央値の計算
mean_follow_up <- mean(df_p1_b$TIMESTRK) / 365.25
median_follow_up <- median(df_p1_b$TIMESTRK) / 365.25
cat("Mean follow-up period:", mean_follow_up, "years\n")
## Mean follow-up period: 20.14437 years
cat("Median follow-up period:", median_follow_up, "years\n")
## Median follow-up period: 24 years
library(survival)
df_p1_b$STROKE <- as.numeric(df_p1_b$STROKE) #numericにしないとカプランマイヤーはかけない

fit1 <- survfit(Surv(TIMESTRK,STROKE) ~ 1, data = df_p1_b)
ggsurvplot(fit1, data = df_p1_b)

# 追跡期間を無視した累積罹患率の計算
cumulative_incidence_ignored <- sum(df_p1_b$STROKE) / length(df_p1_b$STROKE)
cat("Cumulative incidence (ignoring follow-up time):", cumulative_incidence_ignored, "\n")
## Cumulative incidence (ignoring follow-up time): 1.087006

多重代入

いきなりmethod=“pmm”では回らない。method=“cart”にしてなんとか回るが、患者IDを多重代入に入れない設定とすれば”pmm”でもコードは回る

#init = mice(df_p1_b, maxit=0) 
#predMatrix = init$predictorMatrix
#
#predMatrix[, c("RANDID")] = 0
#impa <- mice(df_p1_b, predictorMatrix = predMatrix,m=100,maxit =200,method="pmm", printFlag = F, seed = 10) 
#

一旦保存

#save(impa, file = "framingham_impa.RData")

ロードする場合はこちら

load("framingham_impa.RData")

収束の確認

plot(impa) 

元々のデータの性質を保っているのか?を確認 なかなか宜しくない結果。CIGPDAYは元データの性質が保たれていない、、、

densityplot(impa)

線形で変数全部ありの状態。cph関数では PREVSTRKがあるとまわらない。ほぼ0である点が原因とおもわれる。formulaからは抜くことで対応ができる。

formula_all <- Surv(TIMESTRK, STROKE) ~ SEX + AGE + BMI + educ + CURSMOKE + CIGPDAY + PREVCHD + PREVAP +PREVMI +  PREVHYP + DIABETES + BPMEDS + HEARTRTE + SYSBP + DIABP + TOTCHOL + GLUCOSE
  

fit_all <- fit.mult.impute(formula_all, cph, xtrans = impa, data = df_p1_b)
## 
## Variance Inflation Factors Due to Imputation:
## 
##      SEX=2        AGE        BMI     educ=2     educ=3     educ=4 CURSMOKE=1 
##       1.00       1.00       1.00       1.02       1.03       1.03       1.01 
##    CIGPDAY  PREVCHD=1   PREVAP=1   PREVMI=1  PREVHYP=1 DIABETES=1   BPMEDS=1 
##       1.02       1.00       1.00       1.00       1.00       1.01       1.04 
##   HEARTRTE      SYSBP      DIABP    TOTCHOL    GLUCOSE 
##       1.00       1.00       1.00       1.01       1.03 
## 
## Rate of Missing Information:
## 
##      SEX=2        AGE        BMI     educ=2     educ=3     educ=4 CURSMOKE=1 
##       0.00       0.00       0.00       0.02       0.03       0.03       0.01 
##    CIGPDAY  PREVCHD=1   PREVAP=1   PREVMI=1  PREVHYP=1 DIABETES=1   BPMEDS=1 
##       0.02       0.00       0.00       0.00       0.00       0.01       0.04 
##   HEARTRTE      SYSBP      DIABP    TOTCHOL    GLUCOSE 
##       0.00       0.00       0.00       0.01       0.03 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##        SEX=2          AGE          BMI       educ=2       educ=3       educ=4 
## 2.017846e+07 7.076036e+07 1.138594e+07 2.230215e+05 1.221943e+05 9.957766e+04 
##   CURSMOKE=1      CIGPDAY    PREVCHD=1     PREVAP=1     PREVMI=1    PREVHYP=1 
## 1.531198e+06 4.120413e+05 2.463429e+09 6.390058e+08 2.233008e+09 4.842672e+08 
##   DIABETES=1     BPMEDS=1     HEARTRTE        SYSBP        DIABP      TOTCHOL 
## 5.536923e+05 7.434241e+04 1.124619e+07 8.201131e+07 1.372719e+08 6.747603e+05 
##      GLUCOSE 
## 8.981963e+04 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center

###連続変数の線形性の確認

Age:直線

formula_age <- Surv(TIMESTRK, STROKE) ~  rcs(AGE, 4) 

fit_age <- fit.mult.impute(formula_age, cph, xtrans = impa, data = df_p1_b)
## 
## Variance Inflation Factors Due to Imputation:
## 
##   AGE  AGE' AGE'' 
##     1     1     1 
## 
## Rate of Missing Information:
## 
##   AGE  AGE' AGE'' 
##     0     0     0 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##          AGE         AGE'        AGE'' 
## 8.638094e+30 4.328405e+31 1.253170e+32 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
distribution <- datadist(df_p1_b)
options(datadist = "distribution")
plot(rms::Predict(fit_age, AGE))  

BMI:直線

formula_bmi <- Surv(TIMESTRK, STROKE) ~ rcs(BMI, 3) 
  

fit_bmi <- fit.mult.impute(formula_bmi, cph, xtrans = impa, data = df_p1_b)
## 
## Variance Inflation Factors Due to Imputation:
## 
##  BMI BMI' 
##    1    1 
## 
## Rate of Missing Information:
## 
##  BMI BMI' 
##    0    0 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##      BMI     BMI' 
##  8611177 13077923 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
distribution <- datadist(df_p1_b)
options(datadist = "distribution")
plot(rms::Predict(fit_bmi, BMI))  

CIGPDAY:変曲点がある。knot:3で対応

formula_cigp <- Surv(TIMESTRK, STROKE) ~ rcs(CIGPDAY, 3) 
  

fit_cigp <- fit.mult.impute(formula_cigp, cph, xtrans = impa, data = df_p1_b)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## 
## Variance Inflation Factors Due to Imputation:
## 
##   CIGPDAY  CIGPDAY' CIGPDAY'' 
##      1.01      1.01      1.01 
## 
## Rate of Missing Information:
## 
##   CIGPDAY  CIGPDAY' CIGPDAY'' 
##      0.01      0.01      0.01 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##   CIGPDAY  CIGPDAY' CIGPDAY'' 
##  690010.6  535926.7  533706.5 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
distribution <- datadist(df_p1_b)
options(datadist = "distribution")
plot(rms::Predict(fit_cigp, CIGPDAY))  

HEARTRTE:直線

formula_rate <- Surv(TIMESTRK, STROKE) ~ rcs(HEARTRTE, 3) 
  

fit_rate <- fit.mult.impute(formula_rate, cph, xtrans = impa, data = df_p1_b)
## 
## Variance Inflation Factors Due to Imputation:
## 
##  HEARTRTE HEARTRTE' 
##         1         1 
## 
## Rate of Missing Information:
## 
##  HEARTRTE HEARTRTE' 
##         0         0 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##  HEARTRTE HEARTRTE' 
##  28138208  26981499 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
distribution <- datadist(df_p1_b)
options(datadist = "distribution")
plot(rms::Predict(fit_rate,HEARTRTE ))  

SYSBP:直線

formula_sys <- Surv(TIMESTRK, STROKE) ~ rcs(SYSBP, 3) 
  

fit_sys <- fit.mult.impute(formula_sys, cph, xtrans = impa, data = df_p1_b)
## 
## Variance Inflation Factors Due to Imputation:
## 
##  SYSBP SYSBP' 
##      1      1 
## 
## Rate of Missing Information:
## 
##  SYSBP SYSBP' 
##      0      0 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##        SYSBP       SYSBP' 
## 7.440507e+27 1.822433e+30 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
distribution <- datadist(df_p1_b)
options(datadist = "distribution")
plot(rms::Predict(fit_sys,SYSBP ))  

DIABP:直線

formula_dia <- Surv(TIMESTRK, STROKE) ~ rcs(DIABP, 3) 
  

fit_dia <- fit.mult.impute(formula_dia, cph, xtrans = impa, data = df_p1_b)
## 
## Variance Inflation Factors Due to Imputation:
## 
##  DIABP DIABP' 
##      1      1 
## 
## Rate of Missing Information:
## 
##  DIABP DIABP' 
##      0      0 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##        DIABP       DIABP' 
## 4.062593e+29 5.428838e+30 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
distribution <- datadist(df_p1_b)
options(datadist = "distribution")
plot(rms::Predict(fit_dia,DIABP ))  

TOTCHOL:直線

formula_chol <- Surv(TIMESTRK, STROKE) ~ rcs(TOTCHOL, 3) 
  

fit_chol <- fit.mult.impute(formula_chol, cph, xtrans = impa, data = df_p1_b)
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.

## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 2 ; coefficient may be infinite.
## 
## Variance Inflation Factors Due to Imputation:
## 
##  TOTCHOL TOTCHOL' 
##     1.01     1.01 
## 
## Rate of Missing Information:
## 
##  TOTCHOL TOTCHOL' 
##     0.01     0.01 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##  TOTCHOL TOTCHOL' 
##  3197091  1991053 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
distribution <- datadist(df_p1_b)
options(datadist = "distribution")
plot(rms::Predict(fit_chol,TOTCHOL ))  

GLUCOSE:直線

formula_glu <- Surv(TIMESTRK, STROKE) ~ rcs(GLUCOSE, 3) 
  

fit_glu <- fit.mult.impute(formula_glu, cph, xtrans = impa, data = df_p1_b)
## 
## Variance Inflation Factors Due to Imputation:
## 
##  GLUCOSE GLUCOSE' 
##     1.09     1.09 
## 
## Rate of Missing Information:
## 
##  GLUCOSE GLUCOSE' 
##     0.08     0.08 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##  GLUCOSE GLUCOSE' 
## 14729.00 15834.58 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
distribution <- datadist(df_p1_b)
options(datadist = "distribution")
plot(rms::Predict(fit_glu,GLUCOSE ))  

事前のformulaをformula_preとして作成する。脳卒中既往は重要な因子であるが、ほぼ0であり、抜いておかないとコードがまわらない。

formula_pre <- Surv(TIMESTRK, STROKE) ~ SEX + AGE + BMI + educ + CURSMOKE + rcs(CIGPDAY, 3)  + PREVCHD + PREVAP +PREVMI + PREVHYP + DIABETES + BPMEDS + rcs(HEARTRTE, 3) + SYSBP + DIABP + TOTCHOL + GLUCOSE

変数選択

Lasso (1 standard error rule)

λはimpの1つクロスバリデーションによって算出する

set.seed(10)
library(glmnet)
##  要求されたパッケージ Matrix をロード中です
## 
##  次のパッケージを付け加えます: 'Matrix'
##  以下のオブジェクトは 'package:tidyr' からマスクされています:
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-7
# Prepare the data for glmnet (matrix form and Surv object)
x = model.matrix(formula_pre, data = complete(impa,1)) # Select the first imputed dataset
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
y = Surv(complete(impa,1)$TIMESTRK, complete(impa,1)$STROKE)

# Use cross-validation to find the optimal lambda
cvfit = cv.glmnet(x, y, family="cox", alpha=1)

# Get the lambda that gives minimum mean cross-validated error
lambda_min = cvfit$lambda.min

# Get the largest value of lambda such that error is within 1 standard error of the minimum
lambda_1se = cvfit$lambda.1se

# For illustration purposes, plot the cross-validation curve
plot(cvfit)

# Add vertical lines for both lambda.min and lambda.1se
abline(v = log(c(lambda_min, lambda_1se)), lty = c(1,2), col = c("red", "blue"))

# Add a legend
legend("topright", legend = c("Lambda.min", "Lambda.1se"), lty = c(1,2), col = c("red", "blue"))

# List to store model coefficients
coefs <- list()

# Now apply LASSO with lambda_1se to each of the imputed datasets and store the coefficients
for(i in 1:100) {
  x = model.matrix(formula_pre, data = complete(impa,i))
  y = Surv(complete(impa,i)$TIMESTRK, complete(impa,i)$STROKE)
  fit = glmnet(x, y, family="cox", alpha=1, lambda=lambda_1se)
  coefs[[i]] <- coef(fit)
}
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# Now you have the coefficients for each of the imputed datasets
# Take the mean of these coefficients to get your final model parameters
final_coefs <- Reduce("+", coefs) / length(coefs)

# Print the final coefficients
print(final_coefs)
## 23 x 1 sparse Matrix of class "dgCMatrix"
##                                     s0
## (Intercept)                .          
## SEX2                      -0.134503663
## AGE                        0.068743231
## BMI                        .          
## educ2                      .          
## educ3                      .          
## educ4                      .          
## CURSMOKE1                  0.149458381
## rcs(CIGPDAY, 3)CIGPDAY     0.001308268
## rcs(CIGPDAY, 3)CIGPDAY'    .          
## rcs(CIGPDAY, 3)CIGPDAY''   .          
## PREVCHD1                   0.190219114
## PREVAP1                    .          
## PREVMI1                    0.062843518
## PREVHYP1                   0.380968025
## DIABETES1                  0.670302070
## BPMEDS1                    0.359382207
## rcs(HEARTRTE, 3)HEARTRTE   .          
## rcs(HEARTRTE, 3)HEARTRTE'  .          
## SYSBP                      0.007805229
## DIABP                      0.011771782
## TOTCHOL                    .          
## GLUCOSE                    .

Lassoによる変数

formula_final <- Surv(TIMESTRK, STROKE) ~ SEX + AGE + CURSMOKE+ rcs(CIGPDAY, 3) + PREVCHD+ PREVMI + PREVHYP +DIABETES  + BPMEDS + SYSBP + DIABP 

比例ハザード性の評価

# Coxモデルのフィット
fit_cox_l <- coxph(formula_final, data = df_p1_b)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# 比例ハザードの確認
zph_result_l <- cox.zph(fit_cox_l)

# 結果の表示
print(zph_result_l)
##                    chisq df       p
## SEX              1.62165  1 0.20286
## AGE              1.26422  1 0.26085
## CURSMOKE         0.04838  1 0.82591
## rcs(CIGPDAY, 3)  0.91015  3 0.82298
## PREVCHD          0.76260  1 0.38252
## PREVMI           0.00782  1 0.92952
## PREVHYP          2.12476  1 0.14494
## DIABETES         0.08571  1 0.76971
## BPMEDS           1.95031  1 0.16255
## SYSBP            8.12230  1 0.00437
## DIABP           10.93420  1 0.00094
## GLOBAL          18.80282 13 0.12935
# グラフ化
plot(zph_result_l)

変数決定後のsummaryとcoef値の確認

finalfit<- fit.mult.impute(formula_final,cph,xtrans=impa, data = df_p1_b) 
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## 
## Variance Inflation Factors Due to Imputation:
## 
##      SEX=2        AGE CURSMOKE=1    CIGPDAY   CIGPDAY'  CIGPDAY''  PREVCHD=1 
##       1.00       1.00       1.02       1.02       1.02       1.02       1.00 
##   PREVMI=1  PREVHYP=1 DIABETES=1   BPMEDS=1      SYSBP      DIABP 
##       1.00       1.00       1.00       1.04       1.00       1.00 
## 
## Rate of Missing Information:
## 
##      SEX=2        AGE CURSMOKE=1    CIGPDAY   CIGPDAY'  CIGPDAY''  PREVCHD=1 
##       0.00       0.00       0.02       0.02       0.02       0.02       0.00 
##   PREVMI=1  PREVHYP=1 DIABETES=1   BPMEDS=1      SYSBP      DIABP 
##       0.00       0.00       0.00       0.03       0.00       0.00 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##        SEX=2          AGE   CURSMOKE=1      CIGPDAY     CIGPDAY'    CIGPDAY'' 
## 3.380902e+07 8.437459e+08 2.356003e+05 2.171501e+05 2.488660e+05 2.579729e+05 
##    PREVCHD=1     PREVMI=1    PREVHYP=1   DIABETES=1     BPMEDS=1        SYSBP 
## 9.467271e+07 5.006002e+08 5.954705e+08 1.724622e+09 8.361859e+04 3.505625e+08 
##        DIABP 
## 1.759530e+09 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
summary(finalfit)
##              Effects              Response : Surv(TIMESTRK, STROKE) 
## 
##  Factor         Low High Diff. Effect    S.E.     Lower 0.95 Upper 0.95
##  AGE             42  57  15     1.300200 0.111230  1.082200  1.51830   
##   Hazard Ratio   42  57  15     3.670200       NA  2.951200  4.56430   
##  CIGPDAY          0  20  20     0.680470 0.391680 -0.087214  1.44820   
##   Hazard Ratio    0  20  20     1.974800       NA  0.916480  4.25530   
##  SYSBP          117 144  27     0.184390 0.103080 -0.017634  0.38642   
##   Hazard Ratio  117 144  27     1.202500       NA  0.982520  1.47170   
##  DIABP           75  90  15     0.278270 0.097912  0.086369  0.47018   
##   Hazard Ratio   75  90  15     1.320800       NA  1.090200  1.60030   
##  SEX - 1:2        2   1  NA     0.305860 0.114250  0.081937  0.52978   
##   Hazard Ratio    2   1  NA     1.357800       NA  1.085400  1.69860   
##  CURSMOKE - 1:0   1   2  NA    -0.059581 0.393440 -0.830710  0.71154   
##   Hazard Ratio    1   2  NA     0.942160       NA  0.435740  2.03710   
##  PREVCHD - 1:0    1   2  NA     0.312450 0.249060 -0.175700  0.80061   
##   Hazard Ratio    1   2  NA     1.366800       NA  0.838870  2.22690   
##  PREVMI - 1:0     1   2  NA     0.504190 0.378000 -0.236680  1.24510   
##   Hazard Ratio    1   2  NA     1.655600       NA  0.789240  3.47320   
##  PREVHYP - 1:0    1   2  NA     0.483630 0.142330  0.204670  0.76258   
##   Hazard Ratio    1   2  NA     1.621900       NA  1.227100  2.14380   
##  DIABETES - 1:0   1   2  NA     0.992690 0.206000  0.588940  1.39640   
##   Hazard Ratio    1   2  NA     2.698500       NA  1.802100  4.04080   
##  BPMEDS - 1:0     1   2  NA     0.556720 0.193930  0.176640  0.93681   
##   Hazard Ratio    1   2  NA     1.744900       NA  1.193200  2.55180
# get coefficients from the final fit
coef_lasso <- finalfit$coefficients

# print coefficients
print(coef_lasso)
##        SEX=2          AGE   CURSMOKE=1      CIGPDAY     CIGPDAY'    CIGPDAY'' 
## -0.305859944  0.086682758 -0.059581425  0.057596808 -0.055246468  0.043645984 
##    PREVCHD=1     PREVMI=1    PREVHYP=1   DIABETES=1     BPMEDS=1        SYSBP 
##  0.312452631  0.504192163  0.483626279  0.992687777  0.556724883  0.006829378 
##        DIABP 
##  0.018551488

モデル式:Lasso ハザード比 = exp[-0.306(SEX=2) + 0.087(AGE) - 0.060(CURSMOKE=1) + 0.058(CIGPDAY) - 0.055(CIGPDAY’) + 0.044(CIGPDAY’’) + 0.312(PREVCHD=1) + 0.504(PREVMI=1) + 0.484(PREVHYP=1) + 0.993(DIABETES=1) + 0.557(BPMEDS=1) + 0.007(SYSBP) + 0.019*(DIABP)]

Lasso:c-index

print("apparent c-index") 
## [1] "apparent c-index"
print(finalfit$stats[13]*0.5 + 0.5) 
##       Dxy 
## 0.7827202

Lasso:calibration plot

# 作成データセット数
m <- 100

# 間隔を指定 (5年, 10年, 20年, 24年)
years <- c(5, 10, 20, 24)

# mice::completeデータをスタックする
dat_stack <- complete(impa, 1)

for (i in 2:m){
  dat_stack <- rbind(dat_stack, complete(impa, i))
}

# 作成したデータセット分重みづけする
dat_stack$weight <- 1/m

# miceデータ(をスタックしたデータ)でcph
# クラスターを考慮した分散ではないので、信頼区間は狭く推定される
# ここでは点推定値だけ検討している
cph_stack <- cph(formula = formula_final, 
                 data = dat_stack, 
                 weights = weight, 
                 x = TRUE, 
                 y = TRUE)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# 各時間で予測とプロット
for (year in years) {
  
  # タイムポイントを指定 (年を日に変換)
  u <- 365.25 * year
  
  # 予測
  estim_surv <- survest(cph_stack,
                        newdata = dat_stack,
                        times = u)$surv

  ext_val <- val.surv(cph_stack,
                      newdata = dat_stack, 
                      u = u,
                      est.surv = estim_surv,
                      S = Surv(dat_stack$TIMESTRK, dat_stack$STROKE))
  
  # プロット
  plot(ext_val, 
       main = paste(year, "Years"),
       xlab = paste("Predicted probability of Lasso at", year, "years"), 
       ylab = paste("Observed probability at", year, "years"), 
       las = 1)
}

cox回帰での変数選択ではcalibration in the largeやR2値、Brier scoreは算出できない、AICは出しておく。

# Coxモデルのフィット
fit_cox_lasso <- coxph(formula_final, data = df_p1_b)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# AIC
aic_lasso <- AIC(fit_cox_lasso)
print(aic_lasso)
## [1] 5705.458

Lasso:クロスバリデーション(rms)による内的検証

set.seed(10)
# Coxモデルのフィット
fit_cox_Lasso <- cph(formula_final, x = TRUE, y = TRUE, data = df_p1_b, surv = TRUE)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# クロスバリデーション
cv_lasso <- validate(fit_cox_Lasso, B = 5, method = "crossvalidation", dxy = TRUE)

# 結果の表示
print(cv_lasso)
##       index.orig training   test optimism index.corrected n
## Dxy       0.5625   0.5635 0.5418   0.0218          0.5408 5
## R2        0.1248   0.1278 0.1278   0.0000          0.1249 5
## Slope     1.0000   1.0000 0.9492   0.0508          0.9492 5
## D         0.0700   0.0726 0.0786  -0.0059          0.0759 5
## U        -0.0003  -0.0004 0.0030  -0.0035          0.0031 5
## Q         0.0703   0.0731 0.0755  -0.0025          0.0728 5
## g         1.2771   1.2821 1.2127   0.0693          1.2077 5
# AUCの計算
print("cv_AUC")
## [1] "cv_AUC"
print(cv_lasso["Dxy", "index.corrected"]*0.5 + 0.5)
## [1] 0.7703869

Lasso:ブートストラップ(rms)による内的検証

set.seed(10)
# Coxモデルのフィット
fit_cox_Lasso <- cph(formula_final, x = TRUE, y = TRUE, data = df_p1_b, surv = TRUE)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# ブートストラップによる内的検証
cv_Lasso_boot <- validate(fit_cox_Lasso, B = 1000, method = "boot", dxy = TRUE)
## Warning in fitter(..., strata = strata, rownames = rownames, offset = offset, :
## Loglik converged before variable 7 ; coefficient may be infinite.
## Warning in fitter(..., strata = strata, rownames = rownames, offset = offset, :
## Loglik converged before variable 3 ; coefficient may be infinite.
# 結果の表示
print(cv_Lasso_boot)
##       index.orig training   test optimism index.corrected    n
## Dxy       0.5625   0.5676 0.5566   0.0111          0.5515 1000
## R2        0.1248   0.1284 0.1212   0.0071          0.1177 1000
## Slope     1.0000   1.0000 0.9661   0.0339          0.9661 1000
## D         0.0700   0.0722 0.0678   0.0043          0.0656 1000
## U        -0.0003  -0.0003 0.0003  -0.0007          0.0003 1000
## Q         0.0703   0.0725 0.0675   0.0050          0.0653 1000
## g         1.2771   1.2924 1.2460   0.0464          1.2307 1000
# 補正済みAUCの計算
print("corrected_AUC")
## [1] "corrected_AUC"
print(cv_Lasso_boot["Dxy", "index.corrected"]*0.5 + 0.5)
## [1] 0.7757358

AICを基準にしたステップワイズによる変数選択

1. 多重代入データの1つで行う

# Get the first imputed dataset
imputed_dataset_1 <- complete(impa, 1)

# Fit the initial model using cph function from rms package
fit <- cph(formula_pre, data = imputed_dataset_1, x = TRUE, y = TRUE)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
# Perform stepwise selection using fastbw function
stepwise_model <- fastbw(fit)

# Print the final model
print(stepwise_model)
## 
##  Deleted  Chi-Sq d.f. P      Residual d.f. P      AIC   
##  educ     2.87   3    0.4124  2.87     3   0.4124  -3.13
##  PREVCHD  0.00   1    0.9680  2.87     4   0.5799  -5.13
##  GLUCOSE  0.02   1    0.8774  2.89     5   0.7164  -7.11
##  CURSMOKE 0.06   1    0.8003  2.96     6   0.8142  -9.04
##  BMI      0.17   1    0.6830  3.12     7   0.8733 -10.88
##  HEARTRTE 2.77   2    0.2504  5.89     9   0.7505 -12.11
##  PREVAP   2.01   1    0.1559  7.91    10   0.6379 -12.09
##  TOTCHOL  2.31   1    0.1282 10.22    11   0.5106 -11.78
##  SYSBP    3.45   1    0.0633 13.67    12   0.3223 -10.33
##  SEX      5.73   1    0.0167 19.40    13   0.1113  -6.60
##  BPMEDS   7.18   1    0.0074 26.57    14   0.0219  -1.43
## 
## Approximate Estimates after Deleting Factors
## 
##                Coef     S.E.   Wald Z         P
## AGE         0.09270 0.007036 13.17432 0.000e+00
## CIGPDAY     0.04225 0.036222  1.16638 2.435e-01
## CIGPDAY'    0.01094 0.161776  0.06765 9.461e-01
## CIGPDAY''  -0.05517 0.248862 -0.22169 8.246e-01
## PREVMI=1    0.90633 0.296703  3.05466 2.253e-03
## PREVHYP=1   0.60906 0.133267  4.57021 4.872e-06
## DIABETES=1  0.99549 0.205057  4.85471 1.206e-06
## DIABP       0.02762 0.004814  5.73665 9.657e-09
## 
## Factors in Final Model
## 
## [1] AGE      CIGPDAY  PREVMI   PREVHYP  DIABETES DIABP

### ステップワイズ:100のデータセットの変数をステップワイズですべて確認していく。

set.seed(10)
# Number of imputed datasets
n_imputations <- impa$m  # 'm' stores the number of multiple imputations

# List to store final factors for each imputed dataset
final_factors <- list()

# Apply stepwise selection to each of the imputed datasets
for(i in 1:n_imputations) {
  # Get the i-th imputed dataset
  imputed_dataset <- complete(impa, i)

  # Fit the initial model using cph function from rms package
  fit <- cph(formula_pre, data = imputed_dataset, x = TRUE, y = TRUE)

  # Perform stepwise selection using fastbw function
  stepwise_model <- fastbw(fit)

  # Store the final factors
  final_factors[[i]] <- stepwise_model$names
}
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
## Loglik converged before variable 11 ; coefficient may be infinite.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots

## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# Now you have the final factors for each of the imputed datasets
# You can examine each factors separately
for(i in 1:length(final_factors)) {
  print(paste("Factors for imputed dataset", i))
  print(final_factors[[i]])
}
## [1] "Factors for imputed dataset 1"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 2"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 3"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 4"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 5"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 6"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 7"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 8"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 9"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 10"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 11"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 12"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 13"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 14"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 15"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 16"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 17"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 18"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 19"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 20"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 21"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 22"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 23"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 24"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 25"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 26"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 27"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 28"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 29"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 30"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 31"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 32"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 33"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 34"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 35"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 36"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 37"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 38"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 39"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 40"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 41"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 42"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 43"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 44"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 45"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 46"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 47"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 48"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 49"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 50"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 51"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 52"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 53"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 54"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 55"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 56"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 57"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 58"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 59"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 60"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 61"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 62"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 63"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 64"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 65"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 66"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 67"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 68"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 69"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 70"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 71"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 72"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 73"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 74"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 75"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 76"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 77"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 78"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 79"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 80"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 81"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 82"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 83"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 84"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 85"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 86"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 87"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 88"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 89"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 90"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 91"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 92"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 93"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 94"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 95"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 96"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 97"
## [1] "AGE"      "CIGPDAY"  "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 98"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"   
## [1] "Factors for imputed dataset 99"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "BPMEDS"   "DIABP"   
## [1] "Factors for imputed dataset 100"
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"
# Collect all the factors in the final model
all_factors <- unique(unlist(final_factors))

# Print all the factors
print("All factors in final model across all imputations:")
## [1] "All factors in final model across all imputations:"
print(all_factors)
## [1] "AGE"      "CIGPDAY"  "PREVMI"   "PREVHYP"  "DIABETES" "DIABP"    "BPMEDS"

ステップワイズによる変数 100のデータセットのうち50以上を認めた変数を選択した。

formula_step <- Surv(TIMESTRK, STROKE) ~ AGE +  rcs(CIGPDAY, 3) + PREVMI+ PREVHYP + DIABETES + BPMEDS + DIABP 

比例ハザード性の評価

# Coxモデルのフィット
fit_cox_s <- coxph(formula_step, data = df_p1_b)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# 比例ハザードの確認
zph_result_s <- cox.zph(fit_cox_s)

# 結果の表示
print(zph_result_s)
##                   chisq df       p
## AGE              0.8912  1 0.34516
## rcs(CIGPDAY, 3)  0.7798  3 0.85429
## PREVMI           0.0466  1 0.82909
## PREVHYP          2.4937  1 0.11430
## DIABETES         0.0138  1 0.90640
## BPMEDS           2.0654  1 0.15068
## DIABP           11.5592  1 0.00067
## GLOBAL          14.4684  9 0.10661
# グラフ化
plot(zph_result_s)

変数決定後のsummaryとcoef値の確認

finalfit_step<- fit.mult.impute(formula_step ,cph,xtrans=impa, data = df_p1_b) 
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
## 
## Variance Inflation Factors Due to Imputation:
## 
##        AGE    CIGPDAY   CIGPDAY'  CIGPDAY''   PREVMI=1  PREVHYP=1 DIABETES=1 
##       1.00       1.01       1.01       1.01       1.00       1.00       1.00 
##   BPMEDS=1      DIABP 
##       1.03       1.00 
## 
## Rate of Missing Information:
## 
##        AGE    CIGPDAY   CIGPDAY'  CIGPDAY''   PREVMI=1  PREVHYP=1 DIABETES=1 
##       0.00       0.01       0.01       0.01       0.00       0.00       0.00 
##   BPMEDS=1      DIABP 
##       0.03       0.00 
## 
## d.f. for t-distribution for Tests of Single Coefficients:
## 
##          AGE      CIGPDAY     CIGPDAY'    CIGPDAY''     PREVMI=1    PREVHYP=1 
## 3.400633e+08 6.702230e+05 4.849830e+05 4.790674e+05 1.380157e+10 1.176096e+08 
##   DIABETES=1     BPMEDS=1        DIABP 
## 5.402780e+09 9.058512e+04 3.419442e+08 
## 
## The following fit components were averaged over the 100 model fits:
## 
##   linear.predictors means stats center
summary(finalfit_step)
##              Effects              Response : Surv(TIMESTRK, STROKE) 
## 
##  Factor         Low High Diff. Effect  S.E.     Lower 0.95 Upper 0.95
##  AGE            42  57   15    1.36310 0.106360 1.15460    1.57150   
##   Hazard Ratio  42  57   15    3.90820       NA 3.17280    4.81410   
##  CIGPDAY         0  20   20    0.70410 0.126760 0.45565    0.95255   
##   Hazard Ratio   0  20   20    2.02200       NA 1.57720    2.59230   
##  DIABP          75  90   15    0.39459 0.073299 0.25092    0.53825   
##   Hazard Ratio  75  90   15    1.48380       NA 1.28520    1.71300   
##  PREVMI - 1:0    1   2   NA    0.90121 0.296790 0.31951    1.48290   
##   Hazard Ratio   1   2   NA    2.46260       NA 1.37650    4.40580   
##  PREVHYP - 1:0   1   2   NA    0.55552 0.135120 0.29069    0.82036   
##   Hazard Ratio   1   2   NA    1.74280       NA 1.33730    2.27130   
##  DIABETES - 1:0  1   2   NA    1.01410 0.204700 0.61285    1.41530   
##   Hazard Ratio   1   2   NA    2.75680       NA 1.84570    4.11760   
##  BPMEDS - 1:0    1   2   NA    0.57285 0.188560 0.20327    0.94243   
##   Hazard Ratio   1   2   NA    1.77330       NA 1.22540    2.56620
# get coefficients from the final fit
coef_step <- finalfit_step$coefficients

# print coefficients
print(coef_step)
##          AGE      CIGPDAY     CIGPDAY'    CIGPDAY''     PREVMI=1    PREVHYP=1 
##  0.090871514  0.044002807  0.006480367 -0.047945888  0.901210567  0.555521527 
##   DIABETES=1     BPMEDS=1        DIABP 
##  1.014056505  0.572854349  0.026305844

モデル式:Stepwise ハザード比 = exp[0.091 * AGE + 0.044 * CIGPDAY + 0.006 * (CIGPDAY’) - 0.048 * (CIGPDAY’’) + 0.901 * (PREVMI=1) + 0.556 * (PREVHYP=1) + 1.014 * (DIABETES=1) + 0.573 * (BPMEDS=1) + 0.026 * DIABP]

ステップワイズ:c-index

print("apparent c-index stepwise") 
## [1] "apparent c-index stepwise"
print(finalfit_step$stats[13]*0.5 + 0.5) 
##       Dxy 
## 0.7805723

ステップワイズ:calibration plot

# 作成データセット数
m <- 100

# 間隔を指定 (5年, 10年, 20年, 24年)
years <- c(5, 10, 20, 24)


# mice::completeデータをスタックする
dat_stack <- complete(impa, 1)

for (i in 2:m){
  dat_stack <- rbind(dat_stack, complete(impa, i))
}

# 作成したデータセット分重みづけする
dat_stack$weight <- 1/m

# miceデータ(をスタックしたデータ)でcph
# クラスターを考慮した分散ではないので、信頼区間は狭く推定される
# ここでは点推定値だけ検討している
cph_stack <- cph(formula = formula_step, 
                 data = dat_stack, 
                 weights = weight, 
                 x = TRUE, 
                 y = TRUE)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# 各時間で予測とプロット
for (year in years) {
  
  # タイムポイントを指定 (年を日に変換)
  u <- 365.25 * year
  
  # 予測
  estim_surv <- survest(cph_stack,
                        newdata = dat_stack,
                        times = u)$surv

  ext_val <- val.surv(cph_stack,
                      newdata = dat_stack, 
                      u = u,
                      est.surv = estim_surv,
                      S = Surv(dat_stack$TIMESTRK, dat_stack$STROKE)) 
  
  # プロット
  plot(ext_val, 
       main = paste(year, "Years"),
       xlab = paste("Predicted probability of Stepwise at", year, "years"), 
       ylab = paste("Observed probability at", year, "years"), 
       las = 1)
}

cox回帰での変数選択ではcalibration in the largeやR2値、Brier scoreは算出できない、AICは出しておく。

# Coxモデルのフィット
fit_cox_Step <- coxph(formula_step, data = df_p1_b)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# AIC
aic_step <- AIC(fit_cox_Step)
print(aic_step)
## [1] 5707.76

ステップワイズ:クロスバリデーション(rms)による内的検証

set.seed(10)
# Coxモデルのフィット
fit_cox_step <- cph(formula_step, x = TRUE, y = TRUE, data = df_p1_b, surv = TRUE)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# クロスバリデーション
cv_step <- validate(fit_cox_step, B = 5, method = "crossvalidation", dxy = TRUE)

# 結果の表示
print(cv_step)
##       index.orig training   test optimism index.corrected n
## Dxy       0.5582   0.5592 0.5451   0.0142          0.5441 5
## R2        0.1220   0.1244 0.1290  -0.0046          0.1266 5
## Slope     1.0000   1.0000 0.9721   0.0279          0.9721 5
## D         0.0683   0.0706 0.0794  -0.0087          0.0770 5
## U        -0.0003  -0.0004 0.0027  -0.0031          0.0028 5
## Q         0.0686   0.0711 0.0767  -0.0057          0.0743 5
## g         1.2508   1.2542 1.2132   0.0410          1.2098 5
# AUCの計算
print("cv_AUC")
## [1] "cv_AUC"
print(cv_step["Dxy", "index.corrected"]*0.5 + 0.5)
## [1] 0.7720261

### ステップワイズ:ブートストラップ(rms)による内的検証

set.seed(10)
# Coxモデルのフィット
fit_cox_step <- cph(formula_step, x = TRUE, y = TRUE, data = df_p1_b, surv = TRUE)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied = fractied): could not obtain 3 interior knots with default algorithm.
##  Used alternate algorithm to obtain 4 knots
# ブートストラップによる内的検証
cv_boot_step <- validate(fit_cox_step, B = 1000, method = "boot", dxy = TRUE)
## Warning in fitter(..., strata = strata, rownames = rownames, offset = offset, :
## Loglik converged before variable 2 ; coefficient may be infinite.
## Warning in fitter(..., strata = strata, rownames = rownames, offset = offset, :
## Loglik converged before variable 3 ; coefficient may be infinite.
## Warning in fitter(..., strata = strata, rownames = rownames, offset = offset, :
## Loglik converged before variable 3,4 ; coefficient may be infinite.
# 結果の表示
print(cv_boot_step)
##       index.orig training   test optimism index.corrected    n
## Dxy       0.5582   0.5621 0.5545   0.0075          0.5507 1000
## R2        0.1220   0.1242 0.1195   0.0047          0.1173 1000
## Slope     1.0000   1.0000 0.9763   0.0237          0.9763 1000
## D         0.0683   0.0697 0.0668   0.0029          0.0654 1000
## U        -0.0003  -0.0003 0.0004  -0.0007          0.0004 1000
## Q         0.0686   0.0700 0.0665   0.0036          0.0650 1000
## g         1.2508   1.2610 1.2285   0.0324          1.2184 1000
# 補正済みAUCの計算
print("corrected_AUC")
## [1] "corrected_AUC"
print(cv_boot_step["Dxy", "index.corrected"]*0.5 + 0.5)
## [1] 0.7753396
## Number of imputed datasets
#n_imputations <- impa$m  # 'm' stores the number of multiple imputations
#
## List to store final models for each imputed dataset
#final_models <- list()
#
## List to store final factors for each imputed dataset
#final_factors <- list()
#
## Apply stepwise selection to each of the imputed datasets
#for(i in 1:n_imputations) {
#  # Get the i-th imputed dataset
#  imputed_dataset <- complete(impa, i)
#
#  # Fit the initial model using cph function from rms package
#  fit <- cph(formula_pre, data = imputed_dataset, x = TRUE, y = TRUE)
#
#  # Perform stepwise selection using fastbw function
#  stepwise_model <- fastbw(fit)
#
#  # Store the final model
#  final_models[[i]] <- stepwise_model
#  
#  # Store the final factors
#  final_factors[[i]] <- stepwise_model$names
#}
#
## Now you have the final models for each of the imputed datasets
## You can examine each model separately
#for(i in 1:length(final_models)) {
#  print(paste("Model for imputed dataset", i))
#  print(final_models[[i]])
#}
#
## Collect all the factors in the final model
#all_factors <- unique(unlist(final_factors))
#
## Print all the factors
#print("All factors in final model across all imputations:")
#print(all_factors)
#

内的検証まとめ

# テーブルを作成
results_table <- data.frame(
  Metric = c("Apparent c-index", "AIC", "Corrected c-index CV", "Corrected c-index Boot"),
  Lasso = c(finalfit$stats[13]*0.5 + 0.5, aic_lasso, cv_lasso["Dxy", "index.corrected"]*0.5 + 0.5,
            cv_Lasso_boot["Dxy", "index.corrected"]*0.5 + 0.5),
  Stepwise = c(finalfit_step$stats[13]*0.5 + 0.5, aic_step, cv_step["Dxy", "index.corrected"]*0.5 + 0.5,
               cv_boot_step["Dxy", "index.corrected"]*0.5 + 0.5)
)

# テーブルを表示
print(results_table)
##                   Metric        Lasso     Stepwise
## 1       Apparent c-index    0.7827202    0.7805723
## 2                    AIC 5705.4577210 5707.7598315
## 3   Corrected c-index CV    0.7703869    0.7720261
## 4 Corrected c-index Boot    0.7757358    0.7753396