library(survminer)
## Warning: package 'survminer' was built under R version 4.3.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.1
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.3.1
library(survival)
## 
## Attaching package: 'survival'
## The following object is masked from 'package:survminer':
## 
##     myeloma
library(pec)
## Warning: package 'pec' was built under R version 4.3.1
## Loading required package: prodlim
## Warning: package 'prodlim' was built under R version 4.3.1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'tibble' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.1
## Warning: package 'lubridate' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.3.1
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(nortest)
library(NHANES)
## Warning: package 'NHANES' was built under R version 4.3.1
data(NHANESraw)
NHANES4 <- NHANESraw[c("Diabetes", 
                       "DiabetesAge", 
                       "SmokeNow", 
                       "BMI_WHO",
                       "ID",
                       "Alcohol12PlusYr",
                       "SleepTrouble")]
#NHANES2 <- NHANES1[!(is.na(NHANES1$Diabetes) | NHANES1$Diabetes == ""),]
#NHANES3 <- NHANES2[!(is.na(NHANES2$SmokeNow) | NHANES2$SmokeNow == ""),]
#NHANES4 <- NHANES3[!(is.na(NHANES3$DiabetesAge) | NHANES3$DiabetesAge == ""),]
dat <- select(NHANES4, Diabetes, DiabetesAge, SmokeNow, BMI_WHO, ID, Alcohol12PlusYr, SleepTrouble)
summary(dat)
##  Diabetes      DiabetesAge    SmokeNow             BMI_WHO           ID       
##  No  :17754   Min.   : 1.0    No  : 2779   12.0_18.5   :3641   Min.   :51624  
##  Yes : 1706   1st Qu.:40.0    Yes : 2454   18.5_to_24.9:5354   1st Qu.:56697  
##  NA's:  833   Median :50.0    NA's:15060   25.0_to_29.9:4387   Median :61770  
##               Mean   :49.5                 30.0_plus   :4565   Mean   :61770  
##               3rd Qu.:60.0                 NA's        :2346   3rd Qu.:66843  
##               Max.   :80.0                                     Max.   :71916  
##               NA's   :18856                                                   
##  Alcohol12PlusYr SleepTrouble
##  No  :2820       No  :10077  
##  Yes :7483       Yes : 2981  
##  NA's:9990       NA's: 7235  
##                              
##                              
##                              
## 
summary(NHANES4)
##  Diabetes      DiabetesAge    SmokeNow             BMI_WHO           ID       
##  No  :17754   Min.   : 1.0    No  : 2779   12.0_18.5   :3641   Min.   :51624  
##  Yes : 1706   1st Qu.:40.0    Yes : 2454   18.5_to_24.9:5354   1st Qu.:56697  
##  NA's:  833   Median :50.0    NA's:15060   25.0_to_29.9:4387   Median :61770  
##               Mean   :49.5                 30.0_plus   :4565   Mean   :61770  
##               3rd Qu.:60.0                 NA's        :2346   3rd Qu.:66843  
##               Max.   :80.0                                     Max.   :71916  
##               NA's   :18856                                                   
##  Alcohol12PlusYr SleepTrouble
##  No  :2820       No  :10077  
##  Yes :7483       Yes : 2981  
##  NA's:9990       NA's: 7235  
##                              
##                              
##                              
## 
#Histograms of our continuous variables
hist(NHANES4$DiabetesAge, xlab="DiabetesAge", main="Histogram of DiabetesAge")

#Boxplots of our binary variables
boxplot(DiabetesAge~SmokeNow, data=NHANES4, main="Boxplot of DiabetesAge and Smoking Status", ylab="Age of Diagnosis")

boxplot(DiabetesAge~SleepTrouble, data=NHANES4, main="Boxplot of DiabetesAge and Sleep Issues", ylab="Age of Diagnosis")

boxplot(DiabetesAge~Alcohol12PlusYr, data=NHANES4, main="Boxplot of DiabetesAge and Alcohol Consumption", ylab="Age of Diagnosis")

boxplot(DiabetesAge~BMI_WHO, data=NHANES4, main="Boxplot of DiabetesAge and BMI Catgeory", ylab="Age of Diagnosis")

boxplot(DiabetesAge~Diabetes, data=NHANES4, main="Boxplot of DiabetesAge and Diabetes", ylab="Age of Diagnosis")

# Convert Diabetes to a factor variable
dat$Diabetes <- factor(dat$Diabetes, levels = c("No", "Yes"))
# Data distribution by status only
table1 <- dat %>%
  group_by(Diabetes) %>%
  count()
kbl(table1, booktabs = T, caption = "Data distribution by Diabetes status only") %>%
  column_spec(2, width = "10em") %>%
  kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status only
Diabetes n
No 17754
Yes 1706
NA 833
# Data distribution by status and smoking
table2 <- dat %>%
  group_by(Diabetes, SmokeNow) %>%
  count()
kbl(table2, booktabs = T, caption = "Data distribution by Diabetes status and Smoking status") %>%
  column_spec(2, width = "10em") %>%
  kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status and Smoking status
Diabetes SmokeNow n
No No 2200
No Yes 2180
No NA 13374
Yes No 575
Yes Yes 273
Yes NA 858
NA No 4
NA Yes 1
NA NA 828
# Data distribution by status and sleep issues
table3 <- dat %>%
  group_by(Diabetes, SleepTrouble) %>%
  count()
kbl(table3, booktabs = T, caption = "Data distribution by Diabetes status and Sleep Issue status") %>%
  column_spec(2, width = "10em") %>%
  kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status and Sleep Issue status
Diabetes SleepTrouble n
No No 8981
No Yes 2388
No NA 6385
Yes No 1092
Yes Yes 589
Yes NA 25
NA No 4
NA Yes 4
NA NA 825
# Data distribution by status and alcohol consumption
table4 <- dat %>%
  group_by(Diabetes, Alcohol12PlusYr) %>%
  count()
kbl(table4, booktabs = T, caption = "Data distribution by Diabetes status and Alcohol Consumption status") %>%
  column_spec(2, width = "10em") %>%
  kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status and Alcohol Consumption status
Diabetes Alcohol12PlusYr n
No No 2297
No Yes 6519
No NA 8938
Yes No 522
Yes Yes 960
Yes NA 224
NA No 1
NA Yes 4
NA NA 828
# Data distribution by status and BMI Category
table5 <- dat %>%
  group_by(Diabetes, BMI_WHO) %>%
  count()
kbl(table5, booktabs = T, caption = "Data distribution by Diabetes status and BMI Category") %>%
  column_spec(2, width = "10em") %>%
  kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status and BMI Category
Diabetes BMI_WHO n
No 12.0_18.5 3625
No 18.5_to_24.9 5134
No 25.0_to_29.9 3936
No 30.0_plus 3639
No NA 1420
Yes 12.0_18.5 15
Yes 18.5_to_24.9 215
Yes 25.0_to_29.9 450
Yes 30.0_plus 925
Yes NA 101
NA 12.0_18.5 1
NA 18.5_to_24.9 5
NA 25.0_to_29.9 1
NA 30.0_plus 1
NA NA 825
#The descriptive statistics for group "Diabetes = Yes"
dat%>% filter(Diabetes=="Yes")%>%
  select(DiabetesAge,
         SmokeNow,
         SleepTrouble,
         Alcohol12PlusYr,
         BMI_WHO)%>%
  summary()
##   DiabetesAge   SmokeNow   SleepTrouble Alcohol12PlusYr         BMI_WHO   
##  Min.   : 1.0   No  :575   No  :1092    No  :522        12.0_18.5   : 15  
##  1st Qu.:40.0   Yes :273   Yes : 589    Yes :960        18.5_to_24.9:215  
##  Median :50.0   NA's:858   NA's:  25    NA's:224        25.0_to_29.9:450  
##  Mean   :49.5                                           30.0_plus   :925  
##  3rd Qu.:60.0                                           NA's        :101  
##  Max.   :80.0                                                             
##  NA's   :269
#Kaplan-Meier
#Fit and Plot of fitted model
km_fit <- survfit(Surv(DiabetesAge) ~ SmokeNow, data = NHANES4)
km_fit
## Call: survfit(formula = Surv(DiabetesAge) ~ SmokeNow, data = NHANES4)
## 
##    19585 observations deleted due to missingness 
##                n events median 0.95LCL 0.95UCL
## SmokeNow=No  479    479     55      53      56
## SmokeNow=Yes 229    229     49      46      50
km_fit1 <- survfit(Surv(DiabetesAge) ~ SleepTrouble, data = NHANES4)
km_fit1
## Call: survfit(formula = Surv(DiabetesAge) ~ SleepTrouble, data = NHANES4)
## 
##    18868 observations deleted due to missingness 
##                    n events median 0.95LCL 0.95UCL
## SleepTrouble=No  922    922     52      50      54
## SleepTrouble=Yes 503    503     50      48      50
km_fit2 <- survfit(Surv(DiabetesAge) ~ Alcohol12PlusYr, data = NHANES4)
km_fit2
## Call: survfit(formula = Surv(DiabetesAge) ~ Alcohol12PlusYr, data = NHANES4)
## 
##    19041 observations deleted due to missingness 
##                       n events median 0.95LCL 0.95UCL
## Alcohol12PlusYr=No  448    448     54      51      55
## Alcohol12PlusYr=Yes 804    804     50      49      51
km_fit3 <- survfit(Surv(DiabetesAge) ~ BMI_WHO, data = NHANES4)
km_fit3
## Call: survfit(formula = Surv(DiabetesAge) ~ BMI_WHO, data = NHANES4)
## 
##    18946 observations deleted due to missingness 
##                        n events median 0.95LCL 0.95UCL
## BMI_WHO=12.0_18.5     13     13   38.0      10      NA
## BMI_WHO=18.5_to_24.9 180    180   51.5      50      55
## BMI_WHO=25.0_to_29.9 375    375   52.0      50      55
## BMI_WHO=30.0_plus    779    779   50.0      49      51
km_fit4 <- survfit(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble, data = NHANES4)
km_fit4
## Call: survfit(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble, 
##     data = NHANES4)
## 
##    19585 observations deleted due to missingness 
##                                  n events median 0.95LCL 0.95UCL
## SmokeNow=No, SleepTrouble=No   295    295     55      54      58
## SmokeNow=No, SleepTrouble=Yes  184    184     53      50      56
## SmokeNow=Yes, SleepTrouble=No  125    125     50      50      53
## SmokeNow=Yes, SleepTrouble=Yes 104    104     45      41      49
ggsurvplot(
  km_fit, 
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  risk.table = TRUE,
  surv.median.line = "hv",
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Survival Function Estimate - Smoking"
)

palette.colors()
##         black        orange       skyblue   bluishgreen        yellow 
##     "#000000"     "#E69F00"     "#56B4E9"     "#009E73"     "#F0E442" 
##          blue    vermillion reddishpurple          gray 
##     "#0072B2"     "#D55E00"     "#CC79A7"     "#999999"
ggsurvplot(
  km_fit1, 
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  risk.table = TRUE,
  surv.median.line = "hv",
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Survival Function Estimate - Sleep Issues"
)

ggsurvplot(
  km_fit2, 
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  risk.table = TRUE,
  surv.median.line = "hv",
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Survival Function Estimate - Alcohol Consumption"
)

ggsurvplot(
  km_fit3, 
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  risk.table = TRUE,
  surv.median.line = "hv",
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
  title = "Kaplan-Meier Survival Function Estimate - BMI"
)

ggsurvplot(
  km_fit4, 
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  risk.table = TRUE,
  surv.median.line = "hv",
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
  title = "Kaplan-Meier Survival Function Estimate - Smoking and Sleep Trouble"
)

##KM - Cumulative Risk Function
ggsurvplot(
  km_fit, 
  fun = "event",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
    ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Cumulative Risk Function Estimate - Smoking"
)

ggsurvplot(
  km_fit1, 
  fun = "event",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Cumulative Risk Function Estimate - Sleep Issues"
)

ggsurvplot(
  km_fit2, 
  fun = "event",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Cumulative Risk Function Estimate - Alcohol Consumption"
)

ggsurvplot(
  km_fit3, 
  fun = "event",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
  title = "Kaplan-Meier Cumulative Risk Function Estimate - BMI"
)

ggsurvplot(
  km_fit4, 
  fun = "event",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
  title = "Kaplan-Meier Cumulative Risk Function Estimate - Smoking and Sleep Trouble"
)

## KM-cumulative hazard function
ggsurvplot(
  km_fit, 
  fun="cumhaz",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Cumulative Hazard Function Estimate - Smoking"
)

ggsurvplot(
  km_fit1, 
  fun="cumhaz",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Cumulative Hazard Function Estimate - Sleep Issues"
)

ggsurvplot(
  km_fit2, 
  fun="cumhaz",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Cumulative Hazard Function Estimate - Alcohol Consumption"
)

ggsurvplot(
  km_fit3, 
  fun="cumhaz",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
  title = "Kaplan-Meier Cumulative Hazard Function Estimate - BMI"
)

ggsurvplot(
  km_fit4, 
  fun="cumhaz",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
  title = "Kaplan-Meier Cumulative Hazard Function Estimate - Smoking and Sleep Trouble"
)

## Unstratified Log-rank test
km_diff <- survdiff(Surv(DiabetesAge)~SmokeNow, data=NHANES4)
km_diff
## Call:
## survdiff(formula = Surv(DiabetesAge) ~ SmokeNow, data = NHANES4)
## 
## n=708, 19585 observations deleted due to missingness.
## 
##                N Observed Expected (O-E)^2/E (O-E)^2/V
## SmokeNow=No  479      479      544      7.67      36.6
## SmokeNow=Yes 229      229      164     25.35      36.6
## 
##  Chisq= 36.6  on 1 degrees of freedom, p= 1e-09
km_diff2 <- survdiff(Surv(DiabetesAge)~SleepTrouble, data=NHANES4)
km_diff2
## Call:
## survdiff(formula = Surv(DiabetesAge) ~ SleepTrouble, data = NHANES4)
## 
## n=1425, 18868 observations deleted due to missingness.
## 
##                    N Observed Expected (O-E)^2/E (O-E)^2/V
## SleepTrouble=No  922      922      999      5.92      21.7
## SleepTrouble=Yes 503      503      426     13.88      21.7
## 
##  Chisq= 21.7  on 1 degrees of freedom, p= 3e-06
km_diff3 <- survdiff(Surv(DiabetesAge)~Alcohol12PlusYr, data=NHANES4)
km_diff3
## Call:
## survdiff(formula = Surv(DiabetesAge) ~ Alcohol12PlusYr, data = NHANES4)
## 
## n=1252, 19041 observations deleted due to missingness.
## 
##                       N Observed Expected (O-E)^2/E (O-E)^2/V
## Alcohol12PlusYr=No  448      448      497      4.89      8.94
## Alcohol12PlusYr=Yes 804      804      755      3.22      8.94
## 
##  Chisq= 8.9  on 1 degrees of freedom, p= 0.003
km_diff4 <- survdiff(Surv(DiabetesAge)~BMI_WHO, data=NHANES4)
km_diff4
## Call:
## survdiff(formula = Surv(DiabetesAge) ~ BMI_WHO, data = NHANES4)
## 
## n=1347, 18946 observations deleted due to missingness.
## 
##                        N Observed Expected (O-E)^2/E (O-E)^2/V
## BMI_WHO=12.0_18.5     13       13     17.5      1.16      1.39
## BMI_WHO=18.5_to_24.9 180      180    198.7      1.76      2.29
## BMI_WHO=25.0_to_29.9 375      375    433.5      7.90     12.99
## BMI_WHO=30.0_plus    779      779    697.3      9.58     22.16
## 
##  Chisq= 22.9  on 3 degrees of freedom, p= 4e-05
##2. Cox proportional hazards model
###Cox model with only SmokeNow as covariate
cox.fit<-coxph(Surv(DiabetesAge) ~ SmokeNow, id=ID, data = NHANES4)
summary(cox.fit)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow, data = NHANES4, 
##     id = ID)
## 
##   n= 708, number of events= 708 
##    (19585 observations deleted due to missingness)
## 
##                coef exp(coef) se(coef)     z Pr(>|z|)    
## SmokeNowYes 0.49830   1.64592  0.08254 6.037 1.57e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes     1.646     0.6076       1.4     1.935
## 
## Concordance= 0.552  (se = 0.01 )
## Likelihood ratio test= 34.42  on 1 df,   p=4e-09
## Wald test            = 36.44  on 1 df,   p=2e-09
## Score (logrank) test = 37.16  on 1 df,   p=1e-09
### Cox model with SmokeNow and BMI as covariates
cox.fit2<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble, id=ID, data = NHANES4)
summary(cox.fit2)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble, 
##     data = NHANES4, id = ID)
## 
##   n= 708, number of events= 708 
##    (19585 observations deleted due to missingness)
## 
##                    coef exp(coef) se(coef)     z Pr(>|z|)    
## SmokeNowYes     0.50983   1.66500  0.08271 6.164 7.08e-10 ***
## SleepTroubleYes 0.36709   1.44353  0.07766 4.727 2.28e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes         1.665     0.6006     1.416     1.958
## SleepTroubleYes     1.444     0.6927     1.240     1.681
## 
## Concordance= 0.572  (se = 0.012 )
## Likelihood ratio test= 56.2  on 2 df,   p=6e-13
## Wald test            = 58.55  on 2 df,   p=2e-13
## Score (logrank) test = 59.52  on 2 df,   p=1e-13
### Cox model with SmokeNow, BMI, and Gender as covariates
cox.fit3<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + Alcohol12PlusYr, id=ID, data = NHANES4)
summary(cox.fit3)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + 
##     Alcohol12PlusYr, data = NHANES4, id = ID)
## 
##   n= 636, number of events= 636 
##    (19657 observations deleted due to missingness)
## 
##                        coef exp(coef) se(coef)     z Pr(>|z|)    
## SmokeNowYes        0.490823  1.633660 0.087261 5.625 1.86e-08 ***
## SleepTroubleYes    0.340039  1.405003 0.082055 4.144 3.41e-05 ***
## Alcohol12PlusYrYes 0.007561  1.007590 0.098079 0.077    0.939    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                    exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes            1.634     0.6121    1.3768     1.938
## SleepTroubleYes        1.405     0.7117    1.1963     1.650
## Alcohol12PlusYrYes     1.008     0.9925    0.8314     1.221
## 
## Concordance= 0.567  (se = 0.013 )
## Likelihood ratio test= 45.35  on 3 df,   p=8e-10
## Wald test            = 47.13  on 3 df,   p=3e-10
## Score (logrank) test = 47.83  on 3 df,   p=2e-10
### Cox-model with SmokeNow, BMI, Gender, Alcohol12PlusYr as covariates
cox.fit4<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + Alcohol12PlusYr + BMI_WHO, id=ID, data = NHANES4)
summary(cox.fit4)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + 
##     Alcohol12PlusYr + BMI_WHO, data = NHANES4, id = ID)
## 
##   n= 607, number of events= 607 
##    (19686 observations deleted due to missingness)
## 
##                        coef exp(coef) se(coef)     z Pr(>|z|)    
## SmokeNowYes         0.50626   1.65908  0.09055 5.591 2.26e-08 ***
## SleepTroubleYes     0.32070   1.37810  0.08508 3.770 0.000163 ***
## Alcohol12PlusYrYes  0.02618   1.02652  0.10101 0.259 0.795526    
## BMI_WHO18.5_to_24.9 0.88139   2.41425  0.51689 1.705 0.088161 .  
## BMI_WHO25.0_to_29.9 0.76775   2.15492  0.50975 1.506 0.132033    
## BMI_WHO30.0_plus    1.05489   2.87167  0.50713 2.080 0.037514 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                     exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes             1.659     0.6027    1.3893     1.981
## SleepTroubleYes         1.378     0.7256    1.1664     1.628
## Alcohol12PlusYrYes      1.027     0.9742    0.8422     1.251
## BMI_WHO18.5_to_24.9     2.414     0.4142    0.8766     6.649
## BMI_WHO25.0_to_29.9     2.155     0.4641    0.7935     5.852
## BMI_WHO30.0_plus        2.872     0.3482    1.0628     7.759
## 
## Concordance= 0.581  (se = 0.014 )
## Likelihood ratio test= 57.88  on 6 df,   p=1e-10
## Wald test            = 57.95  on 6 df,   p=1e-10
## Score (logrank) test = 58.99  on 6 df,   p=7e-11
## Cox-model with SmokeNow, BMI, Gender, Alcohol12PlusYr, SleepTrouble as covariates
cox.fit5<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + BMI_WHO, id=ID, data = NHANES4)
summary(cox.fit5)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + 
##     BMI_WHO, data = NHANES4, id = ID)
## 
##   n= 662, number of events= 662 
##    (19631 observations deleted due to missingness)
## 
##                        coef exp(coef) se(coef)     z Pr(>|z|)    
## SmokeNowYes         0.53397   1.70569  0.08640 6.180 6.39e-10 ***
## SleepTroubleYes     0.31698   1.37297  0.08133 3.897 9.72e-05 ***
## BMI_WHO18.5_to_24.9 0.88806   2.43041  0.46067 1.928   0.0539 .  
## BMI_WHO25.0_to_29.9 0.75744   2.13280  0.45476 1.666   0.0958 .  
## BMI_WHO30.0_plus    1.05646   2.87618  0.45223 2.336   0.0195 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                     exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes             1.706     0.5863    1.4400     2.020
## SleepTroubleYes         1.373     0.7283    1.1707     1.610
## BMI_WHO18.5_to_24.9     2.430     0.4115    0.9853     5.995
## BMI_WHO25.0_to_29.9     2.133     0.4689    0.8747     5.201
## BMI_WHO30.0_plus        2.876     0.3477    1.1854     6.978
## 
## Concordance= 0.588  (se = 0.013 )
## Likelihood ratio test= 67.39  on 5 df,   p=4e-13
## Wald test            = 67.73  on 5 df,   p=3e-13
## Score (logrank) test = 68.93  on 5 df,   p=2e-13
## Cox-model with SmokeNow, BMI, Gender, Alcohol12PlusYr, SleepTrouble, Gender as covariates
cox.fit6<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble  + SmokeNow*SleepTrouble, id=ID, data = NHANES4)
summary(cox.fit6)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + 
##     SmokeNow * SleepTrouble, data = NHANES4, id = ID)
## 
##   n= 708, number of events= 708 
##    (19585 observations deleted due to missingness)
## 
##                                coef exp(coef) se(coef)     z Pr(>|z|)    
## SmokeNowYes                 0.40274   1.49591  0.10887 3.699 0.000216 ***
## SleepTroubleYes             0.28316   1.32731  0.09514 2.976 0.002918 ** 
## SmokeNowYes:SleepTroubleYes 0.25351   1.28854  0.16329 1.553 0.120536    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                             exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes                     1.496     0.6685    1.2085     1.852
## SleepTroubleYes                 1.327     0.7534    1.1015     1.599
## SmokeNowYes:SleepTroubleYes     1.289     0.7761    0.9356     1.775
## 
## Concordance= 0.572  (se = 0.012 )
## Likelihood ratio test= 58.6  on 3 df,   p=1e-12
## Wald test            = 64.55  on 3 df,   p=6e-14
## Score (logrank) test = 67.53  on 3 df,   p=1e-14
###Prediction of survival probability
ggsurvplot(survfit(cox.fit2),
           data=NHANES4, 
           palette="#239FDF", 
           ggtheme=theme_minimal(),
           title = "Survival Probability Plot - Cox PH")

##Check proportionality of hazard (PH) assumption
##Goodness of fit approach
test.ph<-cox.zph(cox.fit2)
test.ph
##                chisq df    p
## SmokeNow     0.78767  1 0.37
## SleepTrouble 0.00684  1 0.93
## GLOBAL       0.79796  2 0.67
#Graphical diagnostic using the function ggcoxzph for each covariate to show Schoenfeld residuals
ggcoxzph(test.ph)

##Testing influential observations
#This is about DFBETAS
ggcoxdiagnostics(cox.fit2, type = "dfbeta",
                 linear.predictions = FALSE, ggtheme = theme_bw())
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the survminer package.
##   Please report the issue at <https://github.com/kassambara/survminer/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'

#Deviance of residuals
ggcoxdiagnostics(cox.fit2, type = "deviance",
                 linear.predictions = FALSE, ggtheme = theme_bw())
## `geom_smooth()` using formula = 'y ~ x'

#Linearity - Isn't working, probably won't use
#ggcoxfunctional(Surv(DiabetesAge) ~ DiabetesAge + log(DiabetesAge) + sqrt(DiabetesAge), data = NHANES4)
##3. WEIBULL
weib.fit<-survreg(Surv(DiabetesAge)~1, data=NHANES4)
weib.fit
## Call:
## survreg(formula = Surv(DiabetesAge) ~ 1, data = NHANES4)
## 
## Coefficients:
## (Intercept) 
##    4.001838 
## 
## Scale= 0.2861537 
## 
## Loglik(model)= -6055.2   Loglik(intercept only)= -6055.2
## n=1437 (18856 observations deleted due to missingness)
#Age of Diabetes Diagnosis
surv<-seq(0.99, 0.1, by = -0.1)
t<-predict(weib.fit, type = "quantile", p = 1-surv, newdata=data.frame(1))
table6<-head(data.frame(time=t, surv=surv), n=20)
kbl(table6, booktabs = T, caption = "Age for Diabetes Diagnosis")%>%
  column_spec(2) %>%
  kable_styling(latex_options="HOLD_position")
Age for Diabetes Diagnosis
time surv
14.66537 0.99
29.56910 0.89
36.17316 0.79
41.18816 0.69
45.55338 0.59
49.65904 0.49
53.76495 0.39
58.14282 0.29
63.24349 0.19
#Median Age of Diabetes Diagnosis
predict(weib.fit, type = "quantile", p = 1 - 0.5, newdata = data.frame(1))
##        1 
## 49.25247
#Survival curve
surv.wb<-data.frame(time=t, surv=surv, upper = NA, lower = NA, std.err = NA)
ggsurvplot_df(fit=surv.wb, surv.geom=geom_line, title = "Survival Curve - Diabetes Diagnosis")

ggsurvplot(
  km_fit, 
  fun = "event",
  linetype = "strata",
  pval = TRUE,
  conf.int = TRUE,
  ggtheme = theme_bw(),
  palette = c("#E7B800", "#2E9FDF"),
  title = "Kaplan-Meier Cumulative Risk Function Estimate - Smoking"
)

# Weibull model 2
weib.fit2<- survreg(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble, data = NHANES4)
coef(weib.fit2)
##     (Intercept)     SmokeNowYes SleepTroubleYes 
##      4.08072410     -0.10956400     -0.07921849
summary(weib.fit2)
## 
## Call:
## survreg(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble, 
##     data = NHANES4)
##                   Value Std. Error      z       p
## (Intercept)      4.0807     0.0143 285.41 < 2e-16
## SmokeNowYes     -0.1096     0.0204  -5.36 8.2e-08
## SleepTroubleYes -0.0792     0.0195  -4.07 4.7e-05
## Log(scale)      -1.3693     0.0310 -44.16 < 2e-16
## 
## Scale= 0.254 
## 
## Weibull distribution
## Loglik(model)= -2925.8   Loglik(intercept only)= -2946.8
##  Chisq= 41.97 on 2 degrees of freedom, p= 7.7e-10 
## Number of Newton-Raphson Iterations: 6 
## n=708 (19585 observations deleted due to missingness)
# Goodness of fit test for weibull model

# MLE Estimates
# Transformation: mu.hat = -log(lambda) , sigma.hat=1/alpha
theta.hat <- signif(exp(weib.fit2$coefficients),5)
tau.hat <- signif(1/weib.fit2$scale,5)
KS.test <- suppressWarnings(ks.test(NHANES4$DiabetesAge, "pweibull",shape=tau.hat, scale=theta.hat))
KS.test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  NHANES4$DiabetesAge
## D = 0.99652, p-value < 2.2e-16
## alternative hypothesis: two-sided
library(goftest)
## 
## Attaching package: 'goftest'
## The following objects are masked from 'package:nortest':
## 
##     ad.test, cvm.test
# H0: Data follows specified distribution
DiabetesAge1 <- na.omit(NHANES4$DiabetesAge)
AD.test <- goftest::ad.test(DiabetesAge1, null="pweibull", shape=tau.hat, scale=theta.hat)
AD.test
## 
##  Anderson-Darling test of goodness-of-fit
##  Null hypothesis: Weibull distribution
##  with parameters shape = 3.9326, scale = 59.188 0.89622 0.92384
##  Parameters assumed to be fixed
## 
## data:  DiabetesAge1
## An = Inf, p-value = 4.175e-07
## Model comparisons
# Cox model
AIC.cox = signif(extractAIC(cox.fit2))
BIC.cox = -2*(logLik(cox.fit2)) + 2*log(228)

# Weibull model
AIC.Weibull = signif(extractAIC(weib.fit2),5)
BIC.Weibull = -2*(logLik(weib.fit2)) + 2*log(228)
Model <- c("Cox", "Weibull")
AIC <- c(AIC.cox[2], AIC.Weibull[2])
BIC = round(c(BIC.cox, BIC.Weibull),2)

#Table 7
table7 <- as.data.frame<- cbind(Model,AIC,BIC)
kbl(table7, booktabs = T, caption = "Model comparisons")%>%
  column_spec(3, width = "10em") %>%
  kable_styling(latex_options = "HOLD_position")
Model comparisons
Model AIC BIC
Cox 7832.62 7839.48
Weibull 5859.7 5862.53
library(penalized)
## Warning: package 'penalized' was built under R version 4.3.1
## Welcome to penalized. For extended examples, see vignette("penalized").
attach(NHANES4) 
NHANES5 = NHANES4[!is.na(DiabetesAge) & !is.na(SmokeNow) & !is.na(SleepTrouble),]
attach(NHANES5)
## The following objects are masked from NHANES4:
## 
##     Alcohol12PlusYr, BMI_WHO, Diabetes, DiabetesAge, ID, SleepTrouble,
##     SmokeNow
m1.pen <- penalized(Surv(DiabetesAge),
                    penalized=data.frame(cbind(SmokeNow,SleepTrouble, SmokeNow:SleepTrouble)),
                    standardize=T, 
                    lambda1=10)
## # nonzero coefficients: 3

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 3          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          
m1.pen
## Penalized cox regression object
## 3 regression coefficients of which 2 are non-zero
## 
## Loglikelihood =   -4660.119 
## L1 penalty =  2.9651     at lambda1 =  10
round(coef(m1.pen, standardize=T), 3)
## SleepTrouble           V3 
##        0.043        0.254
set.seed(34)
m2.pen <- profL1(Surv(DiabetesAge),
                      penalized=data.frame(cbind(SleepTrouble, SmokeNow:SleepTrouble)),
                      standardize=T, fold=10, minlambda1=1, maxlambda1=12)
## lambda= 12   12345678910cvl= -4623.498 
## lambda= 11.88889     12345678910cvl= -4623.495 
## lambda= 11.77778     12345678910cvl= -4623.492 
## lambda= 11.66667     12345678910cvl= -4623.489 
## lambda= 11.55556     12345678910cvl= -4623.486 
## lambda= 11.44444     12345678910cvl= -4623.483 
## lambda= 11.33333     12345678910cvl= -4623.48 
## lambda= 11.22222     12345678910cvl= -4623.477 
## lambda= 11.11111     12345678910cvl= -4623.475 
## lambda= 11   12345678910cvl= -4623.472 
## lambda= 10.88889     12345678910cvl= -4623.469 
## lambda= 10.77778     12345678910cvl= -4623.466 
## lambda= 10.66667     12345678910cvl= -4623.464 
## lambda= 10.55556     12345678910cvl= -4623.461 
## lambda= 10.44444     12345678910cvl= -4623.458 
## lambda= 10.33333     12345678910cvl= -4623.456 
## lambda= 10.22222     12345678910cvl= -4623.453 
## lambda= 10.11111     12345678910cvl= -4623.451 
## lambda= 10   12345678910cvl= -4623.448 
## lambda= 9.888889     12345678910cvl= -4623.446 
## lambda= 9.777778     12345678910cvl= -4623.443 
## lambda= 9.666667     12345678910cvl= -4623.441 
## lambda= 9.555556     12345678910cvl= -4623.439 
## lambda= 9.444444     12345678910cvl= -4623.436 
## lambda= 9.333333     12345678910cvl= -4623.434 
## lambda= 9.222222     12345678910cvl= -4623.432 
## lambda= 9.111111     12345678910cvl= -4623.43 
## lambda= 9    12345678910cvl= -4623.427 
## lambda= 8.888889     12345678910cvl= -4623.425 
## lambda= 8.777778     12345678910cvl= -4623.423 
## lambda= 8.666667     12345678910cvl= -4623.421 
## lambda= 8.555556     12345678910cvl= -4623.419 
## lambda= 8.444444     12345678910cvl= -4623.417 
## lambda= 8.333333     12345678910cvl= -4623.415 
## lambda= 8.222222     12345678910cvl= -4623.413 
## lambda= 8.111111     12345678910cvl= -4623.411 
## lambda= 8    12345678910cvl= -4623.409 
## lambda= 7.888889     12345678910cvl= -4623.407 
## lambda= 7.777778     12345678910cvl= -4623.406 
## lambda= 7.666667     12345678910cvl= -4623.404 
## lambda= 7.555556     12345678910cvl= -4623.402 
## lambda= 7.444444     12345678910cvl= -4623.4 
## lambda= 7.333333     12345678910cvl= -4623.399 
## lambda= 7.222222     12345678910cvl= -4623.397 
## lambda= 7.111111     12345678910cvl= -4623.395 
## lambda= 7    12345678910cvl= -4623.394 
## lambda= 6.888889     12345678910cvl= -4623.392 
## lambda= 6.777778     12345678910cvl= -4623.391 
## lambda= 6.666667     12345678910cvl= -4623.389 
## lambda= 6.555556     12345678910cvl= -4623.388 
## lambda= 6.444444     12345678910cvl= -4623.386 
## lambda= 6.333333     12345678910cvl= -4623.385 
## lambda= 6.222222     12345678910cvl= -4623.384 
## lambda= 6.111111     12345678910cvl= -4623.382 
## lambda= 6    12345678910cvl= -4623.381 
## lambda= 5.888889     12345678910cvl= -4623.38 
## lambda= 5.777778     12345678910cvl= -4623.378 
## lambda= 5.666667     12345678910cvl= -4623.377 
## lambda= 5.555556     12345678910cvl= -4623.376 
## lambda= 5.444444     12345678910cvl= -4623.375 
## lambda= 5.333333     12345678910cvl= -4623.374 
## lambda= 5.222222     12345678910cvl= -4623.373 
## lambda= 5.111111     12345678910cvl= -4623.372 
## lambda= 5    12345678910cvl= -4623.371 
## lambda= 4.888889     12345678910cvl= -4623.37 
## lambda= 4.777778     12345678910cvl= -4623.369 
## lambda= 4.666667     12345678910cvl= -4623.368 
## lambda= 4.555556     12345678910cvl= -4623.367 
## lambda= 4.444444     12345678910cvl= -4623.366 
## lambda= 4.333333     12345678910cvl= -4623.365 
## lambda= 4.222222     12345678910cvl= -4623.365 
## lambda= 4.111111     12345678910cvl= -4623.364 
## lambda= 4    12345678910cvl= -4623.363 
## lambda= 3.888889     12345678910cvl= -4623.362 
## lambda= 3.777778     12345678910cvl= -4623.362 
## lambda= 3.666667     12345678910cvl= -4623.361 
## lambda= 3.555556     12345678910cvl= -4623.361 
## lambda= 3.444444     12345678910cvl= -4623.36 
## lambda= 3.333333     12345678910cvl= -4623.36 
## lambda= 3.222222     12345678910cvl= -4623.359 
## lambda= 3.111111     12345678910cvl= -4623.359 
## lambda= 3    12345678910cvl= -4623.358 
## lambda= 2.888889     12345678910cvl= -4623.358 
## lambda= 2.777778     12345678910cvl= -4623.357 
## lambda= 2.666667     12345678910cvl= -4623.357 
## lambda= 2.555556     12345678910cvl= -4623.357 
## lambda= 2.444444     12345678910cvl= -4623.357 
## lambda= 2.333333     12345678910cvl= -4623.356 
## lambda= 2.222222     12345678910cvl= -4623.356 
## lambda= 2.111111     12345678910cvl= -4623.356 
## lambda= 2    12345678910cvl= -4623.356 
## lambda= 1.888889     12345678910cvl= -4623.356 
## lambda= 1.777778     12345678910cvl= -4623.356 
## lambda= 1.666667     12345678910cvl= -4623.356 
## lambda= 1.555556     12345678910cvl= -4623.356 
## lambda= 1.444444     12345678910cvl= -4623.356 
## lambda= 1.333333     12345678910cvl= -4623.356 
## lambda= 1.222222     12345678910cvl= -4623.356 
## lambda= 1.111111     12345678910cvl= -4623.356 
## lambda= 1    12345678910cvl= -4623.356
plot(m2.pen$cvl ~ m2.pen$lambda, type="l", log="x",
     xlab="lambda", ylab="Cross-validated log partial likelihood")
set.seed(34)
m3.pen <- optL1(Surv(DiabetesAge),
                      penalized=data.frame(cbind(SleepTrouble, SmokeNow:SleepTrouble)), standardize=T,
                      fold=10)
## lambda= 60.26345     12345678910cvl= -4626.695 
## lambda= 97.50831     12345678910cvl= -4632.821 
## lambda= 37.24486     12345678910cvl= -4624.719 
## lambda= 15.8574  12345678910cvl= -4623.624 
## lambda= 24.02668     12345678910cvl= -4623.945 
## lambda= 9.800409     12345678910cvl= -4623.444 
## lambda= 6.056986     12345678910cvl= -4623.382 
## lambda= 1.679194     12345678910cvl= -4623.356 
## lambda= 1.64624  12345678910cvl= -4623.356 
## lambda= 1.639796     12345678910cvl= -4623.356 
## lambda= 1.639765     12345678910cvl= -4623.356 
## lambda= 1.013431     12345678910cvl= -4623.356 
## lambda= 1.400527     12345678910cvl= -4623.356 
## lambda= 1.548384     12345678910cvl= -4623.356 
## lambda= 1.604861     12345678910cvl= -4623.356 
## lambda= 1.626433     12345678910cvl= -4623.356 
## lambda= 1.634673     12345678910cvl= -4623.356 
## lambda= 1.637254     12345678910cvl= -4623.356 
## lambda= 1.63853  12345678910cvl= -4623.356 
## lambda= 1.639164     12345678910cvl= -4623.356 
## lambda= 1.63948  12345678910cvl= -4623.356
m3.pen$lambda
## [1] 1.639765
abline(v=m3.pen$lambda, col="gray")

m4.pen <- penalized(Surv(DiabetesAge),
                        penalized=data.frame(cbind(SleepTrouble, SmokeNow:SleepTrouble)), standardize=T,
                        steps=10, lambda1=1)
## # nonzero coefficients: 2

# nonzero coefficients: 0          
## # nonzero coefficients: 2

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          
## # nonzero coefficients: 2

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          
## # nonzero coefficients: 2

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          
## # nonzero coefficients: 2

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          
## # nonzero coefficients: 2

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          
## # nonzero coefficients: 2

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          

# nonzero coefficients: 1          
## # nonzero coefficients: 2

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          
## # nonzero coefficients: 2

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          
## # nonzero coefficients: 2

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          

# nonzero coefficients: 2          
plotpath(m4.pen, labelsize=0.9, standardize=T, log="x",
         lwd=2)
abline(v=m3.pen$lambda, col="gray", lwd=2)