library(survminer)
## Warning: package 'survminer' was built under R version 4.3.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.1
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.3.1
library(survival)
##
## Attaching package: 'survival'
## The following object is masked from 'package:survminer':
##
## myeloma
library(pec)
## Warning: package 'pec' was built under R version 4.3.1
## Loading required package: prodlim
## Warning: package 'prodlim' was built under R version 4.3.1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'tibble' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.1
## Warning: package 'lubridate' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.3.1
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(nortest)
library(NHANES)
## Warning: package 'NHANES' was built under R version 4.3.1
data(NHANESraw)
NHANES4 <- NHANESraw[c("Diabetes",
"DiabetesAge",
"SmokeNow",
"BMI_WHO",
"ID",
"Alcohol12PlusYr",
"SleepTrouble")]
#NHANES2 <- NHANES1[!(is.na(NHANES1$Diabetes) | NHANES1$Diabetes == ""),]
#NHANES3 <- NHANES2[!(is.na(NHANES2$SmokeNow) | NHANES2$SmokeNow == ""),]
#NHANES4 <- NHANES3[!(is.na(NHANES3$DiabetesAge) | NHANES3$DiabetesAge == ""),]
dat <- select(NHANES4, Diabetes, DiabetesAge, SmokeNow, BMI_WHO, ID, Alcohol12PlusYr, SleepTrouble)
summary(dat)
## Diabetes DiabetesAge SmokeNow BMI_WHO ID
## No :17754 Min. : 1.0 No : 2779 12.0_18.5 :3641 Min. :51624
## Yes : 1706 1st Qu.:40.0 Yes : 2454 18.5_to_24.9:5354 1st Qu.:56697
## NA's: 833 Median :50.0 NA's:15060 25.0_to_29.9:4387 Median :61770
## Mean :49.5 30.0_plus :4565 Mean :61770
## 3rd Qu.:60.0 NA's :2346 3rd Qu.:66843
## Max. :80.0 Max. :71916
## NA's :18856
## Alcohol12PlusYr SleepTrouble
## No :2820 No :10077
## Yes :7483 Yes : 2981
## NA's:9990 NA's: 7235
##
##
##
##
summary(NHANES4)
## Diabetes DiabetesAge SmokeNow BMI_WHO ID
## No :17754 Min. : 1.0 No : 2779 12.0_18.5 :3641 Min. :51624
## Yes : 1706 1st Qu.:40.0 Yes : 2454 18.5_to_24.9:5354 1st Qu.:56697
## NA's: 833 Median :50.0 NA's:15060 25.0_to_29.9:4387 Median :61770
## Mean :49.5 30.0_plus :4565 Mean :61770
## 3rd Qu.:60.0 NA's :2346 3rd Qu.:66843
## Max. :80.0 Max. :71916
## NA's :18856
## Alcohol12PlusYr SleepTrouble
## No :2820 No :10077
## Yes :7483 Yes : 2981
## NA's:9990 NA's: 7235
##
##
##
##
#Histograms of our continuous variables
hist(NHANES4$DiabetesAge, xlab="DiabetesAge", main="Histogram of DiabetesAge")

#Boxplots of our binary variables
boxplot(DiabetesAge~SmokeNow, data=NHANES4, main="Boxplot of DiabetesAge and Smoking Status", ylab="Age of Diagnosis")

boxplot(DiabetesAge~SleepTrouble, data=NHANES4, main="Boxplot of DiabetesAge and Sleep Issues", ylab="Age of Diagnosis")

boxplot(DiabetesAge~Alcohol12PlusYr, data=NHANES4, main="Boxplot of DiabetesAge and Alcohol Consumption", ylab="Age of Diagnosis")

boxplot(DiabetesAge~BMI_WHO, data=NHANES4, main="Boxplot of DiabetesAge and BMI Catgeory", ylab="Age of Diagnosis")

boxplot(DiabetesAge~Diabetes, data=NHANES4, main="Boxplot of DiabetesAge and Diabetes", ylab="Age of Diagnosis")

# Convert Diabetes to a factor variable
dat$Diabetes <- factor(dat$Diabetes, levels = c("No", "Yes"))
# Data distribution by status only
table1 <- dat %>%
group_by(Diabetes) %>%
count()
kbl(table1, booktabs = T, caption = "Data distribution by Diabetes status only") %>%
column_spec(2, width = "10em") %>%
kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status only
|
Diabetes
|
n
|
|
No
|
17754
|
|
Yes
|
1706
|
|
NA
|
833
|
# Data distribution by status and smoking
table2 <- dat %>%
group_by(Diabetes, SmokeNow) %>%
count()
kbl(table2, booktabs = T, caption = "Data distribution by Diabetes status and Smoking status") %>%
column_spec(2, width = "10em") %>%
kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status and Smoking status
|
Diabetes
|
SmokeNow
|
n
|
|
No
|
No
|
2200
|
|
No
|
Yes
|
2180
|
|
No
|
NA
|
13374
|
|
Yes
|
No
|
575
|
|
Yes
|
Yes
|
273
|
|
Yes
|
NA
|
858
|
|
NA
|
No
|
4
|
|
NA
|
Yes
|
1
|
|
NA
|
NA
|
828
|
# Data distribution by status and sleep issues
table3 <- dat %>%
group_by(Diabetes, SleepTrouble) %>%
count()
kbl(table3, booktabs = T, caption = "Data distribution by Diabetes status and Sleep Issue status") %>%
column_spec(2, width = "10em") %>%
kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status and Sleep Issue status
|
Diabetes
|
SleepTrouble
|
n
|
|
No
|
No
|
8981
|
|
No
|
Yes
|
2388
|
|
No
|
NA
|
6385
|
|
Yes
|
No
|
1092
|
|
Yes
|
Yes
|
589
|
|
Yes
|
NA
|
25
|
|
NA
|
No
|
4
|
|
NA
|
Yes
|
4
|
|
NA
|
NA
|
825
|
# Data distribution by status and alcohol consumption
table4 <- dat %>%
group_by(Diabetes, Alcohol12PlusYr) %>%
count()
kbl(table4, booktabs = T, caption = "Data distribution by Diabetes status and Alcohol Consumption status") %>%
column_spec(2, width = "10em") %>%
kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status and Alcohol Consumption status
|
Diabetes
|
Alcohol12PlusYr
|
n
|
|
No
|
No
|
2297
|
|
No
|
Yes
|
6519
|
|
No
|
NA
|
8938
|
|
Yes
|
No
|
522
|
|
Yes
|
Yes
|
960
|
|
Yes
|
NA
|
224
|
|
NA
|
No
|
1
|
|
NA
|
Yes
|
4
|
|
NA
|
NA
|
828
|
# Data distribution by status and BMI Category
table5 <- dat %>%
group_by(Diabetes, BMI_WHO) %>%
count()
kbl(table5, booktabs = T, caption = "Data distribution by Diabetes status and BMI Category") %>%
column_spec(2, width = "10em") %>%
kable_styling(latex_options = "HOLD_positions")
Data distribution by Diabetes status and BMI Category
|
Diabetes
|
BMI_WHO
|
n
|
|
No
|
12.0_18.5
|
3625
|
|
No
|
18.5_to_24.9
|
5134
|
|
No
|
25.0_to_29.9
|
3936
|
|
No
|
30.0_plus
|
3639
|
|
No
|
NA
|
1420
|
|
Yes
|
12.0_18.5
|
15
|
|
Yes
|
18.5_to_24.9
|
215
|
|
Yes
|
25.0_to_29.9
|
450
|
|
Yes
|
30.0_plus
|
925
|
|
Yes
|
NA
|
101
|
|
NA
|
12.0_18.5
|
1
|
|
NA
|
18.5_to_24.9
|
5
|
|
NA
|
25.0_to_29.9
|
1
|
|
NA
|
30.0_plus
|
1
|
|
NA
|
NA
|
825
|
#The descriptive statistics for group "Diabetes = Yes"
dat%>% filter(Diabetes=="Yes")%>%
select(DiabetesAge,
SmokeNow,
SleepTrouble,
Alcohol12PlusYr,
BMI_WHO)%>%
summary()
## DiabetesAge SmokeNow SleepTrouble Alcohol12PlusYr BMI_WHO
## Min. : 1.0 No :575 No :1092 No :522 12.0_18.5 : 15
## 1st Qu.:40.0 Yes :273 Yes : 589 Yes :960 18.5_to_24.9:215
## Median :50.0 NA's:858 NA's: 25 NA's:224 25.0_to_29.9:450
## Mean :49.5 30.0_plus :925
## 3rd Qu.:60.0 NA's :101
## Max. :80.0
## NA's :269
#Kaplan-Meier
#Fit and Plot of fitted model
km_fit <- survfit(Surv(DiabetesAge) ~ SmokeNow, data = NHANES4)
km_fit
## Call: survfit(formula = Surv(DiabetesAge) ~ SmokeNow, data = NHANES4)
##
## 19585 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## SmokeNow=No 479 479 55 53 56
## SmokeNow=Yes 229 229 49 46 50
km_fit1 <- survfit(Surv(DiabetesAge) ~ SleepTrouble, data = NHANES4)
km_fit1
## Call: survfit(formula = Surv(DiabetesAge) ~ SleepTrouble, data = NHANES4)
##
## 18868 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## SleepTrouble=No 922 922 52 50 54
## SleepTrouble=Yes 503 503 50 48 50
km_fit2 <- survfit(Surv(DiabetesAge) ~ Alcohol12PlusYr, data = NHANES4)
km_fit2
## Call: survfit(formula = Surv(DiabetesAge) ~ Alcohol12PlusYr, data = NHANES4)
##
## 19041 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## Alcohol12PlusYr=No 448 448 54 51 55
## Alcohol12PlusYr=Yes 804 804 50 49 51
km_fit3 <- survfit(Surv(DiabetesAge) ~ BMI_WHO, data = NHANES4)
km_fit3
## Call: survfit(formula = Surv(DiabetesAge) ~ BMI_WHO, data = NHANES4)
##
## 18946 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## BMI_WHO=12.0_18.5 13 13 38.0 10 NA
## BMI_WHO=18.5_to_24.9 180 180 51.5 50 55
## BMI_WHO=25.0_to_29.9 375 375 52.0 50 55
## BMI_WHO=30.0_plus 779 779 50.0 49 51
km_fit4 <- survfit(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble, data = NHANES4)
km_fit4
## Call: survfit(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble,
## data = NHANES4)
##
## 19585 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## SmokeNow=No, SleepTrouble=No 295 295 55 54 58
## SmokeNow=No, SleepTrouble=Yes 184 184 53 50 56
## SmokeNow=Yes, SleepTrouble=No 125 125 50 50 53
## SmokeNow=Yes, SleepTrouble=Yes 104 104 45 41 49
ggsurvplot(
km_fit,
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Survival Function Estimate - Smoking"
)

palette.colors()
## black orange skyblue bluishgreen yellow
## "#000000" "#E69F00" "#56B4E9" "#009E73" "#F0E442"
## blue vermillion reddishpurple gray
## "#0072B2" "#D55E00" "#CC79A7" "#999999"
ggsurvplot(
km_fit1,
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Survival Function Estimate - Sleep Issues"
)

ggsurvplot(
km_fit2,
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Survival Function Estimate - Alcohol Consumption"
)

ggsurvplot(
km_fit3,
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
title = "Kaplan-Meier Survival Function Estimate - BMI"
)

ggsurvplot(
km_fit4,
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
title = "Kaplan-Meier Survival Function Estimate - Smoking and Sleep Trouble"
)

##KM - Cumulative Risk Function
ggsurvplot(
km_fit,
fun = "event",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Cumulative Risk Function Estimate - Smoking"
)

ggsurvplot(
km_fit1,
fun = "event",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Cumulative Risk Function Estimate - Sleep Issues"
)

ggsurvplot(
km_fit2,
fun = "event",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Cumulative Risk Function Estimate - Alcohol Consumption"
)

ggsurvplot(
km_fit3,
fun = "event",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
title = "Kaplan-Meier Cumulative Risk Function Estimate - BMI"
)

ggsurvplot(
km_fit4,
fun = "event",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
title = "Kaplan-Meier Cumulative Risk Function Estimate - Smoking and Sleep Trouble"
)

## KM-cumulative hazard function
ggsurvplot(
km_fit,
fun="cumhaz",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Cumulative Hazard Function Estimate - Smoking"
)

ggsurvplot(
km_fit1,
fun="cumhaz",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Cumulative Hazard Function Estimate - Sleep Issues"
)

ggsurvplot(
km_fit2,
fun="cumhaz",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Cumulative Hazard Function Estimate - Alcohol Consumption"
)

ggsurvplot(
km_fit3,
fun="cumhaz",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
title = "Kaplan-Meier Cumulative Hazard Function Estimate - BMI"
)

ggsurvplot(
km_fit4,
fun="cumhaz",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF","#999999","#CC79A7"),
title = "Kaplan-Meier Cumulative Hazard Function Estimate - Smoking and Sleep Trouble"
)

## Unstratified Log-rank test
km_diff <- survdiff(Surv(DiabetesAge)~SmokeNow, data=NHANES4)
km_diff
## Call:
## survdiff(formula = Surv(DiabetesAge) ~ SmokeNow, data = NHANES4)
##
## n=708, 19585 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## SmokeNow=No 479 479 544 7.67 36.6
## SmokeNow=Yes 229 229 164 25.35 36.6
##
## Chisq= 36.6 on 1 degrees of freedom, p= 1e-09
km_diff2 <- survdiff(Surv(DiabetesAge)~SleepTrouble, data=NHANES4)
km_diff2
## Call:
## survdiff(formula = Surv(DiabetesAge) ~ SleepTrouble, data = NHANES4)
##
## n=1425, 18868 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## SleepTrouble=No 922 922 999 5.92 21.7
## SleepTrouble=Yes 503 503 426 13.88 21.7
##
## Chisq= 21.7 on 1 degrees of freedom, p= 3e-06
km_diff3 <- survdiff(Surv(DiabetesAge)~Alcohol12PlusYr, data=NHANES4)
km_diff3
## Call:
## survdiff(formula = Surv(DiabetesAge) ~ Alcohol12PlusYr, data = NHANES4)
##
## n=1252, 19041 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Alcohol12PlusYr=No 448 448 497 4.89 8.94
## Alcohol12PlusYr=Yes 804 804 755 3.22 8.94
##
## Chisq= 8.9 on 1 degrees of freedom, p= 0.003
km_diff4 <- survdiff(Surv(DiabetesAge)~BMI_WHO, data=NHANES4)
km_diff4
## Call:
## survdiff(formula = Surv(DiabetesAge) ~ BMI_WHO, data = NHANES4)
##
## n=1347, 18946 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## BMI_WHO=12.0_18.5 13 13 17.5 1.16 1.39
## BMI_WHO=18.5_to_24.9 180 180 198.7 1.76 2.29
## BMI_WHO=25.0_to_29.9 375 375 433.5 7.90 12.99
## BMI_WHO=30.0_plus 779 779 697.3 9.58 22.16
##
## Chisq= 22.9 on 3 degrees of freedom, p= 4e-05
##2. Cox proportional hazards model
###Cox model with only SmokeNow as covariate
cox.fit<-coxph(Surv(DiabetesAge) ~ SmokeNow, id=ID, data = NHANES4)
summary(cox.fit)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow, data = NHANES4,
## id = ID)
##
## n= 708, number of events= 708
## (19585 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## SmokeNowYes 0.49830 1.64592 0.08254 6.037 1.57e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes 1.646 0.6076 1.4 1.935
##
## Concordance= 0.552 (se = 0.01 )
## Likelihood ratio test= 34.42 on 1 df, p=4e-09
## Wald test = 36.44 on 1 df, p=2e-09
## Score (logrank) test = 37.16 on 1 df, p=1e-09
### Cox model with SmokeNow and BMI as covariates
cox.fit2<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble, id=ID, data = NHANES4)
summary(cox.fit2)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble,
## data = NHANES4, id = ID)
##
## n= 708, number of events= 708
## (19585 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## SmokeNowYes 0.50983 1.66500 0.08271 6.164 7.08e-10 ***
## SleepTroubleYes 0.36709 1.44353 0.07766 4.727 2.28e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes 1.665 0.6006 1.416 1.958
## SleepTroubleYes 1.444 0.6927 1.240 1.681
##
## Concordance= 0.572 (se = 0.012 )
## Likelihood ratio test= 56.2 on 2 df, p=6e-13
## Wald test = 58.55 on 2 df, p=2e-13
## Score (logrank) test = 59.52 on 2 df, p=1e-13
### Cox model with SmokeNow, BMI, and Gender as covariates
cox.fit3<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + Alcohol12PlusYr, id=ID, data = NHANES4)
summary(cox.fit3)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble +
## Alcohol12PlusYr, data = NHANES4, id = ID)
##
## n= 636, number of events= 636
## (19657 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## SmokeNowYes 0.490823 1.633660 0.087261 5.625 1.86e-08 ***
## SleepTroubleYes 0.340039 1.405003 0.082055 4.144 3.41e-05 ***
## Alcohol12PlusYrYes 0.007561 1.007590 0.098079 0.077 0.939
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes 1.634 0.6121 1.3768 1.938
## SleepTroubleYes 1.405 0.7117 1.1963 1.650
## Alcohol12PlusYrYes 1.008 0.9925 0.8314 1.221
##
## Concordance= 0.567 (se = 0.013 )
## Likelihood ratio test= 45.35 on 3 df, p=8e-10
## Wald test = 47.13 on 3 df, p=3e-10
## Score (logrank) test = 47.83 on 3 df, p=2e-10
### Cox-model with SmokeNow, BMI, Gender, Alcohol12PlusYr as covariates
cox.fit4<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + Alcohol12PlusYr + BMI_WHO, id=ID, data = NHANES4)
summary(cox.fit4)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble +
## Alcohol12PlusYr + BMI_WHO, data = NHANES4, id = ID)
##
## n= 607, number of events= 607
## (19686 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## SmokeNowYes 0.50626 1.65908 0.09055 5.591 2.26e-08 ***
## SleepTroubleYes 0.32070 1.37810 0.08508 3.770 0.000163 ***
## Alcohol12PlusYrYes 0.02618 1.02652 0.10101 0.259 0.795526
## BMI_WHO18.5_to_24.9 0.88139 2.41425 0.51689 1.705 0.088161 .
## BMI_WHO25.0_to_29.9 0.76775 2.15492 0.50975 1.506 0.132033
## BMI_WHO30.0_plus 1.05489 2.87167 0.50713 2.080 0.037514 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes 1.659 0.6027 1.3893 1.981
## SleepTroubleYes 1.378 0.7256 1.1664 1.628
## Alcohol12PlusYrYes 1.027 0.9742 0.8422 1.251
## BMI_WHO18.5_to_24.9 2.414 0.4142 0.8766 6.649
## BMI_WHO25.0_to_29.9 2.155 0.4641 0.7935 5.852
## BMI_WHO30.0_plus 2.872 0.3482 1.0628 7.759
##
## Concordance= 0.581 (se = 0.014 )
## Likelihood ratio test= 57.88 on 6 df, p=1e-10
## Wald test = 57.95 on 6 df, p=1e-10
## Score (logrank) test = 58.99 on 6 df, p=7e-11
## Cox-model with SmokeNow, BMI, Gender, Alcohol12PlusYr, SleepTrouble as covariates
cox.fit5<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + BMI_WHO, id=ID, data = NHANES4)
summary(cox.fit5)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble +
## BMI_WHO, data = NHANES4, id = ID)
##
## n= 662, number of events= 662
## (19631 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## SmokeNowYes 0.53397 1.70569 0.08640 6.180 6.39e-10 ***
## SleepTroubleYes 0.31698 1.37297 0.08133 3.897 9.72e-05 ***
## BMI_WHO18.5_to_24.9 0.88806 2.43041 0.46067 1.928 0.0539 .
## BMI_WHO25.0_to_29.9 0.75744 2.13280 0.45476 1.666 0.0958 .
## BMI_WHO30.0_plus 1.05646 2.87618 0.45223 2.336 0.0195 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes 1.706 0.5863 1.4400 2.020
## SleepTroubleYes 1.373 0.7283 1.1707 1.610
## BMI_WHO18.5_to_24.9 2.430 0.4115 0.9853 5.995
## BMI_WHO25.0_to_29.9 2.133 0.4689 0.8747 5.201
## BMI_WHO30.0_plus 2.876 0.3477 1.1854 6.978
##
## Concordance= 0.588 (se = 0.013 )
## Likelihood ratio test= 67.39 on 5 df, p=4e-13
## Wald test = 67.73 on 5 df, p=3e-13
## Score (logrank) test = 68.93 on 5 df, p=2e-13
## Cox-model with SmokeNow, BMI, Gender, Alcohol12PlusYr, SleepTrouble, Gender as covariates
cox.fit6<-coxph(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble + SmokeNow*SleepTrouble, id=ID, data = NHANES4)
summary(cox.fit6)
## Call:
## coxph(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble +
## SmokeNow * SleepTrouble, data = NHANES4, id = ID)
##
## n= 708, number of events= 708
## (19585 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## SmokeNowYes 0.40274 1.49591 0.10887 3.699 0.000216 ***
## SleepTroubleYes 0.28316 1.32731 0.09514 2.976 0.002918 **
## SmokeNowYes:SleepTroubleYes 0.25351 1.28854 0.16329 1.553 0.120536
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## SmokeNowYes 1.496 0.6685 1.2085 1.852
## SleepTroubleYes 1.327 0.7534 1.1015 1.599
## SmokeNowYes:SleepTroubleYes 1.289 0.7761 0.9356 1.775
##
## Concordance= 0.572 (se = 0.012 )
## Likelihood ratio test= 58.6 on 3 df, p=1e-12
## Wald test = 64.55 on 3 df, p=6e-14
## Score (logrank) test = 67.53 on 3 df, p=1e-14
###Prediction of survival probability
ggsurvplot(survfit(cox.fit2),
data=NHANES4,
palette="#239FDF",
ggtheme=theme_minimal(),
title = "Survival Probability Plot - Cox PH")

##Check proportionality of hazard (PH) assumption
##Goodness of fit approach
test.ph<-cox.zph(cox.fit2)
test.ph
## chisq df p
## SmokeNow 0.78767 1 0.37
## SleepTrouble 0.00684 1 0.93
## GLOBAL 0.79796 2 0.67
#Graphical diagnostic using the function ggcoxzph for each covariate to show Schoenfeld residuals
ggcoxzph(test.ph)

##Testing influential observations
#This is about DFBETAS
ggcoxdiagnostics(cox.fit2, type = "dfbeta",
linear.predictions = FALSE, ggtheme = theme_bw())
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the survminer package.
## Please report the issue at <https://github.com/kassambara/survminer/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'

#Deviance of residuals
ggcoxdiagnostics(cox.fit2, type = "deviance",
linear.predictions = FALSE, ggtheme = theme_bw())
## `geom_smooth()` using formula = 'y ~ x'

#Linearity - Isn't working, probably won't use
#ggcoxfunctional(Surv(DiabetesAge) ~ DiabetesAge + log(DiabetesAge) + sqrt(DiabetesAge), data = NHANES4)
##3. WEIBULL
weib.fit<-survreg(Surv(DiabetesAge)~1, data=NHANES4)
weib.fit
## Call:
## survreg(formula = Surv(DiabetesAge) ~ 1, data = NHANES4)
##
## Coefficients:
## (Intercept)
## 4.001838
##
## Scale= 0.2861537
##
## Loglik(model)= -6055.2 Loglik(intercept only)= -6055.2
## n=1437 (18856 observations deleted due to missingness)
#Age of Diabetes Diagnosis
surv<-seq(0.99, 0.1, by = -0.1)
t<-predict(weib.fit, type = "quantile", p = 1-surv, newdata=data.frame(1))
table6<-head(data.frame(time=t, surv=surv), n=20)
kbl(table6, booktabs = T, caption = "Age for Diabetes Diagnosis")%>%
column_spec(2) %>%
kable_styling(latex_options="HOLD_position")
Age for Diabetes Diagnosis
|
time
|
surv
|
|
14.66537
|
0.99
|
|
29.56910
|
0.89
|
|
36.17316
|
0.79
|
|
41.18816
|
0.69
|
|
45.55338
|
0.59
|
|
49.65904
|
0.49
|
|
53.76495
|
0.39
|
|
58.14282
|
0.29
|
|
63.24349
|
0.19
|
#Median Age of Diabetes Diagnosis
predict(weib.fit, type = "quantile", p = 1 - 0.5, newdata = data.frame(1))
## 1
## 49.25247
#Survival curve
surv.wb<-data.frame(time=t, surv=surv, upper = NA, lower = NA, std.err = NA)
ggsurvplot_df(fit=surv.wb, surv.geom=geom_line, title = "Survival Curve - Diabetes Diagnosis")

ggsurvplot(
km_fit,
fun = "event",
linetype = "strata",
pval = TRUE,
conf.int = TRUE,
ggtheme = theme_bw(),
palette = c("#E7B800", "#2E9FDF"),
title = "Kaplan-Meier Cumulative Risk Function Estimate - Smoking"
)

# Weibull model 2
weib.fit2<- survreg(Surv(DiabetesAge) ~ SmokeNow + SleepTrouble, data = NHANES4)
coef(weib.fit2)
## (Intercept) SmokeNowYes SleepTroubleYes
## 4.08072410 -0.10956400 -0.07921849
summary(weib.fit2)
##
## Call:
## survreg(formula = Surv(DiabetesAge) ~ SmokeNow + SleepTrouble,
## data = NHANES4)
## Value Std. Error z p
## (Intercept) 4.0807 0.0143 285.41 < 2e-16
## SmokeNowYes -0.1096 0.0204 -5.36 8.2e-08
## SleepTroubleYes -0.0792 0.0195 -4.07 4.7e-05
## Log(scale) -1.3693 0.0310 -44.16 < 2e-16
##
## Scale= 0.254
##
## Weibull distribution
## Loglik(model)= -2925.8 Loglik(intercept only)= -2946.8
## Chisq= 41.97 on 2 degrees of freedom, p= 7.7e-10
## Number of Newton-Raphson Iterations: 6
## n=708 (19585 observations deleted due to missingness)
# Goodness of fit test for weibull model
# MLE Estimates
# Transformation: mu.hat = -log(lambda) , sigma.hat=1/alpha
theta.hat <- signif(exp(weib.fit2$coefficients),5)
tau.hat <- signif(1/weib.fit2$scale,5)
KS.test <- suppressWarnings(ks.test(NHANES4$DiabetesAge, "pweibull",shape=tau.hat, scale=theta.hat))
KS.test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: NHANES4$DiabetesAge
## D = 0.99652, p-value < 2.2e-16
## alternative hypothesis: two-sided
library(goftest)
##
## Attaching package: 'goftest'
## The following objects are masked from 'package:nortest':
##
## ad.test, cvm.test
# H0: Data follows specified distribution
DiabetesAge1 <- na.omit(NHANES4$DiabetesAge)
AD.test <- goftest::ad.test(DiabetesAge1, null="pweibull", shape=tau.hat, scale=theta.hat)
AD.test
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: Weibull distribution
## with parameters shape = 3.9326, scale = 59.188 0.89622 0.92384
## Parameters assumed to be fixed
##
## data: DiabetesAge1
## An = Inf, p-value = 4.175e-07
## Model comparisons
# Cox model
AIC.cox = signif(extractAIC(cox.fit2))
BIC.cox = -2*(logLik(cox.fit2)) + 2*log(228)
# Weibull model
AIC.Weibull = signif(extractAIC(weib.fit2),5)
BIC.Weibull = -2*(logLik(weib.fit2)) + 2*log(228)
Model <- c("Cox", "Weibull")
AIC <- c(AIC.cox[2], AIC.Weibull[2])
BIC = round(c(BIC.cox, BIC.Weibull),2)
#Table 7
table7 <- as.data.frame<- cbind(Model,AIC,BIC)
kbl(table7, booktabs = T, caption = "Model comparisons")%>%
column_spec(3, width = "10em") %>%
kable_styling(latex_options = "HOLD_position")
Model comparisons
|
Model
|
AIC
|
BIC
|
|
Cox
|
7832.62
|
7839.48
|
|
Weibull
|
5859.7
|
5862.53
|
library(penalized)
## Warning: package 'penalized' was built under R version 4.3.1
## Welcome to penalized. For extended examples, see vignette("penalized").
attach(NHANES4)
NHANES5 = NHANES4[!is.na(DiabetesAge) & !is.na(SmokeNow) & !is.na(SleepTrouble),]
attach(NHANES5)
## The following objects are masked from NHANES4:
##
## Alcohol12PlusYr, BMI_WHO, Diabetes, DiabetesAge, ID, SleepTrouble,
## SmokeNow
m1.pen <- penalized(Surv(DiabetesAge),
penalized=data.frame(cbind(SmokeNow,SleepTrouble, SmokeNow:SleepTrouble)),
standardize=T,
lambda1=10)
## # nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 3
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
m1.pen
## Penalized cox regression object
## 3 regression coefficients of which 2 are non-zero
##
## Loglikelihood = -4660.119
## L1 penalty = 2.9651 at lambda1 = 10
round(coef(m1.pen, standardize=T), 3)
## SleepTrouble V3
## 0.043 0.254
set.seed(34)
m2.pen <- profL1(Surv(DiabetesAge),
penalized=data.frame(cbind(SleepTrouble, SmokeNow:SleepTrouble)),
standardize=T, fold=10, minlambda1=1, maxlambda1=12)
## lambda= 12 12345678910cvl= -4623.498
## lambda= 11.88889 12345678910cvl= -4623.495
## lambda= 11.77778 12345678910cvl= -4623.492
## lambda= 11.66667 12345678910cvl= -4623.489
## lambda= 11.55556 12345678910cvl= -4623.486
## lambda= 11.44444 12345678910cvl= -4623.483
## lambda= 11.33333 12345678910cvl= -4623.48
## lambda= 11.22222 12345678910cvl= -4623.477
## lambda= 11.11111 12345678910cvl= -4623.475
## lambda= 11 12345678910cvl= -4623.472
## lambda= 10.88889 12345678910cvl= -4623.469
## lambda= 10.77778 12345678910cvl= -4623.466
## lambda= 10.66667 12345678910cvl= -4623.464
## lambda= 10.55556 12345678910cvl= -4623.461
## lambda= 10.44444 12345678910cvl= -4623.458
## lambda= 10.33333 12345678910cvl= -4623.456
## lambda= 10.22222 12345678910cvl= -4623.453
## lambda= 10.11111 12345678910cvl= -4623.451
## lambda= 10 12345678910cvl= -4623.448
## lambda= 9.888889 12345678910cvl= -4623.446
## lambda= 9.777778 12345678910cvl= -4623.443
## lambda= 9.666667 12345678910cvl= -4623.441
## lambda= 9.555556 12345678910cvl= -4623.439
## lambda= 9.444444 12345678910cvl= -4623.436
## lambda= 9.333333 12345678910cvl= -4623.434
## lambda= 9.222222 12345678910cvl= -4623.432
## lambda= 9.111111 12345678910cvl= -4623.43
## lambda= 9 12345678910cvl= -4623.427
## lambda= 8.888889 12345678910cvl= -4623.425
## lambda= 8.777778 12345678910cvl= -4623.423
## lambda= 8.666667 12345678910cvl= -4623.421
## lambda= 8.555556 12345678910cvl= -4623.419
## lambda= 8.444444 12345678910cvl= -4623.417
## lambda= 8.333333 12345678910cvl= -4623.415
## lambda= 8.222222 12345678910cvl= -4623.413
## lambda= 8.111111 12345678910cvl= -4623.411
## lambda= 8 12345678910cvl= -4623.409
## lambda= 7.888889 12345678910cvl= -4623.407
## lambda= 7.777778 12345678910cvl= -4623.406
## lambda= 7.666667 12345678910cvl= -4623.404
## lambda= 7.555556 12345678910cvl= -4623.402
## lambda= 7.444444 12345678910cvl= -4623.4
## lambda= 7.333333 12345678910cvl= -4623.399
## lambda= 7.222222 12345678910cvl= -4623.397
## lambda= 7.111111 12345678910cvl= -4623.395
## lambda= 7 12345678910cvl= -4623.394
## lambda= 6.888889 12345678910cvl= -4623.392
## lambda= 6.777778 12345678910cvl= -4623.391
## lambda= 6.666667 12345678910cvl= -4623.389
## lambda= 6.555556 12345678910cvl= -4623.388
## lambda= 6.444444 12345678910cvl= -4623.386
## lambda= 6.333333 12345678910cvl= -4623.385
## lambda= 6.222222 12345678910cvl= -4623.384
## lambda= 6.111111 12345678910cvl= -4623.382
## lambda= 6 12345678910cvl= -4623.381
## lambda= 5.888889 12345678910cvl= -4623.38
## lambda= 5.777778 12345678910cvl= -4623.378
## lambda= 5.666667 12345678910cvl= -4623.377
## lambda= 5.555556 12345678910cvl= -4623.376
## lambda= 5.444444 12345678910cvl= -4623.375
## lambda= 5.333333 12345678910cvl= -4623.374
## lambda= 5.222222 12345678910cvl= -4623.373
## lambda= 5.111111 12345678910cvl= -4623.372
## lambda= 5 12345678910cvl= -4623.371
## lambda= 4.888889 12345678910cvl= -4623.37
## lambda= 4.777778 12345678910cvl= -4623.369
## lambda= 4.666667 12345678910cvl= -4623.368
## lambda= 4.555556 12345678910cvl= -4623.367
## lambda= 4.444444 12345678910cvl= -4623.366
## lambda= 4.333333 12345678910cvl= -4623.365
## lambda= 4.222222 12345678910cvl= -4623.365
## lambda= 4.111111 12345678910cvl= -4623.364
## lambda= 4 12345678910cvl= -4623.363
## lambda= 3.888889 12345678910cvl= -4623.362
## lambda= 3.777778 12345678910cvl= -4623.362
## lambda= 3.666667 12345678910cvl= -4623.361
## lambda= 3.555556 12345678910cvl= -4623.361
## lambda= 3.444444 12345678910cvl= -4623.36
## lambda= 3.333333 12345678910cvl= -4623.36
## lambda= 3.222222 12345678910cvl= -4623.359
## lambda= 3.111111 12345678910cvl= -4623.359
## lambda= 3 12345678910cvl= -4623.358
## lambda= 2.888889 12345678910cvl= -4623.358
## lambda= 2.777778 12345678910cvl= -4623.357
## lambda= 2.666667 12345678910cvl= -4623.357
## lambda= 2.555556 12345678910cvl= -4623.357
## lambda= 2.444444 12345678910cvl= -4623.357
## lambda= 2.333333 12345678910cvl= -4623.356
## lambda= 2.222222 12345678910cvl= -4623.356
## lambda= 2.111111 12345678910cvl= -4623.356
## lambda= 2 12345678910cvl= -4623.356
## lambda= 1.888889 12345678910cvl= -4623.356
## lambda= 1.777778 12345678910cvl= -4623.356
## lambda= 1.666667 12345678910cvl= -4623.356
## lambda= 1.555556 12345678910cvl= -4623.356
## lambda= 1.444444 12345678910cvl= -4623.356
## lambda= 1.333333 12345678910cvl= -4623.356
## lambda= 1.222222 12345678910cvl= -4623.356
## lambda= 1.111111 12345678910cvl= -4623.356
## lambda= 1 12345678910cvl= -4623.356
plot(m2.pen$cvl ~ m2.pen$lambda, type="l", log="x",
xlab="lambda", ylab="Cross-validated log partial likelihood")
set.seed(34)
m3.pen <- optL1(Surv(DiabetesAge),
penalized=data.frame(cbind(SleepTrouble, SmokeNow:SleepTrouble)), standardize=T,
fold=10)
## lambda= 60.26345 12345678910cvl= -4626.695
## lambda= 97.50831 12345678910cvl= -4632.821
## lambda= 37.24486 12345678910cvl= -4624.719
## lambda= 15.8574 12345678910cvl= -4623.624
## lambda= 24.02668 12345678910cvl= -4623.945
## lambda= 9.800409 12345678910cvl= -4623.444
## lambda= 6.056986 12345678910cvl= -4623.382
## lambda= 1.679194 12345678910cvl= -4623.356
## lambda= 1.64624 12345678910cvl= -4623.356
## lambda= 1.639796 12345678910cvl= -4623.356
## lambda= 1.639765 12345678910cvl= -4623.356
## lambda= 1.013431 12345678910cvl= -4623.356
## lambda= 1.400527 12345678910cvl= -4623.356
## lambda= 1.548384 12345678910cvl= -4623.356
## lambda= 1.604861 12345678910cvl= -4623.356
## lambda= 1.626433 12345678910cvl= -4623.356
## lambda= 1.634673 12345678910cvl= -4623.356
## lambda= 1.637254 12345678910cvl= -4623.356
## lambda= 1.63853 12345678910cvl= -4623.356
## lambda= 1.639164 12345678910cvl= -4623.356
## lambda= 1.63948 12345678910cvl= -4623.356
m3.pen$lambda
## [1] 1.639765
abline(v=m3.pen$lambda, col="gray")

m4.pen <- penalized(Surv(DiabetesAge),
penalized=data.frame(cbind(SleepTrouble, SmokeNow:SleepTrouble)), standardize=T,
steps=10, lambda1=1)
## # nonzero coefficients: 2
# nonzero coefficients: 0
## # nonzero coefficients: 2
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
## # nonzero coefficients: 2
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
## # nonzero coefficients: 2
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
## # nonzero coefficients: 2
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
## # nonzero coefficients: 2
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
## # nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
# nonzero coefficients: 1
## # nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
## # nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
## # nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
# nonzero coefficients: 2
plotpath(m4.pen, labelsize=0.9, standardize=T, log="x",
lwd=2)
abline(v=m3.pen$lambda, col="gray", lwd=2)
