library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
y <- subset(ADDEP_3, ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))
#Descriptives stats between included (FALSE) and excluded (TRUE) in LEMS baseline
table1(~SEX+AGE_BINARY+REVIEWASIAGRADEADM+ASIA_LEVEL_ADM+CRLOWALBUMIN|is.na(LOWER_MS_REHAB), data=y)
| FALSE (n=430) |
TRUE (n=10) |
Overall (n=440) |
|
|---|---|---|---|
| SEX | |||
| (1) MALE | 345 (80.2%) | 5 (50.0%) | 350 (79.5%) |
| (2) FEMALE | 85 (19.8%) | 5 (50.0%) | 90 (20.5%) |
| AGE_BINARY | |||
| 0 | 312 (72.6%) | 5 (50.0%) | 317 (72.0%) |
| 1 | 118 (27.4%) | 5 (50.0%) | 123 (28.0%) |
| REVIEWASIAGRADEADM | |||
| A | 219 (50.9%) | 1 (10.0%) | 220 (50.0%) |
| B | 73 (17.0%) | 1 (10.0%) | 74 (16.8%) |
| C | 75 (17.4%) | 3 (30.0%) | 78 (17.7%) |
| D | 63 (14.7%) | 5 (50.0%) | 68 (15.5%) |
| ASIA_LEVEL_ADM | |||
| C | 286 (66.5%) | 7 (70.0%) | 293 (66.6%) |
| T | 144 (33.5%) | 3 (30.0%) | 147 (33.4%) |
| Min Albumin Concentration | |||
| Mean (SD) | 2.73 (0.755) | 3.01 (0.448) | 2.74 (0.751) |
| Median [Min, Max] | 2.70 [0.700, 13.1] | 3.15 [2.00, 3.40] | 2.70 [0.700, 13.1] |
#Descriptives stats between included (FALSE) and excluded (TRUE) in AIS Grades baseline
table1(~SEX+AGE_BINARY+REVIEWASIAGRADEADM+ASIA_LEVEL_ADM+CRLOWALBUMIN|is.na(ASIAGRADE_WALK), data=y)
| FALSE (n=439) |
TRUE (n=1) |
Overall (n=440) |
|
|---|---|---|---|
| SEX | |||
| (1) MALE | 349 (79.5%) | 1 (100%) | 350 (79.5%) |
| (2) FEMALE | 90 (20.5%) | 0 (0%) | 90 (20.5%) |
| AGE_BINARY | |||
| 0 | 316 (72.0%) | 1 (100%) | 317 (72.0%) |
| 1 | 123 (28.0%) | 0 (0%) | 123 (28.0%) |
| REVIEWASIAGRADEADM | |||
| A | 220 (50.1%) | 0 (0%) | 220 (50.0%) |
| B | 74 (16.9%) | 0 (0%) | 74 (16.8%) |
| C | 78 (17.8%) | 0 (0%) | 78 (17.7%) |
| D | 67 (15.3%) | 1 (100%) | 68 (15.5%) |
| ASIA_LEVEL_ADM | |||
| C | 293 (66.7%) | 0 (0%) | 293 (66.6%) |
| T | 146 (33.3%) | 1 (100%) | 147 (33.4%) |
| Min Albumin Concentration | |||
| Mean (SD) | 2.73 (0.751) | 3.00 (NA) | 2.74 (0.751) |
| Median [Min, Max] | 2.70 [0.700, 13.1] | 3.00 [3.00, 3.00] | 2.70 [0.700, 13.1] |
These following two figures are to see if Albumin is associated with LEMS and AIS grades at baseline:
For baseline association, we included only C and T participants and excluded all NAs in Albumin. In addition, all NAs in the outcomes (LEMS baseline, and AIS grades baseline) are also excluded.
library("expss")
library("party")
ADDEP_3 = apply_labels(ADDEP_3, CRLOWALBUMIN = "Min Albumin Concentration",
LOWER_MS_REHAB = "LEMS at Admission", ASIAGRADE_WALK = "AIS Grades at Admission")
URP_LEMS_baseline <-use_labels(ADDEP_3, ctree(LOWER_MS_REHAB ~ CRLOWALBUMIN, controls=ctree_control(testtype = "Bonferroni",
mincriterion = 0.95), data=subset(..data, !is.na(LOWER_MS_REHAB)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))))
plot(URP_LEMS_baseline)
URP for AIS grades baseline:
library("expss")
URP_AIS_baseline <-use_labels(ADDEP_3, ctree(ASIAGRADE_WALK ~ CRLOWALBUMIN,controls=ctree_control(testtype = "Bonferroni",
mincriterion = 0.95), data=subset(..data, !is.na(ASIAGRADE_WALK)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))))
plot(URP_AIS_baseline, terminal_panel=node_barplot(URP_AIS_baseline))
#Shared dataset between 2 outcomes for 1 year
z<-subset(ADDEP_3,!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))
#Descriptives stats between included (FALSE) and excluded (TRUE) in Change scores
table1(~SEX+AGE_BINARY+REVIEWASIAGRADEADM+ASIA_LEVEL_ADM+CRLOWALBUMIN|is.na(Change_Scores), data=z)
| FALSE (n=117) |
TRUE (n=265) |
Overall (n=382) |
|
|---|---|---|---|
| SEX | |||
| (1) MALE | 97 (82.9%) | 207 (78.1%) | 304 (79.6%) |
| (2) FEMALE | 20 (17.1%) | 58 (21.9%) | 78 (20.4%) |
| AGE_BINARY | |||
| 0 | 93 (79.5%) | 189 (71.3%) | 282 (73.8%) |
| 1 | 24 (20.5%) | 76 (28.7%) | 100 (26.2%) |
| REVIEWASIAGRADEADM | |||
| A | 67 (57.3%) | 152 (57.4%) | 219 (57.3%) |
| B | 22 (18.8%) | 52 (19.6%) | 74 (19.4%) |
| C | 23 (19.7%) | 49 (18.5%) | 72 (18.8%) |
| D | 5 (4.3%) | 12 (4.5%) | 17 (4.5%) |
| ASIA_LEVEL_ADM | |||
| C | 74 (63.2%) | 170 (64.2%) | 244 (63.9%) |
| T | 43 (36.8%) | 95 (35.8%) | 138 (36.1%) |
| Min Albumin Concentration | |||
| Mean (SD) | 2.69 (0.522) | 2.69 (0.857) | 2.69 (0.769) |
| Median [Min, Max] | 2.60 [1.60, 4.10] | 2.60 [0.700, 13.1] | 2.60 [0.700, 13.1] |
#Descriptives stats between included (FALSE) and excluded (TRUE) in Marked Recovery
table1(~SEX+AGE_BINARY+REVIEWASIAGRADEADM+ASIA_LEVEL_ADM+CRLOWALBUMIN|is.na(Marked_Recovery_Annual_2), data=z)
| FALSE (n=167) |
TRUE (n=215) |
Overall (n=382) |
|
|---|---|---|---|
| SEX | |||
| (1) MALE | 141 (84.4%) | 163 (75.8%) | 304 (79.6%) |
| (2) FEMALE | 26 (15.6%) | 52 (24.2%) | 78 (20.4%) |
| AGE_BINARY | |||
| 0 | 122 (73.1%) | 160 (74.4%) | 282 (73.8%) |
| 1 | 45 (26.9%) | 55 (25.6%) | 100 (26.2%) |
| REVIEWASIAGRADEADM | |||
| A | 71 (42.5%) | 148 (68.8%) | 219 (57.3%) |
| B | 22 (13.2%) | 52 (24.2%) | 74 (19.4%) |
| C | 61 (36.5%) | 11 (5.1%) | 72 (18.8%) |
| D | 13 (7.8%) | 4 (1.9%) | 17 (4.5%) |
| ASIA_LEVEL_ADM | |||
| C | 111 (66.5%) | 133 (61.9%) | 244 (63.9%) |
| T | 56 (33.5%) | 82 (38.1%) | 138 (36.1%) |
| Min Albumin Concentration | |||
| Mean (SD) | 2.78 (0.960) | 2.62 (0.572) | 2.69 (0.769) |
| Median [Min, Max] | 2.70 [1.40, 13.1] | 2.60 [0.700, 4.20] | 2.60 [0.700, 13.1] |
For 1 year, the inclusion criteria are: participants who were non-ambulatory at admission to rehabilitation were included, C & T level only, completed assessment of albumin, and neurological outcomes.
plot(use_labels(ADDEP_3,ctree(Change_Scores ~ CRLOWALBUMIN, data=subset(..data, !is.na(Change_Scores)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))
Splitting data into 70% training, and 30% testing. The evaluation metrics for training sample is:
library("caret")
## Loading required package: lattice
ADDEP_CS_subset <- subset(ADDEP_3, !is.na(Change_Scores)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))
set.seed(4567)
sample_CS <- ADDEP_CS_subset$Change_Scores %>%
createDataPartition(p = 0.7, list = FALSE)
train_ADDEP_CS <- ADDEP_CS_subset[sample_CS, ]
test_ADDEP_CS <- ADDEP_CS_subset[-sample_CS, ]
URP_CS_1y <- train(
Change_Scores ~ CRLOWALBUMIN, data = train_ADDEP_CS, method = "ctree", na.action = na.omit,
trControl = trainControl("cv", number = 10), tuneLength=.1
)
URP_CS_1y
## Conditional Inference Tree
##
## 84 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 76, 76, 75, 76, 76, 76, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 8.816839 0.169552 6.484345
##
## Tuning parameter 'mincriterion' was held constant at a value of 0.99
Predicting on the 30% unseeen test data. The evaluation metrics for testing sample is:
predict_CS_1y <- cbind(as.data.frame(predict(URP_CS_1y, newdata=test_ADDEP_CS)), test_ADDEP_CS$Change_Scores)
colnames(predict_CS_1y) <- c("predicted","actuals")
postResample(pred = predict_CS_1y$predicted, obs = predict_CS_1y$actuals)
## RMSE Rsquared MAE
## 9.75554535 0.07645041 6.85788087
Our model is not overfitting since RMSE between test and train samples are not vastly different.
plot(use_labels(ADDEP_3,ctree(Change_Scores ~ CRLOWALBUMIN+LOWER_MS_REHAB, data=subset(..data, !is.na(Change_Scores)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))
plot(use_labels(ADDEP_3,ctree(Marked_Recovery_Annual_2~CRLOWALBUMIN, data=subset(..data, !is.na(Marked_Recovery_Annual_2)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))
Splitting data is similar to Change Score analysis above. The evaluation metrics for training sample is:
ADDEP_MR_subset <- subset(ADDEP_3, !is.na(Marked_Recovery_Annual_2)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))
set.seed(4567)
sample_MR <- ADDEP_MR_subset$Marked_Recovery_Annual_2 %>%
createDataPartition(p = 0.7, list = FALSE)
train_ADDEP_MR <- ADDEP_MR_subset[sample_MR, ]
test_ADDEP_MR <- ADDEP_MR_subset[-sample_MR, ]
URP_MR_1y <- train(
Marked_Recovery_Annual_2 ~ CRLOWALBUMIN, data = train_ADDEP_MR, method = "ctree", na.action = na.omit,
trControl = trainControl("cv", number = 10), tuneLength=.1
)
URP_MR_1y
## Conditional Inference Tree
##
## 118 samples
## 1 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 106, 106, 106, 106, 107, 106, ...
## Resampling results:
##
## Accuracy Kappa
## 0.6833333 -0.05172414
##
## Tuning parameter 'mincriterion' was held constant at a value of 0.99
Predicting on the 30% unseeen test data. The AUC for ROC and PR curves are:
library("precrec")
library("ROCR")
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
predict_MR_1y <- predict(URP_MR_1y, newdata=test_ADDEP_MR, type='prob')
predict_MR_1y <- prediction(predict_MR_1y[,2], test_ADDEP_MR$Marked_Recovery_Annual_2)
predict_MR_1y <- cbind(as.data.frame(predict_MR_1y@predictions), as.data.frame(predict_MR_1y@labels))
colnames(predict_MR_1y) <- c("prob","actuals")
evalmod(scores = predict_MR_1y$prob, labels = predict_MR_1y$actuals)
##
## === AUCs ===
##
## Model name Dataset ID Curve type AUC
## 1 m1 1 ROC 0.5990991
## 2 m1 1 PRC 0.3436168
##
##
## === Input data ===
##
## Model name Dataset ID # of negatives # of positives
## 1 m1 1 37 12
The plot for ROC and PR curves:
autoplot(evalmod(scores = predict_MR_1y$prob, labels = predict_MR_1y$actuals))
Confusion matrix:
confusionMatrix(predict(URP_MR_1y, newdata=test_ADDEP_MR), test_ADDEP_MR$Marked_Recovery_Annual_2)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 37 12
## 1 0 0
##
## Accuracy : 0.7551
## 95% CI : (0.6113, 0.8666)
## No Information Rate : 0.7551
## P-Value [Acc > NIR] : 0.576719
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 0.001496
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7551
## Neg Pred Value : NaN
## Prevalence : 0.7551
## Detection Rate : 0.7551
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : 0
##
Univariate URP for Marked Recovery has 75% accuracy, and it’s mostly accurate in predicting for cases who did not achieve marked recovery.
ADDEP_3$REVIEWASIAGRADEADM<- as.factor(ADDEP_3$REVIEWASIAGRADEADM)
plot(use_labels(ADDEP_3,ctree(Marked_Recovery_Annual_2~CRLOWALBUMIN+REVIEWASIAGRADEADM, data=subset(..data, !is.na(Marked_Recovery_Annual_2)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))
In this ADDEP dataset, we have individuals who are from 12-19 years old. We excluded them to see if there is any difference in relation to outcomes:
ADDEP_3$AGE_INJ_NUM <- as.factor(ADDEP_3$AGE_INJ_NUM)
plot(use_labels(ADDEP_3,ctree(Change_Scores ~ CRLOWALBUMIN+AGE_INJ_NUM, data=subset(..data, !(AGE_INJ_NUM==1)&!is.na(Change_Scores)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))
plot(use_labels(ADDEP_3,ctree(Marked_Recovery_Annual_2 ~ CRLOWALBUMIN+AGE_INJ_NUM, data=subset(..data, !(AGE_INJ_NUM==1)&!is.na(Marked_Recovery_Annual_2)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))
The absence of age indicates that age is not significant variable assocating with outcome.
At admission to rehabiliation, lower serum albumin was associated with more severe spinal cord injury. However, at 1-year post-injury analysis, serum albumin was not significant after adjusting for injury characteristics. This suggest serum albumin is more useful when neurological details at admission (i.e., LEMS or AIS grade at baseline) are not available.