Analysis for ADDEP dataset

Data Cleaning has been done separately. Click here (right click to open in new tab) for the R code.

Descriptive statistics for Baseline Association

Lower Extremity Motor Scores at baseline analyis

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
y <- subset(ADDEP_3, ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))

#Descriptives stats between included (FALSE) and excluded (TRUE) in LEMS baseline
table1(~SEX+AGE_BINARY+REVIEWASIAGRADEADM+ASIA_LEVEL_ADM+CRLOWALBUMIN|is.na(LOWER_MS_REHAB), data=y)
FALSE
(n=430)
TRUE
(n=10)
Overall
(n=440)
SEX
(1) MALE 345 (80.2%) 5 (50.0%) 350 (79.5%)
(2) FEMALE 85 (19.8%) 5 (50.0%) 90 (20.5%)
AGE_BINARY
0 312 (72.6%) 5 (50.0%) 317 (72.0%)
1 118 (27.4%) 5 (50.0%) 123 (28.0%)
REVIEWASIAGRADEADM
A 219 (50.9%) 1 (10.0%) 220 (50.0%)
B 73 (17.0%) 1 (10.0%) 74 (16.8%)
C 75 (17.4%) 3 (30.0%) 78 (17.7%)
D 63 (14.7%) 5 (50.0%) 68 (15.5%)
ASIA_LEVEL_ADM
C 286 (66.5%) 7 (70.0%) 293 (66.6%)
T 144 (33.5%) 3 (30.0%) 147 (33.4%)
Min Albumin Concentration
Mean (SD) 2.73 (0.755) 3.01 (0.448) 2.74 (0.751)
Median [Min, Max] 2.70 [0.700, 13.1] 3.15 [2.00, 3.40] 2.70 [0.700, 13.1]

AIS Grades at baseline analysis

#Descriptives stats between included (FALSE) and excluded (TRUE) in AIS Grades baseline
table1(~SEX+AGE_BINARY+REVIEWASIAGRADEADM+ASIA_LEVEL_ADM+CRLOWALBUMIN|is.na(ASIAGRADE_WALK), data=y)
FALSE
(n=439)
TRUE
(n=1)
Overall
(n=440)
SEX
(1) MALE 349 (79.5%) 1 (100%) 350 (79.5%)
(2) FEMALE 90 (20.5%) 0 (0%) 90 (20.5%)
AGE_BINARY
0 316 (72.0%) 1 (100%) 317 (72.0%)
1 123 (28.0%) 0 (0%) 123 (28.0%)
REVIEWASIAGRADEADM
A 220 (50.1%) 0 (0%) 220 (50.0%)
B 74 (16.9%) 0 (0%) 74 (16.8%)
C 78 (17.8%) 0 (0%) 78 (17.7%)
D 67 (15.3%) 1 (100%) 68 (15.5%)
ASIA_LEVEL_ADM
C 293 (66.7%) 0 (0%) 293 (66.6%)
T 146 (33.3%) 1 (100%) 147 (33.4%)
Min Albumin Concentration
Mean (SD) 2.73 (0.751) 3.00 (NA) 2.74 (0.751)
Median [Min, Max] 2.70 [0.700, 13.1] 3.00 [3.00, 3.00] 2.70 [0.700, 13.1]

Baseline Association

These following two figures are to see if Albumin is associated with LEMS and AIS grades at baseline:

URP for Admission

For baseline association, we included only C and T participants and excluded all NAs in Albumin. In addition, all NAs in the outcomes (LEMS baseline, and AIS grades baseline) are also excluded.

URP for LEMS baseline:

library("expss")
library("party")
ADDEP_3 = apply_labels(ADDEP_3, CRLOWALBUMIN = "Min Albumin Concentration", 
                       LOWER_MS_REHAB = "LEMS at Admission", ASIAGRADE_WALK = "AIS Grades at Admission")

URP_LEMS_baseline <-use_labels(ADDEP_3, ctree(LOWER_MS_REHAB ~ CRLOWALBUMIN, controls=ctree_control(testtype = "Bonferroni",
                                                                                                    mincriterion = 0.95), data=subset(..data, !is.na(LOWER_MS_REHAB)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))))

plot(URP_LEMS_baseline)

URP for AIS grades baseline:

library("expss")

URP_AIS_baseline <-use_labels(ADDEP_3, ctree(ASIAGRADE_WALK ~ CRLOWALBUMIN,controls=ctree_control(testtype = "Bonferroni",
                                                                                                  mincriterion = 0.95), data=subset(..data, !is.na(ASIAGRADE_WALK)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))))

plot(URP_AIS_baseline, terminal_panel=node_barplot(URP_AIS_baseline))

Descriptive Statistics for 1-year Analysis

Change Scores Analysis

#Shared dataset between 2 outcomes for 1 year
z<-subset(ADDEP_3,!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))

#Descriptives stats between included (FALSE) and excluded (TRUE) in Change scores
table1(~SEX+AGE_BINARY+REVIEWASIAGRADEADM+ASIA_LEVEL_ADM+CRLOWALBUMIN|is.na(Change_Scores), data=z)
FALSE
(n=117)
TRUE
(n=265)
Overall
(n=382)
SEX
(1) MALE 97 (82.9%) 207 (78.1%) 304 (79.6%)
(2) FEMALE 20 (17.1%) 58 (21.9%) 78 (20.4%)
AGE_BINARY
0 93 (79.5%) 189 (71.3%) 282 (73.8%)
1 24 (20.5%) 76 (28.7%) 100 (26.2%)
REVIEWASIAGRADEADM
A 67 (57.3%) 152 (57.4%) 219 (57.3%)
B 22 (18.8%) 52 (19.6%) 74 (19.4%)
C 23 (19.7%) 49 (18.5%) 72 (18.8%)
D 5 (4.3%) 12 (4.5%) 17 (4.5%)
ASIA_LEVEL_ADM
C 74 (63.2%) 170 (64.2%) 244 (63.9%)
T 43 (36.8%) 95 (35.8%) 138 (36.1%)
Min Albumin Concentration
Mean (SD) 2.69 (0.522) 2.69 (0.857) 2.69 (0.769)
Median [Min, Max] 2.60 [1.60, 4.10] 2.60 [0.700, 13.1] 2.60 [0.700, 13.1]

Marked Recovery Analysis

#Descriptives stats between included (FALSE) and excluded (TRUE) in Marked Recovery 
table1(~SEX+AGE_BINARY+REVIEWASIAGRADEADM+ASIA_LEVEL_ADM+CRLOWALBUMIN|is.na(Marked_Recovery_Annual_2), data=z)
FALSE
(n=167)
TRUE
(n=215)
Overall
(n=382)
SEX
(1) MALE 141 (84.4%) 163 (75.8%) 304 (79.6%)
(2) FEMALE 26 (15.6%) 52 (24.2%) 78 (20.4%)
AGE_BINARY
0 122 (73.1%) 160 (74.4%) 282 (73.8%)
1 45 (26.9%) 55 (25.6%) 100 (26.2%)
REVIEWASIAGRADEADM
A 71 (42.5%) 148 (68.8%) 219 (57.3%)
B 22 (13.2%) 52 (24.2%) 74 (19.4%)
C 61 (36.5%) 11 (5.1%) 72 (18.8%)
D 13 (7.8%) 4 (1.9%) 17 (4.5%)
ASIA_LEVEL_ADM
C 111 (66.5%) 133 (61.9%) 244 (63.9%)
T 56 (33.5%) 82 (38.1%) 138 (36.1%)
Min Albumin Concentration
Mean (SD) 2.78 (0.960) 2.62 (0.572) 2.69 (0.769)
Median [Min, Max] 2.70 [1.40, 13.1] 2.60 [0.700, 4.20] 2.60 [0.700, 13.1]

URP for 1 year

For 1 year, the inclusion criteria are: participants who were non-ambulatory at admission to rehabilitation were included, C & T level only, completed assessment of albumin, and neurological outcomes.

URP for change scores (univariate)

plot(use_labels(ADDEP_3,ctree(Change_Scores ~ CRLOWALBUMIN, data=subset(..data, !is.na(Change_Scores)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))

Subset data for change scores then split

Splitting data into 70% training, and 30% testing. The evaluation metrics for training sample is:

library("caret")
## Loading required package: lattice
ADDEP_CS_subset <- subset(ADDEP_3, !is.na(Change_Scores)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))

set.seed(4567)
sample_CS <- ADDEP_CS_subset$Change_Scores %>% 
  createDataPartition(p = 0.7, list = FALSE)
train_ADDEP_CS  <- ADDEP_CS_subset[sample_CS, ]
test_ADDEP_CS <- ADDEP_CS_subset[-sample_CS, ]

URP_CS_1y <- train(
  Change_Scores ~ CRLOWALBUMIN, data = train_ADDEP_CS, method = "ctree", na.action = na.omit, 
  trControl = trainControl("cv", number = 10), tuneLength=.1
)

URP_CS_1y
## Conditional Inference Tree 
## 
## 84 samples
##  1 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 76, 76, 75, 76, 76, 76, ... 
## Resampling results:
## 
##   RMSE      Rsquared  MAE     
##   8.816839  0.169552  6.484345
## 
## Tuning parameter 'mincriterion' was held constant at a value of 0.99

Predicting on the 30% unseeen test data. The evaluation metrics for testing sample is:

predict_CS_1y <- cbind(as.data.frame(predict(URP_CS_1y, newdata=test_ADDEP_CS)), test_ADDEP_CS$Change_Scores)

colnames(predict_CS_1y) <- c("predicted","actuals")
postResample(pred = predict_CS_1y$predicted, obs = predict_CS_1y$actuals)
##       RMSE   Rsquared        MAE 
## 9.75554535 0.07645041 6.85788087

Our model is not overfitting since RMSE between test and train samples are not vastly different.

URP for change scores (bivariate)

plot(use_labels(ADDEP_3,ctree(Change_Scores ~ CRLOWALBUMIN+LOWER_MS_REHAB, data=subset(..data, !is.na(Change_Scores)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))

URP for Marked Recovery (univariate)

plot(use_labels(ADDEP_3,ctree(Marked_Recovery_Annual_2~CRLOWALBUMIN, data=subset(..data, !is.na(Marked_Recovery_Annual_2)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))

Subset data for Marked Recovery then split

Splitting data is similar to Change Score analysis above. The evaluation metrics for training sample is:

ADDEP_MR_subset <- subset(ADDEP_3, !is.na(Marked_Recovery_Annual_2)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN))

set.seed(4567)
sample_MR <- ADDEP_MR_subset$Marked_Recovery_Annual_2 %>% 
  createDataPartition(p = 0.7, list = FALSE)
train_ADDEP_MR <- ADDEP_MR_subset[sample_MR, ]
test_ADDEP_MR <- ADDEP_MR_subset[-sample_MR, ]

URP_MR_1y <- train(
  Marked_Recovery_Annual_2 ~ CRLOWALBUMIN, data = train_ADDEP_MR, method = "ctree", na.action = na.omit, 
  trControl = trainControl("cv", number = 10), tuneLength=.1
)
URP_MR_1y
## Conditional Inference Tree 
## 
## 118 samples
##   1 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 106, 106, 106, 106, 107, 106, ... 
## Resampling results:
## 
##   Accuracy   Kappa      
##   0.6833333  -0.05172414
## 
## Tuning parameter 'mincriterion' was held constant at a value of 0.99

Predicting on the 30% unseeen test data. The AUC for ROC and PR curves are:

library("precrec")
library("ROCR")
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
predict_MR_1y <- predict(URP_MR_1y, newdata=test_ADDEP_MR, type='prob')
predict_MR_1y <- prediction(predict_MR_1y[,2], test_ADDEP_MR$Marked_Recovery_Annual_2)
predict_MR_1y <- cbind(as.data.frame(predict_MR_1y@predictions), as.data.frame(predict_MR_1y@labels))
colnames(predict_MR_1y) <- c("prob","actuals")
evalmod(scores = predict_MR_1y$prob, labels = predict_MR_1y$actuals)
## 
##     === AUCs ===
## 
##      Model name Dataset ID Curve type       AUC
##    1         m1          1        ROC 0.5990991
##    2         m1          1        PRC 0.3436168
## 
## 
##     === Input data ===
## 
##      Model name Dataset ID # of negatives # of positives
##    1         m1          1             37             12

The plot for ROC and PR curves:

autoplot(evalmod(scores = predict_MR_1y$prob, labels = predict_MR_1y$actuals))

Confusion matrix:

confusionMatrix(predict(URP_MR_1y, newdata=test_ADDEP_MR), test_ADDEP_MR$Marked_Recovery_Annual_2)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 37 12
##          1  0  0
##                                           
##                Accuracy : 0.7551          
##                  95% CI : (0.6113, 0.8666)
##     No Information Rate : 0.7551          
##     P-Value [Acc > NIR] : 0.576719        
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : 0.001496        
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7551          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7551          
##          Detection Rate : 0.7551          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class : 0               
## 

Univariate URP for Marked Recovery has 75% accuracy, and it’s mostly accurate in predicting for cases who did not achieve marked recovery.

URP for Marked Recovery (bivariate)

ADDEP_3$REVIEWASIAGRADEADM<- as.factor(ADDEP_3$REVIEWASIAGRADEADM)

plot(use_labels(ADDEP_3,ctree(Marked_Recovery_Annual_2~CRLOWALBUMIN+REVIEWASIAGRADEADM, data=subset(..data, !is.na(Marked_Recovery_Annual_2)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))

Sensitivity analysis (Would age matter?)

In this ADDEP dataset, we have individuals who are from 12-19 years old. We excluded them to see if there is any difference in relation to outcomes:

ADDEP_3$AGE_INJ_NUM <- as.factor(ADDEP_3$AGE_INJ_NUM)

plot(use_labels(ADDEP_3,ctree(Change_Scores ~ CRLOWALBUMIN+AGE_INJ_NUM, data=subset(..data, !(AGE_INJ_NUM==1)&!is.na(Change_Scores)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))

plot(use_labels(ADDEP_3,ctree(Marked_Recovery_Annual_2 ~ CRLOWALBUMIN+AGE_INJ_NUM, data=subset(..data, !(AGE_INJ_NUM==1)&!is.na(Marked_Recovery_Annual_2)&!(Walk_Admission==1)&ASIA_LEVEL_ADM==c("C", "T")&!is.na(CRLOWALBUMIN)))))

The absence of age indicates that age is not significant variable assocating with outcome.

Summary

At admission to rehabiliation, lower serum albumin was associated with more severe spinal cord injury. However, at 1-year post-injury analysis, serum albumin was not significant after adjusting for injury characteristics. This suggest serum albumin is more useful when neurological details at admission (i.e., LEMS or AIS grade at baseline) are not available.