Boosted Regression Trees andInstall Elith et al additional BRT functions
setwd("C:\\Users\\ajohal3\\Downloads")
source("C:\\Users\\ajohal3\\Downloads\\brt.functions.R")
Set up packages needed
library(gbm)
## Warning: package 'gbm' was built under R version 4.4.2
## Loaded gbm 2.2.2
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
library(psych)
## Warning: package 'psych' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape)
## Warning: package 'reshape' was built under R version 4.4.2
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
#also load data here
Amritdata<-read.csv("C:\\Users\\ajohal3\\Downloads\\Research_Data_updated_Dec_9_2024_AJK.csv",stringsAsFactors = TRUE)
summary(Amritdata)
## prolific_id race hispanic
## 54d2c4e2fdf99b2c319a8cf6: 1 Min. : 1.000 Hispanic/Latinx : 48
## 55daf7b369dbc30005b68ac9: 1 1st Qu.: 6.000 I prefer not to disclose: 25
## 574d70b95e549100063c6533: 1 Median : 6.000 Non-Hispanic/Latinx :433
## 57d67fdf6598aa00019917c8: 1 Mean : 5.834
## 584e7a66e42c2a00013099ac: 1 3rd Qu.: 6.000
## 58dff341282ab00001e9ce26: 1 Max. :13.000
## (Other) :500
## assigned_sex_at_birth age employment
## Female :376 Min. :18.00 Employed full-time :174
## Intersex/DSD : 1 1st Qu.:23.00 Student :131
## Male :126 Median :29.00 Employed part-time : 76
## prefer not to disclose: 3 Mean :31.79 Unemployed : 53
## 3rd Qu.:38.00 Homemaker : 31
## Max. :70.00 Receiving disability: 26
## NA's :1 (Other) : 15
## income overlapping_pain_number pain_duration
## $0-$25,000 :168 Min. :1.000 >10 years :117
## $25,001-$50,000 :130 1st Qu.:2.000 5-7 years : 99
## $50,001-$75,000 : 83 Median :3.000 2-4 years : 86
## $75,001-$100,000 : 45 Mean :2.883 1-2 years : 59
## >$100,000 : 58 3rd Qu.:4.000 7-10 years: 55
## prefer not to disclose: 22 Max. :9.000 3-6 months: 50
## NA's :3 (Other) : 40
## gcpsr_2 gcpsr_3 gcpsr_4 gcpsr_5 gcpsr_6
## Every day:187 Min. : 0.000 Min. : 0.00 Min. : 0.000 No :400
## Some days:319 1st Qu.: 4.000 1st Qu.: 4.00 1st Qu.: 4.000 Yes:106
## Median : 5.000 Median : 6.00 Median : 5.000
## Mean : 5.255 Mean : 5.54 Mean : 5.316
## 3rd Qu.: 6.000 3rd Qu.: 7.00 3rd Qu.: 7.000
## Max. :10.000 Max. :10.00 Max. :10.000
##
## prodep_t proanx_t prorx_t RST_PQ_FFS
## Min. :41.00 Min. :40.30 Min. :36.30 Min. :1.000
## 1st Qu.:57.30 1st Qu.:59.50 1st Qu.:41.60 1st Qu.:2.000
## Median :63.90 Median :65.30 Median :43.70 Median :2.600
## Mean :62.89 Mean :63.96 Mean :44.72 Mean :2.523
## 3rd Qu.:69.40 3rd Qu.:69.30 3rd Qu.:48.20 3rd Qu.:3.000
## Max. :79.40 Max. :81.60 Max. :75.10 Max. :4.000
## NA's :320
## RST_PQ_BIS RST_PQ_BAS_RI RST_PQ_BAS_GDP RST_PQ_BAS_RR
## Min. :1.174 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.609 1st Qu.:2.000 1st Qu.:2.143 1st Qu.:2.300
## Median :3.065 Median :2.429 Median :2.714 Median :2.700
## Mean :2.980 Mean :2.388 Mean :2.643 Mean :2.672
## 3rd Qu.:3.478 3rd Qu.:2.857 3rd Qu.:3.143 3rd Qu.:3.100
## Max. :4.000 Max. :3.857 Max. :4.000 Max. :4.000
##
## RST_PQ_BAS_I SHAPS_tot bis_brief_tot cpaq8_tot
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 2.00
## 1st Qu.:1.875 1st Qu.: 0.000 1st Qu.:1.875 1st Qu.:21.00
## Median :2.375 Median : 1.000 Median :2.125 Median :27.00
## Mean :2.383 Mean : 2.433 Mean :2.192 Mean :26.27
## 3rd Qu.:2.875 3rd Qu.: 4.000 3rd Qu.:2.500 3rd Qu.:31.75
## Max. :4.000 Max. :13.000 Max. :3.875 Max. :46.00
##
## pcs_tot isi_tot meq_tot audit_total
## Min. : 0.00 Min. : 0.00 Min. :36.00 Min. : 0.000
## 1st Qu.:17.00 1st Qu.:10.00 1st Qu.:45.00 1st Qu.: 1.000
## Median :26.00 Median :14.00 Median :48.00 Median : 2.000
## Mean :26.29 Mean :14.09 Mean :48.18 Mean : 2.502
## 3rd Qu.:36.00 3rd Qu.:19.00 3rd Qu.:52.00 3rd Qu.: 4.000
## Max. :52.00 Max. :28.00 Max. :64.00 Max. :11.000
##
## cuditr0 current_opioid_meds gcpsr_2_HICP gcpsr_2_HICP_num
## Min. :0.0000 No :436 No :319 Min. :0.0000
## 1st Qu.:0.0000 Yes: 70 Yes:187 1st Qu.:0.0000
## Median :0.0000 Median :0.0000
## Mean :0.3814 Mean :0.3696
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
##
## income_num income_alpha hispanic_bin
## Min. :1.000 A:168 I prefer not to disclose: 25
## 1st Qu.:1.000 B:130 No :433
## Median :2.000 C: 83 Yes : 48
## Mean :2.528 D: 45
## 3rd Qu.:3.000 E: 58
## Max. :6.000 F: 22
##
str(Amritdata)
## 'data.frame': 506 obs. of 37 variables:
## $ prolific_id : Factor w/ 506 levels "54d2c4e2fdf99b2c319a8cf6",..: 1 2 4 5 6 7 10 13 14 18 ...
## $ race : int 6 6 6 6 6 6 6 6 6 6 ...
## $ hispanic : Factor w/ 3 levels "Hispanic/Latinx",..: 3 3 3 2 3 3 3 3 1 3 ...
## $ assigned_sex_at_birth : Factor w/ 4 levels "Female","Intersex/DSD",..: 3 3 3 1 1 3 3 1 1 3 ...
## $ age : int 43 37 53 34 42 37 46 22 55 24 ...
## $ employment : Factor w/ 9 levels "Employed full-time",..: 1 1 1 1 2 2 1 8 2 9 ...
## $ income : Factor w/ 6 levels "$0-$25,000","$25,001-$50,000",..: 3 2 5 1 1 1 2 1 2 1 ...
## $ overlapping_pain_number: int 5 4 2 1 3 3 3 3 3 1 ...
## $ pain_duration : Factor w/ 8 levels " ",">10 years",..: 2 6 7 2 6 4 7 4 4 6 ...
## $ gcpsr_2 : Factor w/ 2 levels "Every day","Some days": 1 2 1 2 2 2 2 2 2 2 ...
## $ gcpsr_3 : int 4 6 8 6 7 5 6 6 5 8 ...
## $ gcpsr_4 : int 6 8 9 5 6 7 5 5 4 5 ...
## $ gcpsr_5 : int 6 4 8 5 6 6 6 3 5 3 ...
## $ gcpsr_6 : Factor w/ 2 levels "No","Yes": 2 1 1 1 2 1 1 1 1 1 ...
## $ prodep_t : num 58.9 71.2 62.2 63.9 49 69.4 60.5 62.2 67.5 55.7 ...
## $ proanx_t : num 48 69.3 61.4 65.3 40.3 59.5 55.8 65.3 71.2 55.8 ...
## $ prorx_t : num NA NA 41.6 50.4 45.5 NA 36.3 NA NA NA ...
## $ RST_PQ_FFS : num 1.5 1.6 3.4 2.5 2.2 3.3 1.9 2.2 3.1 1.7 ...
## $ RST_PQ_BIS : num 1.65 3.43 2.65 2.96 2.17 ...
## $ RST_PQ_BAS_RI : num 3.43 1.71 2.86 2.57 2.71 ...
## $ RST_PQ_BAS_GDP : num 3.14 1.43 3.71 3 3.14 ...
## $ RST_PQ_BAS_RR : num 2 1.8 3.2 2.9 3 2.8 2.4 2.7 3.6 1.9 ...
## $ RST_PQ_BAS_I : num 2.25 2.62 2.5 2.25 2.62 ...
## $ SHAPS_tot : int 0 4 0 0 0 1 0 6 0 10 ...
## $ bis_brief_tot : num 2 2.88 1.75 2.62 2.12 ...
## $ cpaq8_tot : int 29 26 17 21 20 28 30 30 28 32 ...
## $ pcs_tot : int 5 25 23 31 32 7 16 35 21 20 ...
## $ isi_tot : int 14 17 11 14 15 15 1 21 13 0 ...
## $ meq_tot : int 48 44 43 50 43 57 53 50 45 53 ...
## $ audit_total : int 2 1 1 1 0 2 7 3 5 2 ...
## $ cuditr0 : int 0 1 0 0 0 0 0 1 0 0 ...
## $ current_opioid_meds : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
## $ gcpsr_2_HICP : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
## $ gcpsr_2_HICP_num : int 1 0 1 0 0 0 0 0 0 0 ...
## $ income_num : int 3 2 5 1 1 1 2 1 2 1 ...
## $ income_alpha : Factor w/ 6 levels "A","B","C","D",..: 3 2 5 1 1 1 2 1 2 1 ...
## $ hispanic_bin : Factor w/ 3 levels "I prefer not to disclose",..: 2 2 2 1 2 2 2 2 3 2 ...
Clean data
colnames(Amritdata)
## [1] "prolific_id" "race"
## [3] "hispanic" "assigned_sex_at_birth"
## [5] "age" "employment"
## [7] "income" "overlapping_pain_number"
## [9] "pain_duration" "gcpsr_2"
## [11] "gcpsr_3" "gcpsr_4"
## [13] "gcpsr_5" "gcpsr_6"
## [15] "prodep_t" "proanx_t"
## [17] "prorx_t" "RST_PQ_FFS"
## [19] "RST_PQ_BIS" "RST_PQ_BAS_RI"
## [21] "RST_PQ_BAS_GDP" "RST_PQ_BAS_RR"
## [23] "RST_PQ_BAS_I" "SHAPS_tot"
## [25] "bis_brief_tot" "cpaq8_tot"
## [27] "pcs_tot" "isi_tot"
## [29] "meq_tot" "audit_total"
## [31] "cuditr0" "current_opioid_meds"
## [33] "gcpsr_2_HICP" "gcpsr_2_HICP_num"
## [35] "income_num" "income_alpha"
## [37] "hispanic_bin"
Amritdata<-Amritdata %>%
select(!c("gcpsr_2","prorx_t","SHAPS_tot","gcpsr_2_HICP","income_num","income_alpha","hispanic_bin" ))
Now randomly sample the IDs so you have 404 (80% of your sample) unique IDs in the training set, and the other IDs will be used as a test set.
samp<-sample(x = unique(Amritdata$prolific_id), 404, replace=FALSE)
length(samp)
## [1] 404
train<-Amritdata[Amritdata$prolific_id %in% samp, ]
length(unique(train$prolific_id))
## [1] 404
Re-ordering variables so that outcome is first followed by unused variables, followed by the predictors. We won’t use id, which will now be column 2. Remove repeated variables.
colnames(train)
## [1] "prolific_id" "race"
## [3] "hispanic" "assigned_sex_at_birth"
## [5] "age" "employment"
## [7] "income" "overlapping_pain_number"
## [9] "pain_duration" "gcpsr_3"
## [11] "gcpsr_4" "gcpsr_5"
## [13] "gcpsr_6" "prodep_t"
## [15] "proanx_t" "RST_PQ_FFS"
## [17] "RST_PQ_BIS" "RST_PQ_BAS_RI"
## [19] "RST_PQ_BAS_GDP" "RST_PQ_BAS_RR"
## [21] "RST_PQ_BAS_I" "bis_brief_tot"
## [23] "cpaq8_tot" "pcs_tot"
## [25] "isi_tot" "meq_tot"
## [27] "audit_total" "cuditr0"
## [29] "current_opioid_meds" "gcpsr_2_HICP_num"
train<-train[,c(30,1:29)]
colnames(train) #124
## [1] "gcpsr_2_HICP_num" "prolific_id"
## [3] "race" "hispanic"
## [5] "assigned_sex_at_birth" "age"
## [7] "employment" "income"
## [9] "overlapping_pain_number" "pain_duration"
## [11] "gcpsr_3" "gcpsr_4"
## [13] "gcpsr_5" "gcpsr_6"
## [15] "prodep_t" "proanx_t"
## [17] "RST_PQ_FFS" "RST_PQ_BIS"
## [19] "RST_PQ_BAS_RI" "RST_PQ_BAS_GDP"
## [21] "RST_PQ_BAS_RR" "RST_PQ_BAS_I"
## [23] "bis_brief_tot" "cpaq8_tot"
## [25] "pcs_tot" "isi_tot"
## [27] "meq_tot" "audit_total"
## [29] "cuditr0" "current_opioid_meds"
We need to make the cross validation occur over people
#CV fold making
colnames(train)
## [1] "gcpsr_2_HICP_num" "prolific_id"
## [3] "race" "hispanic"
## [5] "assigned_sex_at_birth" "age"
## [7] "employment" "income"
## [9] "overlapping_pain_number" "pain_duration"
## [11] "gcpsr_3" "gcpsr_4"
## [13] "gcpsr_5" "gcpsr_6"
## [15] "prodep_t" "proanx_t"
## [17] "RST_PQ_FFS" "RST_PQ_BIS"
## [19] "RST_PQ_BAS_RI" "RST_PQ_BAS_GDP"
## [21] "RST_PQ_BAS_RR" "RST_PQ_BAS_I"
## [23] "bis_brief_tot" "cpaq8_tot"
## [25] "pcs_tot" "isi_tot"
## [27] "meq_tot" "audit_total"
## [29] "cuditr0" "current_opioid_meds"
id<-unique(train$prolific_id)
#summary(id)
#describe(id)
nfolds<-10
cvtest<-sample(rep(1:nfolds,length.out=length(id)), replace=F)
idname <- "prolific_id"
foldname <- "fold"
cvdf<-data.frame(id,cvtest)
names(cvdf) <- c(idname,foldname)
head(cvdf)
## prolific_id fold
## 1 55daf7b369dbc30005b68ac9 10
## 2 584e7a66e42c2a00013099ac 6
## 3 58dff341282ab00001e9ce26 5
## 4 5947f262de49a9000165ccd3 2
## 5 5a64bb3035f26b0001492e6a 7
## 6 5ac6852df69e940001d98f04 1
combcvdf<-merge(train, cvdf, by= 'prolific_id', sort=F)
head(combcvdf)
## prolific_id gcpsr_2_HICP_num race hispanic
## 1 55daf7b369dbc30005b68ac9 0 6 Non-Hispanic/Latinx
## 2 584e7a66e42c2a00013099ac 0 6 I prefer not to disclose
## 3 58dff341282ab00001e9ce26 0 6 Non-Hispanic/Latinx
## 4 5947f262de49a9000165ccd3 0 6 Non-Hispanic/Latinx
## 5 5a64bb3035f26b0001492e6a 0 6 Non-Hispanic/Latinx
## 6 5ac6852df69e940001d98f04 0 6 Non-Hispanic/Latinx
## assigned_sex_at_birth age employment income
## 1 Male 37 Employed full-time $25,001-$50,000
## 2 Female 34 Employed full-time $0-$25,000
## 3 Female 42 Employed part-time $0-$25,000
## 4 Male 37 Employed part-time $0-$25,000
## 5 Male 46 Employed full-time $25,001-$50,000
## 6 Female 22 Student $0-$25,000
## overlapping_pain_number pain_duration gcpsr_3 gcpsr_4 gcpsr_5 gcpsr_6
## 1 4 5-7 years 6 8 4 No
## 2 1 >10 years 6 5 5 No
## 3 3 5-7 years 7 6 6 Yes
## 4 3 2-4 years 5 7 6 No
## 5 3 7-10 years 6 5 6 No
## 6 3 2-4 years 6 5 3 No
## prodep_t proanx_t RST_PQ_FFS RST_PQ_BIS RST_PQ_BAS_RI RST_PQ_BAS_GDP
## 1 71.2 69.3 1.6 3.434783 1.714286 1.428571
## 2 63.9 65.3 2.5 2.956522 2.571429 3.000000
## 3 49.0 40.3 2.2 2.173913 2.714286 3.142857
## 4 69.4 59.5 3.3 2.739130 1.714286 1.142857
## 5 60.5 55.8 1.9 2.565217 2.714286 2.571429
## 6 62.2 65.3 2.2 3.304348 1.571429 1.142857
## RST_PQ_BAS_RR RST_PQ_BAS_I bis_brief_tot cpaq8_tot pcs_tot isi_tot meq_tot
## 1 1.8 2.625 2.875 26 25 17 44
## 2 2.9 2.250 2.625 21 31 14 50
## 3 3.0 2.625 2.125 20 32 15 43
## 4 2.8 2.125 2.625 28 7 15 57
## 5 2.4 1.500 1.750 30 16 1 53
## 6 2.7 2.125 2.500 30 35 21 50
## audit_total cuditr0 current_opioid_meds fold
## 1 1 1 No 10
## 2 1 0 No 6
## 3 0 0 No 5
## 4 2 0 No 2
## 5 7 0 No 7
## 6 3 1 No 1
#table(train$prolific_id, combcvdf$fold)
#check what's wrong with table
#rowSums(table(train$prolific_id, combcvdf$fold)>0)
colSums(table(train$prolific_id, combcvdf$fold)>0)
## 1 2 3 4 5 6 7 8 9 10
## 41 41 41 41 40 40 40 40 40 40
fold.vector<-combcvdf$fold
head(fold.vector)
## [1] 10 6 5 2 7 1
describe(fold.vector)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 404 5.47 2.88 5 5.46 3.71 1 10 9 0.02 -1.24 0.14
Now let’s run some models with our finaltrain data set! First, tc 3, lr .0005
finaltrain<-as.data.frame(train)
finaltrain_nocgpsr<-finaltrain[,c(1:10,15:30,11:14)] #you will run these models with both the finaltrain and the finaltrain_nocgpsr datasets (separately). For finaltrain, you will use gbm.x = 3:30,
colnames(finaltrain)
## [1] "gcpsr_2_HICP_num" "prolific_id"
## [3] "race" "hispanic"
## [5] "assigned_sex_at_birth" "age"
## [7] "employment" "income"
## [9] "overlapping_pain_number" "pain_duration"
## [11] "gcpsr_3" "gcpsr_4"
## [13] "gcpsr_5" "gcpsr_6"
## [15] "prodep_t" "proanx_t"
## [17] "RST_PQ_FFS" "RST_PQ_BIS"
## [19] "RST_PQ_BAS_RI" "RST_PQ_BAS_GDP"
## [21] "RST_PQ_BAS_RR" "RST_PQ_BAS_I"
## [23] "bis_brief_tot" "cpaq8_tot"
## [25] "pcs_tot" "isi_tot"
## [27] "meq_tot" "audit_total"
## [29] "cuditr0" "current_opioid_meds"
colnames(finaltrain_nocgpsr)
## [1] "gcpsr_2_HICP_num" "prolific_id"
## [3] "race" "hispanic"
## [5] "assigned_sex_at_birth" "age"
## [7] "employment" "income"
## [9] "overlapping_pain_number" "pain_duration"
## [11] "prodep_t" "proanx_t"
## [13] "RST_PQ_FFS" "RST_PQ_BIS"
## [15] "RST_PQ_BAS_RI" "RST_PQ_BAS_GDP"
## [17] "RST_PQ_BAS_RR" "RST_PQ_BAS_I"
## [19] "bis_brief_tot" "cpaq8_tot"
## [21] "pcs_tot" "isi_tot"
## [23] "meq_tot" "audit_total"
## [25] "cuditr0" "current_opioid_meds"
## [27] "gcpsr_3" "gcpsr_4"
## [29] "gcpsr_5" "gcpsr_6"
set.seed(1)
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.309
##
## now adding trees...
## 100 1.3029
## 150 1.2969
## 200 1.2913
## 250 1.2861
## 300 1.2811
## 350 1.2762
## 400 1.2717
## 450 1.2674
## 500 1.2631
## 550 1.259
## 600 1.2552
## 650 1.2514
## 700 1.2479
## 750 1.2448
## 800 1.2417
## 850 1.2387
## 900 1.2358
## 950 1.233
## 1000 1.2306
## 1050 1.2281
## 1100 1.2257
## 1150 1.2234
## 1200 1.221
## 1250 1.219
## 1300 1.2171
## 1350 1.2151
## 1400 1.2132
## 1450 1.2113
## 1500 1.2096
## 1550 1.2076
## 1600 1.206
## 1650 1.2045
## 1700 1.203
## 1750 1.2016
## 1800 1.2001
## 1850 1.1989
## 1900 1.1976
## 1950 1.1963
## 2000 1.1951
## 2050 1.1939
## 2100 1.1928
## 2150 1.1917
## 2200 1.1907
## 2250 1.1896
## 2300 1.1887
## 2350 1.188
## 2400 1.1871
## 2450 1.1863
## 2500 1.1856
## 2550 1.1846
## 2600 1.1841
## 2650 1.1834
## 2700 1.1826
## 2750 1.1818
## 2800 1.1813
## 2850 1.1809
## 2900 1.1804
## 2950 1.18
## 3000 1.1794
## 3050 1.1789
## 3100 1.1783
## 3150 1.1777
## 3200 1.1776
## 3250 1.1771
## 3300 1.1767
## 3350 1.1763
## 3400 1.1756
## 3450 1.1753
## 3500 1.1751
## 3550 1.1748
## 3600 1.1747
## 3650 1.1744
## 3700 1.1741
## 3750 1.174
## 3800 1.1738
## 3850 1.1735
## 3900 1.1731
## 3950 1.1728
## 4000 1.1725
## 4050 1.1723
## 4100 1.1724
## 4150 1.1723
## 4200 1.172
## 4250 1.1718
## 4300 1.1719
## 4350 1.1718
## 4400 1.1717
## 4450 1.1717
## 4500 1.1715
## 4550 1.1715
## 4600 1.1715
## 4650 1.1713
## fitting final gbm model with a fixed number of 4650 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.986
##
## estimated cv deviance = 1.171 ; se = 0.034
##
## training data correlation = 0.622
## cv correlation = 0.391 ; se = 0.058
##
## training data ROC score = 0.86
## cv ROC score = 0.706 ; se = 0.036
##
## elapsed time - 0.4 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.171322
##
## $deviance.se
## [1] 0.03364006
##
## $correlation.mean
## [1] 0.3906834
##
## $correlation.se
## [1] 0.05777884
##
## $discrimination.mean
## [1] 0.70569
##
## $discrimination.se
## [1] 0.03593855
##
## $calibration.mean
## [1] 0.1794158 1.3127085 0.5359566 0.6478088 0.4201257
##
## $calibration.se
## [1] 0.17682185 0.25468034 0.12014084 0.07657072 0.11404526
##
## $cv.threshold
## [1] 0.3666053
##
## $cv.threshold.se
## [1] 0.01049579
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 27.82978141
## isi_tot isi_tot 9.69371751
## overlapping_pain_number overlapping_pain_number 8.56608617
## pain_duration pain_duration 7.78797445
## pcs_tot pcs_tot 6.48536605
## employment employment 5.80991392
## age age 5.51490146
## current_opioid_meds current_opioid_meds 3.98671147
## RST_PQ_BIS RST_PQ_BIS 3.94473272
## RST_PQ_FFS RST_PQ_FFS 3.11774195
## prodep_t prodep_t 2.20941448
## proanx_t proanx_t 2.09782343
## RST_PQ_BAS_I RST_PQ_BAS_I 2.03223845
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.82437014
## audit_total audit_total 1.57514132
## meq_tot meq_tot 1.52053739
## income income 1.43357878
## bis_brief_tot bis_brief_tot 1.25966911
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.99271091
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 0.90068291
## race race 0.46976995
## hispanic hispanic 0.44930887
## cuditr0 cuditr0 0.41708262
## assigned_sex_at_birth assigned_sex_at_birth 0.08074453
THIS IS WHERE WE STOPPED WHILE CODING TOGETHER In all of the below code, you will have to change the gbm.x and data to match what we were doing (so gbm.x = 3:26, data= finaltrain_nocgpsr)
tc 3, lr .0005
set.seed(1)
cvtc3.lr0005_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3058
##
## now adding trees...
## 100 1.2964
## 150 1.2874
## 200 1.2785
## 250 1.2702
## 300 1.2621
## 350 1.2542
## 400 1.2469
## 450 1.2396
## 500 1.2327
## 550 1.2262
## 600 1.2197
## 650 1.2133
## 700 1.207
## 750 1.2012
## 800 1.1956
## 850 1.19
## 900 1.1849
## 950 1.1798
## 1000 1.175
## 1050 1.1702
## 1100 1.1654
## 1150 1.161
## 1200 1.1565
## 1250 1.1524
## 1300 1.1483
## 1350 1.1443
## 1400 1.1406
## 1450 1.1367
## 1500 1.1331
## 1550 1.1296
## 1600 1.1262
## 1650 1.1228
## 1700 1.1196
## 1750 1.1162
## 1800 1.113
## 1850 1.1099
## 1900 1.1073
## 1950 1.1046
## 2000 1.1018
## 2050 1.0993
## 2100 1.0967
## 2150 1.0943
## 2200 1.0919
## 2250 1.0898
## 2300 1.0876
## 2350 1.0855
## 2400 1.0834
## 2450 1.0814
## 2500 1.0793
## 2550 1.0774
## 2600 1.0755
## 2650 1.0735
## 2700 1.0716
## 2750 1.0698
## 2800 1.0682
## 2850 1.0667
## 2900 1.0651
## 2950 1.0637
## 3000 1.0621
## 3050 1.0606
## 3100 1.0592
## 3150 1.0578
## 3200 1.0565
## 3250 1.055
## 3300 1.0538
## 3350 1.0526
## 3400 1.0514
## 3450 1.0501
## 3500 1.0493
## 3550 1.0482
## 3600 1.0473
## 3650 1.0463
## 3700 1.0453
## 3750 1.0444
## 3800 1.0436
## 3850 1.0426
## 3900 1.0416
## 3950 1.0407
## 4000 1.0397
## 4050 1.039
## 4100 1.0382
## 4150 1.0375
## 4200 1.0367
## 4250 1.0361
## 4300 1.0352
## 4350 1.0347
## 4400 1.0338
## 4450 1.0333
## 4500 1.0328
## 4550 1.0323
## 4600 1.032
## 4650 1.0314
## 4700 1.0309
## 4750 1.0304
## 4800 1.0299
## 4850 1.0295
## 4900 1.0289
## 4950 1.0284
## 5000 1.0279
## 5050 1.0276
## 5100 1.0271
## 5150 1.0269
## 5200 1.0269
## 5250 1.0266
## 5300 1.0262
## 5350 1.026
## 5400 1.0256
## 5450 1.0253
## 5500 1.0251
## 5550 1.0247
## 5600 1.0243
## 5650 1.024
## 5700 1.0236
## 5750 1.0233
## 5800 1.0232
## 5850 1.0228
## 5900 1.0226
## 5950 1.0224
## 6000 1.0222
## 6050 1.0221
## 6100 1.0219
## 6150 1.0219
## 6200 1.0217
## 6250 1.0215
## 6300 1.0214
## 6350 1.0212
## 6400 1.0213
## 6450 1.0212
## 6500 1.0212
## 6550 1.0212
## 6600 1.0211
## 6650 1.0211
## fitting final gbm model with a fixed number of 6600 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.785
##
## estimated cv deviance = 1.021 ; se = 0.037
##
## training data correlation = 0.727
## cv correlation = 0.518 ; se = 0.044
##
## training data ROC score = 0.919
## cv ROC score = 0.803 ; se = 0.023
##
## elapsed time - 0.66 minutes
cvtc3.lr0005_1$cv.statistics
## $deviance.mean
## [1] 1.021056
##
## $deviance.se
## [1] 0.03665463
##
## $correlation.mean
## [1] 0.5177228
##
## $correlation.se
## [1] 0.04409666
##
## $discrimination.mean
## [1] 0.80279
##
## $discrimination.se
## [1] 0.02306947
##
## $calibration.mean
## [1] 0.07249221 1.14620613 0.67694290 0.73456462 0.54558932
##
## $calibration.se
## [1] 0.08967059 0.12796313 0.08881519 0.04603232 0.10342900
##
## $cv.threshold
## [1] 0.4208433
##
## $cv.threshold.se
## [1] 0.02803775
summary(cvtc3.lr0005_1)
## var rel.inf
## gcpsr_5 gcpsr_5 19.44185938
## cpaq8_tot cpaq8_tot 11.37422726
## gcpsr_4 gcpsr_4 9.30770081
## gcpsr_6 gcpsr_6 7.61384404
## gcpsr_3 gcpsr_3 7.57266606
## pain_duration pain_duration 5.99255222
## isi_tot isi_tot 5.31752926
## overlapping_pain_number overlapping_pain_number 5.10854044
## age age 4.27560446
## employment employment 3.81494461
## RST_PQ_BIS RST_PQ_BIS 2.53249851
## RST_PQ_FFS RST_PQ_FFS 2.13939433
## pcs_tot pcs_tot 2.12746758
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.01311870
## meq_tot meq_tot 1.62762296
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.38674100
## current_opioid_meds current_opioid_meds 1.14071625
## bis_brief_tot bis_brief_tot 1.08993494
## income income 1.00103716
## RST_PQ_BAS_I RST_PQ_BAS_I 0.99829222
## proanx_t proanx_t 0.99021705
## prodep_t prodep_t 0.90863904
## audit_total audit_total 0.80745276
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.54445330
## hispanic hispanic 0.47685201
## race race 0.24062243
## cuditr0 cuditr0 0.09712249
## assigned_sex_at_birth assigned_sex_at_birth 0.05834872
tc 3 lr .00025
set.seed(1)
cvtc3.lr00025 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.00025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3123
##
## now adding trees...
## 100 1.3091
## 150 1.3059
## 200 1.3028
## 250 1.2999
## 300 1.297
## 350 1.2942
## 400 1.2914
## 450 1.2888
## 500 1.2861
## 550 1.2835
## 600 1.281
## 650 1.2785
## 700 1.2761
## 750 1.2739
## 800 1.2716
## 850 1.2694
## 900 1.2672
## 950 1.2651
## 1000 1.2631
## 1050 1.2611
## 1100 1.2591
## 1150 1.2571
## 1200 1.2552
## 1250 1.2534
## 1300 1.2517
## 1350 1.2499
## 1400 1.2482
## 1450 1.2465
## 1500 1.2448
## 1550 1.2431
## 1600 1.2416
## 1650 1.2401
## 1700 1.2386
## 1750 1.2371
## 1800 1.2356
## 1850 1.2342
## 1900 1.2328
## 1950 1.2315
## 2000 1.2302
## 2050 1.2289
## 2100 1.2276
## 2150 1.2264
## 2200 1.2252
## 2250 1.224
## 2300 1.2228
## 2350 1.2217
## 2400 1.2207
## 2450 1.2196
## 2500 1.2185
## 2550 1.2173
## 2600 1.2164
## 2650 1.2155
## 2700 1.2145
## 2750 1.2135
## 2800 1.2126
## 2850 1.2118
## 2900 1.2109
## 2950 1.21
## 3000 1.2092
## 3050 1.2083
## 3100 1.2075
## 3150 1.2067
## 3200 1.2059
## 3250 1.2052
## 3300 1.2044
## 3350 1.2037
## 3400 1.2028
## 3450 1.2021
## 3500 1.2014
## 3550 1.2007
## 3600 1.2
## 3650 1.1993
## 3700 1.1986
## 3750 1.198
## 3800 1.1974
## 3850 1.1966
## 3900 1.196
## 3950 1.1954
## 4000 1.1949
## 4050 1.1942
## 4100 1.1937
## 4150 1.1932
## 4200 1.1925
## 4250 1.1919
## 4300 1.1915
## 4350 1.191
## 4400 1.1905
## 4450 1.1901
## 4500 1.1896
## 4550 1.1892
## 4600 1.1888
## 4650 1.1883
## 4700 1.1879
## 4750 1.1874
## 4800 1.1871
## 4850 1.1867
## 4900 1.1862
## 4950 1.1858
## 5000 1.1854
## 5050 1.185
## 5100 1.1847
## 5150 1.1844
## 5200 1.1841
## 5250 1.1837
## 5300 1.1834
## 5350 1.1831
## 5400 1.1828
## 5450 1.1825
## 5500 1.1822
## 5550 1.1819
## 5600 1.1816
## 5650 1.1813
## 5700 1.181
## 5750 1.1807
## 5800 1.1803
## 5850 1.1799
## 5900 1.1797
## 5950 1.1794
## 6000 1.1792
## 6050 1.179
## 6100 1.1788
## 6150 1.1785
## 6200 1.1783
## 6250 1.1781
## 6300 1.1778
## 6350 1.1777
## 6400 1.1774
## 6450 1.1772
## 6500 1.177
## 6550 1.1768
## 6600 1.1765
## 6650 1.1764
## 6700 1.1762
## 6750 1.176
## 6800 1.176
## 6850 1.1758
## 6900 1.1756
## 6950 1.1754
## 7000 1.1752
## 7050 1.1751
## 7100 1.175
## 7150 1.1748
## 7200 1.1746
## 7250 1.1744
## 7300 1.1743
## 7350 1.1743
## 7400 1.1741
## 7450 1.1739
## 7500 1.1738
## 7550 1.1737
## 7600 1.1735
## 7650 1.1734
## 7700 1.1733
## 7750 1.1731
## 7800 1.173
## 7850 1.1729
## 7900 1.1728
## fitting final gbm model with a fixed number of 7900 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 1.012
##
## estimated cv deviance = 1.173 ; se = 0.03
##
## training data correlation = 0.607
## cv correlation = 0.393 ; se = 0.056
##
## training data ROC score = 0.851
## cv ROC score = 0.705 ; se = 0.034
##
## elapsed time - 0.73 minutes
cvtc3.lr00025$cv.statistics
## $deviance.mean
## [1] 1.172845
##
## $deviance.se
## [1] 0.03020946
##
## $correlation.mean
## [1] 0.3930418
##
## $correlation.se
## [1] 0.05638659
##
## $discrimination.mean
## [1] 0.70508
##
## $discrimination.se
## [1] 0.03419079
##
## $calibration.mean
## [1] 0.2526100 1.4447901 0.5168032 0.6541823 0.3953110
##
## $calibration.se
## [1] 0.18699847 0.27793351 0.12112806 0.07745088 0.11026702
##
## $cv.threshold
## [1] 0.3645858
##
## $cv.threshold.se
## [1] 0.009678874
summary(cvtc3.lr00025) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 30.01409742
## isi_tot isi_tot 9.91374355
## overlapping_pain_number overlapping_pain_number 8.89285844
## pain_duration pain_duration 7.76753400
## pcs_tot pcs_tot 5.99635786
## employment employment 5.67962120
## age age 5.42794674
## current_opioid_meds current_opioid_meds 4.16158922
## RST_PQ_BIS RST_PQ_BIS 3.65971530
## RST_PQ_FFS RST_PQ_FFS 2.92143949
## proanx_t proanx_t 2.09448016
## prodep_t prodep_t 2.01090822
## RST_PQ_BAS_I RST_PQ_BAS_I 1.90353378
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.48645516
## audit_total audit_total 1.46661643
## income income 1.33444420
## meq_tot meq_tot 1.25227914
## bis_brief_tot bis_brief_tot 1.21393826
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 0.85460602
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.85120743
## cuditr0 cuditr0 0.42172575
## race race 0.32602180
## hispanic hispanic 0.29614309
## assigned_sex_at_birth assigned_sex_at_birth 0.05273735
tc 3 lr .00025
set.seed(1)
cvtc3.lr00025_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.00025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3107
##
## now adding trees...
## 100 1.3058
## 150 1.3011
## 200 1.2963
## 250 1.2917
## 300 1.2873
## 350 1.2829
## 400 1.2786
## 450 1.2743
## 500 1.2702
## 550 1.2662
## 600 1.2622
## 650 1.2583
## 700 1.2542
## 750 1.2505
## 800 1.2468
## 850 1.2432
## 900 1.2398
## 950 1.2363
## 1000 1.2329
## 1050 1.2295
## 1100 1.2261
## 1150 1.2229
## 1200 1.2196
## 1250 1.2165
## 1300 1.2134
## 1350 1.2102
## 1400 1.2073
## 1450 1.2043
## 1500 1.2015
## 1550 1.1986
## 1600 1.1959
## 1650 1.1931
## 1700 1.1903
## 1750 1.1876
## 1800 1.185
## 1850 1.1824
## 1900 1.1799
## 1950 1.1773
## 2000 1.1749
## 2050 1.1725
## 2100 1.1701
## 2150 1.1678
## 2200 1.1655
## 2250 1.1632
## 2300 1.161
## 2350 1.1587
## 2400 1.1565
## 2450 1.1545
## 2500 1.1524
## 2550 1.1504
## 2600 1.1484
## 2650 1.1463
## 2700 1.1443
## 2750 1.1424
## 2800 1.1406
## 2850 1.1387
## 2900 1.1368
## 2950 1.135
## 3000 1.1333
## 3050 1.1314
## 3100 1.1296
## 3150 1.1278
## 3200 1.126
## 3250 1.1242
## 3300 1.1225
## 3350 1.1209
## 3400 1.1193
## 3450 1.1177
## 3500 1.1163
## 3550 1.1148
## 3600 1.1133
## 3650 1.1118
## 3700 1.1103
## 3750 1.1089
## 3800 1.1075
## 3850 1.106
## 3900 1.1046
## 3950 1.1032
## 4000 1.1018
## 4050 1.1005
## 4100 1.0992
## 4150 1.098
## 4200 1.0967
## 4250 1.0955
## 4300 1.0943
## 4350 1.0931
## 4400 1.0918
## 4450 1.0907
## 4500 1.0895
## 4550 1.0883
## 4600 1.0873
## 4650 1.0861
## 4700 1.085
## 4750 1.0839
## 4800 1.0828
## 4850 1.0817
## 4900 1.0808
## 4950 1.0797
## 5000 1.0787
## 5050 1.0777
## 5100 1.0766
## 5150 1.0757
## 5200 1.0748
## 5250 1.0738
## 5300 1.0729
## 5350 1.0721
## 5400 1.0711
## 5450 1.0702
## 5500 1.0693
## 5550 1.0685
## 5600 1.0676
## 5650 1.0668
## 5700 1.0659
## 5750 1.0652
## 5800 1.0643
## 5850 1.0635
## 5900 1.0626
## 5950 1.0619
## 6000 1.061
## 6050 1.0603
## 6100 1.0596
## 6150 1.0589
## 6200 1.0582
## 6250 1.0575
## 6300 1.0569
## 6350 1.0562
## 6400 1.0556
## 6450 1.0549
## 6500 1.0542
## 6550 1.0536
## 6600 1.0529
## 6650 1.0522
## 6700 1.0516
## 6750 1.051
## 6800 1.0504
## 6850 1.0498
## 6900 1.0493
## 6950 1.0487
## 7000 1.0482
## 7050 1.0476
## 7100 1.047
## 7150 1.0464
## 7200 1.0458
## 7250 1.0453
## 7300 1.0448
## 7350 1.0443
## 7400 1.0439
## 7450 1.0433
## 7500 1.0429
## 7550 1.0424
## 7600 1.0419
## 7650 1.0414
## 7700 1.041
## 7750 1.0405
## 7800 1.04
## 7850 1.0395
## 7900 1.0391
## 7950 1.0387
## 8000 1.0383
## 8050 1.0378
## 8100 1.0374
## 8150 1.0371
## 8200 1.0366
## 8250 1.0362
## 8300 1.0358
## 8350 1.0354
## 8400 1.035
## 8450 1.0347
## 8500 1.0342
## 8550 1.0338
## 8600 1.0335
## 8650 1.0332
## 8700 1.033
## 8750 1.0327
## 8800 1.0323
## 8850 1.032
## 8900 1.0317
## 8950 1.0314
## 9000 1.0311
## 9050 1.0309
## 9100 1.0307
## 9150 1.0303
## 9200 1.0301
## 9250 1.0299
## 9300 1.0297
## 9350 1.0293
## 9400 1.0292
## 9450 1.0289
## 9500 1.0287
## 9550 1.0285
## 9600 1.0283
## 9650 1.0281
## 9700 1.0279
## 9750 1.0277
## 9800 1.0274
## 9850 1.0273
## 9900 1.0269
## 9950 1.0267
## 10000 1.0265
## 10050 1.0262
## 10100 1.0261
## 10150 1.0259
## 10200 1.0256
## 10250 1.0255
## 10300 1.0253
## 10350 1.0251
## 10400 1.0249
## 10450 1.0247
## 10500 1.0246
## 10550 1.0243
## 10600 1.0242
## 10650 1.024
## 10700 1.0239
## 10750 1.0237
## 10800 1.0235
## 10850 1.0235
## 10900 1.0234
## 10950 1.0232
## 11000 1.023
## 11050 1.0229
## 11100 1.0228
## 11150 1.0227
## 11200 1.0225
## 11250 1.0226
## 11300 1.0224
## 11350 1.0222
## fitting final gbm model with a fixed number of 11350 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.817
##
## estimated cv deviance = 1.022 ; se = 0.034
##
## training data correlation = 0.715
## cv correlation = 0.52 ; se = 0.042
##
## training data ROC score = 0.912
## cv ROC score = 0.804 ; se = 0.022
##
## elapsed time - 1.21 minutes
cvtc3.lr00025_1$cv.statistics
## $deviance.mean
## [1] 1.022213
##
## $deviance.se
## [1] 0.03394584
##
## $correlation.mean
## [1] 0.5197211
##
## $correlation.se
## [1] 0.04215822
##
## $discrimination.mean
## [1] 0.80441
##
## $discrimination.se
## [1] 0.02243665
##
## $calibration.mean
## [1] 0.1104884 1.2274547 0.6741631 0.7426704 0.5184634
##
## $calibration.se
## [1] 0.08820524 0.12971225 0.09346869 0.04313088 0.09201566
##
## $cv.threshold
## [1] 0.4193172
##
## $cv.threshold.se
## [1] 0.02572326
summary(cvtc3.lr00025_1)
## var rel.inf
## gcpsr_5 gcpsr_5 20.47588441
## cpaq8_tot cpaq8_tot 11.57223275
## gcpsr_4 gcpsr_4 9.62650477
## gcpsr_3 gcpsr_3 8.11295004
## gcpsr_6 gcpsr_6 8.10383939
## pain_duration pain_duration 5.80011221
## isi_tot isi_tot 5.15929146
## overlapping_pain_number overlapping_pain_number 5.05791071
## age age 4.17947224
## employment employment 3.71533569
## RST_PQ_BIS RST_PQ_BIS 2.41780498
## pcs_tot pcs_tot 2.10603109
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.74680721
## RST_PQ_FFS RST_PQ_FFS 1.73596995
## meq_tot meq_tot 1.36831027
## current_opioid_meds current_opioid_meds 1.25774581
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.19963757
## bis_brief_tot bis_brief_tot 1.03864155
## proanx_t proanx_t 0.86352634
## income income 0.84787837
## prodep_t prodep_t 0.82022784
## RST_PQ_BAS_I RST_PQ_BAS_I 0.79072861
## audit_total audit_total 0.63730757
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.58308251
## hispanic hispanic 0.45127198
## race race 0.17871066
## assigned_sex_at_birth assigned_sex_at_birth 0.08240295
## cuditr0 cuditr0 0.07038109
tc 3 lr 0.005
set.seed(1)
cvtc3.lr005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2628
##
## now adding trees...
## 100 1.2302
## 150 1.2075
## 200 1.1938
## 250 1.185
## 300 1.1789
## 350 1.1752
## 400 1.1729
## 450 1.1715
## 500 1.1715
## 550 1.1715
## 600 1.174
## 650 1.1748
## 700 1.1761
## 750 1.1801
## 800 1.1822
## 850 1.1852
## 900 1.1892
## 950 1.1921
## 1000 1.1969
## 1050 1.202
## fitting final gbm model with a fixed number of 500 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.977
##
## estimated cv deviance = 1.171 ; se = 0.034
##
## training data correlation = 0.62
## cv correlation = 0.389 ; se = 0.057
##
## training data ROC score = 0.858
## cv ROC score = 0.704 ; se = 0.035
##
## elapsed time - 0.08 minutes
cvtc3.lr005$cv.statistics
## $deviance.mean
## [1] 1.171458
##
## $deviance.se
## [1] 0.03422395
##
## $correlation.mean
## [1] 0.3894043
##
## $correlation.se
## [1] 0.05684963
##
## $discrimination.mean
## [1] 0.70384
##
## $discrimination.se
## [1] 0.03507343
##
## $calibration.mean
## [1] 0.1538879 1.2530781 0.5447699 0.6408557 0.4236808
##
## $calibration.se
## [1] 0.17104945 0.23246699 0.11820807 0.07529626 0.11151628
##
## $cv.threshold
## [1] 0.3693004
##
## $cv.threshold.se
## [1] 0.01098177
summary(cvtc3.lr005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 28.00465037
## isi_tot isi_tot 9.36610092
## overlapping_pain_number overlapping_pain_number 8.18646207
## pain_duration pain_duration 7.66671138
## pcs_tot pcs_tot 5.98070838
## employment employment 5.89414809
## age age 5.64973592
## current_opioid_meds current_opioid_meds 4.42831221
## RST_PQ_BIS RST_PQ_BIS 3.96301221
## RST_PQ_FFS RST_PQ_FFS 3.71276558
## income income 2.13478252
## proanx_t proanx_t 2.10452750
## prodep_t prodep_t 2.09333291
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.03625966
## RST_PQ_BAS_I RST_PQ_BAS_I 1.64959886
## audit_total audit_total 1.53482118
## meq_tot meq_tot 1.52569922
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.02977917
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.78982337
## bis_brief_tot bis_brief_tot 0.78796265
## hispanic hispanic 0.59746129
## race race 0.41122562
## cuditr0 cuditr0 0.40640915
## assigned_sex_at_birth assigned_sex_at_birth 0.04570976
tc 3 lr 0.005
set.seed(1)
cvtc3.lr005_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2324
##
## now adding trees...
## 100 1.174
## 150 1.1318
## 200 1.1022
## 250 1.0801
## 300 1.064
## 350 1.0493
## 400 1.0387
## 450 1.0299
## 500 1.0237
## 550 1.0207
## 600 1.0194
## 650 1.018
## 700 1.0186
## 750 1.0193
## 800 1.0202
## 850 1.0223
## 900 1.0253
## 950 1.0268
## 1000 1.0298
## 1050 1.032
## 1100 1.0354
## 1150 1.0381
## 1200 1.0386
## 1250 1.0418
## fitting final gbm model with a fixed number of 650 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.788
##
## estimated cv deviance = 1.018 ; se = 0.035
##
## training data correlation = 0.728
## cv correlation = 0.522 ; se = 0.042
##
## training data ROC score = 0.919
## cv ROC score = 0.802 ; se = 0.022
##
## elapsed time - 0.11 minutes
cvtc3.lr005_1$cv.statistics
## $deviance.mean
## [1] 1.018007
##
## $deviance.se
## [1] 0.03529081
##
## $correlation.mean
## [1] 0.5217357
##
## $correlation.se
## [1] 0.04163176
##
## $discrimination.mean
## [1] 0.80245
##
## $discrimination.se
## [1] 0.02247467
##
## $calibration.mean
## [1] 0.08096037 1.15053821 0.68377213 0.72054011 0.54885017
##
## $calibration.se
## [1] 0.08965561 0.12019131 0.08437965 0.04691495 0.09775994
##
## $cv.threshold
## [1] 0.4151979
##
## $cv.threshold.se
## [1] 0.02662583
summary(cvtc3.lr005_1)
## var rel.inf
## gcpsr_5 gcpsr_5 18.85406655
## cpaq8_tot cpaq8_tot 11.67403213
## gcpsr_4 gcpsr_4 9.11050918
## gcpsr_6 gcpsr_6 7.63055040
## gcpsr_3 gcpsr_3 7.44701431
## pain_duration pain_duration 5.82593454
## overlapping_pain_number overlapping_pain_number 5.39295506
## isi_tot isi_tot 4.80550375
## age age 4.58891937
## employment employment 4.19653480
## RST_PQ_BIS RST_PQ_BIS 2.81638992
## RST_PQ_FFS RST_PQ_FFS 2.33880138
## pcs_tot pcs_tot 2.19684049
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.88907691
## meq_tot meq_tot 1.59984843
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.34485684
## bis_brief_tot bis_brief_tot 1.27531002
## income income 1.23261613
## current_opioid_meds current_opioid_meds 1.05876113
## proanx_t proanx_t 1.04680103
## prodep_t prodep_t 0.89875103
## RST_PQ_BAS_I RST_PQ_BAS_I 0.82105541
## hispanic hispanic 0.54662449
## audit_total audit_total 0.51500625
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.44320229
## race race 0.33822940
## cuditr0 cuditr0 0.08314228
## assigned_sex_at_birth assigned_sex_at_birth 0.02866647
tc 3 lr .0025
set.seed(1)
cvtc3.lr0025 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2858
##
## now adding trees...
## 100 1.2634
## 150 1.2445
## 200 1.2302
## 250 1.2189
## 300 1.2096
## 350 1.2029
## 400 1.1961
## 450 1.1913
## 500 1.1859
## 550 1.1823
## 600 1.1797
## 650 1.1765
## 700 1.1751
## 750 1.1748
## 800 1.174
## 850 1.1739
## 900 1.1737
## 950 1.1736
## 1000 1.1733
## 1050 1.1743
## 1100 1.1742
## 1150 1.1754
## 1200 1.1756
## 1250 1.1765
## 1300 1.1775
## 1350 1.1782
## 1400 1.1783
## 1450 1.1796
## fitting final gbm model with a fixed number of 1000 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.974
##
## estimated cv deviance = 1.173 ; se = 0.034
##
## training data correlation = 0.625
## cv correlation = 0.388 ; se = 0.056
##
## training data ROC score = 0.863
## cv ROC score = 0.704 ; se = 0.033
##
## elapsed time - 0.11 minutes
cvtc3.lr0025$cv.statistics
## $deviance.mean
## [1] 1.173344
##
## $deviance.se
## [1] 0.03377643
##
## $correlation.mean
## [1] 0.3880087
##
## $correlation.se
## [1] 0.05562241
##
## $discrimination.mean
## [1] 0.70442
##
## $discrimination.se
## [1] 0.03307114
##
## $calibration.mean
## [1] 0.1436493 1.2436209 0.5490280 0.6416393 0.4209040
##
## $calibration.se
## [1] 0.16669037 0.22944740 0.11556390 0.07734939 0.10613488
##
## $cv.threshold
## [1] 0.3675768
##
## $cv.threshold.se
## [1] 0.01046761
summary(cvtc3.lr0025) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 27.87157903
## isi_tot isi_tot 9.54340685
## overlapping_pain_number overlapping_pain_number 8.06856397
## pain_duration pain_duration 7.52742702
## pcs_tot pcs_tot 6.44823434
## employment employment 5.80975468
## age age 5.39437347
## current_opioid_meds current_opioid_meds 3.96253349
## RST_PQ_BIS RST_PQ_BIS 3.84015228
## RST_PQ_FFS RST_PQ_FFS 3.06852862
## proanx_t proanx_t 2.53627654
## RST_PQ_BAS_I RST_PQ_BAS_I 2.37674774
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.00443004
## prodep_t prodep_t 1.70943636
## audit_total audit_total 1.70140294
## income income 1.66332536
## meq_tot meq_tot 1.40296037
## bis_brief_tot bis_brief_tot 1.27803965
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.18709560
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.95378897
## cuditr0 cuditr0 0.59923051
## race race 0.58105550
## hispanic hispanic 0.38062038
## assigned_sex_at_birth assigned_sex_at_birth 0.09103627
tc 3 lr .0025
set.seed(1)
cvtc3.lr0025_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.27
##
## now adding trees...
## 100 1.2323
## 150 1.2011
## 200 1.1745
## 250 1.1534
## 300 1.134
## 350 1.1163
## 400 1.1016
## 450 1.0894
## 500 1.0779
## 550 1.0691
## 600 1.0609
## 650 1.0535
## 700 1.0475
## 750 1.0429
## 800 1.0387
## 850 1.0348
## 900 1.0319
## 950 1.0288
## 1000 1.0278
## 1050 1.0254
## 1100 1.0242
## 1150 1.0226
## 1200 1.0208
## 1250 1.0196
## 1300 1.0194
## 1350 1.0189
## 1400 1.0192
## 1450 1.0197
## 1500 1.0197
## 1550 1.0201
## 1600 1.0203
## 1650 1.0209
## 1700 1.0221
## 1750 1.0225
## 1800 1.024
## 1850 1.0251
## 1900 1.0268
## fitting final gbm model with a fixed number of 1350 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.782
##
## estimated cv deviance = 1.019 ; se = 0.037
##
## training data correlation = 0.728
## cv correlation = 0.52 ; se = 0.043
##
## training data ROC score = 0.919
## cv ROC score = 0.8 ; se = 0.023
##
## elapsed time - 0.16 minutes
cvtc3.lr0025_1$cv.statistics
## $deviance.mean
## [1] 1.018948
##
## $deviance.se
## [1] 0.03683464
##
## $correlation.mean
## [1] 0.5199933
##
## $correlation.se
## [1] 0.04336524
##
## $discrimination.mean
## [1] 0.80038
##
## $discrimination.se
## [1] 0.02330232
##
## $calibration.mean
## [1] 0.06469214 1.12831692 0.67459484 0.72687293 0.54363087
##
## $calibration.se
## [1] 0.08861472 0.12386696 0.08333148 0.05007122 0.10133761
##
## $cv.threshold
## [1] 0.4205499
##
## $cv.threshold.se
## [1] 0.0285933
summary(cvtc3.lr0025_1)
## var rel.inf
## gcpsr_5 gcpsr_5 18.90757351
## cpaq8_tot cpaq8_tot 11.22525740
## gcpsr_4 gcpsr_4 9.23850766
## gcpsr_6 gcpsr_6 7.83380532
## gcpsr_3 gcpsr_3 7.73764770
## pain_duration pain_duration 6.00416433
## overlapping_pain_number overlapping_pain_number 5.23393844
## isi_tot isi_tot 4.89477563
## age age 4.32143558
## employment employment 4.00201786
## pcs_tot pcs_tot 2.57052901
## RST_PQ_BIS RST_PQ_BIS 2.55596157
## RST_PQ_FFS RST_PQ_FFS 1.92089003
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.91479568
## meq_tot meq_tot 1.71118716
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.34385022
## bis_brief_tot bis_brief_tot 1.25504747
## income income 1.17879328
## proanx_t proanx_t 1.12970451
## current_opioid_meds current_opioid_meds 1.04644788
## prodep_t prodep_t 0.90443500
## audit_total audit_total 0.84087462
## RST_PQ_BAS_I RST_PQ_BAS_I 0.82627074
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.63539374
## hispanic hispanic 0.32314157
## race race 0.23971822
## cuditr0 cuditr0 0.10646748
## assigned_sex_at_birth assigned_sex_at_birth 0.09736839
tc 4 lr .0005
set.seed(1)
cvtc4.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3088
##
## now adding trees...
## 100 1.3026
## 150 1.2964
## 200 1.2908
## 250 1.2855
## 300 1.2804
## 350 1.2753
## 400 1.2706
## 450 1.2663
## 500 1.2618
## 550 1.2577
## 600 1.2539
## 650 1.25
## 700 1.2464
## 750 1.2431
## 800 1.2399
## 850 1.2369
## 900 1.234
## 950 1.2311
## 1000 1.2286
## 1050 1.2262
## 1100 1.2238
## 1150 1.2214
## 1200 1.219
## 1250 1.217
## 1300 1.2151
## 1350 1.2131
## 1400 1.2112
## 1450 1.2094
## 1500 1.2077
## 1550 1.2057
## 1600 1.2041
## 1650 1.2027
## 1700 1.2009
## 1750 1.1996
## 1800 1.1982
## 1850 1.1971
## 1900 1.1959
## 1950 1.1947
## 2000 1.1937
## 2050 1.1926
## 2100 1.1916
## 2150 1.1905
## 2200 1.1895
## 2250 1.1885
## 2300 1.1877
## 2350 1.1871
## 2400 1.1863
## 2450 1.1856
## 2500 1.1849
## 2550 1.1841
## 2600 1.1837
## 2650 1.1831
## 2700 1.1824
## 2750 1.1817
## 2800 1.1814
## 2850 1.181
## 2900 1.1806
## 2950 1.1803
## 3000 1.1799
## 3050 1.1795
## 3100 1.179
## 3150 1.1785
## 3200 1.1784
## 3250 1.1779
## 3300 1.1776
## 3350 1.1773
## 3400 1.1767
## 3450 1.1765
## 3500 1.1764
## 3550 1.1763
## 3600 1.1763
## 3650 1.1762
## 3700 1.1761
## 3750 1.176
## 3800 1.176
## 3850 1.1758
## 3900 1.1755
## 3950 1.1752
## 4000 1.175
## 4050 1.1748
## 4100 1.1749
## 4150 1.175
## 4200 1.1749
## 4250 1.1748
## fitting final gbm model with a fixed number of 4250 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.954
##
## estimated cv deviance = 1.175 ; se = 0.034
##
## training data correlation = 0.657
## cv correlation = 0.385 ; se = 0.058
##
## training data ROC score = 0.883
## cv ROC score = 0.705 ; se = 0.035
##
## elapsed time - 0.43 minutes
cvtc4.lr0005$cv.statistics
## $deviance.mean
## [1] 1.174812
##
## $deviance.se
## [1] 0.03380357
##
## $correlation.mean
## [1] 0.3848734
##
## $correlation.se
## [1] 0.05818755
##
## $discrimination.mean
## [1] 0.70497
##
## $discrimination.se
## [1] 0.0349439
##
## $calibration.mean
## [1] 0.1508489 1.2638484 0.5443256 0.6443981 0.4218594
##
## $calibration.se
## [1] 0.17279876 0.24444831 0.11769606 0.07680342 0.11025376
##
## $cv.threshold
## [1] 0.3685767
##
## $cv.threshold.se
## [1] 0.01112321
summary(cvtc4.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 24.96481629
## isi_tot isi_tot 9.15935125
## pain_duration pain_duration 8.35583424
## overlapping_pain_number overlapping_pain_number 8.18561362
## pcs_tot pcs_tot 6.09259859
## employment employment 5.99251898
## age age 5.50529227
## RST_PQ_BIS RST_PQ_BIS 4.42581835
## current_opioid_meds current_opioid_meds 3.60051346
## RST_PQ_FFS RST_PQ_FFS 3.40737612
## RST_PQ_BAS_I RST_PQ_BAS_I 2.58610171
## proanx_t proanx_t 2.50708286
## prodep_t prodep_t 2.32810799
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.16773450
## income income 2.09859322
## audit_total audit_total 1.94767208
## meq_tot meq_tot 1.70182979
## bis_brief_tot bis_brief_tot 1.41357799
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.11832649
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.06933996
## cuditr0 cuditr0 0.46540678
## hispanic hispanic 0.41315748
## race race 0.39960025
## assigned_sex_at_birth assigned_sex_at_birth 0.09373574
tc 4 lr .0005
set.seed(1)
cvtc4.lr0005_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3052
##
## now adding trees...
## 100 1.2953
## 150 1.2857
## 200 1.2765
## 250 1.2676
## 300 1.2591
## 350 1.2509
## 400 1.2431
## 450 1.2355
## 500 1.2284
## 550 1.2217
## 600 1.2149
## 650 1.2084
## 700 1.2019
## 750 1.1958
## 800 1.1901
## 850 1.1844
## 900 1.1792
## 950 1.174
## 1000 1.1691
## 1050 1.1641
## 1100 1.1592
## 1150 1.1547
## 1200 1.1502
## 1250 1.1461
## 1300 1.1419
## 1350 1.1379
## 1400 1.1342
## 1450 1.1304
## 1500 1.1268
## 1550 1.1233
## 1600 1.1199
## 1650 1.1166
## 1700 1.1132
## 1750 1.1099
## 1800 1.1067
## 1850 1.1036
## 1900 1.101
## 1950 1.0983
## 2000 1.0956
## 2050 1.093
## 2100 1.0905
## 2150 1.0881
## 2200 1.0857
## 2250 1.0836
## 2300 1.0815
## 2350 1.0794
## 2400 1.0773
## 2450 1.0755
## 2500 1.0734
## 2550 1.0715
## 2600 1.0696
## 2650 1.0679
## 2700 1.066
## 2750 1.0643
## 2800 1.0629
## 2850 1.0615
## 2900 1.0599
## 2950 1.0585
## 3000 1.0571
## 3050 1.0556
## 3100 1.0543
## 3150 1.0531
## 3200 1.0519
## 3250 1.0505
## 3300 1.0492
## 3350 1.0481
## 3400 1.0468
## 3450 1.0457
## 3500 1.045
## 3550 1.0439
## 3600 1.0431
## 3650 1.0422
## 3700 1.0414
## 3750 1.0406
## 3800 1.0398
## 3850 1.0389
## 3900 1.038
## 3950 1.0373
## 4000 1.0365
## 4050 1.036
## 4100 1.0352
## 4150 1.0346
## 4200 1.034
## 4250 1.0334
## 4300 1.0328
## 4350 1.0323
## 4400 1.0317
## 4450 1.0313
## 4500 1.0308
## 4550 1.0304
## 4600 1.0303
## 4650 1.03
## 4700 1.0295
## 4750 1.0291
## 4800 1.0286
## 4850 1.0283
## 4900 1.0279
## 4950 1.0275
## 5000 1.0271
## 5050 1.0268
## 5100 1.0265
## 5150 1.0265
## 5200 1.0266
## 5250 1.0265
## 5300 1.0262
## 5350 1.026
## 5400 1.0257
## 5450 1.0255
## 5500 1.0255
## 5550 1.0253
## 5600 1.0251
## 5650 1.0248
## 5700 1.0246
## 5750 1.0244
## 5800 1.0243
## 5850 1.0241
## 5900 1.0239
## 5950 1.0239
## 6000 1.0237
## 6050 1.0238
## 6100 1.0237
## 6150 1.0239
## 6200 1.0239
## 6250 1.0239
## 6300 1.0239
## fitting final gbm model with a fixed number of 6100 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.741
##
## estimated cv deviance = 1.024 ; se = 0.038
##
## training data correlation = 0.76
## cv correlation = 0.516 ; se = 0.045
##
## training data ROC score = 0.936
## cv ROC score = 0.798 ; se = 0.023
##
## elapsed time - 0.74 minutes
cvtc4.lr0005_1$cv.statistics
## $deviance.mean
## [1] 1.023654
##
## $deviance.se
## [1] 0.03787976
##
## $correlation.mean
## [1] 0.5159064
##
## $correlation.se
## [1] 0.04519974
##
## $discrimination.mean
## [1] 0.79816
##
## $discrimination.se
## [1] 0.02332668
##
## $calibration.mean
## [1] 0.05894484 1.12036177 0.66342652 0.72325331 0.54506963
##
## $calibration.se
## [1] 0.09060603 0.12725737 0.08703821 0.04879352 0.10945047
##
## $cv.threshold
## [1] 0.4188295
##
## $cv.threshold.se
## [1] 0.03089155
summary(cvtc4.lr0005_1) #this is where we'll get the deviance measures to compare models
## var rel.inf
## gcpsr_5 gcpsr_5 17.7462820
## cpaq8_tot cpaq8_tot 10.3972987
## gcpsr_4 gcpsr_4 8.6004753
## gcpsr_3 gcpsr_3 7.1225212
## gcpsr_6 gcpsr_6 7.0692838
## pain_duration pain_duration 6.6669652
## isi_tot isi_tot 5.4021229
## overlapping_pain_number overlapping_pain_number 4.7815971
## age age 4.1823253
## employment employment 4.0093887
## RST_PQ_BIS RST_PQ_BIS 2.7178748
## pcs_tot pcs_tot 2.5171711
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.3811841
## RST_PQ_FFS RST_PQ_FFS 2.3497418
## meq_tot meq_tot 2.0125052
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.7954156
## bis_brief_tot bis_brief_tot 1.4725428
## income income 1.4614677
## proanx_t proanx_t 1.2247226
## RST_PQ_BAS_I RST_PQ_BAS_I 1.2081766
## current_opioid_meds current_opioid_meds 1.1072212
## prodep_t prodep_t 1.0650734
## audit_total audit_total 1.0280835
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.7772383
## hispanic hispanic 0.4117909
## race race 0.2005061
## cuditr0 cuditr0 0.1642944
## assigned_sex_at_birth assigned_sex_at_birth 0.1267294
tc 4 lr .0025
set.seed(1)
cvtc4.lr0025 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2852
##
## now adding trees...
## 100 1.262
## 150 1.2433
## 200 1.2286
## 250 1.2176
## 300 1.2083
## 350 1.2022
## 400 1.1958
## 450 1.1914
## 500 1.186
## 550 1.1832
## 600 1.1817
## 650 1.1792
## 700 1.1782
## 750 1.1785
## 800 1.1784
## 850 1.1782
## 900 1.1789
## 950 1.1799
## 1000 1.1808
## 1050 1.1825
## 1100 1.183
## 1150 1.1841
## 1200 1.1844
## 1250 1.186
## 1300 1.1873
## 1350 1.189
## fitting final gbm model with a fixed number of 700 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.991
##
## estimated cv deviance = 1.178 ; se = 0.03
##
## training data correlation = 0.635
## cv correlation = 0.385 ; se = 0.056
##
## training data ROC score = 0.871
## cv ROC score = 0.702 ; se = 0.034
##
## elapsed time - 0.12 minutes
cvtc4.lr0025$cv.statistics
## $deviance.mean
## [1] 1.17818
##
## $deviance.se
## [1] 0.0300742
##
## $correlation.mean
## [1] 0.3852169
##
## $correlation.se
## [1] 0.05632446
##
## $discrimination.mean
## [1] 0.70206
##
## $discrimination.se
## [1] 0.03388197
##
## $calibration.mean
## [1] 0.2264815 1.3977707 0.5226442 0.6615757 0.3998642
##
## $calibration.se
## [1] 0.18169998 0.26146277 0.11943434 0.07853788 0.11187414
##
## $cv.threshold
## [1] 0.3627988
##
## $cv.threshold.se
## [1] 0.01032532
summary(cvtc4.lr0025) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 27.56167967
## isi_tot isi_tot 9.26334916
## overlapping_pain_number overlapping_pain_number 8.43649971
## pain_duration pain_duration 7.52447674
## pcs_tot pcs_tot 5.88890519
## age age 5.71433516
## employment employment 5.41720721
## current_opioid_meds current_opioid_meds 4.02878380
## RST_PQ_BIS RST_PQ_BIS 3.92898660
## RST_PQ_FFS RST_PQ_FFS 2.93997241
## RST_PQ_BAS_I RST_PQ_BAS_I 2.49612532
## proanx_t proanx_t 2.43677288
## prodep_t prodep_t 2.24771264
## income income 1.85403933
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.84610774
## audit_total audit_total 1.72149558
## meq_tot meq_tot 1.70597803
## bis_brief_tot bis_brief_tot 1.66031175
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.11162922
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.87634997
## cuditr0 cuditr0 0.50935517
## hispanic hispanic 0.39396171
## race race 0.38401294
## assigned_sex_at_birth assigned_sex_at_birth 0.05195207
tc 4 lr .0025
set.seed(1)
cvtc4.lr0025_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2676
##
## now adding trees...
## 100 1.2285
## 150 1.1957
## 200 1.1683
## 250 1.1465
## 300 1.127
## 350 1.1095
## 400 1.0953
## 450 1.0831
## 500 1.0722
## 550 1.0643
## 600 1.0568
## 650 1.05
## 700 1.0448
## 750 1.0409
## 800 1.0378
## 850 1.0349
## 900 1.0321
## 950 1.0299
## 1000 1.0298
## 1050 1.0281
## 1100 1.0274
## 1150 1.0269
## 1200 1.0256
## 1250 1.0255
## 1300 1.0251
## 1350 1.026
## 1400 1.0267
## 1450 1.0279
## 1500 1.0288
## 1550 1.0293
## 1600 1.0304
## 1650 1.0312
## 1700 1.0327
## 1750 1.034
## fitting final gbm model with a fixed number of 1300 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.727
##
## estimated cv deviance = 1.025 ; se = 0.039
##
## training data correlation = 0.765
## cv correlation = 0.515 ; se = 0.045
##
## training data ROC score = 0.938
## cv ROC score = 0.798 ; se = 0.023
##
## elapsed time - 0.18 minutes
cvtc4.lr0025_1$cv.statistics
## $deviance.mean
## [1] 1.025138
##
## $deviance.se
## [1] 0.03930888
##
## $correlation.mean
## [1] 0.5149502
##
## $correlation.se
## [1] 0.04549456
##
## $discrimination.mean
## [1] 0.79797
##
## $discrimination.se
## [1] 0.02288092
##
## $calibration.mean
## [1] 0.03859933 1.07865202 0.65052401 0.70979742 0.52417358
##
## $calibration.se
## [1] 0.09144014 0.12448279 0.07973867 0.05271127 0.10361619
##
## $cv.threshold
## [1] 0.4173239
##
## $cv.threshold.se
## [1] 0.03144449
summary(cvtc4.lr0025_1)
## var rel.inf
## gcpsr_5 gcpsr_5 17.01918517
## cpaq8_tot cpaq8_tot 10.18309066
## gcpsr_4 gcpsr_4 8.99823451
## gcpsr_3 gcpsr_3 7.01772634
## pain_duration pain_duration 6.85567544
## gcpsr_6 gcpsr_6 6.78372634
## isi_tot isi_tot 5.22408302
## overlapping_pain_number overlapping_pain_number 4.81306567
## employment employment 4.48735020
## age age 4.27329022
## RST_PQ_BIS RST_PQ_BIS 3.01888290
## RST_PQ_FFS RST_PQ_FFS 2.47565161
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.37384892
## pcs_tot pcs_tot 2.29463003
## meq_tot meq_tot 2.03945160
## income income 1.79352971
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.65033145
## proanx_t proanx_t 1.38284378
## bis_brief_tot bis_brief_tot 1.38174185
## prodep_t prodep_t 1.18219987
## RST_PQ_BAS_I RST_PQ_BAS_I 1.08610878
## current_opioid_meds current_opioid_meds 1.02963677
## audit_total audit_total 0.95129518
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.73134704
## hispanic hispanic 0.49211702
## race race 0.23318792
## cuditr0 cuditr0 0.13836966
## assigned_sex_at_birth assigned_sex_at_birth 0.08939836
tc 4 lr .005
set.seed(1)
cvtc4.lr005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2617
##
## now adding trees...
## 100 1.2289
## 150 1.2057
## 200 1.1937
## 250 1.1867
## 300 1.1806
## 350 1.179
## 400 1.1781
## 450 1.1789
## 500 1.1791
## 550 1.181
## 600 1.185
## 650 1.1874
## 700 1.1907
## 750 1.1951
## 800 1.1977
## 850 1.2021
## 900 1.2075
## 950 1.2121
## 1000 1.2169
## fitting final gbm model with a fixed number of 400 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.965
##
## estimated cv deviance = 1.178 ; se = 0.033
##
## training data correlation = 0.652
## cv correlation = 0.381 ; se = 0.058
##
## training data ROC score = 0.88
## cv ROC score = 0.704 ; se = 0.035
##
## elapsed time - 0.09 minutes
cvtc4.lr005$cv.statistics
## $deviance.mean
## [1] 1.178145
##
## $deviance.se
## [1] 0.0329135
##
## $correlation.mean
## [1] 0.3807468
##
## $correlation.se
## [1] 0.05781186
##
## $discrimination.mean
## [1] 0.70391
##
## $discrimination.se
## [1] 0.03467864
##
## $calibration.mean
## [1] 0.1713711 1.2759530 0.5365084 0.6523755 0.4077083
##
## $calibration.se
## [1] 0.17601866 0.23955150 0.11572957 0.07639729 0.11051462
##
## $cv.threshold
## [1] 0.3658574
##
## $cv.threshold.se
## [1] 0.01048835
summary(cvtc4.lr005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 26.2840671
## isi_tot isi_tot 9.4296163
## overlapping_pain_number overlapping_pain_number 8.2872354
## pain_duration pain_duration 7.8356509
## pcs_tot pcs_tot 5.4815365
## employment employment 5.0561635
## age age 4.9146848
## RST_PQ_BIS RST_PQ_BIS 3.9821734
## current_opioid_meds current_opioid_meds 3.9225084
## RST_PQ_FFS RST_PQ_FFS 3.4850991
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.8050911
## prodep_t prodep_t 2.4596222
## RST_PQ_BAS_I RST_PQ_BAS_I 2.2454212
## proanx_t proanx_t 2.2241249
## audit_total audit_total 2.1227502
## meq_tot meq_tot 1.9862854
## income income 1.8925231
## bis_brief_tot bis_brief_tot 1.4793891
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.4591011
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.4010677
## cuditr0 cuditr0 0.5013392
## hispanic hispanic 0.3429328
## race race 0.2146518
## assigned_sex_at_birth assigned_sex_at_birth 0.1869650
tc 4 lr .005
set.seed(1)
cvtc4.lr005_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2285
##
## now adding trees...
## 100 1.1687
## 150 1.1258
## 200 1.0982
## 250 1.0766
## 300 1.0607
## 350 1.0492
## 400 1.0403
## 450 1.0321
## 500 1.0273
## 550 1.0267
## 600 1.028
## 650 1.0276
## 700 1.0282
## 750 1.0286
## 800 1.0309
## 850 1.0342
## 900 1.0378
## 950 1.0407
## 1000 1.0455
## 1050 1.0488
## 1100 1.0522
## 1150 1.0547
## 1200 1.0574
## fitting final gbm model with a fixed number of 550 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.767
##
## estimated cv deviance = 1.027 ; se = 0.036
##
## training data correlation = 0.75
## cv correlation = 0.516 ; se = 0.043
##
## training data ROC score = 0.931
## cv ROC score = 0.798 ; se = 0.023
##
## elapsed time - 0.12 minutes
cvtc4.lr005_1$cv.statistics
## $deviance.mean
## [1] 1.026706
##
## $deviance.se
## [1] 0.03611049
##
## $correlation.mean
## [1] 0.5162188
##
## $correlation.se
## [1] 0.04348085
##
## $discrimination.mean
## [1] 0.79781
##
## $discrimination.se
## [1] 0.0229387
##
## $calibration.mean
## [1] 0.08871788 1.16734149 0.65605108 0.71303688 0.51401193
##
## $calibration.se
## [1] 0.09261850 0.12843039 0.08660366 0.04813533 0.09606418
##
## $cv.threshold
## [1] 0.4147268
##
## $cv.threshold.se
## [1] 0.02682757
summary(cvtc4.lr005_1)
## var rel.inf
## gcpsr_5 gcpsr_5 19.01962502
## cpaq8_tot cpaq8_tot 10.74712482
## gcpsr_4 gcpsr_4 8.00001133
## gcpsr_6 gcpsr_6 7.08829050
## gcpsr_3 gcpsr_3 6.91509563
## pain_duration pain_duration 6.86897080
## isi_tot isi_tot 5.47195965
## overlapping_pain_number overlapping_pain_number 5.09198803
## employment employment 4.36779526
## age age 4.13424532
## RST_PQ_BIS RST_PQ_BIS 3.01564445
## meq_tot meq_tot 2.39355267
## pcs_tot pcs_tot 2.02499560
## RST_PQ_FFS RST_PQ_FFS 1.92794921
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.83393971
## bis_brief_tot bis_brief_tot 1.63479260
## income income 1.46998213
## current_opioid_meds current_opioid_meds 1.41205137
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.38158154
## prodep_t prodep_t 1.23545302
## proanx_t proanx_t 1.02086647
## audit_total audit_total 0.95362422
## RST_PQ_BAS_I RST_PQ_BAS_I 0.87539735
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.58873955
## hispanic hispanic 0.31724981
## cuditr0 cuditr0 0.08464127
## race race 0.06279426
## assigned_sex_at_birth assigned_sex_at_birth 0.06163839
tc 7 lr .0005
set.seed(1)
cvtc7.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3086
##
## now adding trees...
## 100 1.3021
## 150 1.2959
## 200 1.2902
## 250 1.2848
## 300 1.2797
## 350 1.2746
## 400 1.2698
## 450 1.2654
## 500 1.2609
## 550 1.2567
## 600 1.2528
## 650 1.249
## 700 1.2454
## 750 1.242
## 800 1.2388
## 850 1.2358
## 900 1.2329
## 950 1.2301
## 1000 1.2279
## 1050 1.2256
## 1100 1.2231
## 1150 1.221
## 1200 1.2186
## 1250 1.2165
## 1300 1.2147
## 1350 1.213
## 1400 1.2112
## 1450 1.2096
## 1500 1.208
## 1550 1.2062
## 1600 1.2047
## 1650 1.2034
## 1700 1.2017
## 1750 1.2004
## 1800 1.1992
## 1850 1.1983
## 1900 1.1971
## 1950 1.1961
## 2000 1.1952
## 2050 1.1942
## 2100 1.1935
## 2150 1.1925
## 2200 1.1918
## 2250 1.1908
## 2300 1.1903
## 2350 1.1899
## 2400 1.1891
## 2450 1.1888
## 2500 1.1884
## 2550 1.1877
## 2600 1.1876
## 2650 1.1872
## 2700 1.1866
## 2750 1.1861
## 2800 1.1859
## 2850 1.1858
## 2900 1.1856
## 2950 1.1854
## 3000 1.1851
## 3050 1.1849
## 3100 1.1846
## 3150 1.1844
## 3200 1.1843
## 3250 1.1843
## 3300 1.1842
## 3350 1.1841
## 3400 1.1837
## 3450 1.1837
## 3500 1.1839
## 3550 1.1841
## 3600 1.1844
## 3650 1.1845
## fitting final gbm model with a fixed number of 3400 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.896
##
## estimated cv deviance = 1.184 ; se = 0.032
##
## training data correlation = 0.73
## cv correlation = 0.374 ; se = 0.058
##
## training data ROC score = 0.929
## cv ROC score = 0.7 ; se = 0.035
##
## elapsed time - 0.55 minutes
cvtc7.lr0005$cv.statistics
## $deviance.mean
## [1] 1.183677
##
## $deviance.se
## [1] 0.03204527
##
## $correlation.mean
## [1] 0.3735429
##
## $correlation.se
## [1] 0.05756677
##
## $discrimination.mean
## [1] 0.69994
##
## $discrimination.se
## [1] 0.03451235
##
## $calibration.mean
## [1] 0.1435293 1.2494374 0.5545121 0.6393124 0.4386365
##
## $calibration.se
## [1] 0.17080188 0.23951765 0.11551781 0.07927105 0.11225110
##
## $cv.threshold
## [1] 0.3722637
##
## $cv.threshold.se
## [1] 0.01102345
summary(cvtc7.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 21.4802327
## pain_duration pain_duration 8.7570713
## isi_tot isi_tot 8.3914981
## overlapping_pain_number overlapping_pain_number 6.7332283
## employment employment 6.3353581
## pcs_tot pcs_tot 5.8214480
## age age 5.0204265
## RST_PQ_BIS RST_PQ_BIS 4.9334367
## RST_PQ_FFS RST_PQ_FFS 3.5101919
## income income 3.4152542
## RST_PQ_BAS_I RST_PQ_BAS_I 3.0885317
## prodep_t prodep_t 2.8672008
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.7066101
## proanx_t proanx_t 2.7023593
## current_opioid_meds current_opioid_meds 2.6500610
## meq_tot meq_tot 2.5755224
## audit_total audit_total 2.2503655
## bis_brief_tot bis_brief_tot 1.9426311
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.7699601
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.6124604
## cuditr0 cuditr0 0.7091166
## hispanic hispanic 0.2728453
## race race 0.2587092
## assigned_sex_at_birth assigned_sex_at_birth 0.1954807
tc 7 lr .0005
set.seed(1)
cvtc7.lr0005_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3044
##
## now adding trees...
## 100 1.2938
## 150 1.2835
## 200 1.2737
## 250 1.2645
## 300 1.2555
## 350 1.247
## 400 1.2389
## 450 1.2308
## 500 1.2234
## 550 1.2163
## 600 1.2095
## 650 1.2029
## 700 1.196
## 750 1.1898
## 800 1.1839
## 850 1.1783
## 900 1.173
## 950 1.1677
## 1000 1.1629
## 1050 1.1579
## 1100 1.153
## 1150 1.1486
## 1200 1.1439
## 1250 1.1399
## 1300 1.1358
## 1350 1.1317
## 1400 1.128
## 1450 1.1243
## 1500 1.1208
## 1550 1.1174
## 1600 1.1142
## 1650 1.1107
## 1700 1.1075
## 1750 1.1043
## 1800 1.1014
## 1850 1.0985
## 1900 1.0961
## 1950 1.0935
## 2000 1.0909
## 2050 1.0883
## 2100 1.0861
## 2150 1.0839
## 2200 1.0815
## 2250 1.0796
## 2300 1.0778
## 2350 1.0759
## 2400 1.0738
## 2450 1.0723
## 2500 1.0704
## 2550 1.0687
## 2600 1.0669
## 2650 1.0654
## 2700 1.0638
## 2750 1.0624
## 2800 1.0609
## 2850 1.0599
## 2900 1.0584
## 2950 1.0571
## 3000 1.0559
## 3050 1.0547
## 3100 1.0537
## 3150 1.0528
## 3200 1.0519
## 3250 1.0509
## 3300 1.0498
## 3350 1.0489
## 3400 1.0481
## 3450 1.0472
## 3500 1.0466
## 3550 1.0458
## 3600 1.0454
## 3650 1.0448
## 3700 1.0443
## 3750 1.0438
## 3800 1.0432
## 3850 1.0426
## 3900 1.042
## 3950 1.0415
## 4000 1.041
## 4050 1.0407
## 4100 1.0402
## 4150 1.0398
## 4200 1.0397
## 4250 1.0393
## 4300 1.0391
## 4350 1.0389
## 4400 1.0385
## 4450 1.0383
## 4500 1.0379
## 4550 1.0376
## 4600 1.0376
## 4650 1.0373
## 4700 1.037
## 4750 1.0371
## 4800 1.0369
## 4850 1.0368
## 4900 1.0367
## 4950 1.0365
## 5000 1.0362
## 5050 1.036
## 5100 1.0361
## 5150 1.0363
## 5200 1.0365
## 5250 1.0366
## 5300 1.0367
## fitting final gbm model with a fixed number of 5050 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.666
##
## estimated cv deviance = 1.036 ; se = 0.038
##
## training data correlation = 0.82
## cv correlation = 0.507 ; se = 0.046
##
## training data ROC score = 0.964
## cv ROC score = 0.789 ; se = 0.025
##
## elapsed time - 0.93 minutes
cvtc7.lr0005_1$cv.statistics
## $deviance.mean
## [1] 1.036001
##
## $deviance.se
## [1] 0.03786563
##
## $correlation.mean
## [1] 0.5072275
##
## $correlation.se
## [1] 0.04604393
##
## $discrimination.mean
## [1] 0.78889
##
## $discrimination.se
## [1] 0.02501377
##
## $calibration.mean
## [1] 0.05500414 1.09376000 0.64646320 0.69874566 0.52160042
##
## $calibration.se
## [1] 0.09349984 0.12339961 0.07951680 0.05195854 0.10265585
##
## $cv.threshold
## [1] 0.4186719
##
## $cv.threshold.se
## [1] 0.031679
summary(cvtc7.lr0005_1)
## var rel.inf
## gcpsr_5 gcpsr_5 15.5652641
## cpaq8_tot cpaq8_tot 9.1917185
## pain_duration pain_duration 7.6887020
## gcpsr_4 gcpsr_4 7.2466827
## gcpsr_3 gcpsr_3 6.0423804
## gcpsr_6 gcpsr_6 5.9061624
## isi_tot isi_tot 5.2183183
## employment employment 4.6172084
## overlapping_pain_number overlapping_pain_number 4.5413599
## age age 4.0874978
## pcs_tot pcs_tot 3.0368260
## RST_PQ_BIS RST_PQ_BIS 2.9141104
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.7225697
## RST_PQ_FFS RST_PQ_FFS 2.6895609
## meq_tot meq_tot 2.5811279
## income income 2.3821865
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.2011815
## bis_brief_tot bis_brief_tot 1.9405536
## proanx_t proanx_t 1.6974620
## RST_PQ_BAS_I RST_PQ_BAS_I 1.6193291
## prodep_t prodep_t 1.4872161
## audit_total audit_total 1.4742663
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.3208316
## current_opioid_meds current_opioid_meds 0.9274296
## hispanic hispanic 0.3485474
## cuditr0 cuditr0 0.2265714
## assigned_sex_at_birth assigned_sex_at_birth 0.2000181
## race race 0.1249175
tc 7 lr .0025
set.seed(1)
cvtc7.lr0025 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2844
##
## now adding trees...
## 100 1.2609
## 150 1.242
## 200 1.2274
## 250 1.2164
## 300 1.2067
## 350 1.2013
## 400 1.1956
## 450 1.192
## 500 1.1875
## 550 1.1851
## 600 1.1833
## 650 1.1833
## 700 1.1821
## 750 1.1837
## 800 1.185
## 850 1.1866
## 900 1.1875
## 950 1.1897
## 1000 1.1925
## 1050 1.1956
## 1100 1.1969
## 1150 1.1986
## 1200 1.1997
## fitting final gbm model with a fixed number of 700 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.888
##
## estimated cv deviance = 1.182 ; se = 0.034
##
## training data correlation = 0.734
## cv correlation = 0.375 ; se = 0.057
##
## training data ROC score = 0.932
## cv ROC score = 0.697 ; se = 0.034
##
## elapsed time - 0.17 minutes
cvtc7.lr0025$cv.statistics
## $deviance.mean
## [1] 1.182111
##
## $deviance.se
## [1] 0.03359732
##
## $correlation.mean
## [1] 0.3753678
##
## $correlation.se
## [1] 0.05729905
##
## $discrimination.mean
## [1] 0.69749
##
## $discrimination.se
## [1] 0.03415318
##
## $calibration.mean
## [1] 0.1254085 1.2133900 0.5557290 0.6476719 0.4420732
##
## $calibration.se
## [1] 0.16485902 0.22320871 0.11243833 0.08064683 0.11382093
##
## $cv.threshold
## [1] 0.369778
##
## $cv.threshold.se
## [1] 0.0118075
summary(cvtc7.lr0025) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 20.9863040
## pain_duration pain_duration 8.9185981
## isi_tot isi_tot 8.0248895
## overlapping_pain_number overlapping_pain_number 6.8174959
## employment employment 6.1180188
## pcs_tot pcs_tot 5.9473523
## RST_PQ_BIS RST_PQ_BIS 5.1253684
## age age 5.0862529
## RST_PQ_FFS RST_PQ_FFS 3.7519670
## income income 3.2245599
## prodep_t prodep_t 3.1401372
## RST_PQ_BAS_I RST_PQ_BAS_I 2.9267532
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.7243910
## bis_brief_tot bis_brief_tot 2.6992574
## meq_tot meq_tot 2.5702763
## current_opioid_meds current_opioid_meds 2.5358785
## proanx_t proanx_t 2.5091523
## audit_total audit_total 2.4701486
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.6610988
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.3806745
## cuditr0 cuditr0 0.5584860
## hispanic hispanic 0.3353975
## assigned_sex_at_birth assigned_sex_at_birth 0.2444684
## race race 0.2430735
tc 7 lr .0025
set.seed(1)
cvtc7.lr0025_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2639
##
## now adding trees...
## 100 1.2222
## 150 1.1881
## 200 1.1606
## 250 1.1392
## 300 1.1198
## 350 1.1034
## 400 1.0895
## 450 1.0783
## 500 1.0682
## 550 1.0606
## 600 1.0549
## 650 1.0493
## 700 1.0448
## 750 1.042
## 800 1.0395
## 850 1.038
## 900 1.0374
## 950 1.0362
## 1000 1.0368
## 1050 1.0364
## 1100 1.0365
## 1150 1.0371
## 1200 1.0373
## 1250 1.0391
## 1300 1.0398
## 1350 1.0414
## 1400 1.0438
## 1450 1.0454
## 1500 1.0466
## 1550 1.0476
## fitting final gbm model with a fixed number of 950 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.685
##
## estimated cv deviance = 1.036 ; se = 0.037
##
## training data correlation = 0.811
## cv correlation = 0.507 ; se = 0.046
##
## training data ROC score = 0.961
## cv ROC score = 0.789 ; se = 0.024
##
## elapsed time - 0.24 minutes
cvtc7.lr0025_1$cv.statistics
## $deviance.mean
## [1] 1.036184
##
## $deviance.se
## [1] 0.03675316
##
## $correlation.mean
## [1] 0.5065879
##
## $correlation.se
## [1] 0.04569805
##
## $discrimination.mean
## [1] 0.78866
##
## $discrimination.se
## [1] 0.02379967
##
## $calibration.mean
## [1] 0.07789571 1.12883899 0.64448262 0.70036497 0.52279631
##
## $calibration.se
## [1] 0.09552980 0.12674983 0.07965691 0.05350939 0.10345193
##
## $cv.threshold
## [1] 0.4173048
##
## $cv.threshold.se
## [1] 0.02914774
summary(cvtc7.lr0025_1)
## var rel.inf
## gcpsr_5 gcpsr_5 16.50917560
## cpaq8_tot cpaq8_tot 9.58021851
## pain_duration pain_duration 7.42728602
## gcpsr_4 gcpsr_4 7.32144404
## gcpsr_3 gcpsr_3 6.21385160
## gcpsr_6 gcpsr_6 5.82795595
## isi_tot isi_tot 4.75581212
## employment employment 4.60354458
## overlapping_pain_number overlapping_pain_number 4.36383772
## age age 4.30424772
## RST_PQ_BIS RST_PQ_BIS 2.98537818
## pcs_tot pcs_tot 2.83322973
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.81841125
## RST_PQ_FFS RST_PQ_FFS 2.41095373
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.31513786
## meq_tot meq_tot 2.29848254
## income income 2.04628762
## bis_brief_tot bis_brief_tot 2.00071018
## proanx_t proanx_t 1.80060628
## RST_PQ_BAS_I RST_PQ_BAS_I 1.65940413
## prodep_t prodep_t 1.53236380
## audit_total audit_total 1.40394901
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.13073458
## current_opioid_meds current_opioid_meds 0.93139849
## hispanic hispanic 0.38331893
## assigned_sex_at_birth assigned_sex_at_birth 0.24401814
## cuditr0 cuditr0 0.20001777
## race race 0.09822394
tc 7 lr .005
set.seed(1)
cvtc7.lr005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2605
##
## now adding trees...
## 100 1.2274
## 150 1.2046
## 200 1.1945
## 250 1.1883
## 300 1.1849
## 350 1.1853
## 400 1.1882
## 450 1.1913
## 500 1.1943
## 550 1.1963
## 600 1.2022
## 650 1.2073
## 700 1.2104
## 750 1.2161
## 800 1.2214
## 850 1.2284
## 900 1.2362
## 950 1.2408
## 1000 1.2471
## fitting final gbm model with a fixed number of 300 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.925
##
## estimated cv deviance = 1.185 ; se = 0.03
##
## training data correlation = 0.715
## cv correlation = 0.373 ; se = 0.056
##
## training data ROC score = 0.922
## cv ROC score = 0.698 ; se = 0.034
##
## elapsed time - 0.14 minutes
cvtc7.lr005$cv.statistics
## $deviance.mean
## [1] 1.184939
##
## $deviance.se
## [1] 0.02989794
##
## $correlation.mean
## [1] 0.3726517
##
## $correlation.se
## [1] 0.05628425
##
## $discrimination.mean
## [1] 0.69772
##
## $discrimination.se
## [1] 0.03352887
##
## $calibration.mean
## [1] 0.1714954 1.3168555 0.5567728 0.6576525 0.4483368
##
## $calibration.se
## [1] 0.17332535 0.25003073 0.11945301 0.07995939 0.12038951
##
## $cv.threshold
## [1] 0.3725912
##
## $cv.threshold.se
## [1] 0.01034632
summary(cvtc7.lr005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 22.6327373
## pain_duration pain_duration 8.6811272
## isi_tot isi_tot 8.5523463
## overlapping_pain_number overlapping_pain_number 6.8348854
## employment employment 5.4729970
## pcs_tot pcs_tot 5.3216252
## RST_PQ_BIS RST_PQ_BIS 4.8788211
## age age 4.3953753
## RST_PQ_FFS RST_PQ_FFS 3.6462341
## current_opioid_meds current_opioid_meds 3.2293437
## prodep_t prodep_t 3.2003219
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.9774021
## income income 2.8951290
## meq_tot meq_tot 2.6264988
## audit_total audit_total 2.5985016
## RST_PQ_BAS_I RST_PQ_BAS_I 2.5092164
## proanx_t proanx_t 2.4068154
## bis_brief_tot bis_brief_tot 2.2661968
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.8055346
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.7556846
## cuditr0 cuditr0 0.6367187
## assigned_sex_at_birth assigned_sex_at_birth 0.2736726
## hispanic hispanic 0.2259074
## race race 0.1769076
tc 7 lr .005
set.seed(1)
cvtc7.lr005_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2218
##
## now adding trees...
## 100 1.1597
## 150 1.1178
## 200 1.0917
## 250 1.0719
## 300 1.0567
## 350 1.0466
## 400 1.0383
## 450 1.0337
## 500 1.0307
## 550 1.0322
## 600 1.0354
## 650 1.0378
## 700 1.042
## 750 1.0443
## 800 1.0465
## 850 1.0516
## 900 1.0573
## 950 1.0608
## 1000 1.0672
## 1050 1.0724
## 1100 1.0764
## fitting final gbm model with a fixed number of 500 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.664
##
## estimated cv deviance = 1.031 ; se = 0.036
##
## training data correlation = 0.82
## cv correlation = 0.512 ; se = 0.043
##
## training data ROC score = 0.965
## cv ROC score = 0.793 ; se = 0.022
##
## elapsed time - 0.17 minutes
cvtc7.lr005_1$cv.statistics
## $deviance.mean
## [1] 1.030715
##
## $deviance.se
## [1] 0.03563275
##
## $correlation.mean
## [1] 0.5122601
##
## $correlation.se
## [1] 0.04341216
##
## $discrimination.mean
## [1] 0.79256
##
## $discrimination.se
## [1] 0.02175939
##
## $calibration.mean
## [1] 0.06567966 1.09990876 0.66420685 0.69198584 0.53284767
##
## $calibration.se
## [1] 0.08753195 0.11188223 0.07065612 0.05351112 0.09414945
##
## $cv.threshold
## [1] 0.4116784
##
## $cv.threshold.se
## [1] 0.03167422
summary(cvtc7.lr005_1)
## var rel.inf
## gcpsr_5 gcpsr_5 15.47421663
## cpaq8_tot cpaq8_tot 10.16040072
## pain_duration pain_duration 7.55492935
## gcpsr_4 gcpsr_4 7.11868623
## gcpsr_3 gcpsr_3 5.73492216
## gcpsr_6 gcpsr_6 5.51540553
## isi_tot isi_tot 5.40727021
## employment employment 4.85587595
## overlapping_pain_number overlapping_pain_number 4.62348822
## age age 4.31039314
## RST_PQ_BIS RST_PQ_BIS 3.16444977
## pcs_tot pcs_tot 2.78907426
## income income 2.70007566
## RST_PQ_FFS RST_PQ_FFS 2.68164900
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.51944958
## meq_tot meq_tot 2.26801079
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.99436870
## bis_brief_tot bis_brief_tot 1.93001691
## RST_PQ_BAS_I RST_PQ_BAS_I 1.82910995
## prodep_t prodep_t 1.67621430
## audit_total audit_total 1.43696363
## proanx_t proanx_t 1.39317793
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.26964851
## current_opioid_meds current_opioid_meds 0.79152816
## cuditr0 cuditr0 0.32932418
## hispanic hispanic 0.22335467
## assigned_sex_at_birth assigned_sex_at_birth 0.20652491
## race race 0.04147096
tc 7 lr .00005
set.seed(1)
cvtc7.lr00005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .00005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3149
##
## now adding trees...
## 100 1.3142
## 150 1.3135
## 200 1.3128
## 250 1.3121
## 300 1.3115
## 350 1.3108
## 400 1.3101
## 450 1.3094
## 500 1.3087
## 550 1.308
## 600 1.3074
## 650 1.3067
## 700 1.306
## 750 1.3054
## 800 1.3047
## 850 1.3041
## 900 1.3035
## 950 1.3028
## 1000 1.3022
## 1050 1.3016
## 1100 1.3009
## 1150 1.3003
## 1200 1.2997
## 1250 1.299
## 1300 1.2984
## 1350 1.2978
## 1400 1.2972
## 1450 1.2966
## 1500 1.296
## 1550 1.2954
## 1600 1.2948
## 1650 1.2942
## 1700 1.2936
## 1750 1.293
## 1800 1.2924
## 1850 1.2918
## 1900 1.2912
## 1950 1.2906
## 2000 1.29
## 2050 1.2894
## 2100 1.2888
## 2150 1.2883
## 2200 1.2877
## 2250 1.2872
## 2300 1.2866
## 2350 1.286
## 2400 1.2855
## 2450 1.2849
## 2500 1.2844
## 2550 1.2838
## 2600 1.2833
## 2650 1.2827
## 2700 1.2822
## 2750 1.2817
## 2800 1.2811
## 2850 1.2806
## 2900 1.2801
## 2950 1.2796
## 3000 1.279
## 3050 1.2785
## 3100 1.278
## 3150 1.2775
## 3200 1.277
## 3250 1.2765
## 3300 1.276
## 3350 1.2755
## 3400 1.275
## 3450 1.2745
## 3500 1.274
## 3550 1.2735
## 3600 1.2731
## 3650 1.2726
## 3700 1.2721
## 3750 1.2716
## 3800 1.2712
## 3850 1.2707
## 3900 1.2702
## 3950 1.2697
## 4000 1.2693
## 4050 1.2688
## 4100 1.2683
## 4150 1.2679
## 4200 1.2674
## 4250 1.2669
## 4300 1.2665
## 4350 1.266
## 4400 1.2656
## 4450 1.2651
## 4500 1.2647
## 4550 1.2643
## 4600 1.2638
## 4650 1.2634
## 4700 1.2629
## 4750 1.2625
## 4800 1.262
## 4850 1.2616
## 4900 1.2612
## 4950 1.2608
## 5000 1.2603
## 5050 1.2599
## 5100 1.2595
## 5150 1.2591
## 5200 1.2587
## 5250 1.2583
## 5300 1.2579
## 5350 1.2575
## 5400 1.2571
## 5450 1.2566
## 5500 1.2563
## 5550 1.2559
## 5600 1.2555
## 5650 1.2551
## 5700 1.2547
## 5750 1.2543
## 5800 1.2539
## 5850 1.2535
## 5900 1.2531
## 5950 1.2527
## 6000 1.2523
## 6050 1.2519
## 6100 1.2516
## 6150 1.2512
## 6200 1.2508
## 6250 1.2505
## 6300 1.2501
## 6350 1.2497
## 6400 1.2493
## 6450 1.249
## 6500 1.2486
## 6550 1.2482
## 6600 1.2479
## 6650 1.2475
## 6700 1.2472
## 6750 1.2468
## 6800 1.2465
## 6850 1.2461
## 6900 1.2458
## 6950 1.2454
## 7000 1.2451
## 7050 1.2447
## 7100 1.2444
## 7150 1.244
## 7200 1.2437
## 7250 1.2433
## 7300 1.243
## 7350 1.2426
## 7400 1.2423
## 7450 1.242
## 7500 1.2416
## 7550 1.2413
## 7600 1.241
## 7650 1.2407
## 7700 1.2403
## 7750 1.24
## 7800 1.2397
## 7850 1.2393
## 7900 1.239
## 7950 1.2387
## 8000 1.2384
## 8050 1.238
## 8100 1.2377
## 8150 1.2374
## 8200 1.2371
## 8250 1.2368
## 8300 1.2365
## 8350 1.2362
## 8400 1.2359
## 8450 1.2356
## 8500 1.2353
## 8550 1.235
## 8600 1.2347
## 8650 1.2344
## 8700 1.2342
## 8750 1.2339
## 8800 1.2336
## 8850 1.2333
## 8900 1.233
## 8950 1.2328
## 9000 1.2325
## 9050 1.2322
## 9100 1.2319
## 9150 1.2316
## 9200 1.2314
## 9250 1.2311
## 9300 1.2308
## 9350 1.2306
## 9400 1.2303
## 9450 1.23
## 9500 1.2297
## 9550 1.2294
## 9600 1.2292
## 9650 1.2289
## 9700 1.2287
## 9750 1.2284
## 9800 1.2281
## 9850 1.2279
## 9900 1.2276
## 9950 1.2274
## 10000 1.2271
## 10050 1.2268
## 10100 1.2266
## 10150 1.2263
## 10200 1.2261
## 10250 1.2258
## 10300 1.2256
## 10350 1.2253
## 10400 1.2251
## 10450 1.2248
## 10500 1.2246
## 10550 1.2243
## 10600 1.2241
## 10650 1.2239
## 10700 1.2236
## 10750 1.2234
## 10800 1.2231
## 10850 1.2229
## 10900 1.2227
## 10950 1.2225
## 11000 1.2222
## 11050 1.222
## 11100 1.2217
## 11150 1.2215
## 11200 1.2213
## 11250 1.2211
## 11300 1.2208
## 11350 1.2206
## 11400 1.2204
## 11450 1.2201
## 11500 1.2199
## 11550 1.2197
## 11600 1.2195
## 11650 1.2193
## 11700 1.2191
## 11750 1.2188
## 11800 1.2186
## 11850 1.2184
## 11900 1.2182
## 11950 1.218
## 12000 1.2178
## 12050 1.2176
## 12100 1.2173
## 12150 1.2172
## 12200 1.217
## 12250 1.2168
## 12300 1.2165
## 12350 1.2163
## 12400 1.2162
## 12450 1.2159
## 12500 1.2157
## 12550 1.2155
## 12600 1.2154
## 12650 1.2152
## 12700 1.215
## 12750 1.2148
## 12800 1.2146
## 12850 1.2144
## 12900 1.2142
## 12950 1.214
## 13000 1.2138
## 13050 1.2136
## 13100 1.2134
## 13150 1.2132
## 13200 1.213
## 13250 1.2128
## 13300 1.2126
## 13350 1.2124
## 13400 1.2123
## 13450 1.2121
## 13500 1.2119
## 13550 1.2117
## 13600 1.2115
## 13650 1.2114
## 13700 1.2112
## 13750 1.211
## 13800 1.2108
## 13850 1.2107
## 13900 1.2105
## 13950 1.2103
## 14000 1.2101
## 14050 1.21
## 14100 1.2098
## 14150 1.2097
## 14200 1.2095
## 14250 1.2093
## 14300 1.2092
## 14350 1.209
## 14400 1.2088
## 14450 1.2086
## 14500 1.2085
## 14550 1.2083
## 14600 1.2081
## 14650 1.208
## 14700 1.2078
## 14750 1.2077
## 14800 1.2075
## 14850 1.2074
## 14900 1.2072
## 14950 1.2071
## 15000 1.2069
## 15050 1.2068
## 15100 1.2066
## 15150 1.2065
## 15200 1.2063
## 15250 1.2061
## 15300 1.206
## 15350 1.2059
## 15400 1.2057
## 15450 1.2056
## 15500 1.2054
## 15550 1.2053
## 15600 1.2051
## 15650 1.205
## 15700 1.2048
## 15750 1.2047
## 15800 1.2046
## 15850 1.2045
## 15900 1.2043
## 15950 1.2042
## 16000 1.204
## 16050 1.2039
## 16100 1.2037
## 16150 1.2036
## 16200 1.2034
## 16250 1.2032
## 16300 1.2031
## 16350 1.203
## 16400 1.2029
## 16450 1.2027
## 16500 1.2026
## 16550 1.2024
## 16600 1.2023
## 16650 1.2021
## 16700 1.202
## 16750 1.2019
## 16800 1.2018
## 16850 1.2017
## 16900 1.2015
## 16950 1.2014
## fitting final gbm model with a fixed number of 16950 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 1.045
##
## estimated cv deviance = 1.201 ; se = 0.021
##
## training data correlation = 0.672
## cv correlation = 0.377 ; se = 0.056
##
## training data ROC score = 0.899
## cv ROC score = 0.698 ; se = 0.034
##
## elapsed time - 3.47 minutes
cvtc7.lr00005$cv.statistics
## $deviance.mean
## [1] 1.201419
##
## $deviance.se
## [1] 0.02096563
##
## $correlation.mean
## [1] 0.3773108
##
## $correlation.se
## [1] 0.05565321
##
## $discrimination.mean
## [1] 0.69784
##
## $discrimination.se
## [1] 0.03382687
##
## $calibration.mean
## [1] 0.5340504 2.0003737 0.4304614 0.6898166 0.2941006
##
## $calibration.se
## [1] 0.23016848 0.39381204 0.10842773 0.08264641 0.08691873
##
## $cv.threshold
## [1] 0.3691244
##
## $cv.threshold.se
## [1] 0.007563429
summary(cvtc7.lr00005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 27.7416003
## isi_tot isi_tot 8.5402283
## pain_duration pain_duration 8.1276618
## overlapping_pain_number overlapping_pain_number 6.9382441
## employment employment 5.6718507
## pcs_tot pcs_tot 5.2409966
## age age 4.9018880
## RST_PQ_BIS RST_PQ_BIS 4.4214933
## current_opioid_meds current_opioid_meds 3.5252575
## income income 2.8897918
## RST_PQ_FFS RST_PQ_FFS 2.8479092
## proanx_t proanx_t 2.6194077
## RST_PQ_BAS_I RST_PQ_BAS_I 2.5205005
## prodep_t prodep_t 2.4735597
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.1309925
## meq_tot meq_tot 1.9843152
## audit_total audit_total 1.9317429
## bis_brief_tot bis_brief_tot 1.8405628
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.3319265
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.2605309
## cuditr0 cuditr0 0.6019877
## race race 0.1711519
## hispanic hispanic 0.1662346
## assigned_sex_at_birth assigned_sex_at_birth 0.1201654
tc 7 lr .00005
set.seed(1)
cvtc7.lr00005_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .00005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3145
##
## now adding trees...
## 100 1.3133
## 150 1.3122
## 200 1.3111
## 250 1.31
## 300 1.3089
## 350 1.3078
## 400 1.3067
## 450 1.3056
## 500 1.3045
## 550 1.3034
## 600 1.3024
## 650 1.3013
## 700 1.3002
## 750 1.2992
## 800 1.2981
## 850 1.297
## 900 1.296
## 950 1.2949
## 1000 1.2939
## 1050 1.2928
## 1100 1.2918
## 1150 1.2908
## 1200 1.2897
## 1250 1.2887
## 1300 1.2877
## 1350 1.2867
## 1400 1.2857
## 1450 1.2847
## 1500 1.2837
## 1550 1.2827
## 1600 1.2817
## 1650 1.2807
## 1700 1.2797
## 1750 1.2788
## 1800 1.2778
## 1850 1.2768
## 1900 1.2759
## 1950 1.2749
## 2000 1.274
## 2050 1.273
## 2100 1.272
## 2150 1.2711
## 2200 1.2701
## 2250 1.2692
## 2300 1.2683
## 2350 1.2674
## 2400 1.2664
## 2450 1.2655
## 2500 1.2646
## 2550 1.2637
## 2600 1.2627
## 2650 1.2618
## 2700 1.2609
## 2750 1.26
## 2800 1.2591
## 2850 1.2582
## 2900 1.2573
## 2950 1.2565
## 3000 1.2556
## 3050 1.2547
## 3100 1.2538
## 3150 1.2529
## 3200 1.252
## 3250 1.2511
## 3300 1.2503
## 3350 1.2494
## 3400 1.2485
## 3450 1.2477
## 3500 1.2468
## 3550 1.246
## 3600 1.2452
## 3650 1.2444
## 3700 1.2435
## 3750 1.2427
## 3800 1.2418
## 3850 1.241
## 3900 1.2402
## 3950 1.2394
## 4000 1.2386
## 4050 1.2377
## 4100 1.237
## 4150 1.2361
## 4200 1.2353
## 4250 1.2345
## 4300 1.2337
## 4350 1.2329
## 4400 1.2321
## 4450 1.2314
## 4500 1.2306
## 4550 1.2298
## 4600 1.2291
## 4650 1.2283
## 4700 1.2275
## 4750 1.2267
## 4800 1.226
## 4850 1.2252
## 4900 1.2244
## 4950 1.2237
## 5000 1.2229
## 5050 1.2222
## 5100 1.2214
## 5150 1.2207
## 5200 1.22
## 5250 1.2193
## 5300 1.2185
## 5350 1.2179
## 5400 1.2171
## 5450 1.2164
## 5500 1.2157
## 5550 1.215
## 5600 1.2143
## 5650 1.2136
## 5700 1.2129
## 5750 1.2122
## 5800 1.2115
## 5850 1.2108
## 5900 1.2101
## 5950 1.2095
## 6000 1.2088
## 6050 1.2081
## 6100 1.2074
## 6150 1.2067
## 6200 1.2061
## 6250 1.2054
## 6300 1.2047
## 6350 1.204
## 6400 1.2034
## 6450 1.2027
## 6500 1.2021
## 6550 1.2014
## 6600 1.2007
## 6650 1.2001
## 6700 1.1994
## 6750 1.1988
## 6800 1.1981
## 6850 1.1975
## 6900 1.1968
## 6950 1.1962
## 7000 1.1956
## 7050 1.1949
## 7100 1.1942
## 7150 1.1936
## 7200 1.1929
## 7250 1.1923
## 7300 1.1917
## 7350 1.1911
## 7400 1.1904
## 7450 1.1898
## 7500 1.1892
## 7550 1.1886
## 7600 1.188
## 7650 1.1874
## 7700 1.1868
## 7750 1.1862
## 7800 1.1856
## 7850 1.185
## 7900 1.1844
## 7950 1.1838
## 8000 1.1832
## 8050 1.1826
## 8100 1.182
## 8150 1.1814
## 8200 1.1808
## 8250 1.1803
## 8300 1.1797
## 8350 1.1791
## 8400 1.1786
## 8450 1.178
## 8500 1.1774
## 8550 1.1768
## 8600 1.1763
## 8650 1.1757
## 8700 1.1751
## 8750 1.1746
## 8800 1.174
## 8850 1.1734
## 8900 1.1729
## 8950 1.1723
## 9000 1.1718
## 9050 1.1712
## 9100 1.1707
## 9150 1.1701
## 9200 1.1696
## 9250 1.1691
## 9300 1.1685
## 9350 1.168
## 9400 1.1675
## 9450 1.167
## 9500 1.1665
## 9550 1.1659
## 9600 1.1654
## 9650 1.1649
## 9700 1.1643
## 9750 1.1638
## 9800 1.1633
## 9850 1.1628
## 9900 1.1623
## 9950 1.1617
## 10000 1.1612
## 10050 1.1607
## 10100 1.1602
## 10150 1.1597
## 10200 1.1592
## 10250 1.1587
## 10300 1.1582
## 10350 1.1577
## 10400 1.1572
## 10450 1.1567
## 10500 1.1562
## 10550 1.1557
## 10600 1.1553
## 10650 1.1548
## 10700 1.1543
## 10750 1.1538
## 10800 1.1533
## 10850 1.1528
## 10900 1.1523
## 10950 1.1519
## 11000 1.1514
## 11050 1.1509
## 11100 1.1504
## 11150 1.15
## 11200 1.1495
## 11250 1.1491
## 11300 1.1486
## 11350 1.1482
## 11400 1.1477
## 11450 1.1472
## 11500 1.1468
## 11550 1.1463
## 11600 1.1459
## 11650 1.1454
## 11700 1.145
## 11750 1.1445
## 11800 1.144
## 11850 1.1436
## 11900 1.1431
## 11950 1.1427
## 12000 1.1422
## 12050 1.1418
## 12100 1.1414
## 12150 1.1409
## 12200 1.1405
## 12250 1.1401
## 12300 1.1397
## 12350 1.1393
## 12400 1.1388
## 12450 1.1384
## 12500 1.138
## 12550 1.1376
## 12600 1.1371
## 12650 1.1367
## 12700 1.1362
## 12750 1.1358
## 12800 1.1354
## 12850 1.135
## 12900 1.1346
## 12950 1.1342
## 13000 1.1337
## 13050 1.1333
## 13100 1.1329
## 13150 1.1325
## 13200 1.1321
## 13250 1.1317
## 13300 1.1313
## 13350 1.1309
## 13400 1.1305
## 13450 1.1301
## 13500 1.1297
## 13550 1.1293
## 13600 1.1289
## 13650 1.1285
## 13700 1.1281
## 13750 1.1277
## 13800 1.1273
## 13850 1.1269
## 13900 1.1265
## 13950 1.1262
## 14000 1.1257
## 14050 1.1254
## 14100 1.125
## 14150 1.1246
## 14200 1.1243
## 14250 1.1239
## 14300 1.1235
## 14350 1.1231
## 14400 1.1227
## 14450 1.1224
## 14500 1.122
## 14550 1.1216
## 14600 1.1213
## 14650 1.1209
## 14700 1.1205
## 14750 1.1201
## 14800 1.1198
## 14850 1.1194
## 14900 1.1191
## 14950 1.1187
## 15000 1.1184
## 15050 1.118
## 15100 1.1177
## 15150 1.1174
## 15200 1.1171
## 15250 1.1167
## 15300 1.1164
## 15350 1.116
## 15400 1.1157
## 15450 1.1154
## 15500 1.115
## 15550 1.1146
## 15600 1.1143
## 15650 1.114
## 15700 1.1137
## 15750 1.1133
## 15800 1.113
## 15850 1.1126
## 15900 1.1123
## 15950 1.112
## 16000 1.1116
## 16050 1.1113
## 16100 1.1109
## 16150 1.1106
## 16200 1.1103
## 16250 1.1099
## 16300 1.1096
## 16350 1.1093
## 16400 1.109
## 16450 1.1087
## 16500 1.1083
## 16550 1.108
## 16600 1.1077
## 16650 1.1074
## 16700 1.1071
## 16750 1.1067
## 16800 1.1064
## 16850 1.1061
## 16900 1.1058
## 16950 1.1055
## 17000 1.1052
## 17050 1.1049
## 17100 1.1046
## 17150 1.1043
## 17200 1.104
## 17250 1.1037
## 17300 1.1034
## 17350 1.1031
## 17400 1.1028
## 17450 1.1025
## 17500 1.1022
## 17550 1.1019
## 17600 1.1016
## 17650 1.1013
## 17700 1.101
## 17750 1.1008
## 17800 1.1005
## 17850 1.1002
## 17900 1.0999
## 17950 1.0996
## 18000 1.0993
## 18050 1.099
## 18100 1.0987
## 18150 1.0984
## 18200 1.0981
## 18250 1.0978
## 18300 1.0976
## 18350 1.0973
## 18400 1.097
## 18450 1.0967
## 18500 1.0965
## 18550 1.0962
## 18600 1.0959
## 18650 1.0957
## 18700 1.0954
## 18750 1.0951
## 18800 1.0948
## 18850 1.0946
## 18900 1.0943
## 18950 1.094
## 19000 1.0938
## 19050 1.0935
## 19100 1.0932
## 19150 1.093
## 19200 1.0927
## 19250 1.0925
## 19300 1.0922
## 19350 1.0919
## 19400 1.0917
## 19450 1.0914
## 19500 1.0912
## 19550 1.0909
## 19600 1.0906
## 19650 1.0904
## 19700 1.0901
## 19750 1.0899
## 19800 1.0896
## 19850 1.0894
## 19900 1.0891
## 19950 1.0889
## 20000 1.0886
## 20050 1.0884
## 20100 1.0882
## 20150 1.0879
## 20200 1.0877
## 20250 1.0874
## 20300 1.0872
## 20350 1.0869
## 20400 1.0867
## 20450 1.0864
## 20500 1.0862
## 20550 1.086
## 20600 1.0857
## 20650 1.0854
## 20700 1.0852
## 20750 1.085
## 20800 1.0848
## 20850 1.0845
## 20900 1.0843
## 20950 1.0841
## 21000 1.0839
## 21050 1.0836
## 21100 1.0834
## 21150 1.0832
## 21200 1.0829
## 21250 1.0827
## 21300 1.0825
## 21350 1.0822
## 21400 1.082
## 21450 1.0818
## 21500 1.0816
## 21550 1.0813
## 21600 1.0811
## 21650 1.0809
## 21700 1.0807
## 21750 1.0805
## 21800 1.0802
## 21850 1.08
## 21900 1.0798
## 21950 1.0796
## 22000 1.0794
## 22050 1.0792
## 22100 1.0789
## 22150 1.0787
## 22200 1.0785
## 22250 1.0783
## 22300 1.0781
## 22350 1.0779
## 22400 1.0777
## 22450 1.0775
## 22500 1.0772
## 22550 1.0771
## 22600 1.0769
## 22650 1.0767
## 22700 1.0765
## 22750 1.0763
## 22800 1.0761
## 22850 1.0758
## 22900 1.0756
## 22950 1.0754
## 23000 1.0752
## 23050 1.075
## 23100 1.0748
## 23150 1.0746
## 23200 1.0744
## 23250 1.0742
## 23300 1.074
## 23350 1.0738
## 23400 1.0736
## 23450 1.0734
## 23500 1.0732
## 23550 1.073
## 23600 1.0728
## 23650 1.0726
## 23700 1.0724
## 23750 1.0723
## 23800 1.0721
## 23850 1.0719
## 23900 1.0717
## 23950 1.0715
## 24000 1.0713
## 24050 1.0712
## 24100 1.071
## 24150 1.0708
## 24200 1.0706
## 24250 1.0704
## 24300 1.0702
## 24350 1.0701
## 24400 1.0699
## 24450 1.0697
## 24500 1.0695
## 24550 1.0693
## 24600 1.0691
## 24650 1.069
## 24700 1.0688
## 24750 1.0686
## 24800 1.0684
## 24850 1.0682
## 24900 1.0681
## 24950 1.0679
## 25000 1.0677
## 25050 1.0675
## 25100 1.0673
## 25150 1.0671
## 25200 1.067
## 25250 1.0668
## 25300 1.0666
## 25350 1.0665
## 25400 1.0663
## 25450 1.0661
## 25500 1.066
## 25550 1.0658
## 25600 1.0656
## 25650 1.0655
## 25700 1.0653
## 25750 1.0651
## 25800 1.065
## 25850 1.0648
## 25900 1.0647
## 25950 1.0645
## 26000 1.0643
## 26050 1.0642
## 26100 1.064
## 26150 1.0638
## 26200 1.0637
## 26250 1.0635
## 26300 1.0633
## 26350 1.0632
## 26400 1.063
## 26450 1.0628
## 26500 1.0627
## 26550 1.0625
## 26600 1.0624
## 26650 1.0622
## 26700 1.0621
## 26750 1.0619
## 26800 1.0618
## 26850 1.0616
## 26900 1.0615
## 26950 1.0614
## 27000 1.0612
## 27050 1.0611
## 27100 1.0609
## 27150 1.0607
## 27200 1.0606
## 27250 1.0605
## 27300 1.0603
## 27350 1.0602
## 27400 1.06
## 27450 1.0599
## 27500 1.0597
## 27550 1.0596
## 27600 1.0594
## 27650 1.0593
## 27700 1.0591
## 27750 1.059
## 27800 1.0589
## 27850 1.0587
## 27900 1.0586
## 27950 1.0584
## 28000 1.0583
## 28050 1.0582
## 28100 1.058
## 28150 1.0579
## 28200 1.0578
## 28250 1.0576
## 28300 1.0575
## 28350 1.0574
## 28400 1.0572
## fitting final gbm model with a fixed number of 28400 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.829
##
## estimated cv deviance = 1.057 ; se = 0.027
##
## training data correlation = 0.771
## cv correlation = 0.508 ; se = 0.042
##
## training data ROC score = 0.943
## cv ROC score = 0.795 ; se = 0.022
##
## elapsed time - 7.69 minutes
cvtc7.lr00005_1$cv.statistics
## $deviance.mean
## [1] 1.057229
##
## $deviance.se
## [1] 0.0274483
##
## $correlation.mean
## [1] 0.5080948
##
## $correlation.se
## [1] 0.04167875
##
## $discrimination.mean
## [1] 0.79547
##
## $discrimination.se
## [1] 0.02216561
##
## $calibration.mean
## [1] 0.2707792 1.5324922 0.5407383 0.7416255 0.4107525
##
## $calibration.se
## [1] 0.09892171 0.15460285 0.10601916 0.04659420 0.10982681
##
## $cv.threshold
## [1] 0.4137747
##
## $cv.threshold.se
## [1] 0.02114323
summary(cvtc7.lr00005_1)
## var rel.inf
## gcpsr_5 gcpsr_5 18.75195628
## cpaq8_tot cpaq8_tot 10.34557229
## gcpsr_4 gcpsr_4 7.41270409
## gcpsr_6 gcpsr_6 7.16269684
## pain_duration pain_duration 7.00097570
## gcpsr_3 gcpsr_3 6.77281955
## isi_tot isi_tot 4.83797970
## employment employment 4.37569598
## overlapping_pain_number overlapping_pain_number 4.27665676
## age age 4.12391466
## pcs_tot pcs_tot 2.59872107
## RST_PQ_BIS RST_PQ_BIS 2.51528160
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.30550976
## income income 2.13572101
## RST_PQ_FFS RST_PQ_FFS 2.09932968
## meq_tot meq_tot 1.87363928
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.82571444
## bis_brief_tot bis_brief_tot 1.73148182
## proanx_t proanx_t 1.45085500
## RST_PQ_BAS_I RST_PQ_BAS_I 1.30238228
## prodep_t prodep_t 1.29512425
## audit_total audit_total 1.13528580
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.01187149
## current_opioid_meds current_opioid_meds 0.97431901
## hispanic hispanic 0.29802058
## cuditr0 cuditr0 0.18901641
## assigned_sex_at_birth assigned_sex_at_birth 0.14322918
## race race 0.05352549
tc 5 lr .0005
set.seed(1)
cvtc5.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3088
##
## now adding trees...
## 100 1.3024
## 150 1.2962
## 200 1.2905
## 250 1.2851
## 300 1.28
## 350 1.2749
## 400 1.27
## 450 1.2657
## 500 1.2613
## 550 1.257
## 600 1.2532
## 650 1.2493
## 700 1.2459
## 750 1.2426
## 800 1.2394
## 850 1.2363
## 900 1.2333
## 950 1.2305
## 1000 1.2282
## 1050 1.2258
## 1100 1.2234
## 1150 1.2211
## 1200 1.2188
## 1250 1.2168
## 1300 1.215
## 1350 1.213
## 1400 1.2112
## 1450 1.2095
## 1500 1.2079
## 1550 1.206
## 1600 1.2044
## 1650 1.203
## 1700 1.2012
## 1750 1.1999
## 1800 1.1986
## 1850 1.1975
## 1900 1.1963
## 1950 1.1952
## 2000 1.1942
## 2050 1.1932
## 2100 1.1924
## 2150 1.1913
## 2200 1.1905
## 2250 1.1895
## 2300 1.1887
## 2350 1.1881
## 2400 1.1873
## 2450 1.1867
## 2500 1.1861
## 2550 1.1854
## 2600 1.1851
## 2650 1.1845
## 2700 1.1839
## 2750 1.1832
## 2800 1.183
## 2850 1.1827
## 2900 1.1823
## 2950 1.182
## 3000 1.1818
## 3050 1.1814
## 3100 1.181
## 3150 1.1806
## 3200 1.1806
## 3250 1.1803
## 3300 1.18
## 3350 1.1798
## 3400 1.1792
## 3450 1.1791
## 3500 1.1791
## 3550 1.1792
## 3600 1.1795
## 3650 1.1795
## 3700 1.1794
## 3750 1.1793
## 3800 1.1795
## 3850 1.1793
## 3900 1.1792
## fitting final gbm model with a fixed number of 3500 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.953
##
## estimated cv deviance = 1.179 ; se = 0.031
##
## training data correlation = 0.674
## cv correlation = 0.382 ; se = 0.058
##
## training data ROC score = 0.896
## cv ROC score = 0.704 ; se = 0.035
##
## elapsed time - 0.47 minutes
cvtc5.lr0005$cv.statistics
## $deviance.mean
## [1] 1.179074
##
## $deviance.se
## [1] 0.03127977
##
## $correlation.mean
## [1] 0.3816371
##
## $correlation.se
## [1] 0.05754611
##
## $discrimination.mean
## [1] 0.70379
##
## $discrimination.se
## [1] 0.03517637
##
## $calibration.mean
## [1] 0.1905766 1.3362313 0.5365693 0.6476231 0.4240671
##
## $calibration.se
## [1] 0.17760136 0.25866132 0.11913839 0.07840703 0.11572572
##
## $cv.threshold
## [1] 0.3686573
##
## $cv.threshold.se
## [1] 0.01040199
summary(cvtc5.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 24.9282058
## pain_duration pain_duration 8.4884904
## isi_tot isi_tot 8.4414647
## overlapping_pain_number overlapping_pain_number 7.5405979
## employment employment 6.0404447
## pcs_tot pcs_tot 5.8540994
## age age 5.2486400
## RST_PQ_BIS RST_PQ_BIS 4.6045031
## current_opioid_meds current_opioid_meds 3.2408720
## RST_PQ_FFS RST_PQ_FFS 3.1987448
## prodep_t prodep_t 2.6494253
## RST_PQ_BAS_I RST_PQ_BAS_I 2.6097163
## proanx_t proanx_t 2.5981054
## income income 2.5050669
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.4256882
## meq_tot meq_tot 2.0308386
## audit_total audit_total 2.0113400
## bis_brief_tot bis_brief_tot 1.6667861
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.3529421
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.3474388
## cuditr0 cuditr0 0.5055121
## race race 0.2924086
## hispanic hispanic 0.2912166
## assigned_sex_at_birth assigned_sex_at_birth 0.1274524
tc 5 lr .0005
set.seed(1)
cvtc5.lr0005_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3048
##
## now adding trees...
## 100 1.2945
## 150 1.2845
## 200 1.275
## 250 1.2658
## 300 1.2572
## 350 1.2487
## 400 1.2407
## 450 1.233
## 500 1.2258
## 550 1.2189
## 600 1.2121
## 650 1.2056
## 700 1.199
## 750 1.1929
## 800 1.1871
## 850 1.1814
## 900 1.1761
## 950 1.1709
## 1000 1.166
## 1050 1.1609
## 1100 1.1561
## 1150 1.1515
## 1200 1.1468
## 1250 1.1427
## 1300 1.1386
## 1350 1.1346
## 1400 1.1309
## 1450 1.127
## 1500 1.1235
## 1550 1.1201
## 1600 1.1167
## 1650 1.1134
## 1700 1.1101
## 1750 1.1068
## 1800 1.1038
## 1850 1.1007
## 1900 1.0982
## 1950 1.0955
## 2000 1.0927
## 2050 1.0901
## 2100 1.0877
## 2150 1.0853
## 2200 1.083
## 2250 1.0809
## 2300 1.079
## 2350 1.0769
## 2400 1.0747
## 2450 1.073
## 2500 1.071
## 2550 1.0693
## 2600 1.0676
## 2650 1.0658
## 2700 1.0641
## 2750 1.0625
## 2800 1.061
## 2850 1.0598
## 2900 1.0583
## 2950 1.0569
## 3000 1.0556
## 3050 1.0542
## 3100 1.053
## 3150 1.0519
## 3200 1.0508
## 3250 1.0495
## 3300 1.0482
## 3350 1.0471
## 3400 1.046
## 3450 1.0451
## 3500 1.0443
## 3550 1.0434
## 3600 1.0427
## 3650 1.0419
## 3700 1.0412
## 3750 1.0406
## 3800 1.0399
## 3850 1.0391
## 3900 1.0383
## 3950 1.0377
## 4000 1.0369
## 4050 1.0365
## 4100 1.0358
## 4150 1.0353
## 4200 1.0348
## 4250 1.0343
## 4300 1.0339
## 4350 1.0337
## 4400 1.0331
## 4450 1.0328
## 4500 1.0324
## 4550 1.0321
## 4600 1.0319
## 4650 1.0315
## 4700 1.0311
## 4750 1.0308
## 4800 1.0304
## 4850 1.0301
## 4900 1.0299
## 4950 1.0294
## 5000 1.0291
## 5050 1.0289
## 5100 1.0288
## 5150 1.0288
## 5200 1.0289
## 5250 1.029
## 5300 1.0289
## 5350 1.0288
## 5400 1.0286
## 5450 1.0286
## 5500 1.0287
## 5550 1.0286
## 5600 1.0286
## fitting final gbm model with a fixed number of 5450 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.72
##
## estimated cv deviance = 1.029 ; se = 0.037
##
## training data correlation = 0.781
## cv correlation = 0.513 ; se = 0.046
##
## training data ROC score = 0.947
## cv ROC score = 0.796 ; se = 0.024
##
## elapsed time - 0.78 minutes
cvtc5.lr0005_1$cv.statistics
## $deviance.mean
## [1] 1.02859
##
## $deviance.se
## [1] 0.03747852
##
## $correlation.mean
## [1] 0.5125266
##
## $correlation.se
## [1] 0.04578342
##
## $discrimination.mean
## [1] 0.79618
##
## $discrimination.se
## [1] 0.02400416
##
## $calibration.mean
## [1] 0.06466175 1.12604257 0.65583115 0.71663364 0.53657616
##
## $calibration.se
## [1] 0.09269590 0.12779639 0.08557712 0.04893467 0.10871764
##
## $cv.threshold
## [1] 0.4163356
##
## $cv.threshold.se
## [1] 0.03108364
summary(cvtc5.lr0005_1)
## var rel.inf
## gcpsr_5 gcpsr_5 16.7742295
## cpaq8_tot cpaq8_tot 10.0999776
## gcpsr_4 gcpsr_4 8.0941208
## pain_duration pain_duration 6.9475641
## gcpsr_3 gcpsr_3 6.8607362
## gcpsr_6 gcpsr_6 6.5210080
## isi_tot isi_tot 5.1306146
## overlapping_pain_number overlapping_pain_number 4.7534500
## employment employment 4.3591720
## age age 4.0747212
## RST_PQ_BIS RST_PQ_BIS 2.8247542
## pcs_tot pcs_tot 2.6769135
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.5223428
## RST_PQ_FFS RST_PQ_FFS 2.4987727
## meq_tot meq_tot 2.2526208
## income income 1.8880582
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.8669079
## bis_brief_tot bis_brief_tot 1.7372349
## proanx_t proanx_t 1.4332731
## prodep_t prodep_t 1.3640046
## RST_PQ_BAS_I RST_PQ_BAS_I 1.3057740
## audit_total audit_total 1.1977014
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.9565626
## current_opioid_meds current_opioid_meds 0.8918367
## hispanic hispanic 0.3934290
## race race 0.2168846
## cuditr0 cuditr0 0.1886804
## assigned_sex_at_birth assigned_sex_at_birth 0.1686547
tc 5 lr .0025
set.seed(1)
cvtc5.lr0025 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2854
##
## now adding trees...
## 100 1.2619
## 150 1.2431
## 200 1.2286
## 250 1.2176
## 300 1.2086
## 350 1.202
## 400 1.1961
## 450 1.1922
## 500 1.1874
## 550 1.185
## 600 1.1837
## 650 1.1816
## 700 1.1805
## 750 1.1811
## 800 1.1811
## 850 1.1819
## 900 1.1827
## 950 1.1843
## 1000 1.1861
## 1050 1.1887
## 1100 1.1896
## 1150 1.1908
## 1200 1.1913
## 1250 1.1933
## 1300 1.1944
## fitting final gbm model with a fixed number of 700 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.953
##
## estimated cv deviance = 1.181 ; se = 0.031
##
## training data correlation = 0.675
## cv correlation = 0.38 ; se = 0.056
##
## training data ROC score = 0.895
## cv ROC score = 0.699 ; se = 0.034
##
## elapsed time - 0.14 minutes
cvtc5.lr0025$cv.statistics
## $deviance.mean
## [1] 1.180541
##
## $deviance.se
## [1] 0.03116531
##
## $correlation.mean
## [1] 0.3797401
##
## $correlation.se
## [1] 0.05639836
##
## $discrimination.mean
## [1] 0.69884
##
## $discrimination.se
## [1] 0.03362719
##
## $calibration.mean
## [1] 0.1757282 1.3087762 0.5355817 0.6562492 0.4171216
##
## $calibration.se
## [1] 0.17239395 0.24261537 0.11546833 0.07950215 0.11416712
##
## $cv.threshold
## [1] 0.3652963
##
## $cv.threshold.se
## [1] 0.01086483
summary(cvtc5.lr0025) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 24.7810411
## pain_duration pain_duration 9.0073441
## isi_tot isi_tot 8.3599604
## overlapping_pain_number overlapping_pain_number 7.4434522
## employment employment 6.0033367
## pcs_tot pcs_tot 5.7857400
## age age 5.1832774
## RST_PQ_BIS RST_PQ_BIS 4.7081277
## RST_PQ_FFS RST_PQ_FFS 3.3382092
## current_opioid_meds current_opioid_meds 2.8861095
## proanx_t proanx_t 2.5498484
## income income 2.4959685
## RST_PQ_BAS_I RST_PQ_BAS_I 2.4597145
## prodep_t prodep_t 2.4183424
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.3472881
## audit_total audit_total 2.2274154
## meq_tot meq_tot 2.2179955
## bis_brief_tot bis_brief_tot 1.7376902
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.5035024
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.1309442
## cuditr0 cuditr0 0.6121982
## race race 0.4550115
## hispanic hispanic 0.1989830
## assigned_sex_at_birth assigned_sex_at_birth 0.1484993
tc 5 lr .0025
set.seed(1)
cvtc5.lr0025_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2661
##
## now adding trees...
## 100 1.2257
## 150 1.1922
## 200 1.1644
## 250 1.1425
## 300 1.123
## 350 1.106
## 400 1.0923
## 450 1.0806
## 500 1.0699
## 550 1.062
## 600 1.0554
## 650 1.0495
## 700 1.0443
## 750 1.041
## 800 1.0386
## 850 1.0367
## 900 1.035
## 950 1.0331
## 1000 1.0333
## 1050 1.0323
## 1100 1.032
## 1150 1.0322
## 1200 1.0321
## 1250 1.0323
## 1300 1.0324
## 1350 1.033
## 1400 1.0347
## 1450 1.0353
## 1500 1.0365
## 1550 1.0372
## 1600 1.0386
## 1650 1.0402
## fitting final gbm model with a fixed number of 1100 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.722
##
## estimated cv deviance = 1.032 ; se = 0.038
##
## training data correlation = 0.779
## cv correlation = 0.51 ; se = 0.046
##
## training data ROC score = 0.945
## cv ROC score = 0.793 ; se = 0.024
##
## elapsed time - 0.21 minutes
cvtc5.lr0025_1$cv.statistics
## $deviance.mean
## [1] 1.032009
##
## $deviance.se
## [1] 0.0375305
##
## $correlation.mean
## [1] 0.5095567
##
## $correlation.se
## [1] 0.04566738
##
## $discrimination.mean
## [1] 0.79313
##
## $discrimination.se
## [1] 0.02404356
##
## $calibration.mean
## [1] 0.05226445 1.10718344 0.65601077 0.71405316 0.52708828
##
## $calibration.se
## [1] 0.09220384 0.12733837 0.08074928 0.05109581 0.10268058
##
## $cv.threshold
## [1] 0.4142599
##
## $cv.threshold.se
## [1] 0.02991937
summary(cvtc5.lr0025_1)
## var rel.inf
## gcpsr_5 gcpsr_5 17.5290273
## cpaq8_tot cpaq8_tot 9.7662840
## gcpsr_4 gcpsr_4 8.2190010
## pain_duration pain_duration 6.8928556
## gcpsr_3 gcpsr_3 6.4953900
## gcpsr_6 gcpsr_6 5.9369907
## isi_tot isi_tot 5.2910042
## overlapping_pain_number overlapping_pain_number 4.5980794
## employment employment 4.1867152
## age age 4.1279930
## RST_PQ_BIS RST_PQ_BIS 3.0229158
## pcs_tot pcs_tot 2.6771611
## RST_PQ_FFS RST_PQ_FFS 2.6163415
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.1909579
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.1689198
## bis_brief_tot bis_brief_tot 2.1188346
## meq_tot meq_tot 1.8759135
## income income 1.8522493
## RST_PQ_BAS_I RST_PQ_BAS_I 1.5118891
## audit_total audit_total 1.4138389
## proanx_t proanx_t 1.2463421
## prodep_t prodep_t 1.2457050
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.0085215
## current_opioid_meds current_opioid_meds 0.9763553
## hispanic hispanic 0.3737280
## cuditr0 cuditr0 0.2639232
## race race 0.2282603
## assigned_sex_at_birth assigned_sex_at_birth 0.1648030
tc 5 lr .005
set.seed(1)
cvtc5.lr005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2618
##
## now adding trees...
## 100 1.2287
## 150 1.2063
## 200 1.1953
## 250 1.1891
## 300 1.1845
## 350 1.1835
## 400 1.1833
## 450 1.1848
## 500 1.1858
## 550 1.1887
## 600 1.1944
## 650 1.1977
## 700 1.2015
## 750 1.2065
## 800 1.2096
## 850 1.2151
## 900 1.2213
## 950 1.2264
## 1000 1.2324
## fitting final gbm model with a fixed number of 400 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.923
##
## estimated cv deviance = 1.183 ; se = 0.034
##
## training data correlation = 0.69
## cv correlation = 0.373 ; se = 0.057
##
## training data ROC score = 0.905
## cv ROC score = 0.697 ; se = 0.034
##
## elapsed time - 0.11 minutes
cvtc5.lr005$cv.statistics
## $deviance.mean
## [1] 1.183298
##
## $deviance.se
## [1] 0.03378967
##
## $correlation.mean
## [1] 0.3728082
##
## $correlation.se
## [1] 0.05718965
##
## $discrimination.mean
## [1] 0.69689
##
## $discrimination.se
## [1] 0.03373663
##
## $calibration.mean
## [1] 0.1145446 1.1777036 0.5561596 0.6448285 0.4322357
##
## $calibration.se
## [1] 0.16771648 0.21841431 0.11242734 0.07729361 0.11071291
##
## $cv.threshold
## [1] 0.3672349
##
## $cv.threshold.se
## [1] 0.009850221
summary(cvtc5.lr005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 23.6902872
## isi_tot isi_tot 8.7868767
## pain_duration pain_duration 8.2035344
## overlapping_pain_number overlapping_pain_number 7.4456500
## pcs_tot pcs_tot 5.6125706
## employment employment 5.2469153
## age age 4.8527184
## RST_PQ_BIS RST_PQ_BIS 4.7445089
## RST_PQ_FFS RST_PQ_FFS 3.7937757
## current_opioid_meds current_opioid_meds 3.5136110
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.8564319
## prodep_t prodep_t 2.6905914
## RST_PQ_BAS_I RST_PQ_BAS_I 2.5949630
## proanx_t proanx_t 2.4673339
## meq_tot meq_tot 2.3652439
## audit_total audit_total 2.3185876
## income income 2.2920255
## bis_brief_tot bis_brief_tot 1.7695939
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.6942837
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.5802445
## cuditr0 cuditr0 0.6480302
## race race 0.3001341
## hispanic hispanic 0.2895538
## assigned_sex_at_birth assigned_sex_at_birth 0.2425345
tc 5 lr .005
set.seed(1)
cvtc5.lr005_1 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.314
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2254
##
## now adding trees...
## 100 1.1639
## 150 1.121
## 200 1.0938
## 250 1.0729
## 300 1.0564
## 350 1.046
## 400 1.0378
## 450 1.0309
## 500 1.0272
## 550 1.0268
## 600 1.0283
## 650 1.0298
## 700 1.0308
## 750 1.0318
## 800 1.0336
## 850 1.0381
## 900 1.042
## 950 1.0451
## 1000 1.051
## 1050 1.0541
## 1100 1.0586
## 1150 1.063
## fitting final gbm model with a fixed number of 550 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.314
## mean residual deviance = 0.718
##
## estimated cv deviance = 1.027 ; se = 0.037
##
## training data correlation = 0.781
## cv correlation = 0.514 ; se = 0.044
##
## training data ROC score = 0.947
## cv ROC score = 0.797 ; se = 0.022
##
## elapsed time - 0.14 minutes
cvtc5.lr005_1$cv.statistics
## $deviance.mean
## [1] 1.026764
##
## $deviance.se
## [1] 0.03688992
##
## $correlation.mean
## [1] 0.5144144
##
## $correlation.se
## [1] 0.04428876
##
## $discrimination.mean
## [1] 0.79702
##
## $discrimination.se
## [1] 0.02238718
##
## $calibration.mean
## [1] 0.06331919 1.11749538 0.66765286 0.71906443 0.53765355
##
## $calibration.se
## [1] 0.08816397 0.12013595 0.08206896 0.05132381 0.10275860
##
## $cv.threshold
## [1] 0.411925
##
## $cv.threshold.se
## [1] 0.02932956
summary(cvtc5.lr005_1)
## var rel.inf
## gcpsr_5 gcpsr_5 18.1361390
## cpaq8_tot cpaq8_tot 9.9850628
## gcpsr_4 gcpsr_4 8.0736451
## pain_duration pain_duration 6.1306404
## isi_tot isi_tot 5.8115850
## gcpsr_3 gcpsr_3 5.7412784
## gcpsr_6 gcpsr_6 5.6426767
## employment employment 4.7255699
## overlapping_pain_number overlapping_pain_number 4.6974451
## age age 4.1489279
## RST_PQ_BIS RST_PQ_BIS 2.7355501
## RST_PQ_FFS RST_PQ_FFS 2.6501357
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.5406324
## pcs_tot pcs_tot 2.5259340
## meq_tot meq_tot 2.3394290
## income income 2.2702297
## RST_PQ_BAS_RR RST_PQ_BAS_RR 2.0561947
## RST_PQ_BAS_I RST_PQ_BAS_I 1.7271336
## prodep_t prodep_t 1.3955031
## proanx_t proanx_t 1.3812406
## bis_brief_tot bis_brief_tot 1.3689129
## audit_total audit_total 1.2553422
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.9994355
## current_opioid_meds current_opioid_meds 0.6846939
## hispanic hispanic 0.4106690
## assigned_sex_at_birth assigned_sex_at_birth 0.2301388
## cuditr0 cuditr0 0.1979390
## race race 0.1379155
Amrit paused here on Dec 20th, 2024