Boosted Regression Trees and
Install Elith et al additional BRT functions
setwd("C:\\Users\\ajohal3\\Downloads")
source("C:\\Users\\ajohal3\\Downloads\\brt.functions.R")
Set up packages needed
library(gbm)
## Warning: package 'gbm' was built under R version 4.4.2
## Loaded gbm 2.2.2
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
library(psych)
## Warning: package 'psych' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape)
## Warning: package 'reshape' was built under R version 4.4.2
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
#also load data here
Amritdata<-read.csv("C:\\Users\\ajohal3\\Downloads\\Research_Data_updated_Dec_9_2024_AJK.csv",stringsAsFactors = TRUE)
summary(Amritdata)
## prolific_id race hispanic
## 54d2c4e2fdf99b2c319a8cf6: 1 Min. : 1.000 Hispanic/Latinx : 48
## 55daf7b369dbc30005b68ac9: 1 1st Qu.: 6.000 I prefer not to disclose: 25
## 574d70b95e549100063c6533: 1 Median : 6.000 Non-Hispanic/Latinx :433
## 57d67fdf6598aa00019917c8: 1 Mean : 5.834
## 584e7a66e42c2a00013099ac: 1 3rd Qu.: 6.000
## 58dff341282ab00001e9ce26: 1 Max. :13.000
## (Other) :500
## assigned_sex_at_birth age employment
## Female :376 Min. :18.00 Employed full-time :174
## Intersex/DSD : 1 1st Qu.:23.00 Student :131
## Male :126 Median :29.00 Employed part-time : 76
## prefer not to disclose: 3 Mean :31.79 Unemployed : 53
## 3rd Qu.:38.00 Homemaker : 31
## Max. :70.00 Receiving disability: 26
## NA's :1 (Other) : 15
## income overlapping_pain_number pain_duration
## $0-$25,000 :168 Min. :1.000 >10 years :117
## $25,001-$50,000 :130 1st Qu.:2.000 5-7 years : 99
## $50,001-$75,000 : 83 Median :3.000 2-4 years : 86
## $75,001-$100,000 : 45 Mean :2.883 1-2 years : 59
## >$100,000 : 58 3rd Qu.:4.000 7-10 years: 55
## prefer not to disclose: 22 Max. :9.000 3-6 months: 50
## NA's :3 (Other) : 40
## gcpsr_2 gcpsr_3 gcpsr_4 gcpsr_5 gcpsr_6
## Every day:187 Min. : 0.000 Min. : 0.00 Min. : 0.000 No :400
## Some days:319 1st Qu.: 4.000 1st Qu.: 4.00 1st Qu.: 4.000 Yes:106
## Median : 5.000 Median : 6.00 Median : 5.000
## Mean : 5.255 Mean : 5.54 Mean : 5.316
## 3rd Qu.: 6.000 3rd Qu.: 7.00 3rd Qu.: 7.000
## Max. :10.000 Max. :10.00 Max. :10.000
##
## prodep_t proanx_t prorx_t RST_PQ_FFS
## Min. :41.00 Min. :40.30 Min. :36.30 Min. :1.000
## 1st Qu.:57.30 1st Qu.:59.50 1st Qu.:41.60 1st Qu.:2.000
## Median :63.90 Median :65.30 Median :43.70 Median :2.600
## Mean :62.89 Mean :63.96 Mean :44.72 Mean :2.523
## 3rd Qu.:69.40 3rd Qu.:69.30 3rd Qu.:48.20 3rd Qu.:3.000
## Max. :79.40 Max. :81.60 Max. :75.10 Max. :4.000
## NA's :320
## RST_PQ_BIS RST_PQ_BAS_RI RST_PQ_BAS_GDP RST_PQ_BAS_RR
## Min. :1.174 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.609 1st Qu.:2.000 1st Qu.:2.143 1st Qu.:2.300
## Median :3.065 Median :2.429 Median :2.714 Median :2.700
## Mean :2.980 Mean :2.388 Mean :2.643 Mean :2.672
## 3rd Qu.:3.478 3rd Qu.:2.857 3rd Qu.:3.143 3rd Qu.:3.100
## Max. :4.000 Max. :3.857 Max. :4.000 Max. :4.000
##
## RST_PQ_BAS_I SHAPS_tot bis_brief_tot cpaq8_tot
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 2.00
## 1st Qu.:1.875 1st Qu.: 0.000 1st Qu.:1.875 1st Qu.:21.00
## Median :2.375 Median : 1.000 Median :2.125 Median :27.00
## Mean :2.383 Mean : 2.433 Mean :2.192 Mean :26.27
## 3rd Qu.:2.875 3rd Qu.: 4.000 3rd Qu.:2.500 3rd Qu.:31.75
## Max. :4.000 Max. :13.000 Max. :3.875 Max. :46.00
##
## pcs_tot isi_tot meq_tot audit_total
## Min. : 0.00 Min. : 0.00 Min. :36.00 Min. : 0.000
## 1st Qu.:17.00 1st Qu.:10.00 1st Qu.:45.00 1st Qu.: 1.000
## Median :26.00 Median :14.00 Median :48.00 Median : 2.000
## Mean :26.29 Mean :14.09 Mean :48.18 Mean : 2.502
## 3rd Qu.:36.00 3rd Qu.:19.00 3rd Qu.:52.00 3rd Qu.: 4.000
## Max. :52.00 Max. :28.00 Max. :64.00 Max. :11.000
##
## cuditr0 current_opioid_meds gcpsr_2_HICP gcpsr_2_HICP_num
## Min. :0.0000 No :436 No :319 Min. :0.0000
## 1st Qu.:0.0000 Yes: 70 Yes:187 1st Qu.:0.0000
## Median :0.0000 Median :0.0000
## Mean :0.3814 Mean :0.3696
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
##
## income_num income_alpha hispanic_bin
## Min. :1.000 A:168 I prefer not to disclose: 25
## 1st Qu.:1.000 B:130 No :433
## Median :2.000 C: 83 Yes : 48
## Mean :2.528 D: 45
## 3rd Qu.:3.000 E: 58
## Max. :6.000 F: 22
##
str(Amritdata)
## 'data.frame': 506 obs. of 37 variables:
## $ prolific_id : Factor w/ 506 levels "54d2c4e2fdf99b2c319a8cf6",..: 1 2 4 5 6 7 10 13 14 18 ...
## $ race : int 6 6 6 6 6 6 6 6 6 6 ...
## $ hispanic : Factor w/ 3 levels "Hispanic/Latinx",..: 3 3 3 2 3 3 3 3 1 3 ...
## $ assigned_sex_at_birth : Factor w/ 4 levels "Female","Intersex/DSD",..: 3 3 3 1 1 3 3 1 1 3 ...
## $ age : int 43 37 53 34 42 37 46 22 55 24 ...
## $ employment : Factor w/ 9 levels "Employed full-time",..: 1 1 1 1 2 2 1 8 2 9 ...
## $ income : Factor w/ 6 levels "$0-$25,000","$25,001-$50,000",..: 3 2 5 1 1 1 2 1 2 1 ...
## $ overlapping_pain_number: int 5 4 2 1 3 3 3 3 3 1 ...
## $ pain_duration : Factor w/ 8 levels " ",">10 years",..: 2 6 7 2 6 4 7 4 4 6 ...
## $ gcpsr_2 : Factor w/ 2 levels "Every day","Some days": 1 2 1 2 2 2 2 2 2 2 ...
## $ gcpsr_3 : int 4 6 8 6 7 5 6 6 5 8 ...
## $ gcpsr_4 : int 6 8 9 5 6 7 5 5 4 5 ...
## $ gcpsr_5 : int 6 4 8 5 6 6 6 3 5 3 ...
## $ gcpsr_6 : Factor w/ 2 levels "No","Yes": 2 1 1 1 2 1 1 1 1 1 ...
## $ prodep_t : num 58.9 71.2 62.2 63.9 49 69.4 60.5 62.2 67.5 55.7 ...
## $ proanx_t : num 48 69.3 61.4 65.3 40.3 59.5 55.8 65.3 71.2 55.8 ...
## $ prorx_t : num NA NA 41.6 50.4 45.5 NA 36.3 NA NA NA ...
## $ RST_PQ_FFS : num 1.5 1.6 3.4 2.5 2.2 3.3 1.9 2.2 3.1 1.7 ...
## $ RST_PQ_BIS : num 1.65 3.43 2.65 2.96 2.17 ...
## $ RST_PQ_BAS_RI : num 3.43 1.71 2.86 2.57 2.71 ...
## $ RST_PQ_BAS_GDP : num 3.14 1.43 3.71 3 3.14 ...
## $ RST_PQ_BAS_RR : num 2 1.8 3.2 2.9 3 2.8 2.4 2.7 3.6 1.9 ...
## $ RST_PQ_BAS_I : num 2.25 2.62 2.5 2.25 2.62 ...
## $ SHAPS_tot : int 0 4 0 0 0 1 0 6 0 10 ...
## $ bis_brief_tot : num 2 2.88 1.75 2.62 2.12 ...
## $ cpaq8_tot : int 29 26 17 21 20 28 30 30 28 32 ...
## $ pcs_tot : int 5 25 23 31 32 7 16 35 21 20 ...
## $ isi_tot : int 14 17 11 14 15 15 1 21 13 0 ...
## $ meq_tot : int 48 44 43 50 43 57 53 50 45 53 ...
## $ audit_total : int 2 1 1 1 0 2 7 3 5 2 ...
## $ cuditr0 : int 0 1 0 0 0 0 0 1 0 0 ...
## $ current_opioid_meds : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
## $ gcpsr_2_HICP : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
## $ gcpsr_2_HICP_num : int 1 0 1 0 0 0 0 0 0 0 ...
## $ income_num : int 3 2 5 1 1 1 2 1 2 1 ...
## $ income_alpha : Factor w/ 6 levels "A","B","C","D",..: 3 2 5 1 1 1 2 1 2 1 ...
## $ hispanic_bin : Factor w/ 3 levels "I prefer not to disclose",..: 2 2 2 1 2 2 2 2 3 2 ...
Clean data
colnames(Amritdata)
## [1] "prolific_id" "race"
## [3] "hispanic" "assigned_sex_at_birth"
## [5] "age" "employment"
## [7] "income" "overlapping_pain_number"
## [9] "pain_duration" "gcpsr_2"
## [11] "gcpsr_3" "gcpsr_4"
## [13] "gcpsr_5" "gcpsr_6"
## [15] "prodep_t" "proanx_t"
## [17] "prorx_t" "RST_PQ_FFS"
## [19] "RST_PQ_BIS" "RST_PQ_BAS_RI"
## [21] "RST_PQ_BAS_GDP" "RST_PQ_BAS_RR"
## [23] "RST_PQ_BAS_I" "SHAPS_tot"
## [25] "bis_brief_tot" "cpaq8_tot"
## [27] "pcs_tot" "isi_tot"
## [29] "meq_tot" "audit_total"
## [31] "cuditr0" "current_opioid_meds"
## [33] "gcpsr_2_HICP" "gcpsr_2_HICP_num"
## [35] "income_num" "income_alpha"
## [37] "hispanic_bin"
Amritdata<-Amritdata %>%
select(!c("gcpsr_2","prorx_t","SHAPS_tot","gcpsr_2_HICP","income_num","income_alpha","hispanic_bin" ))
Now randomly sample the IDs so you have 404 (80% of your sample) unique IDs in the training set, and the other IDs will be used as a test set.
samp<-sample(x = unique(Amritdata$prolific_id), 404, replace=FALSE)
length(samp)
## [1] 404
train<-Amritdata[Amritdata$prolific_id %in% samp, ]
length(unique(train$prolific_id))
## [1] 404
Re-ordering variables so that outcome is first followed by unused variables, followed by the predictors. We won’t use id, which will now be column 2. Remove repeated variables.
colnames(train)
## [1] "prolific_id" "race"
## [3] "hispanic" "assigned_sex_at_birth"
## [5] "age" "employment"
## [7] "income" "overlapping_pain_number"
## [9] "pain_duration" "gcpsr_3"
## [11] "gcpsr_4" "gcpsr_5"
## [13] "gcpsr_6" "prodep_t"
## [15] "proanx_t" "RST_PQ_FFS"
## [17] "RST_PQ_BIS" "RST_PQ_BAS_RI"
## [19] "RST_PQ_BAS_GDP" "RST_PQ_BAS_RR"
## [21] "RST_PQ_BAS_I" "bis_brief_tot"
## [23] "cpaq8_tot" "pcs_tot"
## [25] "isi_tot" "meq_tot"
## [27] "audit_total" "cuditr0"
## [29] "current_opioid_meds" "gcpsr_2_HICP_num"
train<-train[,c(30,1:29)]
colnames(train) #124
## [1] "gcpsr_2_HICP_num" "prolific_id"
## [3] "race" "hispanic"
## [5] "assigned_sex_at_birth" "age"
## [7] "employment" "income"
## [9] "overlapping_pain_number" "pain_duration"
## [11] "gcpsr_3" "gcpsr_4"
## [13] "gcpsr_5" "gcpsr_6"
## [15] "prodep_t" "proanx_t"
## [17] "RST_PQ_FFS" "RST_PQ_BIS"
## [19] "RST_PQ_BAS_RI" "RST_PQ_BAS_GDP"
## [21] "RST_PQ_BAS_RR" "RST_PQ_BAS_I"
## [23] "bis_brief_tot" "cpaq8_tot"
## [25] "pcs_tot" "isi_tot"
## [27] "meq_tot" "audit_total"
## [29] "cuditr0" "current_opioid_meds"
We need to make the cross validation occur over people
#CV fold making
colnames(train)
## [1] "gcpsr_2_HICP_num" "prolific_id"
## [3] "race" "hispanic"
## [5] "assigned_sex_at_birth" "age"
## [7] "employment" "income"
## [9] "overlapping_pain_number" "pain_duration"
## [11] "gcpsr_3" "gcpsr_4"
## [13] "gcpsr_5" "gcpsr_6"
## [15] "prodep_t" "proanx_t"
## [17] "RST_PQ_FFS" "RST_PQ_BIS"
## [19] "RST_PQ_BAS_RI" "RST_PQ_BAS_GDP"
## [21] "RST_PQ_BAS_RR" "RST_PQ_BAS_I"
## [23] "bis_brief_tot" "cpaq8_tot"
## [25] "pcs_tot" "isi_tot"
## [27] "meq_tot" "audit_total"
## [29] "cuditr0" "current_opioid_meds"
id<-unique(train$prolific_id)
#summary(id)
#describe(id)
nfolds<-10
cvtest<-sample(rep(1:nfolds,length.out=length(id)), replace=F)
idname <- "prolific_id"
foldname <- "fold"
cvdf<-data.frame(id,cvtest)
names(cvdf) <- c(idname,foldname)
head(cvdf)
## prolific_id fold
## 1 55daf7b369dbc30005b68ac9 1
## 2 57d67fdf6598aa00019917c8 5
## 3 58dff341282ab00001e9ce26 3
## 4 5947f262de49a9000165ccd3 8
## 5 5a64bb3035f26b0001492e6a 7
## 6 5ac6852df69e940001d98f04 9
combcvdf<-merge(train, cvdf, by= 'prolific_id', sort=F)
head(combcvdf)
## prolific_id gcpsr_2_HICP_num race hispanic
## 1 55daf7b369dbc30005b68ac9 0 6 Non-Hispanic/Latinx
## 2 57d67fdf6598aa00019917c8 1 6 Non-Hispanic/Latinx
## 3 58dff341282ab00001e9ce26 0 6 Non-Hispanic/Latinx
## 4 5947f262de49a9000165ccd3 0 6 Non-Hispanic/Latinx
## 5 5a64bb3035f26b0001492e6a 0 6 Non-Hispanic/Latinx
## 6 5ac6852df69e940001d98f04 0 6 Non-Hispanic/Latinx
## assigned_sex_at_birth age employment income
## 1 Male 37 Employed full-time $25,001-$50,000
## 2 Male 53 Employed full-time >$100,000
## 3 Female 42 Employed part-time $0-$25,000
## 4 Male 37 Employed part-time $0-$25,000
## 5 Male 46 Employed full-time $25,001-$50,000
## 6 Female 22 Student $0-$25,000
## overlapping_pain_number pain_duration gcpsr_3 gcpsr_4 gcpsr_5 gcpsr_6
## 1 4 5-7 years 6 8 4 No
## 2 2 7-10 years 8 9 8 No
## 3 3 5-7 years 7 6 6 Yes
## 4 3 2-4 years 5 7 6 No
## 5 3 7-10 years 6 5 6 No
## 6 3 2-4 years 6 5 3 No
## prodep_t proanx_t RST_PQ_FFS RST_PQ_BIS RST_PQ_BAS_RI RST_PQ_BAS_GDP
## 1 71.2 69.3 1.6 3.434783 1.714286 1.428571
## 2 62.2 61.4 3.4 2.652174 2.857143 3.714286
## 3 49.0 40.3 2.2 2.173913 2.714286 3.142857
## 4 69.4 59.5 3.3 2.739130 1.714286 1.142857
## 5 60.5 55.8 1.9 2.565217 2.714286 2.571429
## 6 62.2 65.3 2.2 3.304348 1.571429 1.142857
## RST_PQ_BAS_RR RST_PQ_BAS_I bis_brief_tot cpaq8_tot pcs_tot isi_tot meq_tot
## 1 1.8 2.625 2.875 26 25 17 44
## 2 3.2 2.500 1.750 17 23 11 43
## 3 3.0 2.625 2.125 20 32 15 43
## 4 2.8 2.125 2.625 28 7 15 57
## 5 2.4 1.500 1.750 30 16 1 53
## 6 2.7 2.125 2.500 30 35 21 50
## audit_total cuditr0 current_opioid_meds fold
## 1 1 1 No 1
## 2 1 0 Yes 5
## 3 0 0 No 3
## 4 2 0 No 8
## 5 7 0 No 7
## 6 3 1 No 9
#table(train$prolific_id, combcvdf$fold)
#check what's wrong with table
#rowSums(table(train$prolific_id, combcvdf$fold)>0)
colSums(table(train$prolific_id, combcvdf$fold)>0)
## 1 2 3 4 5 6 7 8 9 10
## 41 41 41 41 40 40 40 40 40 40
fold.vector<-combcvdf$fold
head(fold.vector)
## [1] 1 5 3 8 7 9
describe(fold.vector)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 404 5.47 2.88 5 5.46 3.71 1 10 9 0.02 -1.24 0.14
Now let’s run some models with our finaltrain data set! First, tc 3, lr .0005
finaltrain<-as.data.frame(train)
finaltrain_nocgpsr<-finaltrain[,c(1:10,15:30,11:14)] #you will run these models with both the finaltrain and the finaltrain_nocgpsr datasets (separately). For finaltrain, you will use gbm.x = 3:30,
colnames(finaltrain)
## [1] "gcpsr_2_HICP_num" "prolific_id"
## [3] "race" "hispanic"
## [5] "assigned_sex_at_birth" "age"
## [7] "employment" "income"
## [9] "overlapping_pain_number" "pain_duration"
## [11] "gcpsr_3" "gcpsr_4"
## [13] "gcpsr_5" "gcpsr_6"
## [15] "prodep_t" "proanx_t"
## [17] "RST_PQ_FFS" "RST_PQ_BIS"
## [19] "RST_PQ_BAS_RI" "RST_PQ_BAS_GDP"
## [21] "RST_PQ_BAS_RR" "RST_PQ_BAS_I"
## [23] "bis_brief_tot" "cpaq8_tot"
## [25] "pcs_tot" "isi_tot"
## [27] "meq_tot" "audit_total"
## [29] "cuditr0" "current_opioid_meds"
colnames(finaltrain_nocgpsr)
## [1] "gcpsr_2_HICP_num" "prolific_id"
## [3] "race" "hispanic"
## [5] "assigned_sex_at_birth" "age"
## [7] "employment" "income"
## [9] "overlapping_pain_number" "pain_duration"
## [11] "prodep_t" "proanx_t"
## [13] "RST_PQ_FFS" "RST_PQ_BIS"
## [15] "RST_PQ_BAS_RI" "RST_PQ_BAS_GDP"
## [17] "RST_PQ_BAS_RR" "RST_PQ_BAS_I"
## [19] "bis_brief_tot" "cpaq8_tot"
## [21] "pcs_tot" "isi_tot"
## [23] "meq_tot" "audit_total"
## [25] "cuditr0" "current_opioid_meds"
## [27] "gcpsr_3" "gcpsr_4"
## [29] "gcpsr_5" "gcpsr_6"
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3089
##
## now adding trees...
## 100 1.3014
## 150 1.2945
## 200 1.2876
## 250 1.2809
## 300 1.2746
## 350 1.2687
## 400 1.2629
## 450 1.2575
## 500 1.2524
## 550 1.2476
## 600 1.2427
## 650 1.2382
## 700 1.2339
## 750 1.2298
## 800 1.2258
## 850 1.2222
## 900 1.2186
## 950 1.215
## 1000 1.2117
## 1050 1.2086
## 1100 1.2056
## 1150 1.2028
## 1200 1.1999
## 1250 1.1974
## 1300 1.195
## 1350 1.1926
## 1400 1.1901
## 1450 1.188
## 1500 1.1858
## 1550 1.184
## 1600 1.182
## 1650 1.1801
## 1700 1.1782
## 1750 1.1764
## 1800 1.1748
## 1850 1.1732
## 1900 1.1716
## 1950 1.1701
## 2000 1.1685
## 2050 1.1671
## 2100 1.1658
## 2150 1.1646
## 2200 1.1633
## 2250 1.1621
## 2300 1.161
## 2350 1.1599
## 2400 1.1588
## 2450 1.1579
## 2500 1.157
## 2550 1.1561
## 2600 1.1551
## 2650 1.1543
## 2700 1.1535
## 2750 1.1529
## 2800 1.1522
## 2850 1.1514
## 2900 1.151
## 2950 1.1504
## 3000 1.1499
## 3050 1.1493
## 3100 1.1489
## 3150 1.1484
## 3200 1.148
## 3250 1.1475
## 3300 1.1474
## 3350 1.1471
## 3400 1.1469
## 3450 1.1468
## 3500 1.1464
## 3550 1.1461
## 3600 1.1457
## 3650 1.1453
## 3700 1.145
## 3750 1.145
## 3800 1.1449
## 3850 1.1447
## 3900 1.1446
## 3950 1.1445
## 4000 1.1443
## 4050 1.1442
## 4100 1.1441
## 4150 1.144
## 4200 1.1441
## 4250 1.1442
## 4300 1.1441
## 4350 1.144
## 4400 1.1441
## fitting final gbm model with a fixed number of 4150 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.969
##
## estimated cv deviance = 1.144 ; se = 0.048
##
## training data correlation = 0.63
## cv correlation = 0.426 ; se = 0.055
##
## training data ROC score = 0.872
## cv ROC score = 0.745 ; se = 0.03
##
## elapsed time - 0.37 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.144017
##
## $deviance.se
## [1] 0.0475256
##
## $correlation.mean
## [1] 0.4257112
##
## $correlation.se
## [1] 0.05542095
##
## $discrimination.mean
## [1] 0.7446
##
## $discrimination.se
## [1] 0.03025443
##
## $calibration.mean
## [1] 0.3201415 1.5979503 0.4264567 0.4602101 0.3688215
##
## $calibration.se
## [1] 0.23821960 0.44100007 0.09847052 0.09685961 0.07601304
##
## $cv.threshold
## [1] 0.3781137
##
## $cv.threshold.se
## [1] 0.01345187
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 34.28308891
## age age 8.52846699
## isi_tot isi_tot 7.84598339
## current_opioid_meds current_opioid_meds 7.04057701
## overlapping_pain_number overlapping_pain_number 6.44704211
## pain_duration pain_duration 5.66407621
## pcs_tot pcs_tot 5.38906926
## RST_PQ_BIS RST_PQ_BIS 5.23306327
## employment employment 3.29176647
## audit_total audit_total 3.28654962
## bis_brief_tot bis_brief_tot 2.41069309
## income income 2.15476205
## prodep_t prodep_t 1.67415831
## proanx_t proanx_t 1.42910599
## RST_PQ_FFS RST_PQ_FFS 1.13967206
## meq_tot meq_tot 0.95263740
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 0.78579175
## RST_PQ_BAS_I RST_PQ_BAS_I 0.72992404
## hispanic hispanic 0.51199642
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.49417388
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.46928338
## race race 0.15781572
## cuditr0 cuditr0 0.06233123
## assigned_sex_at_birth assigned_sex_at_birth 0.01797142
THIS IS WHERE WE STOPPED WHILE CODING TOGETHER In all of the below code, you will have to change the gbm.x and data to match what we were doing (so gbm.x = 3:26, data= finaltrain_nocgpsr)
tc 3, lr .0005
cvtc3.lr005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3053
##
## now adding trees...
## 100 1.2943
## 150 1.2841
## 200 1.2742
## 250 1.2645
## 300 1.2554
## 350 1.2467
## 400 1.2381
## 450 1.2299
## 500 1.2221
## 550 1.2148
## 600 1.2077
## 650 1.2008
## 700 1.1939
## 750 1.1874
## 800 1.1811
## 850 1.1752
## 900 1.1696
## 950 1.164
## 1000 1.1588
## 1050 1.1537
## 1100 1.1486
## 1150 1.1438
## 1200 1.1394
## 1250 1.135
## 1300 1.1306
## 1350 1.1263
## 1400 1.1225
## 1450 1.1185
## 1500 1.1148
## 1550 1.111
## 1600 1.1076
## 1650 1.1044
## 1700 1.1011
## 1750 1.098
## 1800 1.095
## 1850 1.0919
## 1900 1.0892
## 1950 1.0865
## 2000 1.0836
## 2050 1.081
## 2100 1.0785
## 2150 1.0761
## 2200 1.0739
## 2250 1.0719
## 2300 1.0697
## 2350 1.0676
## 2400 1.0656
## 2450 1.0638
## 2500 1.062
## 2550 1.0602
## 2600 1.0584
## 2650 1.0567
## 2700 1.0548
## 2750 1.0534
## 2800 1.0518
## 2850 1.0503
## 2900 1.0489
## 2950 1.0476
## 3000 1.0461
## 3050 1.0449
## 3100 1.0436
## 3150 1.0424
## 3200 1.0413
## 3250 1.0401
## 3300 1.0391
## 3350 1.0382
## 3400 1.0372
## 3450 1.0362
## 3500 1.0353
## 3550 1.0345
## 3600 1.0338
## 3650 1.0331
## 3700 1.0321
## 3750 1.0316
## 3800 1.0309
## 3850 1.0301
## 3900 1.0296
## 3950 1.029
## 4000 1.0284
## 4050 1.028
## 4100 1.0275
## 4150 1.0271
## 4200 1.0267
## 4250 1.0262
## 4300 1.0258
## 4350 1.0258
## 4400 1.0253
## 4450 1.0248
## 4500 1.0246
## 4550 1.0242
## 4600 1.0237
## 4650 1.0234
## 4700 1.0231
## 4750 1.0228
## 4800 1.0226
## 4850 1.0227
## 4900 1.0224
## 4950 1.0222
## 5000 1.0221
## 5050 1.022
## 5100 1.0219
## 5150 1.0217
## 5200 1.0216
## 5250 1.0217
## 5300 1.0215
## 5350 1.0212
## 5400 1.0213
## 5450 1.0213
## 5500 1.0214
## fitting final gbm model with a fixed number of 5350 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.819
##
## estimated cv deviance = 1.021 ; se = 0.06
##
## training data correlation = 0.712
## cv correlation = 0.54 ; se = 0.055
##
## training data ROC score = 0.909
## cv ROC score = 0.805 ; se = 0.027
##
## elapsed time - 0.51 minutes
cvtc3.lr005$cv.statistics
## $deviance.mean
## [1] 1.02125
##
## $deviance.se
## [1] 0.06019242
##
## $correlation.mean
## [1] 0.5396279
##
## $correlation.se
## [1] 0.05466285
##
## $discrimination.mean
## [1] 0.80513
##
## $discrimination.se
## [1] 0.02732153
##
## $calibration.mean
## [1] 0.2656818 1.4480633 0.4394217 0.5036994 0.3846411
##
## $calibration.se
## [1] 0.20992680 0.31935138 0.10728474 0.09275580 0.09629856
##
## $cv.threshold
## [1] 0.3950878
##
## $cv.threshold.se
## [1] 0.02209087
summary(cvtc3.lr005)
## var rel.inf
## gcpsr_5 gcpsr_5 28.11781616
## cpaq8_tot cpaq8_tot 17.21535092
## gcpsr_3 gcpsr_3 6.55010805
## age age 6.23326778
## isi_tot isi_tot 4.68449160
## gcpsr_6 gcpsr_6 3.96021195
## pain_duration pain_duration 3.90936231
## RST_PQ_BIS RST_PQ_BIS 3.58507490
## gcpsr_4 gcpsr_4 3.18452015
## overlapping_pain_number overlapping_pain_number 3.06062070
## current_opioid_meds current_opioid_meds 2.97187506
## employment employment 2.69628289
## pcs_tot pcs_tot 1.91984333
## audit_total audit_total 1.55495271
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.51650831
## bis_brief_tot bis_brief_tot 1.30447828
## income income 1.23263277
## proanx_t proanx_t 1.16003137
## meq_tot meq_tot 0.87741060
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.84875439
## hispanic hispanic 0.80856748
## prodep_t prodep_t 0.69460200
## RST_PQ_FFS RST_PQ_FFS 0.66611477
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.59127031
## RST_PQ_BAS_I RST_PQ_BAS_I 0.48076482
## race race 0.11193943
## cuditr0 cuditr0 0.03561131
## assigned_sex_at_birth assigned_sex_at_birth 0.02753565
tc 3 lr .00025
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.00025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3129
##
## now adding trees...
## 100 1.3088
## 150 1.3049
## 200 1.3013
## 250 1.2978
## 300 1.294
## 350 1.2907
## 400 1.2873
## 450 1.2839
## 500 1.2807
## 550 1.2775
## 600 1.2744
## 650 1.2715
## 700 1.2686
## 750 1.2659
## 800 1.2631
## 850 1.2603
## 900 1.2577
## 950 1.2552
## 1000 1.2526
## 1050 1.2501
## 1100 1.2478
## 1150 1.2455
## 1200 1.2432
## 1250 1.2409
## 1300 1.2386
## 1350 1.2366
## 1400 1.2345
## 1450 1.2325
## 1500 1.2304
## 1550 1.2284
## 1600 1.2264
## 1650 1.2245
## 1700 1.2226
## 1750 1.2208
## 1800 1.2191
## 1850 1.2173
## 1900 1.2156
## 1950 1.214
## 2000 1.2124
## 2050 1.2108
## 2100 1.2092
## 2150 1.2077
## 2200 1.2062
## 2250 1.2047
## 2300 1.2033
## 2350 1.202
## 2400 1.2006
## 2450 1.1993
## 2500 1.198
## 2550 1.1966
## 2600 1.1953
## 2650 1.1941
## 2700 1.1929
## 2750 1.1917
## 2800 1.1906
## 2850 1.1895
## 2900 1.1884
## 2950 1.1873
## 3000 1.1863
## 3050 1.1853
## 3100 1.1843
## 3150 1.1834
## 3200 1.1824
## 3250 1.1814
## 3300 1.1805
## 3350 1.1795
## 3400 1.1787
## 3450 1.1778
## 3500 1.1769
## 3550 1.176
## 3600 1.175
## 3650 1.1742
## 3700 1.1733
## 3750 1.1726
## 3800 1.1717
## 3850 1.1709
## 3900 1.1702
## 3950 1.1696
## 4000 1.1689
## 4050 1.1683
## 4100 1.1676
## 4150 1.167
## 4200 1.1663
## 4250 1.1657
## 4300 1.165
## 4350 1.1644
## 4400 1.1637
## 4450 1.1629
## 4500 1.1624
## 4550 1.1619
## 4600 1.1613
## 4650 1.1608
## 4700 1.1602
## 4750 1.1596
## 4800 1.1591
## 4850 1.1587
## 4900 1.1581
## 4950 1.1577
## 5000 1.1573
## 5050 1.1568
## 5100 1.1563
## 5150 1.1559
## 5200 1.1555
## 5250 1.155
## 5300 1.1547
## 5350 1.1542
## 5400 1.1537
## 5450 1.1533
## 5500 1.153
## 5550 1.1526
## 5600 1.1523
## 5650 1.1521
## 5700 1.1518
## 5750 1.1513
## 5800 1.151
## 5850 1.1508
## 5900 1.1505
## 5950 1.1502
## 6000 1.1499
## 6050 1.1497
## 6100 1.1494
## 6150 1.1491
## 6200 1.1489
## 6250 1.1487
## 6300 1.1484
## 6350 1.1482
## 6400 1.1479
## 6450 1.1476
## 6500 1.1475
## 6550 1.1473
## 6600 1.147
## 6650 1.1468
## 6700 1.1466
## 6750 1.1463
## 6800 1.1461
## 6850 1.1459
## 6900 1.1459
## 6950 1.1457
## 7000 1.1455
## 7050 1.1453
## 7100 1.1452
## 7150 1.1451
## 7200 1.1451
## 7250 1.1449
## 7300 1.1448
## 7350 1.1448
## 7400 1.1447
## 7450 1.1447
## 7500 1.1446
## fitting final gbm model with a fixed number of 7500 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.985
##
## estimated cv deviance = 1.145 ; se = 0.046
##
## training data correlation = 0.623
## cv correlation = 0.429 ; se = 0.056
##
## training data ROC score = 0.867
## cv ROC score = 0.745 ; se = 0.031
##
## elapsed time - 0.68 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.144595
##
## $deviance.se
## [1] 0.04636747
##
## $correlation.mean
## [1] 0.4289955
##
## $correlation.se
## [1] 0.0563225
##
## $discrimination.mean
## [1] 0.7452
##
## $discrimination.se
## [1] 0.03106307
##
## $calibration.mean
## [1] 0.3937493 1.7368222 0.3956736 0.4577948 0.3118121
##
## $calibration.se
## [1] 0.26145664 0.49671625 0.08983369 0.09400341 0.05915848
##
## $cv.threshold
## [1] 0.3772412
##
## $cv.threshold.se
## [1] 0.01324371
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 35.42205801
## age age 8.45090923
## isi_tot isi_tot 8.04497580
## current_opioid_meds current_opioid_meds 7.33079585
## overlapping_pain_number overlapping_pain_number 6.29246025
## pain_duration pain_duration 5.66683322
## pcs_tot pcs_tot 5.21650067
## RST_PQ_BIS RST_PQ_BIS 5.16650378
## audit_total audit_total 3.07406116
## employment employment 3.02124418
## bis_brief_tot bis_brief_tot 2.40748262
## income income 2.14751680
## prodep_t prodep_t 1.55350874
## proanx_t proanx_t 1.36160302
## RST_PQ_FFS RST_PQ_FFS 0.92761085
## meq_tot meq_tot 0.92412345
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 0.71557865
## RST_PQ_BAS_I RST_PQ_BAS_I 0.70813123
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.51843122
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.47086999
## hispanic hispanic 0.38026145
## race race 0.13388751
## cuditr0 cuditr0 0.03560460
## assigned_sex_at_birth assigned_sex_at_birth 0.02904771
tc 3 lr .00025
cvtc3.lr0025 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.00025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3111
##
## now adding trees...
## 100 1.3054
## 150 1.2999
## 200 1.2946
## 250 1.2893
## 300 1.2842
## 350 1.2791
## 400 1.2741
## 450 1.2693
## 500 1.2646
## 550 1.26
## 600 1.2553
## 650 1.2509
## 700 1.2465
## 750 1.2423
## 800 1.2382
## 850 1.2341
## 900 1.2303
## 950 1.2264
## 1000 1.2226
## 1050 1.2189
## 1100 1.2152
## 1150 1.2117
## 1200 1.2082
## 1250 1.2048
## 1300 1.2014
## 1350 1.1979
## 1400 1.1946
## 1450 1.1914
## 1500 1.1883
## 1550 1.1852
## 1600 1.1822
## 1650 1.1793
## 1700 1.1763
## 1750 1.1734
## 1800 1.1706
## 1850 1.1679
## 1900 1.1651
## 1950 1.1625
## 2000 1.1599
## 2050 1.1572
## 2100 1.1547
## 2150 1.1521
## 2200 1.1498
## 2250 1.1473
## 2300 1.145
## 2350 1.1428
## 2400 1.1404
## 2450 1.1382
## 2500 1.136
## 2550 1.1338
## 2600 1.1318
## 2650 1.1297
## 2700 1.1277
## 2750 1.1257
## 2800 1.1236
## 2850 1.1217
## 2900 1.1198
## 2950 1.1179
## 3000 1.1161
## 3050 1.1142
## 3100 1.1124
## 3150 1.1107
## 3200 1.1089
## 3250 1.1072
## 3300 1.1055
## 3350 1.1038
## 3400 1.102
## 3450 1.1005
## 3500 1.099
## 3550 1.0973
## 3600 1.0959
## 3650 1.0944
## 3700 1.093
## 3750 1.0915
## 3800 1.0901
## 3850 1.0886
## 3900 1.0873
## 3950 1.0859
## 4000 1.0846
## 4050 1.0832
## 4100 1.082
## 4150 1.0807
## 4200 1.0794
## 4250 1.078
## 4300 1.0769
## 4350 1.0757
## 4400 1.0747
## 4450 1.0735
## 4500 1.0724
## 4550 1.0713
## 4600 1.0702
## 4650 1.0692
## 4700 1.0681
## 4750 1.0672
## 4800 1.0662
## 4850 1.0652
## 4900 1.0642
## 4950 1.0633
## 5000 1.0623
## 5050 1.0613
## 5100 1.0604
## 5150 1.0595
## 5200 1.0587
## 5250 1.058
## 5300 1.0571
## 5350 1.0561
## 5400 1.0554
## 5450 1.0546
## 5500 1.0538
## 5550 1.0531
## 5600 1.0524
## 5650 1.0516
## 5700 1.051
## 5750 1.0502
## 5800 1.0495
## 5850 1.0488
## 5900 1.0482
## 5950 1.0474
## 6000 1.0467
## 6050 1.046
## 6100 1.0454
## 6150 1.0447
## 6200 1.0441
## 6250 1.0435
## 6300 1.043
## 6350 1.0424
## 6400 1.0418
## 6450 1.0413
## 6500 1.0408
## 6550 1.0403
## 6600 1.0398
## 6650 1.0393
## 6700 1.0389
## 6750 1.0384
## 6800 1.0379
## 6850 1.0375
## 6900 1.0369
## 6950 1.0365
## 7000 1.0361
## 7050 1.0356
## 7100 1.0352
## 7150 1.0348
## 7200 1.0344
## 7250 1.0339
## 7300 1.0335
## 7350 1.0331
## 7400 1.0327
## 7450 1.0325
## 7500 1.0322
## 7550 1.0318
## 7600 1.0315
## 7650 1.0311
## 7700 1.0308
## 7750 1.0305
## 7800 1.0301
## 7850 1.0298
## 7900 1.0294
## 7950 1.0291
## 8000 1.0288
## 8050 1.0285
## 8100 1.0283
## 8150 1.0281
## 8200 1.0278
## 8250 1.0275
## 8300 1.0272
## 8350 1.0269
## 8400 1.0268
## 8450 1.0266
## 8500 1.0264
## 8550 1.0261
## 8600 1.026
## 8650 1.0258
## 8700 1.0256
## 8750 1.0254
## 8800 1.0252
## 8850 1.025
## 8900 1.0248
## 8950 1.0248
## 9000 1.0245
## 9050 1.0244
## 9100 1.0243
## 9150 1.0241
## 9200 1.024
## 9250 1.0239
## 9300 1.0239
## 9350 1.0237
## 9400 1.0235
## 9450 1.0235
## 9500 1.0233
## 9550 1.0232
## 9600 1.0231
## fitting final gbm model with a fixed number of 9600 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.84
##
## estimated cv deviance = 1.023 ; se = 0.058
##
## training data correlation = 0.704
## cv correlation = 0.54 ; se = 0.055
##
## training data ROC score = 0.905
## cv ROC score = 0.806 ; se = 0.028
##
## elapsed time - 0.99 minutes
cvtc3.lr0025$cv.statistics
## $deviance.mean
## [1] 1.023057
##
## $deviance.se
## [1] 0.05794706
##
## $correlation.mean
## [1] 0.5401421
##
## $correlation.se
## [1] 0.05476121
##
## $discrimination.mean
## [1] 0.8056
##
## $discrimination.se
## [1] 0.02785595
##
## $calibration.mean
## [1] 0.2803442 1.4993694 0.4273062 0.5031588 0.3397515
##
## $calibration.se
## [1] 0.19962522 0.31415119 0.10106043 0.09159738 0.07981578
##
## $cv.threshold
## [1] 0.3931912
##
## $cv.threshold.se
## [1] 0.02197686
summary(cvtc3.lr0025)
## var rel.inf
## gcpsr_5 gcpsr_5 29.13310831
## cpaq8_tot cpaq8_tot 17.69426027
## gcpsr_3 gcpsr_3 6.62468542
## age age 6.09118569
## isi_tot isi_tot 4.62102012
## gcpsr_6 gcpsr_6 4.08236008
## pain_duration pain_duration 3.62987720
## RST_PQ_BIS RST_PQ_BIS 3.50996575
## current_opioid_meds current_opioid_meds 3.19113694
## overlapping_pain_number overlapping_pain_number 3.15846837
## gcpsr_4 gcpsr_4 3.13351585
## employment employment 2.54052239
## pcs_tot pcs_tot 1.88682445
## audit_total audit_total 1.31350153
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.29075696
## bis_brief_tot bis_brief_tot 1.25507709
## income income 1.22194709
## proanx_t proanx_t 1.11138997
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.84938023
## meq_tot meq_tot 0.77395428
## hispanic hispanic 0.69751503
## prodep_t prodep_t 0.65670009
## RST_PQ_FFS RST_PQ_FFS 0.56034895
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.46471149
## RST_PQ_BAS_I RST_PQ_BAS_I 0.36683593
## race race 0.08913563
## cuditr0 cuditr0 0.03087730
## assigned_sex_at_birth assigned_sex_at_birth 0.02093759
tc 3 lr 0.005
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2509
##
## now adding trees...
## 100 1.2116
## 150 1.1868
## 200 1.1672
## 250 1.1555
## 300 1.1489
## 350 1.1469
## 400 1.1452
## 450 1.1439
## 500 1.147
## 550 1.1495
## 600 1.152
## 650 1.1551
## 700 1.157
## 750 1.1602
## 800 1.1655
## 850 1.1712
## 900 1.1774
## 950 1.1817
## 1000 1.1878
## 1050 1.1928
## fitting final gbm model with a fixed number of 450 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.954
##
## estimated cv deviance = 1.144 ; se = 0.048
##
## training data correlation = 0.638
## cv correlation = 0.425 ; se = 0.054
##
## training data ROC score = 0.877
## cv ROC score = 0.745 ; se = 0.03
##
## elapsed time - 0.08 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.143858
##
## $deviance.se
## [1] 0.04837856
##
## $correlation.mean
## [1] 0.4248425
##
## $correlation.se
## [1] 0.05403213
##
## $discrimination.mean
## [1] 0.74496
##
## $discrimination.se
## [1] 0.02967057
##
## $calibration.mean
## [1] 0.2495425 1.5043870 0.4422612 0.4668652 0.3921268
##
## $calibration.se
## [1] 0.20757040 0.38639905 0.10195605 0.10198461 0.07525061
##
## $cv.threshold
## [1] 0.3790469
##
## $cv.threshold.se
## [1] 0.01521988
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 32.4329171
## isi_tot isi_tot 7.8137361
## current_opioid_meds current_opioid_meds 7.6566140
## age age 7.5778058
## pain_duration pain_duration 5.9906613
## RST_PQ_BIS RST_PQ_BIS 5.8668207
## overlapping_pain_number overlapping_pain_number 5.8631029
## pcs_tot pcs_tot 5.8054731
## employment employment 3.7897738
## audit_total audit_total 3.3950920
## bis_brief_tot bis_brief_tot 2.4366810
## income income 2.2233721
## proanx_t proanx_t 1.6813452
## prodep_t prodep_t 1.6740108
## meq_tot meq_tot 0.9705514
## RST_PQ_FFS RST_PQ_FFS 0.9505188
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 0.9053882
## RST_PQ_BAS_I RST_PQ_BAS_I 0.8914243
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.8600870
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.4780161
## race race 0.3785031
## hispanic hispanic 0.3581050
## assigned_sex_at_birth assigned_sex_at_birth 0.0000000
## cuditr0 cuditr0 0.0000000
tc 3 lr 0.005
cvtc5.lr005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2208
##
## now adding trees...
## 100 1.1592
## 150 1.1134
## 200 1.0834
## 250 1.0622
## 300 1.0469
## 350 1.037
## 400 1.0281
## 450 1.0228
## 500 1.0202
## 550 1.0205
## 600 1.0215
## 650 1.0225
## 700 1.0248
## 750 1.0283
## 800 1.031
## 850 1.0326
## 900 1.036
## 950 1.0414
## 1000 1.0465
## 1050 1.0514
## 1100 1.0554
## 1150 1.0589
## fitting final gbm model with a fixed number of 500 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.832
##
## estimated cv deviance = 1.02 ; se = 0.059
##
## training data correlation = 0.706
## cv correlation = 0.543 ; se = 0.055
##
## training data ROC score = 0.905
## cv ROC score = 0.806 ; se = 0.028
##
## elapsed time - 0.09 minutes
cvtc5.lr005$cv.statistics
## $deviance.mean
## [1] 1.020192
##
## $deviance.se
## [1] 0.05931665
##
## $correlation.mean
## [1] 0.542745
##
## $correlation.se
## [1] 0.05537704
##
## $discrimination.mean
## [1] 0.80615
##
## $discrimination.se
## [1] 0.02835754
##
## $calibration.mean
## [1] 0.3063210 1.5174696 0.4286507 0.5043258 0.3485032
##
## $calibration.se
## [1] 0.22593283 0.34471626 0.10361128 0.09312254 0.08310394
##
## $cv.threshold
## [1] 0.3885993
##
## $cv.threshold.se
## [1] 0.01882122
summary(cvtc5.lr005)
## var rel.inf
## gcpsr_5 gcpsr_5 28.85428279
## cpaq8_tot cpaq8_tot 17.27784217
## gcpsr_3 gcpsr_3 6.51880614
## age age 6.40939912
## isi_tot isi_tot 5.06610488
## gcpsr_6 gcpsr_6 4.33822478
## pain_duration pain_duration 3.78272220
## RST_PQ_BIS RST_PQ_BIS 3.64156788
## gcpsr_4 gcpsr_4 3.37720323
## overlapping_pain_number overlapping_pain_number 3.12190450
## current_opioid_meds current_opioid_meds 2.94641385
## employment employment 2.44501516
## pcs_tot pcs_tot 1.88350921
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.58279126
## bis_brief_tot bis_brief_tot 1.26430033
## income income 1.12726705
## audit_total audit_total 1.11928817
## proanx_t proanx_t 1.03632879
## prodep_t prodep_t 0.73708736
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.73574456
## hispanic hispanic 0.65610982
## meq_tot meq_tot 0.62258985
## RST_PQ_FFS RST_PQ_FFS 0.59359288
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.50556131
## RST_PQ_BAS_I RST_PQ_BAS_I 0.19732993
## cuditr0 cuditr0 0.08912396
## assigned_sex_at_birth assigned_sex_at_birth 0.03891400
## race race 0.03097481
tc 3 lr .0025
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2822
##
## now adding trees...
## 100 1.2538
## 150 1.2311
## 200 1.2125
## 250 1.1982
## 300 1.1864
## 350 1.1759
## 400 1.1676
## 450 1.1606
## 500 1.1554
## 550 1.152
## 600 1.1497
## 650 1.147
## 700 1.146
## 750 1.1449
## 800 1.1446
## 850 1.1441
## 900 1.1438
## 950 1.144
## 1000 1.1459
## 1050 1.1462
## 1100 1.1478
## 1150 1.1489
## 1200 1.1502
## 1250 1.152
## 1300 1.1529
## 1350 1.1548
## 1400 1.157
## fitting final gbm model with a fixed number of 900 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.956
##
## estimated cv deviance = 1.144 ; se = 0.048
##
## training data correlation = 0.636
## cv correlation = 0.425 ; se = 0.054
##
## training data ROC score = 0.875
## cv ROC score = 0.744 ; se = 0.03
##
## elapsed time - 0.1 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.143781
##
## $deviance.se
## [1] 0.04848497
##
## $correlation.mean
## [1] 0.4246872
##
## $correlation.se
## [1] 0.0543302
##
## $discrimination.mean
## [1] 0.74382
##
## $discrimination.se
## [1] 0.03027563
##
## $calibration.mean
## [1] 0.2558462 1.4989751 0.4404392 0.4579218 0.4030266
##
## $calibration.se
## [1] 0.20991855 0.39415103 0.10250271 0.09675155 0.08158469
##
## $cv.threshold
## [1] 0.3769763
##
## $cv.threshold.se
## [1] 0.01315306
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 32.86669072
## age age 8.61310456
## isi_tot isi_tot 8.32826981
## current_opioid_meds current_opioid_meds 6.81577421
## pain_duration pain_duration 5.87172947
## overlapping_pain_number overlapping_pain_number 5.70567174
## pcs_tot pcs_tot 5.24868055
## RST_PQ_BIS RST_PQ_BIS 5.09671484
## audit_total audit_total 3.48603158
## employment employment 3.22403518
## bis_brief_tot bis_brief_tot 2.64234332
## income income 2.31456327
## proanx_t proanx_t 1.91836328
## prodep_t prodep_t 1.68074171
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.20657037
## RST_PQ_FFS RST_PQ_FFS 0.98209923
## RST_PQ_BAS_I RST_PQ_BAS_I 0.93303543
## meq_tot meq_tot 0.88684301
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.72457093
## hispanic hispanic 0.55792799
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.54657193
## race race 0.30579123
## cuditr0 cuditr0 0.04387561
## assigned_sex_at_birth assigned_sex_at_birth 0.00000000
tc 3 lr .0025
cvtc5.lr0005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 3,
learning.rate = 0.0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2643
##
## now adding trees...
## 100 1.2219
## 150 1.1863
## 200 1.1579
## 250 1.1335
## 300 1.1131
## 350 1.0968
## 400 1.0828
## 450 1.0707
## 500 1.0612
## 550 1.0536
## 600 1.0469
## 650 1.0407
## 700 1.0358
## 750 1.0328
## 800 1.0297
## 850 1.0282
## 900 1.026
## 950 1.0244
## 1000 1.023
## 1050 1.0228
## 1100 1.0225
## 1150 1.0229
## 1200 1.0226
## 1250 1.0233
## 1300 1.0233
## 1350 1.0243
## 1400 1.0252
## 1450 1.0262
## 1500 1.0279
## 1550 1.0303
## 1600 1.0332
## 1650 1.0346
## fitting final gbm model with a fixed number of 1100 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.813
##
## estimated cv deviance = 1.022 ; se = 0.062
##
## training data correlation = 0.714
## cv correlation = 0.538 ; se = 0.055
##
## training data ROC score = 0.91
## cv ROC score = 0.807 ; se = 0.027
##
## elapsed time - 0.14 minutes
cvtc5.lr0005$cv.statistics
## $deviance.mean
## [1] 1.022483
##
## $deviance.se
## [1] 0.06188085
##
## $correlation.mean
## [1] 0.5383629
##
## $correlation.se
## [1] 0.05545334
##
## $discrimination.mean
## [1] 0.80674
##
## $discrimination.se
## [1] 0.02711901
##
## $calibration.mean
## [1] 0.2562806 1.4183258 0.4374475 0.5102666 0.3792940
##
## $calibration.se
## [1] 0.20466788 0.30767427 0.10933206 0.09380486 0.09392956
##
## $cv.threshold
## [1] 0.3973466
##
## $cv.threshold.se
## [1] 0.02117932
summary(cvtc5.lr0005)
## var rel.inf
## gcpsr_5 gcpsr_5 27.71183079
## cpaq8_tot cpaq8_tot 16.86534350
## gcpsr_3 gcpsr_3 6.36354119
## age age 6.08918186
## isi_tot isi_tot 4.94450331
## pain_duration pain_duration 4.17923340
## gcpsr_6 gcpsr_6 4.06601089
## RST_PQ_BIS RST_PQ_BIS 3.86291801
## gcpsr_4 gcpsr_4 3.34887563
## overlapping_pain_number overlapping_pain_number 3.32118311
## current_opioid_meds current_opioid_meds 2.79416434
## employment employment 2.57229520
## pcs_tot pcs_tot 1.90340178
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.74395946
## audit_total audit_total 1.43862821
## bis_brief_tot bis_brief_tot 1.28608644
## income income 1.27489693
## proanx_t proanx_t 0.99126328
## meq_tot meq_tot 0.97398419
## hispanic hispanic 0.88084577
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.87404465
## prodep_t prodep_t 0.84877372
## RST_PQ_FFS RST_PQ_FFS 0.75832457
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.53412318
## RST_PQ_BAS_I RST_PQ_BAS_I 0.27388824
## race race 0.09869835
## assigned_sex_at_birth assigned_sex_at_birth 0.00000000
## cuditr0 cuditr0 0.00000000
tc 4 lr .0005
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3086
##
## now adding trees...
## 100 1.3007
## 150 1.2934
## 200 1.2866
## 250 1.2801
## 300 1.2737
## 350 1.2676
## 400 1.2618
## 450 1.2559
## 500 1.2507
## 550 1.2455
## 600 1.2407
## 650 1.2362
## 700 1.2318
## 750 1.2275
## 800 1.2236
## 850 1.2197
## 900 1.2161
## 950 1.2123
## 1000 1.2091
## 1050 1.206
## 1100 1.203
## 1150 1.2003
## 1200 1.1976
## 1250 1.195
## 1300 1.1925
## 1350 1.1901
## 1400 1.188
## 1450 1.1858
## 1500 1.1838
## 1550 1.1818
## 1600 1.18
## 1650 1.178
## 1700 1.1762
## 1750 1.1748
## 1800 1.1732
## 1850 1.1716
## 1900 1.1702
## 1950 1.1687
## 2000 1.1674
## 2050 1.166
## 2100 1.1648
## 2150 1.1633
## 2200 1.1621
## 2250 1.1612
## 2300 1.1603
## 2350 1.1592
## 2400 1.1583
## 2450 1.1576
## 2500 1.1566
## 2550 1.1558
## 2600 1.1554
## 2650 1.1548
## 2700 1.1541
## 2750 1.1536
## 2800 1.1532
## 2850 1.1525
## 2900 1.1523
## 2950 1.1518
## 3000 1.1512
## 3050 1.151
## 3100 1.1506
## 3150 1.1503
## 3200 1.1501
## 3250 1.1501
## 3300 1.15
## 3350 1.1498
## 3400 1.1498
## 3450 1.1494
## 3500 1.1491
## 3550 1.1489
## 3600 1.1488
## 3650 1.1489
## 3700 1.1487
## 3750 1.1486
## 3800 1.1485
## 3850 1.1487
## 3900 1.1488
## 3950 1.1488
## 4000 1.149
## fitting final gbm model with a fixed number of 3800 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.938
##
## estimated cv deviance = 1.148 ; se = 0.048
##
## training data correlation = 0.663
## cv correlation = 0.42 ; se = 0.055
##
## training data ROC score = 0.89
## cv ROC score = 0.74 ; se = 0.03
##
## elapsed time - 0.4 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.148492
##
## $deviance.se
## [1] 0.04809471
##
## $correlation.mean
## [1] 0.4199143
##
## $correlation.se
## [1] 0.05513532
##
## $discrimination.mean
## [1] 0.7401
##
## $discrimination.se
## [1] 0.03047419
##
## $calibration.mean
## [1] 0.2752414 1.5072358 0.4354791 0.4645656 0.3847396
##
## $calibration.se
## [1] 0.21722143 0.38072586 0.10131593 0.09764534 0.07957158
##
## $cv.threshold
## [1] 0.3759294
##
## $cv.threshold.se
## [1] 0.01359232
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 30.98156534
## age age 7.82507714
## isi_tot isi_tot 7.46495257
## current_opioid_meds current_opioid_meds 6.42927289
## pain_duration pain_duration 6.25612545
## RST_PQ_BIS RST_PQ_BIS 6.17802994
## overlapping_pain_number overlapping_pain_number 5.60775797
## pcs_tot pcs_tot 5.39613288
## employment employment 3.66095462
## audit_total audit_total 3.50067471
## income income 2.89648541
## bis_brief_tot bis_brief_tot 2.74829376
## prodep_t prodep_t 1.87287593
## proanx_t proanx_t 1.86727271
## RST_PQ_FFS RST_PQ_FFS 1.45294354
## meq_tot meq_tot 1.25265643
## RST_PQ_BAS_I RST_PQ_BAS_I 1.16676808
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.07336727
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.87440076
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.70731673
## hispanic hispanic 0.45412736
## race race 0.20360342
## cuditr0 cuditr0 0.06521223
## assigned_sex_at_birth assigned_sex_at_birth 0.06413286
tc 4 lr .0005
cvtc3.lr0005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3049
##
## now adding trees...
## 100 1.2935
## 150 1.2824
## 200 1.2722
## 250 1.2622
## 300 1.2526
## 350 1.2434
## 400 1.2348
## 450 1.2264
## 500 1.2182
## 550 1.2106
## 600 1.2033
## 650 1.1963
## 700 1.1893
## 750 1.1825
## 800 1.1762
## 850 1.1701
## 900 1.1646
## 950 1.1589
## 1000 1.1537
## 1050 1.1486
## 1100 1.1433
## 1150 1.1386
## 1200 1.134
## 1250 1.1296
## 1300 1.1253
## 1350 1.121
## 1400 1.117
## 1450 1.1132
## 1500 1.1093
## 1550 1.1055
## 1600 1.1019
## 1650 1.0984
## 1700 1.0954
## 1750 1.0923
## 1800 1.0893
## 1850 1.0865
## 1900 1.0838
## 1950 1.081
## 2000 1.0783
## 2050 1.0757
## 2100 1.0732
## 2150 1.071
## 2200 1.0689
## 2250 1.0668
## 2300 1.0647
## 2350 1.0629
## 2400 1.061
## 2450 1.059
## 2500 1.057
## 2550 1.0554
## 2600 1.0536
## 2650 1.0522
## 2700 1.0507
## 2750 1.0492
## 2800 1.0478
## 2850 1.0464
## 2900 1.0453
## 2950 1.0441
## 3000 1.0429
## 3050 1.0417
## 3100 1.0407
## 3150 1.0399
## 3200 1.0389
## 3250 1.038
## 3300 1.037
## 3350 1.0359
## 3400 1.035
## 3450 1.0342
## 3500 1.0332
## 3550 1.0325
## 3600 1.0317
## 3650 1.0311
## 3700 1.0304
## 3750 1.0297
## 3800 1.0292
## 3850 1.0286
## 3900 1.0281
## 3950 1.0278
## 4000 1.0274
## 4050 1.027
## 4100 1.0266
## 4150 1.0261
## 4200 1.0258
## 4250 1.0253
## 4300 1.025
## 4350 1.0247
## 4400 1.0244
## 4450 1.0242
## 4500 1.024
## 4550 1.0239
## 4600 1.0238
## 4650 1.0236
## 4700 1.0235
## 4750 1.0235
## 4800 1.0233
## 4850 1.0234
## 4900 1.0233
## 4950 1.0233
## 5000 1.0233
## 5050 1.0233
## 5100 1.0234
## fitting final gbm model with a fixed number of 4800 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.789
##
## estimated cv deviance = 1.023 ; se = 0.06
##
## training data correlation = 0.739
## cv correlation = 0.538 ; se = 0.055
##
## training data ROC score = 0.923
## cv ROC score = 0.805 ; se = 0.028
##
## elapsed time - 0.57 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.023258
##
## $deviance.se
## [1] 0.06038332
##
## $correlation.mean
## [1] 0.537729
##
## $correlation.se
## [1] 0.05507746
##
## $discrimination.mean
## [1] 0.80504
##
## $discrimination.se
## [1] 0.02768744
##
## $calibration.mean
## [1] 0.2776889 1.4505847 0.4491894 0.5118139 0.3918641
##
## $calibration.se
## [1] 0.21900549 0.32663941 0.10957562 0.09360325 0.09620300
##
## $cv.threshold
## [1] 0.3975718
##
## $cv.threshold.se
## [1] 0.02283161
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## gcpsr_5 gcpsr_5 26.01678991
## cpaq8_tot cpaq8_tot 15.99500558
## gcpsr_3 gcpsr_3 6.15392479
## age age 5.77759062
## pain_duration pain_duration 4.73312684
## isi_tot isi_tot 4.72131191
## RST_PQ_BIS RST_PQ_BIS 3.95242780
## gcpsr_6 gcpsr_6 3.70247059
## gcpsr_4 gcpsr_4 3.05903308
## employment employment 3.05275911
## overlapping_pain_number overlapping_pain_number 2.90713716
## current_opioid_meds current_opioid_meds 2.77863394
## pcs_tot pcs_tot 2.39946065
## income income 1.94392141
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.72811983
## audit_total audit_total 1.61328392
## bis_brief_tot bis_brief_tot 1.61085171
## proanx_t proanx_t 1.59590073
## meq_tot meq_tot 1.08713152
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.07447132
## RST_PQ_FFS RST_PQ_FFS 1.00167029
## prodep_t prodep_t 0.86749729
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.68903968
## hispanic hispanic 0.66013523
## RST_PQ_BAS_I RST_PQ_BAS_I 0.64380391
## race race 0.10308851
## cuditr0 cuditr0 0.08830537
## assigned_sex_at_birth assigned_sex_at_birth 0.04310730
tc 4 lr .0025
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2802
##
## now adding trees...
## 100 1.2506
## 150 1.2272
## 200 1.2087
## 250 1.1936
## 300 1.1823
## 350 1.1731
## 400 1.1639
## 450 1.1584
## 500 1.1544
## 550 1.151
## 600 1.1488
## 650 1.1475
## 700 1.1473
## 750 1.1471
## 800 1.1485
## 850 1.1493
## 900 1.1502
## 950 1.1502
## 1000 1.1507
## 1050 1.1525
## 1100 1.1552
## 1150 1.1575
## 1200 1.1607
## 1250 1.1632
## 1300 1.1661
## fitting final gbm model with a fixed number of 750 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.941
##
## estimated cv deviance = 1.147 ; se = 0.048
##
## training data correlation = 0.664
## cv correlation = 0.421 ; se = 0.055
##
## training data ROC score = 0.891
## cv ROC score = 0.739 ; se = 0.03
##
## elapsed time - 0.12 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.147096
##
## $deviance.se
## [1] 0.04796054
##
## $correlation.mean
## [1] 0.4206574
##
## $correlation.se
## [1] 0.05454263
##
## $discrimination.mean
## [1] 0.73888
##
## $discrimination.se
## [1] 0.02954265
##
## $calibration.mean
## [1] 0.2903537 1.5251900 0.4427980 0.4742805 0.3813984
##
## $calibration.se
## [1] 0.22677885 0.38983186 0.10211383 0.09889303 0.07834834
##
## $cv.threshold
## [1] 0.3790785
##
## $cv.threshold.se
## [1] 0.01401745
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 31.12430507
## age age 7.60662033
## isi_tot isi_tot 7.06264228
## overlapping_pain_number overlapping_pain_number 6.05371101
## current_opioid_meds current_opioid_meds 6.01750800
## RST_PQ_BIS RST_PQ_BIS 5.91648290
## pain_duration pain_duration 5.70664598
## pcs_tot pcs_tot 5.15328360
## employment employment 4.37321565
## audit_total audit_total 3.74963740
## income income 3.28484875
## bis_brief_tot bis_brief_tot 2.83084114
## prodep_t prodep_t 2.03113372
## proanx_t proanx_t 1.78815068
## RST_PQ_FFS RST_PQ_FFS 1.39744255
## meq_tot meq_tot 1.32282219
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.14105152
## RST_PQ_BAS_I RST_PQ_BAS_I 1.06562745
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.95488749
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.64609256
## hispanic hispanic 0.58490038
## race race 0.14712803
## cuditr0 cuditr0 0.04102133
## assigned_sex_at_birth assigned_sex_at_birth 0.00000000
tc 4 lr .0025
cvtc7.lr005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2624
##
## now adding trees...
## 100 1.217
## 150 1.1818
## 200 1.1531
## 250 1.1285
## 300 1.1095
## 350 1.0938
## 400 1.0799
## 450 1.0688
## 500 1.0597
## 550 1.053
## 600 1.0457
## 650 1.0421
## 700 1.0377
## 750 1.0346
## 800 1.0315
## 850 1.0298
## 900 1.0292
## 950 1.028
## 1000 1.0277
## 1050 1.0275
## 1100 1.0295
## 1150 1.0304
## 1200 1.0309
## 1250 1.0326
## 1300 1.0345
## 1350 1.0364
## 1400 1.0381
## 1450 1.0411
## 1500 1.0424
## fitting final gbm model with a fixed number of 1050 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.769
##
## estimated cv deviance = 1.028 ; se = 0.063
##
## training data correlation = 0.746
## cv correlation = 0.532 ; se = 0.056
##
## training data ROC score = 0.926
## cv ROC score = 0.798 ; se = 0.029
##
## elapsed time - 0.15 minutes
cvtc7.lr005$cv.statistics
## $deviance.mean
## [1] 1.027529
##
## $deviance.se
## [1] 0.06334221
##
## $correlation.mean
## [1] 0.5315409
##
## $correlation.se
## [1] 0.05591326
##
## $discrimination.mean
## [1] 0.79808
##
## $discrimination.se
## [1] 0.02863952
##
## $calibration.mean
## [1] 0.2283505 1.3769131 0.4587737 0.5071299 0.4302837
##
## $calibration.se
## [1] 0.21461140 0.32381790 0.11513024 0.09420365 0.10061066
##
## $cv.threshold
## [1] 0.4028558
##
## $cv.threshold.se
## [1] 0.02497462
summary(cvtc7.lr005)
## var rel.inf
## gcpsr_5 gcpsr_5 25.56134199
## cpaq8_tot cpaq8_tot 15.21656080
## age age 5.99112959
## gcpsr_3 gcpsr_3 5.79000540
## isi_tot isi_tot 4.85703600
## pain_duration pain_duration 4.46364353
## RST_PQ_BIS RST_PQ_BIS 3.87092952
## gcpsr_6 gcpsr_6 3.71956596
## employment employment 3.33744407
## gcpsr_4 gcpsr_4 3.01426775
## overlapping_pain_number overlapping_pain_number 3.01271469
## current_opioid_meds current_opioid_meds 2.65631121
## pcs_tot pcs_tot 2.58254461
## proanx_t proanx_t 2.02742074
## bis_brief_tot bis_brief_tot 1.89824212
## income income 1.86647508
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.85558643
## audit_total audit_total 1.70748851
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.20372989
## meq_tot meq_tot 1.14433995
## prodep_t prodep_t 0.93430420
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.89678013
## hispanic hispanic 0.85175512
## RST_PQ_FFS RST_PQ_FFS 0.74427700
## RST_PQ_BAS_I RST_PQ_BAS_I 0.51720976
## race race 0.15124324
## cuditr0 cuditr0 0.10444275
## assigned_sex_at_birth assigned_sex_at_birth 0.02320999
tc 4 lr .005
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2499
##
## now adding trees...
## 100 1.2089
## 150 1.1825
## 200 1.1652
## 250 1.1543
## 300 1.1513
## 350 1.1488
## 400 1.1472
## 450 1.1477
## 500 1.1521
## 550 1.1549
## 600 1.1602
## 650 1.1666
## 700 1.1722
## 750 1.1765
## 800 1.1808
## 850 1.1861
## 900 1.1923
## 950 1.1987
## 1000 1.2033
## fitting final gbm model with a fixed number of 400 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.93
##
## estimated cv deviance = 1.147 ; se = 0.049
##
## training data correlation = 0.664
## cv correlation = 0.421 ; se = 0.055
##
## training data ROC score = 0.892
## cv ROC score = 0.741 ; se = 0.03
##
## elapsed time - 0.09 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.147245
##
## $deviance.se
## [1] 0.04924865
##
## $correlation.mean
## [1] 0.4209709
##
## $correlation.se
## [1] 0.05525873
##
## $discrimination.mean
## [1] 0.74074
##
## $discrimination.se
## [1] 0.02974984
##
## $calibration.mean
## [1] 0.2669300 1.4894672 0.4501995 0.4636395 0.4073396
##
## $calibration.se
## [1] 0.22157644 0.39834344 0.10676133 0.09534053 0.08484106
##
## $cv.threshold
## [1] 0.3765187
##
## $cv.threshold.se
## [1] 0.01309959
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 30.67755162
## isi_tot isi_tot 8.15249505
## age age 7.61844175
## current_opioid_meds current_opioid_meds 6.39288701
## pain_duration pain_duration 6.17316282
## overlapping_pain_number overlapping_pain_number 5.75348708
## RST_PQ_BIS RST_PQ_BIS 5.56126854
## pcs_tot pcs_tot 5.02531520
## employment employment 4.19002394
## audit_total audit_total 3.72494258
## income income 2.98608333
## bis_brief_tot bis_brief_tot 2.83315871
## prodep_t prodep_t 1.78152300
## proanx_t proanx_t 1.68278180
## RST_PQ_FFS RST_PQ_FFS 1.48773298
## meq_tot meq_tot 1.38910507
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.21169376
## RST_PQ_BAS_I RST_PQ_BAS_I 0.92376034
## RST_PQ_BAS_RR RST_PQ_BAS_RR 0.89371253
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.72756846
## hispanic hispanic 0.43220674
## race race 0.23994602
## assigned_sex_at_birth assigned_sex_at_birth 0.09381367
## cuditr0 cuditr0 0.04733799
tc 4 lr .005
cvtc7.lr0005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 4,
learning.rate = 0.005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2179
##
## now adding trees...
## 100 1.1544
## 150 1.1093
## 200 1.0803
## 250 1.0587
## 300 1.0454
## 350 1.035
## 400 1.0285
## 450 1.0251
## 500 1.027
## 550 1.0293
## 600 1.0325
## 650 1.0349
## 700 1.0371
## 750 1.0423
## 800 1.0466
## 850 1.0513
## 900 1.0564
## 950 1.0604
## 1000 1.0675
## 1050 1.0725
## 1100 1.077
## fitting final gbm model with a fixed number of 450 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.802
##
## estimated cv deviance = 1.025 ; se = 0.06
##
## training data correlation = 0.733
## cv correlation = 0.537 ; se = 0.056
##
## training data ROC score = 0.92
## cv ROC score = 0.801 ; se = 0.028
##
## elapsed time - 0.11 minutes
cvtc7.lr0005$cv.statistics
## $deviance.mean
## [1] 1.02507
##
## $deviance.se
## [1] 0.05989564
##
## $correlation.mean
## [1] 0.5373775
##
## $correlation.se
## [1] 0.05583585
##
## $discrimination.mean
## [1] 0.80093
##
## $discrimination.se
## [1] 0.02751183
##
## $calibration.mean
## [1] 0.2461241 1.4501025 0.4346719 0.5172998 0.3561608
##
## $calibration.se
## [1] 0.19337992 0.29653197 0.10630938 0.09256631 0.09122205
##
## $cv.threshold
## [1] 0.3930572
##
## $cv.threshold.se
## [1] 0.0235085
summary(cvtc7.lr0005)
## var rel.inf
## gcpsr_5 gcpsr_5 27.74246406
## cpaq8_tot cpaq8_tot 15.91991807
## gcpsr_3 gcpsr_3 5.74971237
## age age 5.51183863
## isi_tot isi_tot 4.80658662
## RST_PQ_BIS RST_PQ_BIS 4.38148251
## pain_duration pain_duration 4.32589904
## gcpsr_6 gcpsr_6 4.00541595
## employment employment 3.18993269
## overlapping_pain_number overlapping_pain_number 2.98291492
## current_opioid_meds current_opioid_meds 2.96568610
## gcpsr_4 gcpsr_4 2.52700696
## pcs_tot pcs_tot 2.28231061
## income income 1.87296316
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.75104008
## audit_total audit_total 1.66176294
## bis_brief_tot bis_brief_tot 1.41660210
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.16172891
## proanx_t proanx_t 1.12524636
## meq_tot meq_tot 0.98248058
## prodep_t prodep_t 0.90691330
## hispanic hispanic 0.75212621
## RST_PQ_FFS RST_PQ_FFS 0.69522520
## RST_PQ_BAS_I RST_PQ_BAS_I 0.54390286
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.45783764
## cuditr0 cuditr0 0.18189575
## race race 0.09910639
## assigned_sex_at_birth assigned_sex_at_birth 0.00000000
tc 7 lr .0005
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3087
##
## now adding trees...
## 100 1.3009
## 150 1.2932
## 200 1.2862
## 250 1.2796
## 300 1.2731
## 350 1.2666
## 400 1.2605
## 450 1.2548
## 500 1.2496
## 550 1.2444
## 600 1.2395
## 650 1.2353
## 700 1.2303
## 750 1.2265
## 800 1.2226
## 850 1.2187
## 900 1.2151
## 950 1.2118
## 1000 1.2086
## 1050 1.2058
## 1100 1.2029
## 1150 1.2001
## 1200 1.1973
## 1250 1.1947
## 1300 1.1924
## 1350 1.1902
## 1400 1.188
## 1450 1.1859
## 1500 1.1844
## 1550 1.1825
## 1600 1.1807
## 1650 1.1791
## 1700 1.1778
## 1750 1.1763
## 1800 1.1748
## 1850 1.1736
## 1900 1.1723
## 1950 1.1713
## 2000 1.1703
## 2050 1.1694
## 2100 1.1685
## 2150 1.1677
## 2200 1.167
## 2250 1.1663
## 2300 1.1659
## 2350 1.1654
## 2400 1.1651
## 2450 1.1645
## 2500 1.1641
## 2550 1.1638
## 2600 1.1633
## 2650 1.1628
## 2700 1.1624
## 2750 1.1622
## 2800 1.1618
## 2850 1.1615
## 2900 1.1615
## 2950 1.1614
## 3000 1.1615
## 3050 1.1615
## 3100 1.1614
## 3150 1.1615
## 3200 1.1618
## 3250 1.1618
## 3300 1.162
## 3350 1.1619
## 3400 1.1619
## fitting final gbm model with a fixed number of 2950 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.897
##
## estimated cv deviance = 1.161 ; se = 0.047
##
## training data correlation = 0.727
## cv correlation = 0.407 ; se = 0.056
##
## training data ROC score = 0.924
## cv ROC score = 0.735 ; se = 0.031
##
## elapsed time - 0.51 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.16139
##
## $deviance.se
## [1] 0.04684134
##
## $correlation.mean
## [1] 0.4071974
##
## $correlation.se
## [1] 0.05562675
##
## $discrimination.mean
## [1] 0.73484
##
## $discrimination.se
## [1] 0.03072203
##
## $calibration.mean
## [1] 0.2809879 1.5054429 0.4314101 0.4703246 0.3791870
##
## $calibration.se
## [1] 0.22021350 0.37379909 0.10179825 0.09463116 0.08529679
##
## $cv.threshold
## [1] 0.3791723
##
## $cv.threshold.se
## [1] 0.01201753
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 26.5442935
## age age 7.1837311
## pain_duration pain_duration 6.9825253
## isi_tot isi_tot 6.8556078
## RST_PQ_BIS RST_PQ_BIS 6.4798727
## pcs_tot pcs_tot 5.1657426
## current_opioid_meds current_opioid_meds 5.0548659
## overlapping_pain_number overlapping_pain_number 5.0071341
## employment employment 4.8584775
## income income 4.1340840
## audit_total audit_total 3.5189234
## bis_brief_tot bis_brief_tot 3.0002831
## proanx_t proanx_t 2.2667563
## prodep_t prodep_t 2.1181201
## RST_PQ_BAS_I RST_PQ_BAS_I 1.8981482
## RST_PQ_FFS RST_PQ_FFS 1.8937246
## meq_tot meq_tot 1.8704085
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.5822165
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.3974834
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.2897053
## hispanic hispanic 0.3980300
## cuditr0 cuditr0 0.2014803
## race race 0.1875670
## assigned_sex_at_birth assigned_sex_at_birth 0.1108189
tc 7 lr .0005
cvtc7.lr0005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3044
##
## now adding trees...
## 100 1.2929
## 150 1.2817
## 200 1.2709
## 250 1.2607
## 300 1.2508
## 350 1.2415
## 400 1.2325
## 450 1.224
## 500 1.2159
## 550 1.208
## 600 1.2003
## 650 1.1933
## 700 1.1864
## 750 1.1794
## 800 1.173
## 850 1.1667
## 900 1.1608
## 950 1.1549
## 1000 1.1495
## 1050 1.1447
## 1100 1.1397
## 1150 1.1352
## 1200 1.1307
## 1250 1.1263
## 1300 1.1224
## 1350 1.1181
## 1400 1.1142
## 1450 1.1102
## 1500 1.1067
## 1550 1.1031
## 1600 1.0998
## 1650 1.097
## 1700 1.094
## 1750 1.0912
## 1800 1.0884
## 1850 1.086
## 1900 1.0833
## 1950 1.0806
## 2000 1.0781
## 2050 1.0757
## 2100 1.0738
## 2150 1.0717
## 2200 1.0697
## 2250 1.0676
## 2300 1.0655
## 2350 1.0638
## 2400 1.0622
## 2450 1.0606
## 2500 1.0591
## 2550 1.0577
## 2600 1.0563
## 2650 1.055
## 2700 1.0538
## 2750 1.0526
## 2800 1.0513
## 2850 1.0504
## 2900 1.0494
## 2950 1.0483
## 3000 1.0473
## 3050 1.0465
## 3100 1.046
## 3150 1.0449
## 3200 1.0443
## 3250 1.0437
## 3300 1.043
## 3350 1.0424
## 3400 1.0416
## 3450 1.0411
## 3500 1.0407
## 3550 1.0405
## 3600 1.0402
## 3650 1.0403
## 3700 1.0398
## 3750 1.0396
## 3800 1.0393
## 3850 1.039
## 3900 1.0388
## 3950 1.0388
## 4000 1.0385
## 4050 1.0382
## 4100 1.0383
## 4150 1.038
## 4200 1.038
## 4250 1.038
## 4300 1.038
## 4350 1.0383
## 4400 1.0384
## 4450 1.0387
## 4500 1.039
## fitting final gbm model with a fixed number of 4200 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.715
##
## estimated cv deviance = 1.038 ; se = 0.061
##
## training data correlation = 0.799
## cv correlation = 0.525 ; se = 0.056
##
## training data ROC score = 0.954
## cv ROC score = 0.796 ; se = 0.028
##
## elapsed time - 0.77 minutes
cvtc7.lr0005$cv.statistics
## $deviance.mean
## [1] 1.037973
##
## $deviance.se
## [1] 0.06129746
##
## $correlation.mean
## [1] 0.5248469
##
## $correlation.se
## [1] 0.05561748
##
## $discrimination.mean
## [1] 0.79556
##
## $discrimination.se
## [1] 0.0282093
##
## $calibration.mean
## [1] 0.2621396 1.4027645 0.4681123 0.5121292 0.4483848
##
## $calibration.se
## [1] 0.2258002 0.3334442 0.1155636 0.0907876 0.1093568
##
## $cv.threshold
## [1] 0.3999033
##
## $cv.threshold.se
## [1] 0.02290522
summary(cvtc7.lr0005)
## var rel.inf
## gcpsr_5 gcpsr_5 22.16973059
## cpaq8_tot cpaq8_tot 13.51236916
## age age 5.89837089
## pain_duration pain_duration 5.74224751
## gcpsr_3 gcpsr_3 5.14195586
## isi_tot isi_tot 4.79831913
## RST_PQ_BIS RST_PQ_BIS 4.01671136
## employment employment 3.88327488
## income income 3.11436053
## pcs_tot pcs_tot 3.00490531
## gcpsr_4 gcpsr_4 2.98154881
## gcpsr_6 gcpsr_6 2.92347938
## overlapping_pain_number overlapping_pain_number 2.64568337
## current_opioid_meds current_opioid_meds 2.39815632
## bis_brief_tot bis_brief_tot 2.22881353
## audit_total audit_total 2.10407790
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.02253138
## proanx_t proanx_t 1.77972121
## RST_PQ_FFS RST_PQ_FFS 1.71242157
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.70352835
## meq_tot meq_tot 1.70330080
## RST_PQ_BAS_I RST_PQ_BAS_I 1.24979022
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.16192063
## prodep_t prodep_t 1.14341614
## hispanic hispanic 0.56210353
## cuditr0 cuditr0 0.22421511
## race race 0.08784938
## assigned_sex_at_birth assigned_sex_at_birth 0.08519716
tc 7 lr .0025
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2786
##
## now adding trees...
## 100 1.2485
## 150 1.2264
## 200 1.2073
## 250 1.1934
## 300 1.1827
## 350 1.1746
## 400 1.1675
## 450 1.1627
## 500 1.1591
## 550 1.1568
## 600 1.1552
## 650 1.1557
## 700 1.1561
## 750 1.1584
## 800 1.1595
## 850 1.162
## 900 1.164
## 950 1.1659
## 1000 1.1699
## 1050 1.1737
## 1100 1.1778
## 1150 1.1816
## 1200 1.1847
## fitting final gbm model with a fixed number of 600 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.894
##
## estimated cv deviance = 1.155 ; se = 0.046
##
## training data correlation = 0.728
## cv correlation = 0.414 ; se = 0.056
##
## training data ROC score = 0.927
## cv ROC score = 0.739 ; se = 0.032
##
## elapsed time - 0.17 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.155226
##
## $deviance.se
## [1] 0.04646194
##
## $correlation.mean
## [1] 0.4143922
##
## $correlation.se
## [1] 0.05550615
##
## $discrimination.mean
## [1] 0.73861
##
## $discrimination.se
## [1] 0.03207811
##
## $calibration.mean
## [1] 0.3078190 1.5167046 0.4299294 0.4789588 0.3517599
##
## $calibration.se
## [1] 0.23073036 0.35688541 0.10188141 0.09541625 0.07913611
##
## $cv.threshold
## [1] 0.3792585
##
## $cv.threshold.se
## [1] 0.01294056
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 26.4129801
## age age 7.0780196
## pain_duration pain_duration 6.8267298
## isi_tot isi_tot 6.7878472
## RST_PQ_BIS RST_PQ_BIS 6.0733974
## pcs_tot pcs_tot 5.2732816
## current_opioid_meds current_opioid_meds 5.2177667
## overlapping_pain_number overlapping_pain_number 4.8437692
## employment employment 4.5674573
## income income 4.0865064
## audit_total audit_total 3.6058221
## bis_brief_tot bis_brief_tot 3.0125247
## proanx_t proanx_t 2.7491836
## prodep_t prodep_t 2.5260201
## RST_PQ_FFS RST_PQ_FFS 2.1707597
## RST_PQ_BAS_I RST_PQ_BAS_I 1.9354572
## meq_tot meq_tot 1.6452369
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.5011247
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.4329772
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.2507375
## hispanic hispanic 0.4276314
## race race 0.2356098
## cuditr0 cuditr0 0.2053794
## assigned_sex_at_birth assigned_sex_at_birth 0.1337804
tc 7 lr .0025
cvtc7.lr0005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2611
##
## now adding trees...
## 100 1.2164
## 150 1.1798
## 200 1.1491
## 250 1.125
## 300 1.1058
## 350 1.0903
## 400 1.0759
## 450 1.065
## 500 1.0574
## 550 1.05
## 600 1.0455
## 650 1.0427
## 700 1.0394
## 750 1.0385
## 800 1.0366
## 850 1.0347
## 900 1.0366
## 950 1.0376
## 1000 1.039
## 1050 1.0413
## 1100 1.0437
## 1150 1.0457
## 1200 1.0477
## 1250 1.0502
## 1300 1.0525
## 1350 1.0548
## 1400 1.0578
## fitting final gbm model with a fixed number of 850 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.715
##
## estimated cv deviance = 1.035 ; se = 0.061
##
## training data correlation = 0.798
## cv correlation = 0.528 ; se = 0.055
##
## training data ROC score = 0.953
## cv ROC score = 0.796 ; se = 0.028
##
## elapsed time - 0.22 minutes
cvtc7.lr0005$cv.statistics
## $deviance.mean
## [1] 1.034716
##
## $deviance.se
## [1] 0.06063829
##
## $correlation.mean
## [1] 0.5280598
##
## $correlation.se
## [1] 0.05488477
##
## $discrimination.mean
## [1] 0.79556
##
## $discrimination.se
## [1] 0.02828672
##
## $calibration.mean
## [1] 0.2567427 1.4087035 0.4775388 0.5134735 0.4583241
##
## $calibration.se
## [1] 0.2242488 0.3395576 0.1173525 0.0930757 0.1111588
##
## $cv.threshold
## [1] 0.395988
##
## $cv.threshold.se
## [1] 0.02165936
summary(cvtc7.lr0005)
## var rel.inf
## gcpsr_5 gcpsr_5 22.5774936
## cpaq8_tot cpaq8_tot 13.6476402
## age age 5.7298820
## pain_duration pain_duration 5.5990992
## gcpsr_3 gcpsr_3 4.7117460
## isi_tot isi_tot 4.7019361
## employment employment 4.3393770
## RST_PQ_BIS RST_PQ_BIS 4.2646053
## gcpsr_6 gcpsr_6 3.1316767
## income income 3.1013219
## overlapping_pain_number overlapping_pain_number 2.7626837
## pcs_tot pcs_tot 2.7462558
## gcpsr_4 gcpsr_4 2.4214323
## current_opioid_meds current_opioid_meds 2.3188976
## bis_brief_tot bis_brief_tot 2.0544118
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.0008983
## meq_tot meq_tot 1.9082767
## audit_total audit_total 1.8661481
## RST_PQ_FFS RST_PQ_FFS 1.8498826
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.6537636
## proanx_t proanx_t 1.6491420
## RST_PQ_BAS_I RST_PQ_BAS_I 1.4423852
## prodep_t prodep_t 1.2851490
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.1293431
## hispanic hispanic 0.6392048
## cuditr0 cuditr0 0.2594975
## race race 0.1269996
## assigned_sex_at_birth assigned_sex_at_birth 0.0808501
tc 7 lr .005
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2494
##
## now adding trees...
## 100 1.2079
## 150 1.1849
## 200 1.1697
## 250 1.1597
## 300 1.1592
## 350 1.1631
## 400 1.1668
## 450 1.171
## 500 1.1765
## 550 1.1835
## 600 1.1918
## 650 1.1974
## 700 1.2071
## 750 1.2154
## 800 1.2245
## 850 1.2352
## 900 1.245
## 950 1.2523
## 1000 1.2601
## fitting final gbm model with a fixed number of 300 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.897
##
## estimated cv deviance = 1.159 ; se = 0.046
##
## training data correlation = 0.729
## cv correlation = 0.406 ; se = 0.054
##
## training data ROC score = 0.926
## cv ROC score = 0.734 ; se = 0.031
##
## elapsed time - 0.13 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.159242
##
## $deviance.se
## [1] 0.04610345
##
## $correlation.mean
## [1] 0.4058745
##
## $correlation.se
## [1] 0.0539592
##
## $discrimination.mean
## [1] 0.73402
##
## $discrimination.se
## [1] 0.0307536
##
## $calibration.mean
## [1] 0.2255389 1.4008628 0.4465147 0.4855912 0.3841016
##
## $calibration.se
## [1] 0.20233325 0.29685675 0.10623970 0.09625451 0.08661145
##
## $cv.threshold
## [1] 0.3807723
##
## $cv.threshold.se
## [1] 0.01105311
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 25.87089485
## age age 7.35207468
## isi_tot isi_tot 7.26638975
## pain_duration pain_duration 7.04923322
## RST_PQ_BIS RST_PQ_BIS 6.41150893
## current_opioid_meds current_opioid_meds 5.15494716
## employment employment 5.01347305
## pcs_tot pcs_tot 4.86127806
## overlapping_pain_number overlapping_pain_number 4.58665933
## audit_total audit_total 4.12744889
## income income 3.95469326
## bis_brief_tot bis_brief_tot 2.99211036
## proanx_t proanx_t 2.50911743
## meq_tot meq_tot 2.05432321
## prodep_t prodep_t 2.04551993
## RST_PQ_FFS RST_PQ_FFS 1.99259209
## RST_PQ_BAS_I RST_PQ_BAS_I 1.71909655
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.65571755
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.41741659
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.37796060
## hispanic hispanic 0.17820937
## race race 0.17534186
## cuditr0 cuditr0 0.14552579
## assigned_sex_at_birth assigned_sex_at_birth 0.08846752
tc 7 lr .005
cvtc7.lr0005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2144
##
## now adding trees...
## 100 1.1486
## 150 1.1072
## 200 1.0761
## 250 1.0549
## 300 1.0419
## 350 1.038
## 400 1.0349
## 450 1.0325
## 500 1.0346
## 550 1.0398
## 600 1.0442
## 650 1.0489
## 700 1.0562
## 750 1.0629
## 800 1.0717
## 850 1.0782
## 900 1.0857
## 950 1.0937
## 1000 1.1007
## 1050 1.1074
## fitting final gbm model with a fixed number of 450 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.695
##
## estimated cv deviance = 1.033 ; se = 0.064
##
## training data correlation = 0.806
## cv correlation = 0.529 ; se = 0.056
##
## training data ROC score = 0.957
## cv ROC score = 0.795 ; se = 0.026
##
## elapsed time - 0.16 minutes
cvtc7.lr0005$cv.statistics
## $deviance.mean
## [1] 1.03255
##
## $deviance.se
## [1] 0.06406451
##
## $correlation.mean
## [1] 0.5288207
##
## $correlation.se
## [1] 0.05561399
##
## $discrimination.mean
## [1] 0.7954
##
## $discrimination.se
## [1] 0.02632992
##
## $calibration.mean
## [1] 0.2226074 1.3324316 0.4720496 0.5124822 0.4727824
##
## $calibration.se
## [1] 0.20992221 0.30515106 0.11368360 0.09287942 0.11105035
##
## $cv.threshold
## [1] 0.4043779
##
## $cv.threshold.se
## [1] 0.0254986
summary(cvtc7.lr0005)
## var rel.inf
## gcpsr_5 gcpsr_5 21.46808660
## cpaq8_tot cpaq8_tot 13.23711933
## pain_duration pain_duration 5.83747382
## age age 5.83352167
## gcpsr_3 gcpsr_3 5.28297987
## isi_tot isi_tot 5.23846961
## RST_PQ_BIS RST_PQ_BIS 4.09308840
## income income 3.68897802
## employment employment 3.62925668
## gcpsr_6 gcpsr_6 3.01644085
## pcs_tot pcs_tot 2.98574240
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 2.57918863
## current_opioid_meds current_opioid_meds 2.52221181
## gcpsr_4 gcpsr_4 2.50736116
## overlapping_pain_number overlapping_pain_number 2.33878660
## audit_total audit_total 2.02455535
## proanx_t proanx_t 1.90276051
## meq_tot meq_tot 1.86961715
## bis_brief_tot bis_brief_tot 1.77421169
## RST_PQ_FFS RST_PQ_FFS 1.63711817
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.63571019
## RST_PQ_BAS_I RST_PQ_BAS_I 1.42496660
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.34708495
## prodep_t prodep_t 1.33338823
## hispanic hispanic 0.47160033
## cuditr0 cuditr0 0.17430402
## race race 0.08715639
## assigned_sex_at_birth assigned_sex_at_birth 0.05882097
tc 7 lr .00005
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .00005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.316
##
## now adding trees...
## 100 1.3152
## 150 1.3143
## 200 1.3135
## 250 1.3127
## 300 1.3119
## 350 1.3111
## 400 1.3102
## 450 1.3094
## 500 1.3086
## 550 1.3078
## 600 1.307
## 650 1.3062
## 700 1.3054
## 750 1.3046
## 800 1.3038
## 850 1.3031
## 900 1.3023
## 950 1.3015
## 1000 1.3007
## 1050 1.3
## 1100 1.2992
## 1150 1.2985
## 1200 1.2977
## 1250 1.2969
## 1300 1.2962
## 1350 1.2955
## 1400 1.2947
## 1450 1.294
## 1500 1.2932
## 1550 1.2925
## 1600 1.2918
## 1650 1.2911
## 1700 1.2904
## 1750 1.2896
## 1800 1.2889
## 1850 1.2882
## 1900 1.2875
## 1950 1.2868
## 2000 1.2861
## 2050 1.2854
## 2100 1.2847
## 2150 1.284
## 2200 1.2833
## 2250 1.2827
## 2300 1.282
## 2350 1.2813
## 2400 1.2806
## 2450 1.28
## 2500 1.2793
## 2550 1.2787
## 2600 1.278
## 2650 1.2774
## 2700 1.2768
## 2750 1.2761
## 2800 1.2755
## 2850 1.2749
## 2900 1.2742
## 2950 1.2736
## 3000 1.273
## 3050 1.2723
## 3100 1.2717
## 3150 1.2711
## 3200 1.2705
## 3250 1.2698
## 3300 1.2692
## 3350 1.2686
## 3400 1.268
## 3450 1.2674
## 3500 1.2668
## 3550 1.2662
## 3600 1.2656
## 3650 1.265
## 3700 1.2644
## 3750 1.2638
## 3800 1.2633
## 3850 1.2627
## 3900 1.2621
## 3950 1.2615
## 4000 1.261
## 4050 1.2604
## 4100 1.2598
## 4150 1.2593
## 4200 1.2588
## 4250 1.2582
## 4300 1.2576
## 4350 1.2571
## 4400 1.2565
## 4450 1.256
## 4500 1.2554
## 4550 1.2549
## 4600 1.2543
## 4650 1.2538
## 4700 1.2533
## 4750 1.2528
## 4800 1.2522
## 4850 1.2517
## 4900 1.2511
## 4950 1.2506
## 5000 1.2501
## 5050 1.2496
## 5100 1.249
## 5150 1.2485
## 5200 1.248
## 5250 1.2475
## 5300 1.2471
## 5350 1.2466
## 5400 1.2461
## 5450 1.2455
## 5500 1.2451
## 5550 1.2446
## 5600 1.2441
## 5650 1.2436
## 5700 1.2432
## 5750 1.2427
## 5800 1.2422
## 5850 1.2417
## 5900 1.2413
## 5950 1.2408
## 6000 1.2403
## 6050 1.2398
## 6100 1.2394
## 6150 1.2389
## 6200 1.2385
## 6250 1.238
## 6300 1.2375
## 6350 1.2371
## 6400 1.2366
## 6450 1.2362
## 6500 1.2358
## 6550 1.2353
## 6600 1.2349
## 6650 1.2345
## 6700 1.234
## 6750 1.2336
## 6800 1.2332
## 6850 1.2327
## 6900 1.2323
## 6950 1.2319
## 7000 1.2314
## 7050 1.231
## 7100 1.2306
## 7150 1.2302
## 7200 1.2298
## 7250 1.2294
## 7300 1.229
## 7350 1.2286
## 7400 1.2282
## 7450 1.2277
## 7500 1.2273
## 7550 1.227
## 7600 1.2266
## 7650 1.2262
## 7700 1.2258
## 7750 1.2254
## 7800 1.225
## 7850 1.2246
## 7900 1.2241
## 7950 1.2238
## 8000 1.2234
## 8050 1.2231
## 8100 1.2227
## 8150 1.2223
## 8200 1.2219
## 8250 1.2215
## 8300 1.2212
## 8350 1.2208
## 8400 1.2205
## 8450 1.2201
## 8500 1.2197
## 8550 1.2194
## 8600 1.219
## 8650 1.2186
## 8700 1.2182
## 8750 1.2179
## 8800 1.2176
## 8850 1.2172
## 8900 1.2169
## 8950 1.2165
## 9000 1.2162
## 9050 1.2158
## 9100 1.2155
## 9150 1.2151
## 9200 1.2148
## 9250 1.2144
## 9300 1.2141
## 9350 1.2138
## 9400 1.2135
## 9450 1.2131
## 9500 1.2128
## 9550 1.2125
## 9600 1.2121
## 9650 1.2118
## 9700 1.2115
## 9750 1.2112
## 9800 1.2109
## 9850 1.2106
## 9900 1.2102
## 9950 1.2099
## 10000 1.2096
## 10050 1.2093
## 10100 1.209
## 10150 1.2087
## 10200 1.2084
## 10250 1.2081
## 10300 1.2078
## 10350 1.2075
## 10400 1.2072
## 10450 1.2069
## 10500 1.2066
## 10550 1.2063
## 10600 1.206
## 10650 1.2057
## 10700 1.2054
## 10750 1.2051
## 10800 1.2048
## 10850 1.2045
## 10900 1.2043
## 10950 1.2039
## 11000 1.2036
## 11050 1.2034
## 11100 1.2031
## 11150 1.2028
## 11200 1.2025
## 11250 1.2022
## 11300 1.202
## 11350 1.2017
## 11400 1.2014
## 11450 1.2011
## 11500 1.2009
## 11550 1.2006
## 11600 1.2004
## 11650 1.2001
## 11700 1.1999
## 11750 1.1996
## 11800 1.1993
## 11850 1.199
## 11900 1.1988
## 11950 1.1985
## 12000 1.1983
## 12050 1.198
## 12100 1.1978
## 12150 1.1975
## 12200 1.1973
## 12250 1.197
## 12300 1.1968
## 12350 1.1965
## 12400 1.1963
## 12450 1.196
## 12500 1.1958
## 12550 1.1956
## 12600 1.1953
## 12650 1.1951
## 12700 1.1948
## 12750 1.1946
## 12800 1.1944
## 12850 1.1942
## 12900 1.1939
## 12950 1.1937
## 13000 1.1935
## 13050 1.1932
## 13100 1.193
## 13150 1.1928
## 13200 1.1926
## 13250 1.1923
## 13300 1.1921
## 13350 1.1919
## 13400 1.1917
## 13450 1.1915
## 13500 1.1913
## 13550 1.191
## 13600 1.1908
## 13650 1.1906
## 13700 1.1904
## 13750 1.1902
## 13800 1.19
## 13850 1.1897
## 13900 1.1895
## 13950 1.1893
## 14000 1.1891
## 14050 1.1889
## 14100 1.1887
## 14150 1.1885
## 14200 1.1883
## 14250 1.1881
## 14300 1.1879
## 14350 1.1877
## 14400 1.1874
## 14450 1.1872
## 14500 1.187
## 14550 1.1868
## 14600 1.1866
## 14650 1.1865
## 14700 1.1863
## 14750 1.1861
## 14800 1.1859
## 14850 1.1857
## 14900 1.1855
## 14950 1.1853
## 15000 1.1851
## 15050 1.1849
## 15100 1.1848
## 15150 1.1846
## 15200 1.1843
## 15250 1.1842
## 15300 1.184
## 15350 1.1838
## 15400 1.1836
## 15450 1.1835
## 15500 1.1833
## 15550 1.1831
## 15600 1.183
## 15650 1.1828
## 15700 1.1826
## 15750 1.1824
## 15800 1.1822
## 15850 1.182
## 15900 1.1819
## 15950 1.1817
## 16000 1.1815
## 16050 1.1813
## 16100 1.1812
## 16150 1.181
## 16200 1.1809
## 16250 1.1807
## 16300 1.1806
## 16350 1.1804
## 16400 1.1803
## 16450 1.1801
## 16500 1.18
## 16550 1.1798
## 16600 1.1796
## 16650 1.1795
## 16700 1.1793
## 16750 1.1792
## 16800 1.179
## 16850 1.1789
## 16900 1.1787
## 16950 1.1786
## 17000 1.1784
## 17050 1.1783
## 17100 1.1781
## 17150 1.178
## 17200 1.1778
## 17250 1.1777
## 17300 1.1775
## 17350 1.1774
## 17400 1.1773
## 17450 1.1771
## 17500 1.177
## 17550 1.1768
## 17600 1.1767
## 17650 1.1765
## 17700 1.1764
## 17750 1.1763
## 17800 1.1761
## 17850 1.176
## 17900 1.1759
## 17950 1.1757
## 18000 1.1756
## 18050 1.1755
## 18100 1.1753
## 18150 1.1752
## 18200 1.175
## 18250 1.1749
## fitting final gbm model with a fixed number of 18250 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 1.002
##
## estimated cv deviance = 1.175 ; se = 0.039
##
## training data correlation = 0.693
## cv correlation = 0.415 ; se = 0.056
##
## training data ROC score = 0.908
## cv ROC score = 0.737 ; se = 0.03
##
## elapsed time - 3.82 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.174888
##
## $deviance.se
## [1] 0.03858339
##
## $correlation.mean
## [1] 0.4146352
##
## $correlation.se
## [1] 0.05575038
##
## $discrimination.mean
## [1] 0.73704
##
## $discrimination.se
## [1] 0.03041651
##
## $calibration.mean
## [1] 0.5894906 2.0976129 0.3221037 0.4589708 0.2204941
##
## $calibration.se
## [1] 0.27066276 0.49634444 0.07821405 0.08519943 0.04941401
##
## $cv.threshold
## [1] 0.375911
##
## $cv.threshold.se
## [1] 0.0101839
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 31.01260821
## age age 7.41804589
## isi_tot isi_tot 7.03432474
## pain_duration pain_duration 6.38560387
## RST_PQ_BIS RST_PQ_BIS 6.27896129
## current_opioid_meds current_opioid_meds 5.60580686
## overlapping_pain_number overlapping_pain_number 5.06036427
## pcs_tot pcs_tot 4.54917783
## employment employment 4.25806532
## income income 3.64586471
## audit_total audit_total 3.21475327
## bis_brief_tot bis_brief_tot 2.55226299
## proanx_t proanx_t 2.00219547
## prodep_t prodep_t 1.95615355
## RST_PQ_FFS RST_PQ_FFS 1.78170026
## meq_tot meq_tot 1.62798334
## RST_PQ_BAS_I RST_PQ_BAS_I 1.53617012
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.22870709
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.19527173
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.02641023
## hispanic hispanic 0.21218915
## race race 0.16983123
## cuditr0 cuditr0 0.15566165
## assigned_sex_at_birth assigned_sex_at_birth 0.09188695
tc 7 lr .00005
cvtc7.lr0005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 7,
learning.rate = .00005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3156
##
## now adding trees...
## 100 1.3143
## 150 1.3131
## 200 1.3118
## 250 1.3106
## 300 1.3094
## 350 1.3082
## 400 1.3069
## 450 1.3057
## 500 1.3045
## 550 1.3033
## 600 1.3021
## 650 1.3009
## 700 1.2997
## 750 1.2985
## 800 1.2973
## 850 1.2962
## 900 1.295
## 950 1.2938
## 1000 1.2926
## 1050 1.2915
## 1100 1.2903
## 1150 1.2892
## 1200 1.288
## 1250 1.2869
## 1300 1.2858
## 1350 1.2847
## 1400 1.2836
## 1450 1.2825
## 1500 1.2814
## 1550 1.2802
## 1600 1.2791
## 1650 1.278
## 1700 1.2769
## 1750 1.2758
## 1800 1.2748
## 1850 1.2737
## 1900 1.2727
## 1950 1.2716
## 2000 1.2705
## 2050 1.2695
## 2100 1.2684
## 2150 1.2674
## 2200 1.2664
## 2250 1.2653
## 2300 1.2643
## 2350 1.2632
## 2400 1.2622
## 2450 1.2612
## 2500 1.2602
## 2550 1.2592
## 2600 1.2582
## 2650 1.2572
## 2700 1.2562
## 2750 1.2552
## 2800 1.2542
## 2850 1.2532
## 2900 1.2523
## 2950 1.2513
## 3000 1.2503
## 3050 1.2494
## 3100 1.2484
## 3150 1.2475
## 3200 1.2465
## 3250 1.2456
## 3300 1.2447
## 3350 1.2437
## 3400 1.2428
## 3450 1.2419
## 3500 1.241
## 3550 1.24
## 3600 1.2391
## 3650 1.2382
## 3700 1.2373
## 3750 1.2364
## 3800 1.2355
## 3850 1.2346
## 3900 1.2338
## 3950 1.2329
## 4000 1.232
## 4050 1.2311
## 4100 1.2302
## 4150 1.2293
## 4200 1.2284
## 4250 1.2276
## 4300 1.2267
## 4350 1.2259
## 4400 1.225
## 4450 1.2242
## 4500 1.2234
## 4550 1.2225
## 4600 1.2217
## 4650 1.2209
## 4700 1.2201
## 4750 1.2193
## 4800 1.2185
## 4850 1.2177
## 4900 1.2169
## 4950 1.2161
## 5000 1.2153
## 5050 1.2145
## 5100 1.2137
## 5150 1.2129
## 5200 1.2121
## 5250 1.2113
## 5300 1.2105
## 5350 1.2098
## 5400 1.209
## 5450 1.2082
## 5500 1.2074
## 5550 1.2067
## 5600 1.2059
## 5650 1.2052
## 5700 1.2044
## 5750 1.2036
## 5800 1.2029
## 5850 1.2021
## 5900 1.2014
## 5950 1.2006
## 6000 1.1999
## 6050 1.1992
## 6100 1.1985
## 6150 1.1977
## 6200 1.197
## 6250 1.1963
## 6300 1.1956
## 6350 1.1949
## 6400 1.1942
## 6450 1.1935
## 6500 1.1927
## 6550 1.192
## 6600 1.1913
## 6650 1.1906
## 6700 1.1899
## 6750 1.1892
## 6800 1.1885
## 6850 1.1878
## 6900 1.1872
## 6950 1.1865
## 7000 1.1859
## 7050 1.1852
## 7100 1.1845
## 7150 1.1838
## 7200 1.1832
## 7250 1.1825
## 7300 1.1818
## 7350 1.1811
## 7400 1.1805
## 7450 1.1798
## 7500 1.1792
## 7550 1.1785
## 7600 1.1779
## 7650 1.1773
## 7700 1.1766
## 7750 1.176
## 7800 1.1754
## 7850 1.1747
## 7900 1.1741
## 7950 1.1735
## 8000 1.1729
## 8050 1.1723
## 8100 1.1717
## 8150 1.1711
## 8200 1.1705
## 8250 1.1699
## 8300 1.1693
## 8350 1.1687
## 8400 1.168
## 8450 1.1675
## 8500 1.1669
## 8550 1.1663
## 8600 1.1657
## 8650 1.1651
## 8700 1.1645
## 8750 1.1639
## 8800 1.1633
## 8850 1.1627
## 8900 1.1621
## 8950 1.1616
## 9000 1.161
## 9050 1.1604
## 9100 1.1599
## 9150 1.1593
## 9200 1.1587
## 9250 1.1582
## 9300 1.1576
## 9350 1.1571
## 9400 1.1565
## 9450 1.156
## 9500 1.1554
## 9550 1.1549
## 9600 1.1543
## 9650 1.1538
## 9700 1.1532
## 9750 1.1527
## 9800 1.1522
## 9850 1.1516
## 9900 1.1511
## 9950 1.1506
## 10000 1.15
## 10050 1.1495
## 10100 1.149
## 10150 1.1484
## 10200 1.1479
## 10250 1.1474
## 10300 1.1469
## 10350 1.1463
## 10400 1.1458
## 10450 1.1453
## 10500 1.1448
## 10550 1.1443
## 10600 1.1438
## 10650 1.1433
## 10700 1.1428
## 10750 1.1423
## 10800 1.1418
## 10850 1.1413
## 10900 1.1408
## 10950 1.1403
## 11000 1.1398
## 11050 1.1393
## 11100 1.1388
## 11150 1.1383
## 11200 1.1378
## 11250 1.1374
## 11300 1.1369
## 11350 1.1364
## 11400 1.1359
## 11450 1.1354
## 11500 1.1349
## 11550 1.1344
## 11600 1.134
## 11650 1.1335
## 11700 1.1331
## 11750 1.1326
## 11800 1.1321
## 11850 1.1317
## 11900 1.1312
## 11950 1.1308
## 12000 1.1303
## 12050 1.1299
## 12100 1.1294
## 12150 1.129
## 12200 1.1286
## 12250 1.1281
## 12300 1.1277
## 12350 1.1273
## 12400 1.1269
## 12450 1.1264
## 12500 1.126
## 12550 1.1256
## 12600 1.1251
## 12650 1.1247
## 12700 1.1243
## 12750 1.1238
## 12800 1.1234
## 12850 1.123
## 12900 1.1226
## 12950 1.1221
## 13000 1.1217
## 13050 1.1213
## 13100 1.1208
## 13150 1.1205
## 13200 1.12
## 13250 1.1196
## 13300 1.1192
## 13350 1.1188
## 13400 1.1184
## 13450 1.118
## 13500 1.1176
## 13550 1.1172
## 13600 1.1168
## 13650 1.1164
## 13700 1.116
## 13750 1.1157
## 13800 1.1153
## 13850 1.1149
## 13900 1.1145
## 13950 1.1141
## 14000 1.1137
## 14050 1.1134
## 14100 1.113
## 14150 1.1126
## 14200 1.1122
## 14250 1.1119
## 14300 1.1115
## 14350 1.1111
## 14400 1.1108
## 14450 1.1104
## 14500 1.11
## 14550 1.1096
## 14600 1.1093
## 14650 1.1089
## 14700 1.1085
## 14750 1.1082
## 14800 1.1078
## 14850 1.1074
## 14900 1.1071
## 14950 1.1067
## 15000 1.1063
## 15050 1.106
## 15100 1.1057
## 15150 1.1053
## 15200 1.105
## 15250 1.1046
## 15300 1.1043
## 15350 1.1039
## 15400 1.1035
## 15450 1.1032
## 15500 1.1028
## 15550 1.1025
## 15600 1.1022
## 15650 1.1019
## 15700 1.1015
## 15750 1.1012
## 15800 1.1009
## 15850 1.1005
## 15900 1.1002
## 15950 1.0999
## 16000 1.0995
## 16050 1.0992
## 16100 1.0989
## 16150 1.0986
## 16200 1.0983
## 16250 1.098
## 16300 1.0977
## 16350 1.0973
## 16400 1.097
## 16450 1.0967
## 16500 1.0964
## 16550 1.0961
## 16600 1.0958
## 16650 1.0955
## 16700 1.0952
## 16750 1.0948
## 16800 1.0945
## 16850 1.0942
## 16900 1.0939
## 16950 1.0936
## 17000 1.0933
## 17050 1.0931
## 17100 1.0928
## 17150 1.0924
## 17200 1.0922
## 17250 1.0919
## 17300 1.0916
## 17350 1.0913
## 17400 1.091
## 17450 1.0907
## 17500 1.0905
## 17550 1.0901
## 17600 1.0899
## 17650 1.0895
## 17700 1.0893
## 17750 1.089
## 17800 1.0887
## 17850 1.0884
## 17900 1.0881
## 17950 1.0879
## 18000 1.0876
## 18050 1.0873
## 18100 1.087
## 18150 1.0868
## 18200 1.0865
## 18250 1.0862
## 18300 1.0859
## 18350 1.0857
## 18400 1.0854
## 18450 1.0851
## 18500 1.0849
## 18550 1.0846
## 18600 1.0843
## 18650 1.084
## 18700 1.0838
## 18750 1.0835
## 18800 1.0833
## 18850 1.083
## 18900 1.0827
## 18950 1.0825
## 19000 1.0822
## 19050 1.082
## 19100 1.0817
## 19150 1.0815
## 19200 1.0813
## 19250 1.081
## 19300 1.0808
## 19350 1.0805
## 19400 1.0803
## 19450 1.08
## 19500 1.0798
## 19550 1.0796
## 19600 1.0793
## 19650 1.0791
## 19700 1.0788
## 19750 1.0786
## 19800 1.0783
## 19850 1.0781
## 19900 1.0778
## 19950 1.0776
## 20000 1.0773
## 20050 1.0771
## 20100 1.0769
## 20150 1.0767
## 20200 1.0764
## 20250 1.0762
## 20300 1.076
## 20350 1.0757
## 20400 1.0755
## 20450 1.0753
## 20500 1.0751
## 20550 1.0748
## 20600 1.0746
## 20650 1.0744
## 20700 1.0742
## 20750 1.074
## 20800 1.0738
## 20850 1.0735
## 20900 1.0733
## 20950 1.0731
## 21000 1.0729
## 21050 1.0727
## 21100 1.0724
## 21150 1.0723
## 21200 1.072
## 21250 1.0718
## 21300 1.0716
## 21350 1.0714
## 21400 1.0712
## 21450 1.071
## 21500 1.0708
## 21550 1.0706
## 21600 1.0704
## 21650 1.0702
## 21700 1.07
## 21750 1.0698
## 21800 1.0696
## 21850 1.0694
## 21900 1.0692
## 21950 1.069
## 22000 1.0688
## 22050 1.0686
## 22100 1.0684
## 22150 1.0682
## 22200 1.068
## 22250 1.0678
## 22300 1.0677
## 22350 1.0675
## 22400 1.0673
## 22450 1.0671
## 22500 1.0669
## 22550 1.0667
## 22600 1.0665
## 22650 1.0663
## 22700 1.0661
## 22750 1.0659
## 22800 1.0657
## 22850 1.0656
## 22900 1.0654
## 22950 1.0652
## 23000 1.0651
## 23050 1.0649
## 23100 1.0647
## 23150 1.0645
## 23200 1.0643
## 23250 1.0642
## 23300 1.064
## 23350 1.0638
## 23400 1.0637
## 23450 1.0635
## 23500 1.0633
## 23550 1.0632
## 23600 1.063
## 23650 1.0628
## 23700 1.0626
## 23750 1.0625
## 23800 1.0623
## 23850 1.0621
## 23900 1.062
## 23950 1.0618
## 24000 1.0617
## 24050 1.0615
## 24100 1.0614
## 24150 1.0612
## 24200 1.0611
## 24250 1.0609
## 24300 1.0607
## 24350 1.0605
## 24400 1.0604
## 24450 1.0602
## 24500 1.0601
## 24550 1.0599
## 24600 1.0598
## 24650 1.0596
## 24700 1.0595
## 24750 1.0593
## 24800 1.0591
## 24850 1.059
## 24900 1.0588
## 24950 1.0587
## 25000 1.0585
## 25050 1.0584
## 25100 1.0582
## 25150 1.0581
## 25200 1.0579
## 25250 1.0578
## 25300 1.0576
## 25350 1.0575
## 25400 1.0573
## 25450 1.0572
## 25500 1.0571
## 25550 1.0569
## 25600 1.0568
## 25650 1.0566
## 25700 1.0565
## 25750 1.0563
## 25800 1.0562
## 25850 1.0561
## 25900 1.0559
## 25950 1.0558
## 26000 1.0557
## 26050 1.0555
## 26100 1.0554
## 26150 1.0552
## 26200 1.0551
## 26250 1.055
## 26300 1.0548
## 26350 1.0547
## 26400 1.0546
## fitting final gbm model with a fixed number of 26400 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.84
##
## estimated cv deviance = 1.055 ; se = 0.049
##
## training data correlation = 0.762
## cv correlation = 0.53 ; se = 0.056
##
## training data ROC score = 0.935
## cv ROC score = 0.798 ; se = 0.028
##
## elapsed time - 6.76 minutes
cvtc7.lr0005$cv.statistics
## $deviance.mean
## [1] 1.054553
##
## $deviance.se
## [1] 0.04930168
##
## $correlation.mean
## [1] 0.530333
##
## $correlation.se
## [1] 0.05550569
##
## $discrimination.mean
## [1] 0.79772
##
## $discrimination.se
## [1] 0.02825031
##
## $calibration.mean
## [1] 0.4660760 1.8455725 0.3772194 0.4916652 0.2584489
##
## $calibration.se
## [1] 0.24670873 0.40702300 0.09327762 0.08018175 0.06239416
##
## $cv.threshold
## [1] 0.3897129
##
## $cv.threshold.se
## [1] 0.01770522
summary(cvtc7.lr0005)
## var rel.inf
## gcpsr_5 gcpsr_5 26.47704702
## cpaq8_tot cpaq8_tot 14.68806679
## age age 6.06210537
## gcpsr_3 gcpsr_3 5.30042091
## pain_duration pain_duration 5.27996291
## isi_tot isi_tot 4.47817356
## employment employment 3.69521043
## RST_PQ_BIS RST_PQ_BIS 3.60069219
## gcpsr_6 gcpsr_6 3.24779947
## income income 2.68496568
## overlapping_pain_number overlapping_pain_number 2.61510261
## pcs_tot pcs_tot 2.47780997
## gcpsr_4 gcpsr_4 2.42966846
## current_opioid_meds current_opioid_meds 2.40729410
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.78155341
## bis_brief_tot bis_brief_tot 1.73250802
## audit_total audit_total 1.62404592
## proanx_t proanx_t 1.62028940
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.43246247
## RST_PQ_FFS RST_PQ_FFS 1.42234397
## meq_tot meq_tot 1.25561555
## prodep_t prodep_t 1.07715459
## RST_PQ_BAS_I RST_PQ_BAS_I 1.00776179
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.94981940
## hispanic hispanic 0.38146283
## cuditr0 cuditr0 0.17539088
## assigned_sex_at_birth assigned_sex_at_birth 0.04866678
## race race 0.04660550
tc 5 lr .0005
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3084
##
## now adding trees...
## 100 1.3007
## 150 1.2931
## 200 1.2857
## 250 1.2787
## 300 1.2726
## 350 1.2665
## 400 1.2605
## 450 1.2547
## 500 1.2497
## 550 1.2451
## 600 1.2404
## 650 1.2356
## 700 1.2313
## 750 1.2269
## 800 1.2227
## 850 1.2191
## 900 1.2153
## 950 1.2121
## 1000 1.2085
## 1050 1.2053
## 1100 1.2022
## 1150 1.1994
## 1200 1.1964
## 1250 1.1941
## 1300 1.1916
## 1350 1.1893
## 1400 1.1871
## 1450 1.1851
## 1500 1.1832
## 1550 1.1812
## 1600 1.1796
## 1650 1.1776
## 1700 1.1758
## 1750 1.1741
## 1800 1.1726
## 1850 1.1712
## 1900 1.1699
## 1950 1.1686
## 2000 1.1674
## 2050 1.1661
## 2100 1.165
## 2150 1.1642
## 2200 1.1633
## 2250 1.1626
## 2300 1.1618
## 2350 1.161
## 2400 1.1604
## 2450 1.1595
## 2500 1.1587
## 2550 1.1583
## 2600 1.1577
## 2650 1.1573
## 2700 1.1569
## 2750 1.1562
## 2800 1.1559
## 2850 1.1555
## 2900 1.1553
## 2950 1.155
## 3000 1.1546
## 3050 1.1542
## 3100 1.1538
## 3150 1.1537
## 3200 1.1534
## 3250 1.1532
## 3300 1.153
## 3350 1.1526
## 3400 1.1523
## 3450 1.1524
## 3500 1.1524
## 3550 1.1524
## 3600 1.1529
## 3650 1.1529
## 3700 1.153
## 3750 1.1533
## 3800 1.1536
## fitting final gbm model with a fixed number of 3400 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.924
##
## estimated cv deviance = 1.152 ; se = 0.048
##
## training data correlation = 0.686
## cv correlation = 0.417 ; se = 0.055
##
## training data ROC score = 0.903
## cv ROC score = 0.738 ; se = 0.03
##
## elapsed time - 0.48 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.152343
##
## $deviance.se
## [1] 0.04765349
##
## $correlation.mean
## [1] 0.4165447
##
## $correlation.se
## [1] 0.055271
##
## $discrimination.mean
## [1] 0.73818
##
## $discrimination.se
## [1] 0.02970235
##
## $calibration.mean
## [1] 0.3023157 1.5543152 0.4385018 0.4667890 0.3803662
##
## $calibration.se
## [1] 0.23089895 0.41981806 0.10168000 0.09480698 0.07937049
##
## $cv.threshold
## [1] 0.3808227
##
## $cv.threshold.se
## [1] 0.01283925
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 28.95237115
## age age 7.60796775
## isi_tot isi_tot 7.39147593
## RST_PQ_BIS RST_PQ_BIS 6.53780002
## pain_duration pain_duration 6.40217695
## current_opioid_meds current_opioid_meds 5.79631821
## pcs_tot pcs_tot 5.45310446
## overlapping_pain_number overlapping_pain_number 5.20444438
## employment employment 4.32952123
## audit_total audit_total 3.53390257
## income income 3.28608465
## bis_brief_tot bis_brief_tot 2.93996130
## proanx_t proanx_t 2.18749738
## prodep_t prodep_t 2.07839278
## RST_PQ_FFS RST_PQ_FFS 1.58502630
## RST_PQ_BAS_I RST_PQ_BAS_I 1.41543546
## meq_tot meq_tot 1.36737523
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.14293059
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.03196118
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.94951363
## hispanic hispanic 0.44574132
## race race 0.20569466
## cuditr0 cuditr0 0.08464924
## assigned_sex_at_birth assigned_sex_at_birth 0.07065364
tc 5 lr .0005
cvtc7.lr0005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .0005,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.3047
##
## now adding trees...
## 100 1.2928
## 150 1.2818
## 200 1.2714
## 250 1.2614
## 300 1.2514
## 350 1.2424
## 400 1.2339
## 450 1.2255
## 500 1.2176
## 550 1.2096
## 600 1.2022
## 650 1.1949
## 700 1.188
## 750 1.1814
## 800 1.1753
## 850 1.1692
## 900 1.1633
## 950 1.1575
## 1000 1.152
## 1050 1.1468
## 1100 1.142
## 1150 1.1374
## 1200 1.1329
## 1250 1.1286
## 1300 1.1244
## 1350 1.1204
## 1400 1.1166
## 1450 1.1127
## 1500 1.1089
## 1550 1.1051
## 1600 1.1017
## 1650 1.0981
## 1700 1.0948
## 1750 1.0919
## 1800 1.0888
## 1850 1.086
## 1900 1.0834
## 1950 1.0807
## 2000 1.0782
## 2050 1.076
## 2100 1.0735
## 2150 1.0713
## 2200 1.0692
## 2250 1.0669
## 2300 1.0649
## 2350 1.0632
## 2400 1.0611
## 2450 1.0593
## 2500 1.0579
## 2550 1.0564
## 2600 1.0548
## 2650 1.0534
## 2700 1.0523
## 2750 1.0508
## 2800 1.0496
## 2850 1.0483
## 2900 1.047
## 2950 1.0459
## 3000 1.0446
## 3050 1.0435
## 3100 1.0424
## 3150 1.0416
## 3200 1.0407
## 3250 1.0397
## 3300 1.0388
## 3350 1.038
## 3400 1.0372
## 3450 1.0366
## 3500 1.036
## 3550 1.0355
## 3600 1.035
## 3650 1.0344
## 3700 1.0339
## 3750 1.0336
## 3800 1.033
## 3850 1.0326
## 3900 1.0323
## 3950 1.0319
## 4000 1.0316
## 4050 1.0313
## 4100 1.0309
## 4150 1.0307
## 4200 1.0306
## 4250 1.0304
## 4300 1.0304
## 4350 1.0304
## 4400 1.0303
## 4450 1.0301
## 4500 1.0299
## 4550 1.03
## 4600 1.0302
## 4650 1.0302
## 4700 1.0302
## 4750 1.0304
## fitting final gbm model with a fixed number of 4500 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.763
##
## estimated cv deviance = 1.03 ; se = 0.061
##
## training data correlation = 0.762
## cv correlation = 0.532 ; se = 0.056
##
## training data ROC score = 0.935
## cv ROC score = 0.799 ; se = 0.028
##
## elapsed time - 0.65 minutes
cvtc7.lr0005$cv.statistics
## $deviance.mean
## [1] 1.029904
##
## $deviance.se
## [1] 0.06052019
##
## $correlation.mean
## [1] 0.5320984
##
## $correlation.se
## [1] 0.05580901
##
## $discrimination.mean
## [1] 0.79931
##
## $discrimination.se
## [1] 0.02780227
##
## $calibration.mean
## [1] 0.2727509 1.4394668 0.4584682 0.5111955 0.4120924
##
## $calibration.se
## [1] 0.22628993 0.33853335 0.11374904 0.09191772 0.10218536
##
## $cv.threshold
## [1] 0.3989254
##
## $cv.threshold.se
## [1] 0.02240444
summary(cvtc7.lr0005)
## var rel.inf
## gcpsr_5 gcpsr_5 24.71836166
## cpaq8_tot cpaq8_tot 14.58809298
## age age 6.09903302
## gcpsr_3 gcpsr_3 5.61847863
## pain_duration pain_duration 4.98103376
## isi_tot isi_tot 4.78624690
## RST_PQ_BIS RST_PQ_BIS 4.02689864
## employment employment 3.53255422
## gcpsr_6 gcpsr_6 3.30843361
## gcpsr_4 gcpsr_4 3.02053608
## overlapping_pain_number overlapping_pain_number 2.83678271
## pcs_tot pcs_tot 2.61331618
## current_opioid_meds current_opioid_meds 2.57259051
## income income 2.46190210
## audit_total audit_total 1.90484351
## bis_brief_tot bis_brief_tot 1.84053784
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.79101379
## proanx_t proanx_t 1.70953052
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.37233977
## meq_tot meq_tot 1.29465581
## RST_PQ_FFS RST_PQ_FFS 1.21359212
## prodep_t prodep_t 0.99991650
## RST_PQ_BAS_I RST_PQ_BAS_I 0.86649202
## RST_PQ_BAS_RI RST_PQ_BAS_RI 0.85006084
## hispanic hispanic 0.73348653
## race race 0.10887060
## cuditr0 cuditr0 0.07793542
## assigned_sex_at_birth assigned_sex_at_birth 0.07246373
tc 5 lr .0025
cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr,
gbm.x = 3:26,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli
##
## Using 404 observations and 24 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2796
##
## now adding trees...
## 100 1.249
## 150 1.2253
## 200 1.2072
## 250 1.1933
## 300 1.1829
## 350 1.1752
## 400 1.1685
## 450 1.1621
## 500 1.1577
## 550 1.155
## 600 1.1537
## 650 1.1533
## 700 1.1532
## 750 1.1533
## 800 1.1544
## 850 1.1569
## 900 1.1598
## 950 1.1614
## 1000 1.1642
## 1050 1.1656
## 1100 1.167
## 1150 1.1692
## 1200 1.1722
## 1250 1.1753
## fitting final gbm model with a fixed number of 700 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.921
##
## estimated cv deviance = 1.153 ; se = 0.047
##
## training data correlation = 0.686
## cv correlation = 0.414 ; se = 0.055
##
## training data ROC score = 0.903
## cv ROC score = 0.74 ; se = 0.029
##
## elapsed time - 0.14 minutes
cvtc3.lr0005$cv.statistics
## $deviance.mean
## [1] 1.153182
##
## $deviance.se
## [1] 0.04716857
##
## $correlation.mean
## [1] 0.4135266
##
## $correlation.se
## [1] 0.05510049
##
## $discrimination.mean
## [1] 0.73962
##
## $discrimination.se
## [1] 0.0292763
##
## $calibration.mean
## [1] 0.2530776 1.4391522 0.4419874 0.4714546 0.3959977
##
## $calibration.se
## [1] 0.21224567 0.33267254 0.10410803 0.09706560 0.08950416
##
## $cv.threshold
## [1] 0.3810061
##
## $cv.threshold.se
## [1] 0.01359757
summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models
## var rel.inf
## cpaq8_tot cpaq8_tot 29.46570907
## age age 7.29866027
## isi_tot isi_tot 7.15591172
## RST_PQ_BIS RST_PQ_BIS 6.07041218
## pain_duration pain_duration 6.04310979
## current_opioid_meds current_opioid_meds 5.94944672
## pcs_tot pcs_tot 5.34746385
## overlapping_pain_number overlapping_pain_number 5.33324027
## employment employment 4.04735896
## income income 3.75650741
## audit_total audit_total 3.52440565
## bis_brief_tot bis_brief_tot 2.93143729
## prodep_t prodep_t 2.26620315
## proanx_t proanx_t 2.10790693
## RST_PQ_FFS RST_PQ_FFS 1.78187147
## meq_tot meq_tot 1.40009568
## RST_PQ_BAS_I RST_PQ_BAS_I 1.37157018
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.16808259
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.11954478
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.01429717
## hispanic hispanic 0.46250609
## race race 0.22822530
## assigned_sex_at_birth assigned_sex_at_birth 0.08192841
## cuditr0 cuditr0 0.07410507
tc 5 lr .0025
cvtc7.lr0005 <- gbm.step(data=finaltrain,
gbm.x = 3:30,
gbm.y = 1,
fold.vector = fold.vector,
family = "bernoulli",
tree.complexity = 5,
learning.rate = .0025,
bag.fraction = 0.5,
tolerance.method = "auto",
#tolerance = 0.01,
max.trees = 50000)
## Length Class Mode
## 0 NULL NULL
##
##
## GBM STEP - version 2.9
##
## Performing cross-validation optimisation of a boosted regression tree model
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli
##
## Using 404 observations and 28 predictors
##
## loading user-supplied fold vector
##
## creating 10 initial models of 50 trees
##
## folds are stratified by prevalence
##
## total mean deviance = 1.3112
##
## tolerance is fixed at 0.0013
##
## ntrees resid. dev.
## 50 1.2618
##
## now adding trees...
## 100 1.2169
## 150 1.1813
## 200 1.1514
## 250 1.1277
## 300 1.1091
## 350 1.0926
## 400 1.079
## 450 1.067
## 500 1.0564
## 550 1.0492
## 600 1.043
## 650 1.0384
## 700 1.0338
## 750 1.033
## 800 1.0315
## 850 1.0301
## 900 1.0297
## 950 1.0293
## 1000 1.0303
## 1050 1.0321
## 1100 1.0319
## 1150 1.033
## 1200 1.0349
## 1250 1.0362
## 1300 1.0379
## 1350 1.0406
## 1400 1.0441
## 1450 1.0467
## fitting final gbm model with a fixed number of 950 trees for gcpsr_2_HICP_num
##
## mean total deviance = 1.311
## mean residual deviance = 0.749
##
## estimated cv deviance = 1.029 ; se = 0.061
##
## training data correlation = 0.766
## cv correlation = 0.532 ; se = 0.055
##
## training data ROC score = 0.938
## cv ROC score = 0.803 ; se = 0.026
##
## elapsed time - 0.18 minutes
cvtc7.lr0005$cv.statistics
## $deviance.mean
## [1] 1.029272
##
## $deviance.se
## [1] 0.06142857
##
## $correlation.mean
## [1] 0.531747
##
## $correlation.se
## [1] 0.05512385
##
## $discrimination.mean
## [1] 0.80257
##
## $discrimination.se
## [1] 0.02637095
##
## $calibration.mean
## [1] 0.2569699 1.4019267 0.4557784 0.5146885 0.4223676
##
## $calibration.se
## [1] 0.21994230 0.32234097 0.11366907 0.09399728 0.10275562
##
## $cv.threshold
## [1] 0.401829
##
## $cv.threshold.se
## [1] 0.02262023
summary(cvtc7.lr0005)
## var rel.inf
## gcpsr_5 gcpsr_5 24.71647659
## cpaq8_tot cpaq8_tot 14.01333882
## age age 5.75450308
## isi_tot isi_tot 5.28679706
## gcpsr_3 gcpsr_3 5.04339385
## pain_duration pain_duration 4.92060525
## RST_PQ_BIS RST_PQ_BIS 4.22435147
## gcpsr_6 gcpsr_6 3.53087405
## employment employment 3.47134861
## gcpsr_4 gcpsr_4 2.98103233
## overlapping_pain_number overlapping_pain_number 2.93622536
## income income 2.59192761
## pcs_tot pcs_tot 2.52548063
## current_opioid_meds current_opioid_meds 2.43593237
## bis_brief_tot bis_brief_tot 2.02075732
## RST_PQ_BAS_GDP RST_PQ_BAS_GDP 1.93410369
## audit_total audit_total 1.80822723
## proanx_t proanx_t 1.78469313
## RST_PQ_FFS RST_PQ_FFS 1.60631743
## meq_tot meq_tot 1.38280230
## RST_PQ_BAS_RR RST_PQ_BAS_RR 1.12501219
## RST_PQ_BAS_I RST_PQ_BAS_I 1.11856188
## RST_PQ_BAS_RI RST_PQ_BAS_RI 1.00109996
## prodep_t prodep_t 0.86230612
## hispanic hispanic 0.63286889
## cuditr0 cuditr0 0.18160950
## assigned_sex_at_birth assigned_sex_at_birth 0.06340201
## race race 0.04595124
Amrit paused here on Dec 15th, 2024