BRTforAmrit.Rmd.knit

Boosted Regression Trees andInstall Elith et al additional BRT functions

setwd("C:\\Users\\ajohal3\\Downloads")
source("C:\\Users\\ajohal3\\Downloads\\brt.functions.R")

Set up packages needed

library(gbm)

## Warning: package 'gbm' was built under R version 4.4.2

## Loaded gbm 2.2.2

## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3

library(psych)

## Warning: package 'psych' was built under R version 4.4.2

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.4.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(reshape)

## Warning: package 'reshape' was built under R version 4.4.2

## 
## Attaching package: 'reshape'

## The following object is masked from 'package:dplyr':
## 
##     rename

#also load data here
Amritdata<-read.csv("C:\\Users\\ajohal3\\Downloads\\Research_Data_updated_Dec_9_2024_AJK.csv",stringsAsFactors = TRUE)
summary(Amritdata)

##                    prolific_id       race                            hispanic  
##  54d2c4e2fdf99b2c319a8cf6:  1   Min.   : 1.000   Hispanic/Latinx         : 48  
##  55daf7b369dbc30005b68ac9:  1   1st Qu.: 6.000   I prefer not to disclose: 25  
##  574d70b95e549100063c6533:  1   Median : 6.000   Non-Hispanic/Latinx     :433  
##  57d67fdf6598aa00019917c8:  1   Mean   : 5.834                                 
##  584e7a66e42c2a00013099ac:  1   3rd Qu.: 6.000                                 
##  58dff341282ab00001e9ce26:  1   Max.   :13.000                                 
##  (Other)                 :500                                                  
##             assigned_sex_at_birth      age                       employment 
##  Female                :376       Min.   :18.00   Employed full-time  :174  
##  Intersex/DSD          :  1       1st Qu.:23.00   Student             :131  
##  Male                  :126       Median :29.00   Employed part-time  : 76  
##  prefer not to disclose:  3       Mean   :31.79   Unemployed          : 53  
##                                   3rd Qu.:38.00   Homemaker           : 31  
##                                   Max.   :70.00   Receiving disability: 26  
##                                   NA's   :1       (Other)             : 15  
##                     income    overlapping_pain_number    pain_duration
##  $0-$25,000            :168   Min.   :1.000           >10 years :117  
##  $25,001-$50,000       :130   1st Qu.:2.000           5-7 years : 99  
##  $50,001-$75,000       : 83   Median :3.000           2-4 years : 86  
##  $75,001-$100,000      : 45   Mean   :2.883           1-2 years : 59  
##  >$100,000             : 58   3rd Qu.:4.000           7-10 years: 55  
##  prefer not to disclose: 22   Max.   :9.000           3-6 months: 50  
##                               NA's   :3               (Other)   : 40  
##       gcpsr_2       gcpsr_3          gcpsr_4         gcpsr_5       gcpsr_6  
##  Every day:187   Min.   : 0.000   Min.   : 0.00   Min.   : 0.000   No :400  
##  Some days:319   1st Qu.: 4.000   1st Qu.: 4.00   1st Qu.: 4.000   Yes:106  
##                  Median : 5.000   Median : 6.00   Median : 5.000            
##                  Mean   : 5.255   Mean   : 5.54   Mean   : 5.316            
##                  3rd Qu.: 6.000   3rd Qu.: 7.00   3rd Qu.: 7.000            
##                  Max.   :10.000   Max.   :10.00   Max.   :10.000            
##                                                                             
##     prodep_t        proanx_t        prorx_t        RST_PQ_FFS   
##  Min.   :41.00   Min.   :40.30   Min.   :36.30   Min.   :1.000  
##  1st Qu.:57.30   1st Qu.:59.50   1st Qu.:41.60   1st Qu.:2.000  
##  Median :63.90   Median :65.30   Median :43.70   Median :2.600  
##  Mean   :62.89   Mean   :63.96   Mean   :44.72   Mean   :2.523  
##  3rd Qu.:69.40   3rd Qu.:69.30   3rd Qu.:48.20   3rd Qu.:3.000  
##  Max.   :79.40   Max.   :81.60   Max.   :75.10   Max.   :4.000  
##                                  NA's   :320                    
##    RST_PQ_BIS    RST_PQ_BAS_RI   RST_PQ_BAS_GDP  RST_PQ_BAS_RR  
##  Min.   :1.174   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.609   1st Qu.:2.000   1st Qu.:2.143   1st Qu.:2.300  
##  Median :3.065   Median :2.429   Median :2.714   Median :2.700  
##  Mean   :2.980   Mean   :2.388   Mean   :2.643   Mean   :2.672  
##  3rd Qu.:3.478   3rd Qu.:2.857   3rd Qu.:3.143   3rd Qu.:3.100  
##  Max.   :4.000   Max.   :3.857   Max.   :4.000   Max.   :4.000  
##                                                                 
##   RST_PQ_BAS_I     SHAPS_tot      bis_brief_tot     cpaq8_tot    
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   : 2.00  
##  1st Qu.:1.875   1st Qu.: 0.000   1st Qu.:1.875   1st Qu.:21.00  
##  Median :2.375   Median : 1.000   Median :2.125   Median :27.00  
##  Mean   :2.383   Mean   : 2.433   Mean   :2.192   Mean   :26.27  
##  3rd Qu.:2.875   3rd Qu.: 4.000   3rd Qu.:2.500   3rd Qu.:31.75  
##  Max.   :4.000   Max.   :13.000   Max.   :3.875   Max.   :46.00  
##                                                                  
##     pcs_tot         isi_tot         meq_tot       audit_total    
##  Min.   : 0.00   Min.   : 0.00   Min.   :36.00   Min.   : 0.000  
##  1st Qu.:17.00   1st Qu.:10.00   1st Qu.:45.00   1st Qu.: 1.000  
##  Median :26.00   Median :14.00   Median :48.00   Median : 2.000  
##  Mean   :26.29   Mean   :14.09   Mean   :48.18   Mean   : 2.502  
##  3rd Qu.:36.00   3rd Qu.:19.00   3rd Qu.:52.00   3rd Qu.: 4.000  
##  Max.   :52.00   Max.   :28.00   Max.   :64.00   Max.   :11.000  
##                                                                  
##     cuditr0       current_opioid_meds gcpsr_2_HICP gcpsr_2_HICP_num
##  Min.   :0.0000   No :436             No :319      Min.   :0.0000  
##  1st Qu.:0.0000   Yes: 70             Yes:187      1st Qu.:0.0000  
##  Median :0.0000                                    Median :0.0000  
##  Mean   :0.3814                                    Mean   :0.3696  
##  3rd Qu.:1.0000                                    3rd Qu.:1.0000  
##  Max.   :1.0000                                    Max.   :1.0000  
##                                                                    
##    income_num    income_alpha                   hispanic_bin
##  Min.   :1.000   A:168        I prefer not to disclose: 25  
##  1st Qu.:1.000   B:130        No                      :433  
##  Median :2.000   C: 83        Yes                     : 48  
##  Mean   :2.528   D: 45                                      
##  3rd Qu.:3.000   E: 58                                      
##  Max.   :6.000   F: 22                                      
##

str(Amritdata)

## 'data.frame':    506 obs. of  37 variables:
##  $ prolific_id            : Factor w/ 506 levels "54d2c4e2fdf99b2c319a8cf6",..: 1 2 4 5 6 7 10 13 14 18 ...
##  $ race                   : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ hispanic               : Factor w/ 3 levels "Hispanic/Latinx",..: 3 3 3 2 3 3 3 3 1 3 ...
##  $ assigned_sex_at_birth  : Factor w/ 4 levels "Female","Intersex/DSD",..: 3 3 3 1 1 3 3 1 1 3 ...
##  $ age                    : int  43 37 53 34 42 37 46 22 55 24 ...
##  $ employment             : Factor w/ 9 levels "Employed full-time",..: 1 1 1 1 2 2 1 8 2 9 ...
##  $ income                 : Factor w/ 6 levels "$0-$25,000","$25,001-$50,000",..: 3 2 5 1 1 1 2 1 2 1 ...
##  $ overlapping_pain_number: int  5 4 2 1 3 3 3 3 3 1 ...
##  $ pain_duration          : Factor w/ 8 levels " ",">10 years",..: 2 6 7 2 6 4 7 4 4 6 ...
##  $ gcpsr_2                : Factor w/ 2 levels "Every day","Some days": 1 2 1 2 2 2 2 2 2 2 ...
##  $ gcpsr_3                : int  4 6 8 6 7 5 6 6 5 8 ...
##  $ gcpsr_4                : int  6 8 9 5 6 7 5 5 4 5 ...
##  $ gcpsr_5                : int  6 4 8 5 6 6 6 3 5 3 ...
##  $ gcpsr_6                : Factor w/ 2 levels "No","Yes": 2 1 1 1 2 1 1 1 1 1 ...
##  $ prodep_t               : num  58.9 71.2 62.2 63.9 49 69.4 60.5 62.2 67.5 55.7 ...
##  $ proanx_t               : num  48 69.3 61.4 65.3 40.3 59.5 55.8 65.3 71.2 55.8 ...
##  $ prorx_t                : num  NA NA 41.6 50.4 45.5 NA 36.3 NA NA NA ...
##  $ RST_PQ_FFS             : num  1.5 1.6 3.4 2.5 2.2 3.3 1.9 2.2 3.1 1.7 ...
##  $ RST_PQ_BIS             : num  1.65 3.43 2.65 2.96 2.17 ...
##  $ RST_PQ_BAS_RI          : num  3.43 1.71 2.86 2.57 2.71 ...
##  $ RST_PQ_BAS_GDP         : num  3.14 1.43 3.71 3 3.14 ...
##  $ RST_PQ_BAS_RR          : num  2 1.8 3.2 2.9 3 2.8 2.4 2.7 3.6 1.9 ...
##  $ RST_PQ_BAS_I           : num  2.25 2.62 2.5 2.25 2.62 ...
##  $ SHAPS_tot              : int  0 4 0 0 0 1 0 6 0 10 ...
##  $ bis_brief_tot          : num  2 2.88 1.75 2.62 2.12 ...
##  $ cpaq8_tot              : int  29 26 17 21 20 28 30 30 28 32 ...
##  $ pcs_tot                : int  5 25 23 31 32 7 16 35 21 20 ...
##  $ isi_tot                : int  14 17 11 14 15 15 1 21 13 0 ...
##  $ meq_tot                : int  48 44 43 50 43 57 53 50 45 53 ...
##  $ audit_total            : int  2 1 1 1 0 2 7 3 5 2 ...
##  $ cuditr0                : int  0 1 0 0 0 0 0 1 0 0 ...
##  $ current_opioid_meds    : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
##  $ gcpsr_2_HICP           : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
##  $ gcpsr_2_HICP_num       : int  1 0 1 0 0 0 0 0 0 0 ...
##  $ income_num             : int  3 2 5 1 1 1 2 1 2 1 ...
##  $ income_alpha           : Factor w/ 6 levels "A","B","C","D",..: 3 2 5 1 1 1 2 1 2 1 ...
##  $ hispanic_bin           : Factor w/ 3 levels "I prefer not to disclose",..: 2 2 2 1 2 2 2 2 3 2 ...

Clean data

colnames(Amritdata)

##  [1] "prolific_id"             "race"                   
##  [3] "hispanic"                "assigned_sex_at_birth"  
##  [5] "age"                     "employment"             
##  [7] "income"                  "overlapping_pain_number"
##  [9] "pain_duration"           "gcpsr_2"                
## [11] "gcpsr_3"                 "gcpsr_4"                
## [13] "gcpsr_5"                 "gcpsr_6"                
## [15] "prodep_t"                "proanx_t"               
## [17] "prorx_t"                 "RST_PQ_FFS"             
## [19] "RST_PQ_BIS"              "RST_PQ_BAS_RI"          
## [21] "RST_PQ_BAS_GDP"          "RST_PQ_BAS_RR"          
## [23] "RST_PQ_BAS_I"            "SHAPS_tot"              
## [25] "bis_brief_tot"           "cpaq8_tot"              
## [27] "pcs_tot"                 "isi_tot"                
## [29] "meq_tot"                 "audit_total"            
## [31] "cuditr0"                 "current_opioid_meds"    
## [33] "gcpsr_2_HICP"            "gcpsr_2_HICP_num"       
## [35] "income_num"              "income_alpha"           
## [37] "hispanic_bin"

Amritdata<-Amritdata %>% 
select(!c("gcpsr_2","prorx_t","SHAPS_tot","gcpsr_2_HICP","income_num","income_alpha","hispanic_bin" ))

Now randomly sample the IDs so you have 404 (80% of your sample) unique IDs in the training set, and the other IDs will be used as a test set.

samp<-sample(x = unique(Amritdata$prolific_id), 404, replace=FALSE)
length(samp)

## [1] 404

train<-Amritdata[Amritdata$prolific_id %in% samp, ]
length(unique(train$prolific_id))

## [1] 404

Re-ordering variables so that outcome is first followed by unused variables, followed by the predictors. We won’t use id, which will now be column 2. Remove repeated variables.

colnames(train)

##  [1] "prolific_id"             "race"                   
##  [3] "hispanic"                "assigned_sex_at_birth"  
##  [5] "age"                     "employment"             
##  [7] "income"                  "overlapping_pain_number"
##  [9] "pain_duration"           "gcpsr_3"                
## [11] "gcpsr_4"                 "gcpsr_5"                
## [13] "gcpsr_6"                 "prodep_t"               
## [15] "proanx_t"                "RST_PQ_FFS"             
## [17] "RST_PQ_BIS"              "RST_PQ_BAS_RI"          
## [19] "RST_PQ_BAS_GDP"          "RST_PQ_BAS_RR"          
## [21] "RST_PQ_BAS_I"            "bis_brief_tot"          
## [23] "cpaq8_tot"               "pcs_tot"                
## [25] "isi_tot"                 "meq_tot"                
## [27] "audit_total"             "cuditr0"                
## [29] "current_opioid_meds"     "gcpsr_2_HICP_num"

train<-train[,c(30,1:29)] 
colnames(train) #124

##  [1] "gcpsr_2_HICP_num"        "prolific_id"            
##  [3] "race"                    "hispanic"               
##  [5] "assigned_sex_at_birth"   "age"                    
##  [7] "employment"              "income"                 
##  [9] "overlapping_pain_number" "pain_duration"          
## [11] "gcpsr_3"                 "gcpsr_4"                
## [13] "gcpsr_5"                 "gcpsr_6"                
## [15] "prodep_t"                "proanx_t"               
## [17] "RST_PQ_FFS"              "RST_PQ_BIS"             
## [19] "RST_PQ_BAS_RI"           "RST_PQ_BAS_GDP"         
## [21] "RST_PQ_BAS_RR"           "RST_PQ_BAS_I"           
## [23] "bis_brief_tot"           "cpaq8_tot"              
## [25] "pcs_tot"                 "isi_tot"                
## [27] "meq_tot"                 "audit_total"            
## [29] "cuditr0"                 "current_opioid_meds"

We need to make the cross validation occur over people

#CV fold making
colnames(train)

##  [1] "gcpsr_2_HICP_num"        "prolific_id"            
##  [3] "race"                    "hispanic"               
##  [5] "assigned_sex_at_birth"   "age"                    
##  [7] "employment"              "income"                 
##  [9] "overlapping_pain_number" "pain_duration"          
## [11] "gcpsr_3"                 "gcpsr_4"                
## [13] "gcpsr_5"                 "gcpsr_6"                
## [15] "prodep_t"                "proanx_t"               
## [17] "RST_PQ_FFS"              "RST_PQ_BIS"             
## [19] "RST_PQ_BAS_RI"           "RST_PQ_BAS_GDP"         
## [21] "RST_PQ_BAS_RR"           "RST_PQ_BAS_I"           
## [23] "bis_brief_tot"           "cpaq8_tot"              
## [25] "pcs_tot"                 "isi_tot"                
## [27] "meq_tot"                 "audit_total"            
## [29] "cuditr0"                 "current_opioid_meds"

id<-unique(train$prolific_id)
#summary(id)
#describe(id)
nfolds<-10
cvtest<-sample(rep(1:nfolds,length.out=length(id)), replace=F)
idname <- "prolific_id"
foldname <- "fold"
cvdf<-data.frame(id,cvtest)
names(cvdf) <- c(idname,foldname)
head(cvdf)

##                prolific_id fold
## 1 55daf7b369dbc30005b68ac9   10
## 2 584e7a66e42c2a00013099ac    6
## 3 58dff341282ab00001e9ce26    5
## 4 5947f262de49a9000165ccd3    2
## 5 5a64bb3035f26b0001492e6a    7
## 6 5ac6852df69e940001d98f04    1

combcvdf<-merge(train, cvdf, by= 'prolific_id', sort=F)
   head(combcvdf)

##                prolific_id gcpsr_2_HICP_num race                 hispanic
## 1 55daf7b369dbc30005b68ac9                0    6      Non-Hispanic/Latinx
## 2 584e7a66e42c2a00013099ac                0    6 I prefer not to disclose
## 3 58dff341282ab00001e9ce26                0    6      Non-Hispanic/Latinx
## 4 5947f262de49a9000165ccd3                0    6      Non-Hispanic/Latinx
## 5 5a64bb3035f26b0001492e6a                0    6      Non-Hispanic/Latinx
## 6 5ac6852df69e940001d98f04                0    6      Non-Hispanic/Latinx
##   assigned_sex_at_birth age         employment          income
## 1                  Male  37 Employed full-time $25,001-$50,000
## 2                Female  34 Employed full-time      $0-$25,000
## 3                Female  42 Employed part-time      $0-$25,000
## 4                  Male  37 Employed part-time      $0-$25,000
## 5                  Male  46 Employed full-time $25,001-$50,000
## 6                Female  22            Student      $0-$25,000
##   overlapping_pain_number pain_duration gcpsr_3 gcpsr_4 gcpsr_5 gcpsr_6
## 1                       4     5-7 years       6       8       4      No
## 2                       1     >10 years       6       5       5      No
## 3                       3     5-7 years       7       6       6     Yes
## 4                       3     2-4 years       5       7       6      No
## 5                       3    7-10 years       6       5       6      No
## 6                       3     2-4 years       6       5       3      No
##   prodep_t proanx_t RST_PQ_FFS RST_PQ_BIS RST_PQ_BAS_RI RST_PQ_BAS_GDP
## 1     71.2     69.3        1.6   3.434783      1.714286       1.428571
## 2     63.9     65.3        2.5   2.956522      2.571429       3.000000
## 3     49.0     40.3        2.2   2.173913      2.714286       3.142857
## 4     69.4     59.5        3.3   2.739130      1.714286       1.142857
## 5     60.5     55.8        1.9   2.565217      2.714286       2.571429
## 6     62.2     65.3        2.2   3.304348      1.571429       1.142857
##   RST_PQ_BAS_RR RST_PQ_BAS_I bis_brief_tot cpaq8_tot pcs_tot isi_tot meq_tot
## 1           1.8        2.625         2.875        26      25      17      44
## 2           2.9        2.250         2.625        21      31      14      50
## 3           3.0        2.625         2.125        20      32      15      43
## 4           2.8        2.125         2.625        28       7      15      57
## 5           2.4        1.500         1.750        30      16       1      53
## 6           2.7        2.125         2.500        30      35      21      50
##   audit_total cuditr0 current_opioid_meds fold
## 1           1       1                  No   10
## 2           1       0                  No    6
## 3           0       0                  No    5
## 4           2       0                  No    2
## 5           7       0                  No    7
## 6           3       1                  No    1

#table(train$prolific_id, combcvdf$fold)   
   #check what's wrong with table
#rowSums(table(train$prolific_id, combcvdf$fold)>0)
colSums(table(train$prolific_id, combcvdf$fold)>0)

##  1  2  3  4  5  6  7  8  9 10 
## 41 41 41 41 40 40 40 40 40 40

fold.vector<-combcvdf$fold
head(fold.vector)

## [1] 10  6  5  2  7  1

describe(fold.vector)

##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 404 5.47 2.88      5    5.46 3.71   1  10     9 0.02    -1.24 0.14

Now let’s run some models with our finaltrain data set! First, tc 3, lr .0005

finaltrain<-as.data.frame(train)
finaltrain_nocgpsr<-finaltrain[,c(1:10,15:30,11:14)] #you will run these models with both the finaltrain and the finaltrain_nocgpsr datasets (separately). For finaltrain, you will use gbm.x = 3:30,
colnames(finaltrain)

##  [1] "gcpsr_2_HICP_num"        "prolific_id"            
##  [3] "race"                    "hispanic"               
##  [5] "assigned_sex_at_birth"   "age"                    
##  [7] "employment"              "income"                 
##  [9] "overlapping_pain_number" "pain_duration"          
## [11] "gcpsr_3"                 "gcpsr_4"                
## [13] "gcpsr_5"                 "gcpsr_6"                
## [15] "prodep_t"                "proanx_t"               
## [17] "RST_PQ_FFS"              "RST_PQ_BIS"             
## [19] "RST_PQ_BAS_RI"           "RST_PQ_BAS_GDP"         
## [21] "RST_PQ_BAS_RR"           "RST_PQ_BAS_I"           
## [23] "bis_brief_tot"           "cpaq8_tot"              
## [25] "pcs_tot"                 "isi_tot"                
## [27] "meq_tot"                 "audit_total"            
## [29] "cuditr0"                 "current_opioid_meds"

colnames(finaltrain_nocgpsr)

##  [1] "gcpsr_2_HICP_num"        "prolific_id"            
##  [3] "race"                    "hispanic"               
##  [5] "assigned_sex_at_birth"   "age"                    
##  [7] "employment"              "income"                 
##  [9] "overlapping_pain_number" "pain_duration"          
## [11] "prodep_t"                "proanx_t"               
## [13] "RST_PQ_FFS"              "RST_PQ_BIS"             
## [15] "RST_PQ_BAS_RI"           "RST_PQ_BAS_GDP"         
## [17] "RST_PQ_BAS_RR"           "RST_PQ_BAS_I"           
## [19] "bis_brief_tot"           "cpaq8_tot"              
## [21] "pcs_tot"                 "isi_tot"                
## [23] "meq_tot"                 "audit_total"            
## [25] "cuditr0"                 "current_opioid_meds"    
## [27] "gcpsr_3"                 "gcpsr_4"                
## [29] "gcpsr_5"                 "gcpsr_6"

set.seed(1)

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.309 
##  
## now adding trees... 
## 100   1.3029 
## 150   1.2969 
## 200   1.2913 
## 250   1.2861 
## 300   1.2811 
## 350   1.2762 
## 400   1.2717 
## 450   1.2674 
## 500   1.2631 
## 550   1.259 
## 600   1.2552 
## 650   1.2514 
## 700   1.2479 
## 750   1.2448 
## 800   1.2417 
## 850   1.2387 
## 900   1.2358 
## 950   1.233 
## 1000   1.2306 
## 1050   1.2281 
## 1100   1.2257 
## 1150   1.2234 
## 1200   1.221 
## 1250   1.219 
## 1300   1.2171 
## 1350   1.2151 
## 1400   1.2132 
## 1450   1.2113 
## 1500   1.2096 
## 1550   1.2076 
## 1600   1.206 
## 1650   1.2045 
## 1700   1.203 
## 1750   1.2016 
## 1800   1.2001 
## 1850   1.1989 
## 1900   1.1976 
## 1950   1.1963 
## 2000   1.1951 
## 2050   1.1939 
## 2100   1.1928 
## 2150   1.1917 
## 2200   1.1907 
## 2250   1.1896 
## 2300   1.1887 
## 2350   1.188 
## 2400   1.1871 
## 2450   1.1863 
## 2500   1.1856 
## 2550   1.1846 
## 2600   1.1841 
## 2650   1.1834 
## 2700   1.1826 
## 2750   1.1818 
## 2800   1.1813 
## 2850   1.1809 
## 2900   1.1804 
## 2950   1.18 
## 3000   1.1794 
## 3050   1.1789 
## 3100   1.1783 
## 3150   1.1777 
## 3200   1.1776 
## 3250   1.1771 
## 3300   1.1767 
## 3350   1.1763 
## 3400   1.1756 
## 3450   1.1753 
## 3500   1.1751 
## 3550   1.1748 
## 3600   1.1747 
## 3650   1.1744 
## 3700   1.1741 
## 3750   1.174 
## 3800   1.1738 
## 3850   1.1735 
## 3900   1.1731 
## 3950   1.1728 
## 4000   1.1725 
## 4050   1.1723 
## 4100   1.1724 
## 4150   1.1723 
## 4200   1.172 
## 4250   1.1718 
## 4300   1.1719 
## 4350   1.1718 
## 4400   1.1717 
## 4450   1.1717 
## 4500   1.1715 
## 4550   1.1715 
## 4600   1.1715 
## 4650   1.1713

## fitting final gbm model with a fixed number of  4650  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.986 
##  
## estimated cv deviance = 1.171 ; se = 0.034 
##  
## training data correlation = 0.622 
## cv correlation =  0.391 ; se = 0.058 
##  
## training data ROC score = 0.86 
## cv ROC score = 0.706 ; se = 0.036 
##  
## elapsed time -  0.4 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.171322
## 
## $deviance.se
## [1] 0.03364006
## 
## $correlation.mean
## [1] 0.3906834
## 
## $correlation.se
## [1] 0.05777884
## 
## $discrimination.mean
## [1] 0.70569
## 
## $discrimination.se
## [1] 0.03593855
## 
## $calibration.mean
## [1] 0.1794158 1.3127085 0.5359566 0.6478088 0.4201257
## 
## $calibration.se
## [1] 0.17682185 0.25468034 0.12014084 0.07657072 0.11404526
## 
## $cv.threshold
## [1] 0.3666053
## 
## $cv.threshold.se
## [1] 0.01049579

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 27.82978141
## isi_tot                                 isi_tot  9.69371751
## overlapping_pain_number overlapping_pain_number  8.56608617
## pain_duration                     pain_duration  7.78797445
## pcs_tot                                 pcs_tot  6.48536605
## employment                           employment  5.80991392
## age                                         age  5.51490146
## current_opioid_meds         current_opioid_meds  3.98671147
## RST_PQ_BIS                           RST_PQ_BIS  3.94473272
## RST_PQ_FFS                           RST_PQ_FFS  3.11774195
## prodep_t                               prodep_t  2.20941448
## proanx_t                               proanx_t  2.09782343
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.03223845
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.82437014
## audit_total                         audit_total  1.57514132
## meq_tot                                 meq_tot  1.52053739
## income                                   income  1.43357878
## bis_brief_tot                     bis_brief_tot  1.25966911
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.99271091
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  0.90068291
## race                                       race  0.46976995
## hispanic                               hispanic  0.44930887
## cuditr0                                 cuditr0  0.41708262
## assigned_sex_at_birth     assigned_sex_at_birth  0.08074453

THIS IS WHERE WE STOPPED WHILE CODING TOGETHER In all of the below code, you will have to change the gbm.x and data to match what we were doing (so gbm.x = 3:26, data= finaltrain_nocgpsr)

tc 3, lr .0005

set.seed(1)

cvtc3.lr0005_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3058 
##  
## now adding trees... 
## 100   1.2964 
## 150   1.2874 
## 200   1.2785 
## 250   1.2702 
## 300   1.2621 
## 350   1.2542 
## 400   1.2469 
## 450   1.2396 
## 500   1.2327 
## 550   1.2262 
## 600   1.2197 
## 650   1.2133 
## 700   1.207 
## 750   1.2012 
## 800   1.1956 
## 850   1.19 
## 900   1.1849 
## 950   1.1798 
## 1000   1.175 
## 1050   1.1702 
## 1100   1.1654 
## 1150   1.161 
## 1200   1.1565 
## 1250   1.1524 
## 1300   1.1483 
## 1350   1.1443 
## 1400   1.1406 
## 1450   1.1367 
## 1500   1.1331 
## 1550   1.1296 
## 1600   1.1262 
## 1650   1.1228 
## 1700   1.1196 
## 1750   1.1162 
## 1800   1.113 
## 1850   1.1099 
## 1900   1.1073 
## 1950   1.1046 
## 2000   1.1018 
## 2050   1.0993 
## 2100   1.0967 
## 2150   1.0943 
## 2200   1.0919 
## 2250   1.0898 
## 2300   1.0876 
## 2350   1.0855 
## 2400   1.0834 
## 2450   1.0814 
## 2500   1.0793 
## 2550   1.0774 
## 2600   1.0755 
## 2650   1.0735 
## 2700   1.0716 
## 2750   1.0698 
## 2800   1.0682 
## 2850   1.0667 
## 2900   1.0651 
## 2950   1.0637 
## 3000   1.0621 
## 3050   1.0606 
## 3100   1.0592 
## 3150   1.0578 
## 3200   1.0565 
## 3250   1.055 
## 3300   1.0538 
## 3350   1.0526 
## 3400   1.0514 
## 3450   1.0501 
## 3500   1.0493 
## 3550   1.0482 
## 3600   1.0473 
## 3650   1.0463 
## 3700   1.0453 
## 3750   1.0444 
## 3800   1.0436 
## 3850   1.0426 
## 3900   1.0416 
## 3950   1.0407 
## 4000   1.0397 
## 4050   1.039 
## 4100   1.0382 
## 4150   1.0375 
## 4200   1.0367 
## 4250   1.0361 
## 4300   1.0352 
## 4350   1.0347 
## 4400   1.0338 
## 4450   1.0333 
## 4500   1.0328 
## 4550   1.0323 
## 4600   1.032 
## 4650   1.0314 
## 4700   1.0309 
## 4750   1.0304 
## 4800   1.0299 
## 4850   1.0295 
## 4900   1.0289 
## 4950   1.0284 
## 5000   1.0279 
## 5050   1.0276 
## 5100   1.0271 
## 5150   1.0269 
## 5200   1.0269 
## 5250   1.0266 
## 5300   1.0262 
## 5350   1.026 
## 5400   1.0256 
## 5450   1.0253 
## 5500   1.0251 
## 5550   1.0247 
## 5600   1.0243 
## 5650   1.024 
## 5700   1.0236 
## 5750   1.0233 
## 5800   1.0232 
## 5850   1.0228 
## 5900   1.0226 
## 5950   1.0224 
## 6000   1.0222 
## 6050   1.0221 
## 6100   1.0219 
## 6150   1.0219 
## 6200   1.0217 
## 6250   1.0215 
## 6300   1.0214 
## 6350   1.0212 
## 6400   1.0213 
## 6450   1.0212 
## 6500   1.0212 
## 6550   1.0212 
## 6600   1.0211 
## 6650   1.0211

## fitting final gbm model with a fixed number of  6600  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.785 
##  
## estimated cv deviance = 1.021 ; se = 0.037 
##  
## training data correlation = 0.727 
## cv correlation =  0.518 ; se = 0.044 
##  
## training data ROC score = 0.919 
## cv ROC score = 0.803 ; se = 0.023 
##  
## elapsed time -  0.66 minutes

cvtc3.lr0005_1$cv.statistics

## $deviance.mean
## [1] 1.021056
## 
## $deviance.se
## [1] 0.03665463
## 
## $correlation.mean
## [1] 0.5177228
## 
## $correlation.se
## [1] 0.04409666
## 
## $discrimination.mean
## [1] 0.80279
## 
## $discrimination.se
## [1] 0.02306947
## 
## $calibration.mean
## [1] 0.07249221 1.14620613 0.67694290 0.73456462 0.54558932
## 
## $calibration.se
## [1] 0.08967059 0.12796313 0.08881519 0.04603232 0.10342900
## 
## $cv.threshold
## [1] 0.4208433
## 
## $cv.threshold.se
## [1] 0.02803775

summary(cvtc3.lr0005_1)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 19.44185938
## cpaq8_tot                             cpaq8_tot 11.37422726
## gcpsr_4                                 gcpsr_4  9.30770081
## gcpsr_6                                 gcpsr_6  7.61384404
## gcpsr_3                                 gcpsr_3  7.57266606
## pain_duration                     pain_duration  5.99255222
## isi_tot                                 isi_tot  5.31752926
## overlapping_pain_number overlapping_pain_number  5.10854044
## age                                         age  4.27560446
## employment                           employment  3.81494461
## RST_PQ_BIS                           RST_PQ_BIS  2.53249851
## RST_PQ_FFS                           RST_PQ_FFS  2.13939433
## pcs_tot                                 pcs_tot  2.12746758
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.01311870
## meq_tot                                 meq_tot  1.62762296
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.38674100
## current_opioid_meds         current_opioid_meds  1.14071625
## bis_brief_tot                     bis_brief_tot  1.08993494
## income                                   income  1.00103716
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.99829222
## proanx_t                               proanx_t  0.99021705
## prodep_t                               prodep_t  0.90863904
## audit_total                         audit_total  0.80745276
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.54445330
## hispanic                               hispanic  0.47685201
## race                                       race  0.24062243
## cuditr0                                 cuditr0  0.09712249
## assigned_sex_at_birth     assigned_sex_at_birth  0.05834872

tc 3 lr .00025

set.seed(1)

cvtc3.lr00025 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.00025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3123 
##  
## now adding trees... 
## 100   1.3091 
## 150   1.3059 
## 200   1.3028 
## 250   1.2999 
## 300   1.297 
## 350   1.2942 
## 400   1.2914 
## 450   1.2888 
## 500   1.2861 
## 550   1.2835 
## 600   1.281 
## 650   1.2785 
## 700   1.2761 
## 750   1.2739 
## 800   1.2716 
## 850   1.2694 
## 900   1.2672 
## 950   1.2651 
## 1000   1.2631 
## 1050   1.2611 
## 1100   1.2591 
## 1150   1.2571 
## 1200   1.2552 
## 1250   1.2534 
## 1300   1.2517 
## 1350   1.2499 
## 1400   1.2482 
## 1450   1.2465 
## 1500   1.2448 
## 1550   1.2431 
## 1600   1.2416 
## 1650   1.2401 
## 1700   1.2386 
## 1750   1.2371 
## 1800   1.2356 
## 1850   1.2342 
## 1900   1.2328 
## 1950   1.2315 
## 2000   1.2302 
## 2050   1.2289 
## 2100   1.2276 
## 2150   1.2264 
## 2200   1.2252 
## 2250   1.224 
## 2300   1.2228 
## 2350   1.2217 
## 2400   1.2207 
## 2450   1.2196 
## 2500   1.2185 
## 2550   1.2173 
## 2600   1.2164 
## 2650   1.2155 
## 2700   1.2145 
## 2750   1.2135 
## 2800   1.2126 
## 2850   1.2118 
## 2900   1.2109 
## 2950   1.21 
## 3000   1.2092 
## 3050   1.2083 
## 3100   1.2075 
## 3150   1.2067 
## 3200   1.2059 
## 3250   1.2052 
## 3300   1.2044 
## 3350   1.2037 
## 3400   1.2028 
## 3450   1.2021 
## 3500   1.2014 
## 3550   1.2007 
## 3600   1.2 
## 3650   1.1993 
## 3700   1.1986 
## 3750   1.198 
## 3800   1.1974 
## 3850   1.1966 
## 3900   1.196 
## 3950   1.1954 
## 4000   1.1949 
## 4050   1.1942 
## 4100   1.1937 
## 4150   1.1932 
## 4200   1.1925 
## 4250   1.1919 
## 4300   1.1915 
## 4350   1.191 
## 4400   1.1905 
## 4450   1.1901 
## 4500   1.1896 
## 4550   1.1892 
## 4600   1.1888 
## 4650   1.1883 
## 4700   1.1879 
## 4750   1.1874 
## 4800   1.1871 
## 4850   1.1867 
## 4900   1.1862 
## 4950   1.1858 
## 5000   1.1854 
## 5050   1.185 
## 5100   1.1847 
## 5150   1.1844 
## 5200   1.1841 
## 5250   1.1837 
## 5300   1.1834 
## 5350   1.1831 
## 5400   1.1828 
## 5450   1.1825 
## 5500   1.1822 
## 5550   1.1819 
## 5600   1.1816 
## 5650   1.1813 
## 5700   1.181 
## 5750   1.1807 
## 5800   1.1803 
## 5850   1.1799 
## 5900   1.1797 
## 5950   1.1794 
## 6000   1.1792 
## 6050   1.179 
## 6100   1.1788 
## 6150   1.1785 
## 6200   1.1783 
## 6250   1.1781 
## 6300   1.1778 
## 6350   1.1777 
## 6400   1.1774 
## 6450   1.1772 
## 6500   1.177 
## 6550   1.1768 
## 6600   1.1765 
## 6650   1.1764 
## 6700   1.1762 
## 6750   1.176 
## 6800   1.176 
## 6850   1.1758 
## 6900   1.1756 
## 6950   1.1754 
## 7000   1.1752 
## 7050   1.1751 
## 7100   1.175 
## 7150   1.1748 
## 7200   1.1746 
## 7250   1.1744 
## 7300   1.1743 
## 7350   1.1743 
## 7400   1.1741 
## 7450   1.1739 
## 7500   1.1738 
## 7550   1.1737 
## 7600   1.1735 
## 7650   1.1734 
## 7700   1.1733 
## 7750   1.1731 
## 7800   1.173 
## 7850   1.1729 
## 7900   1.1728

## fitting final gbm model with a fixed number of  7900  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 1.012 
##  
## estimated cv deviance = 1.173 ; se = 0.03 
##  
## training data correlation = 0.607 
## cv correlation =  0.393 ; se = 0.056 
##  
## training data ROC score = 0.851 
## cv ROC score = 0.705 ; se = 0.034 
##  
## elapsed time -  0.73 minutes

cvtc3.lr00025$cv.statistics

## $deviance.mean
## [1] 1.172845
## 
## $deviance.se
## [1] 0.03020946
## 
## $correlation.mean
## [1] 0.3930418
## 
## $correlation.se
## [1] 0.05638659
## 
## $discrimination.mean
## [1] 0.70508
## 
## $discrimination.se
## [1] 0.03419079
## 
## $calibration.mean
## [1] 0.2526100 1.4447901 0.5168032 0.6541823 0.3953110
## 
## $calibration.se
## [1] 0.18699847 0.27793351 0.12112806 0.07745088 0.11026702
## 
## $cv.threshold
## [1] 0.3645858
## 
## $cv.threshold.se
## [1] 0.009678874

summary(cvtc3.lr00025) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 30.01409742
## isi_tot                                 isi_tot  9.91374355
## overlapping_pain_number overlapping_pain_number  8.89285844
## pain_duration                     pain_duration  7.76753400
## pcs_tot                                 pcs_tot  5.99635786
## employment                           employment  5.67962120
## age                                         age  5.42794674
## current_opioid_meds         current_opioid_meds  4.16158922
## RST_PQ_BIS                           RST_PQ_BIS  3.65971530
## RST_PQ_FFS                           RST_PQ_FFS  2.92143949
## proanx_t                               proanx_t  2.09448016
## prodep_t                               prodep_t  2.01090822
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.90353378
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.48645516
## audit_total                         audit_total  1.46661643
## income                                   income  1.33444420
## meq_tot                                 meq_tot  1.25227914
## bis_brief_tot                     bis_brief_tot  1.21393826
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  0.85460602
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.85120743
## cuditr0                                 cuditr0  0.42172575
## race                                       race  0.32602180
## hispanic                               hispanic  0.29614309
## assigned_sex_at_birth     assigned_sex_at_birth  0.05273735

tc 3 lr .00025

set.seed(1)

cvtc3.lr00025_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.00025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3107 
##  
## now adding trees... 
## 100   1.3058 
## 150   1.3011 
## 200   1.2963 
## 250   1.2917 
## 300   1.2873 
## 350   1.2829 
## 400   1.2786 
## 450   1.2743 
## 500   1.2702 
## 550   1.2662 
## 600   1.2622 
## 650   1.2583 
## 700   1.2542 
## 750   1.2505 
## 800   1.2468 
## 850   1.2432 
## 900   1.2398 
## 950   1.2363 
## 1000   1.2329 
## 1050   1.2295 
## 1100   1.2261 
## 1150   1.2229 
## 1200   1.2196 
## 1250   1.2165 
## 1300   1.2134 
## 1350   1.2102 
## 1400   1.2073 
## 1450   1.2043 
## 1500   1.2015 
## 1550   1.1986 
## 1600   1.1959 
## 1650   1.1931 
## 1700   1.1903 
## 1750   1.1876 
## 1800   1.185 
## 1850   1.1824 
## 1900   1.1799 
## 1950   1.1773 
## 2000   1.1749 
## 2050   1.1725 
## 2100   1.1701 
## 2150   1.1678 
## 2200   1.1655 
## 2250   1.1632 
## 2300   1.161 
## 2350   1.1587 
## 2400   1.1565 
## 2450   1.1545 
## 2500   1.1524 
## 2550   1.1504 
## 2600   1.1484 
## 2650   1.1463 
## 2700   1.1443 
## 2750   1.1424 
## 2800   1.1406 
## 2850   1.1387 
## 2900   1.1368 
## 2950   1.135 
## 3000   1.1333 
## 3050   1.1314 
## 3100   1.1296 
## 3150   1.1278 
## 3200   1.126 
## 3250   1.1242 
## 3300   1.1225 
## 3350   1.1209 
## 3400   1.1193 
## 3450   1.1177 
## 3500   1.1163 
## 3550   1.1148 
## 3600   1.1133 
## 3650   1.1118 
## 3700   1.1103 
## 3750   1.1089 
## 3800   1.1075 
## 3850   1.106 
## 3900   1.1046 
## 3950   1.1032 
## 4000   1.1018 
## 4050   1.1005 
## 4100   1.0992 
## 4150   1.098 
## 4200   1.0967 
## 4250   1.0955 
## 4300   1.0943 
## 4350   1.0931 
## 4400   1.0918 
## 4450   1.0907 
## 4500   1.0895 
## 4550   1.0883 
## 4600   1.0873 
## 4650   1.0861 
## 4700   1.085 
## 4750   1.0839 
## 4800   1.0828 
## 4850   1.0817 
## 4900   1.0808 
## 4950   1.0797 
## 5000   1.0787 
## 5050   1.0777 
## 5100   1.0766 
## 5150   1.0757 
## 5200   1.0748 
## 5250   1.0738 
## 5300   1.0729 
## 5350   1.0721 
## 5400   1.0711 
## 5450   1.0702 
## 5500   1.0693 
## 5550   1.0685 
## 5600   1.0676 
## 5650   1.0668 
## 5700   1.0659 
## 5750   1.0652 
## 5800   1.0643 
## 5850   1.0635 
## 5900   1.0626 
## 5950   1.0619 
## 6000   1.061 
## 6050   1.0603 
## 6100   1.0596 
## 6150   1.0589 
## 6200   1.0582 
## 6250   1.0575 
## 6300   1.0569 
## 6350   1.0562 
## 6400   1.0556 
## 6450   1.0549 
## 6500   1.0542 
## 6550   1.0536 
## 6600   1.0529 
## 6650   1.0522 
## 6700   1.0516 
## 6750   1.051 
## 6800   1.0504 
## 6850   1.0498 
## 6900   1.0493 
## 6950   1.0487 
## 7000   1.0482 
## 7050   1.0476 
## 7100   1.047 
## 7150   1.0464 
## 7200   1.0458 
## 7250   1.0453 
## 7300   1.0448 
## 7350   1.0443 
## 7400   1.0439 
## 7450   1.0433 
## 7500   1.0429 
## 7550   1.0424 
## 7600   1.0419 
## 7650   1.0414 
## 7700   1.041 
## 7750   1.0405 
## 7800   1.04 
## 7850   1.0395 
## 7900   1.0391 
## 7950   1.0387 
## 8000   1.0383 
## 8050   1.0378 
## 8100   1.0374 
## 8150   1.0371 
## 8200   1.0366 
## 8250   1.0362 
## 8300   1.0358 
## 8350   1.0354 
## 8400   1.035 
## 8450   1.0347 
## 8500   1.0342 
## 8550   1.0338 
## 8600   1.0335 
## 8650   1.0332 
## 8700   1.033 
## 8750   1.0327 
## 8800   1.0323 
## 8850   1.032 
## 8900   1.0317 
## 8950   1.0314 
## 9000   1.0311 
## 9050   1.0309 
## 9100   1.0307 
## 9150   1.0303 
## 9200   1.0301 
## 9250   1.0299 
## 9300   1.0297 
## 9350   1.0293 
## 9400   1.0292 
## 9450   1.0289 
## 9500   1.0287 
## 9550   1.0285 
## 9600   1.0283 
## 9650   1.0281 
## 9700   1.0279 
## 9750   1.0277 
## 9800   1.0274 
## 9850   1.0273 
## 9900   1.0269 
## 9950   1.0267 
## 10000   1.0265 
## 10050   1.0262 
## 10100   1.0261 
## 10150   1.0259 
## 10200   1.0256 
## 10250   1.0255 
## 10300   1.0253 
## 10350   1.0251 
## 10400   1.0249 
## 10450   1.0247 
## 10500   1.0246 
## 10550   1.0243 
## 10600   1.0242 
## 10650   1.024 
## 10700   1.0239 
## 10750   1.0237 
## 10800   1.0235 
## 10850   1.0235 
## 10900   1.0234 
## 10950   1.0232 
## 11000   1.023 
## 11050   1.0229 
## 11100   1.0228 
## 11150   1.0227 
## 11200   1.0225 
## 11250   1.0226 
## 11300   1.0224 
## 11350   1.0222

## fitting final gbm model with a fixed number of  11350  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.817 
##  
## estimated cv deviance = 1.022 ; se = 0.034 
##  
## training data correlation = 0.715 
## cv correlation =  0.52 ; se = 0.042 
##  
## training data ROC score = 0.912 
## cv ROC score = 0.804 ; se = 0.022 
##  
## elapsed time -  1.21 minutes

cvtc3.lr00025_1$cv.statistics

## $deviance.mean
## [1] 1.022213
## 
## $deviance.se
## [1] 0.03394584
## 
## $correlation.mean
## [1] 0.5197211
## 
## $correlation.se
## [1] 0.04215822
## 
## $discrimination.mean
## [1] 0.80441
## 
## $discrimination.se
## [1] 0.02243665
## 
## $calibration.mean
## [1] 0.1104884 1.2274547 0.6741631 0.7426704 0.5184634
## 
## $calibration.se
## [1] 0.08820524 0.12971225 0.09346869 0.04313088 0.09201566
## 
## $cv.threshold
## [1] 0.4193172
## 
## $cv.threshold.se
## [1] 0.02572326

summary(cvtc3.lr00025_1)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 20.47588441
## cpaq8_tot                             cpaq8_tot 11.57223275
## gcpsr_4                                 gcpsr_4  9.62650477
## gcpsr_3                                 gcpsr_3  8.11295004
## gcpsr_6                                 gcpsr_6  8.10383939
## pain_duration                     pain_duration  5.80011221
## isi_tot                                 isi_tot  5.15929146
## overlapping_pain_number overlapping_pain_number  5.05791071
## age                                         age  4.17947224
## employment                           employment  3.71533569
## RST_PQ_BIS                           RST_PQ_BIS  2.41780498
## pcs_tot                                 pcs_tot  2.10603109
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.74680721
## RST_PQ_FFS                           RST_PQ_FFS  1.73596995
## meq_tot                                 meq_tot  1.36831027
## current_opioid_meds         current_opioid_meds  1.25774581
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.19963757
## bis_brief_tot                     bis_brief_tot  1.03864155
## proanx_t                               proanx_t  0.86352634
## income                                   income  0.84787837
## prodep_t                               prodep_t  0.82022784
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.79072861
## audit_total                         audit_total  0.63730757
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.58308251
## hispanic                               hispanic  0.45127198
## race                                       race  0.17871066
## assigned_sex_at_birth     assigned_sex_at_birth  0.08240295
## cuditr0                                 cuditr0  0.07038109

tc 3 lr 0.005

set.seed(1)

cvtc3.lr005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2628 
##  
## now adding trees... 
## 100   1.2302 
## 150   1.2075 
## 200   1.1938 
## 250   1.185 
## 300   1.1789 
## 350   1.1752 
## 400   1.1729 
## 450   1.1715 
## 500   1.1715 
## 550   1.1715 
## 600   1.174 
## 650   1.1748 
## 700   1.1761 
## 750   1.1801 
## 800   1.1822 
## 850   1.1852 
## 900   1.1892 
## 950   1.1921 
## 1000   1.1969 
## 1050   1.202

## fitting final gbm model with a fixed number of  500  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.977 
##  
## estimated cv deviance = 1.171 ; se = 0.034 
##  
## training data correlation = 0.62 
## cv correlation =  0.389 ; se = 0.057 
##  
## training data ROC score = 0.858 
## cv ROC score = 0.704 ; se = 0.035 
##  
## elapsed time -  0.08 minutes

cvtc3.lr005$cv.statistics

## $deviance.mean
## [1] 1.171458
## 
## $deviance.se
## [1] 0.03422395
## 
## $correlation.mean
## [1] 0.3894043
## 
## $correlation.se
## [1] 0.05684963
## 
## $discrimination.mean
## [1] 0.70384
## 
## $discrimination.se
## [1] 0.03507343
## 
## $calibration.mean
## [1] 0.1538879 1.2530781 0.5447699 0.6408557 0.4236808
## 
## $calibration.se
## [1] 0.17104945 0.23246699 0.11820807 0.07529626 0.11151628
## 
## $cv.threshold
## [1] 0.3693004
## 
## $cv.threshold.se
## [1] 0.01098177

summary(cvtc3.lr005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 28.00465037
## isi_tot                                 isi_tot  9.36610092
## overlapping_pain_number overlapping_pain_number  8.18646207
## pain_duration                     pain_duration  7.66671138
## pcs_tot                                 pcs_tot  5.98070838
## employment                           employment  5.89414809
## age                                         age  5.64973592
## current_opioid_meds         current_opioid_meds  4.42831221
## RST_PQ_BIS                           RST_PQ_BIS  3.96301221
## RST_PQ_FFS                           RST_PQ_FFS  3.71276558
## income                                   income  2.13478252
## proanx_t                               proanx_t  2.10452750
## prodep_t                               prodep_t  2.09333291
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.03625966
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.64959886
## audit_total                         audit_total  1.53482118
## meq_tot                                 meq_tot  1.52569922
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.02977917
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.78982337
## bis_brief_tot                     bis_brief_tot  0.78796265
## hispanic                               hispanic  0.59746129
## race                                       race  0.41122562
## cuditr0                                 cuditr0  0.40640915
## assigned_sex_at_birth     assigned_sex_at_birth  0.04570976

tc 3 lr 0.005

set.seed(1)

cvtc3.lr005_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2324 
##  
## now adding trees... 
## 100   1.174 
## 150   1.1318 
## 200   1.1022 
## 250   1.0801 
## 300   1.064 
## 350   1.0493 
## 400   1.0387 
## 450   1.0299 
## 500   1.0237 
## 550   1.0207 
## 600   1.0194 
## 650   1.018 
## 700   1.0186 
## 750   1.0193 
## 800   1.0202 
## 850   1.0223 
## 900   1.0253 
## 950   1.0268 
## 1000   1.0298 
## 1050   1.032 
## 1100   1.0354 
## 1150   1.0381 
## 1200   1.0386 
## 1250   1.0418

## fitting final gbm model with a fixed number of  650  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.788 
##  
## estimated cv deviance = 1.018 ; se = 0.035 
##  
## training data correlation = 0.728 
## cv correlation =  0.522 ; se = 0.042 
##  
## training data ROC score = 0.919 
## cv ROC score = 0.802 ; se = 0.022 
##  
## elapsed time -  0.11 minutes

cvtc3.lr005_1$cv.statistics

## $deviance.mean
## [1] 1.018007
## 
## $deviance.se
## [1] 0.03529081
## 
## $correlation.mean
## [1] 0.5217357
## 
## $correlation.se
## [1] 0.04163176
## 
## $discrimination.mean
## [1] 0.80245
## 
## $discrimination.se
## [1] 0.02247467
## 
## $calibration.mean
## [1] 0.08096037 1.15053821 0.68377213 0.72054011 0.54885017
## 
## $calibration.se
## [1] 0.08965561 0.12019131 0.08437965 0.04691495 0.09775994
## 
## $cv.threshold
## [1] 0.4151979
## 
## $cv.threshold.se
## [1] 0.02662583

summary(cvtc3.lr005_1)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 18.85406655
## cpaq8_tot                             cpaq8_tot 11.67403213
## gcpsr_4                                 gcpsr_4  9.11050918
## gcpsr_6                                 gcpsr_6  7.63055040
## gcpsr_3                                 gcpsr_3  7.44701431
## pain_duration                     pain_duration  5.82593454
## overlapping_pain_number overlapping_pain_number  5.39295506
## isi_tot                                 isi_tot  4.80550375
## age                                         age  4.58891937
## employment                           employment  4.19653480
## RST_PQ_BIS                           RST_PQ_BIS  2.81638992
## RST_PQ_FFS                           RST_PQ_FFS  2.33880138
## pcs_tot                                 pcs_tot  2.19684049
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.88907691
## meq_tot                                 meq_tot  1.59984843
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.34485684
## bis_brief_tot                     bis_brief_tot  1.27531002
## income                                   income  1.23261613
## current_opioid_meds         current_opioid_meds  1.05876113
## proanx_t                               proanx_t  1.04680103
## prodep_t                               prodep_t  0.89875103
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.82105541
## hispanic                               hispanic  0.54662449
## audit_total                         audit_total  0.51500625
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.44320229
## race                                       race  0.33822940
## cuditr0                                 cuditr0  0.08314228
## assigned_sex_at_birth     assigned_sex_at_birth  0.02866647

tc 3 lr .0025

set.seed(1)

cvtc3.lr0025 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2858 
##  
## now adding trees... 
## 100   1.2634 
## 150   1.2445 
## 200   1.2302 
## 250   1.2189 
## 300   1.2096 
## 350   1.2029 
## 400   1.1961 
## 450   1.1913 
## 500   1.1859 
## 550   1.1823 
## 600   1.1797 
## 650   1.1765 
## 700   1.1751 
## 750   1.1748 
## 800   1.174 
## 850   1.1739 
## 900   1.1737 
## 950   1.1736 
## 1000   1.1733 
## 1050   1.1743 
## 1100   1.1742 
## 1150   1.1754 
## 1200   1.1756 
## 1250   1.1765 
## 1300   1.1775 
## 1350   1.1782 
## 1400   1.1783 
## 1450   1.1796

## fitting final gbm model with a fixed number of  1000  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.974 
##  
## estimated cv deviance = 1.173 ; se = 0.034 
##  
## training data correlation = 0.625 
## cv correlation =  0.388 ; se = 0.056 
##  
## training data ROC score = 0.863 
## cv ROC score = 0.704 ; se = 0.033 
##  
## elapsed time -  0.11 minutes

cvtc3.lr0025$cv.statistics

## $deviance.mean
## [1] 1.173344
## 
## $deviance.se
## [1] 0.03377643
## 
## $correlation.mean
## [1] 0.3880087
## 
## $correlation.se
## [1] 0.05562241
## 
## $discrimination.mean
## [1] 0.70442
## 
## $discrimination.se
## [1] 0.03307114
## 
## $calibration.mean
## [1] 0.1436493 1.2436209 0.5490280 0.6416393 0.4209040
## 
## $calibration.se
## [1] 0.16669037 0.22944740 0.11556390 0.07734939 0.10613488
## 
## $cv.threshold
## [1] 0.3675768
## 
## $cv.threshold.se
## [1] 0.01046761

summary(cvtc3.lr0025) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 27.87157903
## isi_tot                                 isi_tot  9.54340685
## overlapping_pain_number overlapping_pain_number  8.06856397
## pain_duration                     pain_duration  7.52742702
## pcs_tot                                 pcs_tot  6.44823434
## employment                           employment  5.80975468
## age                                         age  5.39437347
## current_opioid_meds         current_opioid_meds  3.96253349
## RST_PQ_BIS                           RST_PQ_BIS  3.84015228
## RST_PQ_FFS                           RST_PQ_FFS  3.06852862
## proanx_t                               proanx_t  2.53627654
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.37674774
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.00443004
## prodep_t                               prodep_t  1.70943636
## audit_total                         audit_total  1.70140294
## income                                   income  1.66332536
## meq_tot                                 meq_tot  1.40296037
## bis_brief_tot                     bis_brief_tot  1.27803965
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.18709560
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.95378897
## cuditr0                                 cuditr0  0.59923051
## race                                       race  0.58105550
## hispanic                               hispanic  0.38062038
## assigned_sex_at_birth     assigned_sex_at_birth  0.09103627

tc 3 lr .0025

set.seed(1)

cvtc3.lr0025_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.27 
##  
## now adding trees... 
## 100   1.2323 
## 150   1.2011 
## 200   1.1745 
## 250   1.1534 
## 300   1.134 
## 350   1.1163 
## 400   1.1016 
## 450   1.0894 
## 500   1.0779 
## 550   1.0691 
## 600   1.0609 
## 650   1.0535 
## 700   1.0475 
## 750   1.0429 
## 800   1.0387 
## 850   1.0348 
## 900   1.0319 
## 950   1.0288 
## 1000   1.0278 
## 1050   1.0254 
## 1100   1.0242 
## 1150   1.0226 
## 1200   1.0208 
## 1250   1.0196 
## 1300   1.0194 
## 1350   1.0189 
## 1400   1.0192 
## 1450   1.0197 
## 1500   1.0197 
## 1550   1.0201 
## 1600   1.0203 
## 1650   1.0209 
## 1700   1.0221 
## 1750   1.0225 
## 1800   1.024 
## 1850   1.0251 
## 1900   1.0268

## fitting final gbm model with a fixed number of  1350  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.782 
##  
## estimated cv deviance = 1.019 ; se = 0.037 
##  
## training data correlation = 0.728 
## cv correlation =  0.52 ; se = 0.043 
##  
## training data ROC score = 0.919 
## cv ROC score = 0.8 ; se = 0.023 
##  
## elapsed time -  0.16 minutes

cvtc3.lr0025_1$cv.statistics

## $deviance.mean
## [1] 1.018948
## 
## $deviance.se
## [1] 0.03683464
## 
## $correlation.mean
## [1] 0.5199933
## 
## $correlation.se
## [1] 0.04336524
## 
## $discrimination.mean
## [1] 0.80038
## 
## $discrimination.se
## [1] 0.02330232
## 
## $calibration.mean
## [1] 0.06469214 1.12831692 0.67459484 0.72687293 0.54363087
## 
## $calibration.se
## [1] 0.08861472 0.12386696 0.08333148 0.05007122 0.10133761
## 
## $cv.threshold
## [1] 0.4205499
## 
## $cv.threshold.se
## [1] 0.0285933

summary(cvtc3.lr0025_1)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 18.90757351
## cpaq8_tot                             cpaq8_tot 11.22525740
## gcpsr_4                                 gcpsr_4  9.23850766
## gcpsr_6                                 gcpsr_6  7.83380532
## gcpsr_3                                 gcpsr_3  7.73764770
## pain_duration                     pain_duration  6.00416433
## overlapping_pain_number overlapping_pain_number  5.23393844
## isi_tot                                 isi_tot  4.89477563
## age                                         age  4.32143558
## employment                           employment  4.00201786
## pcs_tot                                 pcs_tot  2.57052901
## RST_PQ_BIS                           RST_PQ_BIS  2.55596157
## RST_PQ_FFS                           RST_PQ_FFS  1.92089003
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.91479568
## meq_tot                                 meq_tot  1.71118716
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.34385022
## bis_brief_tot                     bis_brief_tot  1.25504747
## income                                   income  1.17879328
## proanx_t                               proanx_t  1.12970451
## current_opioid_meds         current_opioid_meds  1.04644788
## prodep_t                               prodep_t  0.90443500
## audit_total                         audit_total  0.84087462
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.82627074
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.63539374
## hispanic                               hispanic  0.32314157
## race                                       race  0.23971822
## cuditr0                                 cuditr0  0.10646748
## assigned_sex_at_birth     assigned_sex_at_birth  0.09736839

tc 4 lr .0005

set.seed(1)

cvtc4.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3088 
##  
## now adding trees... 
## 100   1.3026 
## 150   1.2964 
## 200   1.2908 
## 250   1.2855 
## 300   1.2804 
## 350   1.2753 
## 400   1.2706 
## 450   1.2663 
## 500   1.2618 
## 550   1.2577 
## 600   1.2539 
## 650   1.25 
## 700   1.2464 
## 750   1.2431 
## 800   1.2399 
## 850   1.2369 
## 900   1.234 
## 950   1.2311 
## 1000   1.2286 
## 1050   1.2262 
## 1100   1.2238 
## 1150   1.2214 
## 1200   1.219 
## 1250   1.217 
## 1300   1.2151 
## 1350   1.2131 
## 1400   1.2112 
## 1450   1.2094 
## 1500   1.2077 
## 1550   1.2057 
## 1600   1.2041 
## 1650   1.2027 
## 1700   1.2009 
## 1750   1.1996 
## 1800   1.1982 
## 1850   1.1971 
## 1900   1.1959 
## 1950   1.1947 
## 2000   1.1937 
## 2050   1.1926 
## 2100   1.1916 
## 2150   1.1905 
## 2200   1.1895 
## 2250   1.1885 
## 2300   1.1877 
## 2350   1.1871 
## 2400   1.1863 
## 2450   1.1856 
## 2500   1.1849 
## 2550   1.1841 
## 2600   1.1837 
## 2650   1.1831 
## 2700   1.1824 
## 2750   1.1817 
## 2800   1.1814 
## 2850   1.181 
## 2900   1.1806 
## 2950   1.1803 
## 3000   1.1799 
## 3050   1.1795 
## 3100   1.179 
## 3150   1.1785 
## 3200   1.1784 
## 3250   1.1779 
## 3300   1.1776 
## 3350   1.1773 
## 3400   1.1767 
## 3450   1.1765 
## 3500   1.1764 
## 3550   1.1763 
## 3600   1.1763 
## 3650   1.1762 
## 3700   1.1761 
## 3750   1.176 
## 3800   1.176 
## 3850   1.1758 
## 3900   1.1755 
## 3950   1.1752 
## 4000   1.175 
## 4050   1.1748 
## 4100   1.1749 
## 4150   1.175 
## 4200   1.1749 
## 4250   1.1748

## fitting final gbm model with a fixed number of  4250  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.954 
##  
## estimated cv deviance = 1.175 ; se = 0.034 
##  
## training data correlation = 0.657 
## cv correlation =  0.385 ; se = 0.058 
##  
## training data ROC score = 0.883 
## cv ROC score = 0.705 ; se = 0.035 
##  
## elapsed time -  0.43 minutes

cvtc4.lr0005$cv.statistics

## $deviance.mean
## [1] 1.174812
## 
## $deviance.se
## [1] 0.03380357
## 
## $correlation.mean
## [1] 0.3848734
## 
## $correlation.se
## [1] 0.05818755
## 
## $discrimination.mean
## [1] 0.70497
## 
## $discrimination.se
## [1] 0.0349439
## 
## $calibration.mean
## [1] 0.1508489 1.2638484 0.5443256 0.6443981 0.4218594
## 
## $calibration.se
## [1] 0.17279876 0.24444831 0.11769606 0.07680342 0.11025376
## 
## $cv.threshold
## [1] 0.3685767
## 
## $cv.threshold.se
## [1] 0.01112321

summary(cvtc4.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 24.96481629
## isi_tot                                 isi_tot  9.15935125
## pain_duration                     pain_duration  8.35583424
## overlapping_pain_number overlapping_pain_number  8.18561362
## pcs_tot                                 pcs_tot  6.09259859
## employment                           employment  5.99251898
## age                                         age  5.50529227
## RST_PQ_BIS                           RST_PQ_BIS  4.42581835
## current_opioid_meds         current_opioid_meds  3.60051346
## RST_PQ_FFS                           RST_PQ_FFS  3.40737612
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.58610171
## proanx_t                               proanx_t  2.50708286
## prodep_t                               prodep_t  2.32810799
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.16773450
## income                                   income  2.09859322
## audit_total                         audit_total  1.94767208
## meq_tot                                 meq_tot  1.70182979
## bis_brief_tot                     bis_brief_tot  1.41357799
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.11832649
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.06933996
## cuditr0                                 cuditr0  0.46540678
## hispanic                               hispanic  0.41315748
## race                                       race  0.39960025
## assigned_sex_at_birth     assigned_sex_at_birth  0.09373574

tc 4 lr .0005

set.seed(1)

cvtc4.lr0005_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3052 
##  
## now adding trees... 
## 100   1.2953 
## 150   1.2857 
## 200   1.2765 
## 250   1.2676 
## 300   1.2591 
## 350   1.2509 
## 400   1.2431 
## 450   1.2355 
## 500   1.2284 
## 550   1.2217 
## 600   1.2149 
## 650   1.2084 
## 700   1.2019 
## 750   1.1958 
## 800   1.1901 
## 850   1.1844 
## 900   1.1792 
## 950   1.174 
## 1000   1.1691 
## 1050   1.1641 
## 1100   1.1592 
## 1150   1.1547 
## 1200   1.1502 
## 1250   1.1461 
## 1300   1.1419 
## 1350   1.1379 
## 1400   1.1342 
## 1450   1.1304 
## 1500   1.1268 
## 1550   1.1233 
## 1600   1.1199 
## 1650   1.1166 
## 1700   1.1132 
## 1750   1.1099 
## 1800   1.1067 
## 1850   1.1036 
## 1900   1.101 
## 1950   1.0983 
## 2000   1.0956 
## 2050   1.093 
## 2100   1.0905 
## 2150   1.0881 
## 2200   1.0857 
## 2250   1.0836 
## 2300   1.0815 
## 2350   1.0794 
## 2400   1.0773 
## 2450   1.0755 
## 2500   1.0734 
## 2550   1.0715 
## 2600   1.0696 
## 2650   1.0679 
## 2700   1.066 
## 2750   1.0643 
## 2800   1.0629 
## 2850   1.0615 
## 2900   1.0599 
## 2950   1.0585 
## 3000   1.0571 
## 3050   1.0556 
## 3100   1.0543 
## 3150   1.0531 
## 3200   1.0519 
## 3250   1.0505 
## 3300   1.0492 
## 3350   1.0481 
## 3400   1.0468 
## 3450   1.0457 
## 3500   1.045 
## 3550   1.0439 
## 3600   1.0431 
## 3650   1.0422 
## 3700   1.0414 
## 3750   1.0406 
## 3800   1.0398 
## 3850   1.0389 
## 3900   1.038 
## 3950   1.0373 
## 4000   1.0365 
## 4050   1.036 
## 4100   1.0352 
## 4150   1.0346 
## 4200   1.034 
## 4250   1.0334 
## 4300   1.0328 
## 4350   1.0323 
## 4400   1.0317 
## 4450   1.0313 
## 4500   1.0308 
## 4550   1.0304 
## 4600   1.0303 
## 4650   1.03 
## 4700   1.0295 
## 4750   1.0291 
## 4800   1.0286 
## 4850   1.0283 
## 4900   1.0279 
## 4950   1.0275 
## 5000   1.0271 
## 5050   1.0268 
## 5100   1.0265 
## 5150   1.0265 
## 5200   1.0266 
## 5250   1.0265 
## 5300   1.0262 
## 5350   1.026 
## 5400   1.0257 
## 5450   1.0255 
## 5500   1.0255 
## 5550   1.0253 
## 5600   1.0251 
## 5650   1.0248 
## 5700   1.0246 
## 5750   1.0244 
## 5800   1.0243 
## 5850   1.0241 
## 5900   1.0239 
## 5950   1.0239 
## 6000   1.0237 
## 6050   1.0238 
## 6100   1.0237 
## 6150   1.0239 
## 6200   1.0239 
## 6250   1.0239 
## 6300   1.0239

## fitting final gbm model with a fixed number of  6100  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.741 
##  
## estimated cv deviance = 1.024 ; se = 0.038 
##  
## training data correlation = 0.76 
## cv correlation =  0.516 ; se = 0.045 
##  
## training data ROC score = 0.936 
## cv ROC score = 0.798 ; se = 0.023 
##  
## elapsed time -  0.74 minutes

cvtc4.lr0005_1$cv.statistics

## $deviance.mean
## [1] 1.023654
## 
## $deviance.se
## [1] 0.03787976
## 
## $correlation.mean
## [1] 0.5159064
## 
## $correlation.se
## [1] 0.04519974
## 
## $discrimination.mean
## [1] 0.79816
## 
## $discrimination.se
## [1] 0.02332668
## 
## $calibration.mean
## [1] 0.05894484 1.12036177 0.66342652 0.72325331 0.54506963
## 
## $calibration.se
## [1] 0.09060603 0.12725737 0.08703821 0.04879352 0.10945047
## 
## $cv.threshold
## [1] 0.4188295
## 
## $cv.threshold.se
## [1] 0.03089155

summary(cvtc4.lr0005_1) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## gcpsr_5                                 gcpsr_5 17.7462820
## cpaq8_tot                             cpaq8_tot 10.3972987
## gcpsr_4                                 gcpsr_4  8.6004753
## gcpsr_3                                 gcpsr_3  7.1225212
## gcpsr_6                                 gcpsr_6  7.0692838
## pain_duration                     pain_duration  6.6669652
## isi_tot                                 isi_tot  5.4021229
## overlapping_pain_number overlapping_pain_number  4.7815971
## age                                         age  4.1823253
## employment                           employment  4.0093887
## RST_PQ_BIS                           RST_PQ_BIS  2.7178748
## pcs_tot                                 pcs_tot  2.5171711
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.3811841
## RST_PQ_FFS                           RST_PQ_FFS  2.3497418
## meq_tot                                 meq_tot  2.0125052
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.7954156
## bis_brief_tot                     bis_brief_tot  1.4725428
## income                                   income  1.4614677
## proanx_t                               proanx_t  1.2247226
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.2081766
## current_opioid_meds         current_opioid_meds  1.1072212
## prodep_t                               prodep_t  1.0650734
## audit_total                         audit_total  1.0280835
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.7772383
## hispanic                               hispanic  0.4117909
## race                                       race  0.2005061
## cuditr0                                 cuditr0  0.1642944
## assigned_sex_at_birth     assigned_sex_at_birth  0.1267294

tc 4 lr .0025

set.seed(1)

cvtc4.lr0025 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2852 
##  
## now adding trees... 
## 100   1.262 
## 150   1.2433 
## 200   1.2286 
## 250   1.2176 
## 300   1.2083 
## 350   1.2022 
## 400   1.1958 
## 450   1.1914 
## 500   1.186 
## 550   1.1832 
## 600   1.1817 
## 650   1.1792 
## 700   1.1782 
## 750   1.1785 
## 800   1.1784 
## 850   1.1782 
## 900   1.1789 
## 950   1.1799 
## 1000   1.1808 
## 1050   1.1825 
## 1100   1.183 
## 1150   1.1841 
## 1200   1.1844 
## 1250   1.186 
## 1300   1.1873 
## 1350   1.189

## fitting final gbm model with a fixed number of  700  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.991 
##  
## estimated cv deviance = 1.178 ; se = 0.03 
##  
## training data correlation = 0.635 
## cv correlation =  0.385 ; se = 0.056 
##  
## training data ROC score = 0.871 
## cv ROC score = 0.702 ; se = 0.034 
##  
## elapsed time -  0.12 minutes

cvtc4.lr0025$cv.statistics

## $deviance.mean
## [1] 1.17818
## 
## $deviance.se
## [1] 0.0300742
## 
## $correlation.mean
## [1] 0.3852169
## 
## $correlation.se
## [1] 0.05632446
## 
## $discrimination.mean
## [1] 0.70206
## 
## $discrimination.se
## [1] 0.03388197
## 
## $calibration.mean
## [1] 0.2264815 1.3977707 0.5226442 0.6615757 0.3998642
## 
## $calibration.se
## [1] 0.18169998 0.26146277 0.11943434 0.07853788 0.11187414
## 
## $cv.threshold
## [1] 0.3627988
## 
## $cv.threshold.se
## [1] 0.01032532

summary(cvtc4.lr0025) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 27.56167967
## isi_tot                                 isi_tot  9.26334916
## overlapping_pain_number overlapping_pain_number  8.43649971
## pain_duration                     pain_duration  7.52447674
## pcs_tot                                 pcs_tot  5.88890519
## age                                         age  5.71433516
## employment                           employment  5.41720721
## current_opioid_meds         current_opioid_meds  4.02878380
## RST_PQ_BIS                           RST_PQ_BIS  3.92898660
## RST_PQ_FFS                           RST_PQ_FFS  2.93997241
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.49612532
## proanx_t                               proanx_t  2.43677288
## prodep_t                               prodep_t  2.24771264
## income                                   income  1.85403933
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.84610774
## audit_total                         audit_total  1.72149558
## meq_tot                                 meq_tot  1.70597803
## bis_brief_tot                     bis_brief_tot  1.66031175
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.11162922
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.87634997
## cuditr0                                 cuditr0  0.50935517
## hispanic                               hispanic  0.39396171
## race                                       race  0.38401294
## assigned_sex_at_birth     assigned_sex_at_birth  0.05195207

tc 4 lr .0025

set.seed(1)

cvtc4.lr0025_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2676 
##  
## now adding trees... 
## 100   1.2285 
## 150   1.1957 
## 200   1.1683 
## 250   1.1465 
## 300   1.127 
## 350   1.1095 
## 400   1.0953 
## 450   1.0831 
## 500   1.0722 
## 550   1.0643 
## 600   1.0568 
## 650   1.05 
## 700   1.0448 
## 750   1.0409 
## 800   1.0378 
## 850   1.0349 
## 900   1.0321 
## 950   1.0299 
## 1000   1.0298 
## 1050   1.0281 
## 1100   1.0274 
## 1150   1.0269 
## 1200   1.0256 
## 1250   1.0255 
## 1300   1.0251 
## 1350   1.026 
## 1400   1.0267 
## 1450   1.0279 
## 1500   1.0288 
## 1550   1.0293 
## 1600   1.0304 
## 1650   1.0312 
## 1700   1.0327 
## 1750   1.034

## fitting final gbm model with a fixed number of  1300  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.727 
##  
## estimated cv deviance = 1.025 ; se = 0.039 
##  
## training data correlation = 0.765 
## cv correlation =  0.515 ; se = 0.045 
##  
## training data ROC score = 0.938 
## cv ROC score = 0.798 ; se = 0.023 
##  
## elapsed time -  0.18 minutes

cvtc4.lr0025_1$cv.statistics

## $deviance.mean
## [1] 1.025138
## 
## $deviance.se
## [1] 0.03930888
## 
## $correlation.mean
## [1] 0.5149502
## 
## $correlation.se
## [1] 0.04549456
## 
## $discrimination.mean
## [1] 0.79797
## 
## $discrimination.se
## [1] 0.02288092
## 
## $calibration.mean
## [1] 0.03859933 1.07865202 0.65052401 0.70979742 0.52417358
## 
## $calibration.se
## [1] 0.09144014 0.12448279 0.07973867 0.05271127 0.10361619
## 
## $cv.threshold
## [1] 0.4173239
## 
## $cv.threshold.se
## [1] 0.03144449

summary(cvtc4.lr0025_1)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 17.01918517
## cpaq8_tot                             cpaq8_tot 10.18309066
## gcpsr_4                                 gcpsr_4  8.99823451
## gcpsr_3                                 gcpsr_3  7.01772634
## pain_duration                     pain_duration  6.85567544
## gcpsr_6                                 gcpsr_6  6.78372634
## isi_tot                                 isi_tot  5.22408302
## overlapping_pain_number overlapping_pain_number  4.81306567
## employment                           employment  4.48735020
## age                                         age  4.27329022
## RST_PQ_BIS                           RST_PQ_BIS  3.01888290
## RST_PQ_FFS                           RST_PQ_FFS  2.47565161
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.37384892
## pcs_tot                                 pcs_tot  2.29463003
## meq_tot                                 meq_tot  2.03945160
## income                                   income  1.79352971
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.65033145
## proanx_t                               proanx_t  1.38284378
## bis_brief_tot                     bis_brief_tot  1.38174185
## prodep_t                               prodep_t  1.18219987
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.08610878
## current_opioid_meds         current_opioid_meds  1.02963677
## audit_total                         audit_total  0.95129518
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.73134704
## hispanic                               hispanic  0.49211702
## race                                       race  0.23318792
## cuditr0                                 cuditr0  0.13836966
## assigned_sex_at_birth     assigned_sex_at_birth  0.08939836

tc 4 lr .005

set.seed(1)

cvtc4.lr005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2617 
##  
## now adding trees... 
## 100   1.2289 
## 150   1.2057 
## 200   1.1937 
## 250   1.1867 
## 300   1.1806 
## 350   1.179 
## 400   1.1781 
## 450   1.1789 
## 500   1.1791 
## 550   1.181 
## 600   1.185 
## 650   1.1874 
## 700   1.1907 
## 750   1.1951 
## 800   1.1977 
## 850   1.2021 
## 900   1.2075 
## 950   1.2121 
## 1000   1.2169

## fitting final gbm model with a fixed number of  400  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.965 
##  
## estimated cv deviance = 1.178 ; se = 0.033 
##  
## training data correlation = 0.652 
## cv correlation =  0.381 ; se = 0.058 
##  
## training data ROC score = 0.88 
## cv ROC score = 0.704 ; se = 0.035 
##  
## elapsed time -  0.09 minutes

cvtc4.lr005$cv.statistics

## $deviance.mean
## [1] 1.178145
## 
## $deviance.se
## [1] 0.0329135
## 
## $correlation.mean
## [1] 0.3807468
## 
## $correlation.se
## [1] 0.05781186
## 
## $discrimination.mean
## [1] 0.70391
## 
## $discrimination.se
## [1] 0.03467864
## 
## $calibration.mean
## [1] 0.1713711 1.2759530 0.5365084 0.6523755 0.4077083
## 
## $calibration.se
## [1] 0.17601866 0.23955150 0.11572957 0.07639729 0.11051462
## 
## $cv.threshold
## [1] 0.3658574
## 
## $cv.threshold.se
## [1] 0.01048835

summary(cvtc4.lr005) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 26.2840671
## isi_tot                                 isi_tot  9.4296163
## overlapping_pain_number overlapping_pain_number  8.2872354
## pain_duration                     pain_duration  7.8356509
## pcs_tot                                 pcs_tot  5.4815365
## employment                           employment  5.0561635
## age                                         age  4.9146848
## RST_PQ_BIS                           RST_PQ_BIS  3.9821734
## current_opioid_meds         current_opioid_meds  3.9225084
## RST_PQ_FFS                           RST_PQ_FFS  3.4850991
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.8050911
## prodep_t                               prodep_t  2.4596222
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.2454212
## proanx_t                               proanx_t  2.2241249
## audit_total                         audit_total  2.1227502
## meq_tot                                 meq_tot  1.9862854
## income                                   income  1.8925231
## bis_brief_tot                     bis_brief_tot  1.4793891
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.4591011
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.4010677
## cuditr0                                 cuditr0  0.5013392
## hispanic                               hispanic  0.3429328
## race                                       race  0.2146518
## assigned_sex_at_birth     assigned_sex_at_birth  0.1869650

tc 4 lr .005

set.seed(1)

cvtc4.lr005_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2285 
##  
## now adding trees... 
## 100   1.1687 
## 150   1.1258 
## 200   1.0982 
## 250   1.0766 
## 300   1.0607 
## 350   1.0492 
## 400   1.0403 
## 450   1.0321 
## 500   1.0273 
## 550   1.0267 
## 600   1.028 
## 650   1.0276 
## 700   1.0282 
## 750   1.0286 
## 800   1.0309 
## 850   1.0342 
## 900   1.0378 
## 950   1.0407 
## 1000   1.0455 
## 1050   1.0488 
## 1100   1.0522 
## 1150   1.0547 
## 1200   1.0574

## fitting final gbm model with a fixed number of  550  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.767 
##  
## estimated cv deviance = 1.027 ; se = 0.036 
##  
## training data correlation = 0.75 
## cv correlation =  0.516 ; se = 0.043 
##  
## training data ROC score = 0.931 
## cv ROC score = 0.798 ; se = 0.023 
##  
## elapsed time -  0.12 minutes

cvtc4.lr005_1$cv.statistics

## $deviance.mean
## [1] 1.026706
## 
## $deviance.se
## [1] 0.03611049
## 
## $correlation.mean
## [1] 0.5162188
## 
## $correlation.se
## [1] 0.04348085
## 
## $discrimination.mean
## [1] 0.79781
## 
## $discrimination.se
## [1] 0.0229387
## 
## $calibration.mean
## [1] 0.08871788 1.16734149 0.65605108 0.71303688 0.51401193
## 
## $calibration.se
## [1] 0.09261850 0.12843039 0.08660366 0.04813533 0.09606418
## 
## $cv.threshold
## [1] 0.4147268
## 
## $cv.threshold.se
## [1] 0.02682757

summary(cvtc4.lr005_1)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 19.01962502
## cpaq8_tot                             cpaq8_tot 10.74712482
## gcpsr_4                                 gcpsr_4  8.00001133
## gcpsr_6                                 gcpsr_6  7.08829050
## gcpsr_3                                 gcpsr_3  6.91509563
## pain_duration                     pain_duration  6.86897080
## isi_tot                                 isi_tot  5.47195965
## overlapping_pain_number overlapping_pain_number  5.09198803
## employment                           employment  4.36779526
## age                                         age  4.13424532
## RST_PQ_BIS                           RST_PQ_BIS  3.01564445
## meq_tot                                 meq_tot  2.39355267
## pcs_tot                                 pcs_tot  2.02499560
## RST_PQ_FFS                           RST_PQ_FFS  1.92794921
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.83393971
## bis_brief_tot                     bis_brief_tot  1.63479260
## income                                   income  1.46998213
## current_opioid_meds         current_opioid_meds  1.41205137
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.38158154
## prodep_t                               prodep_t  1.23545302
## proanx_t                               proanx_t  1.02086647
## audit_total                         audit_total  0.95362422
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.87539735
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.58873955
## hispanic                               hispanic  0.31724981
## cuditr0                                 cuditr0  0.08464127
## race                                       race  0.06279426
## assigned_sex_at_birth     assigned_sex_at_birth  0.06163839

tc 7 lr .0005

set.seed(1)

cvtc7.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3086 
##  
## now adding trees... 
## 100   1.3021 
## 150   1.2959 
## 200   1.2902 
## 250   1.2848 
## 300   1.2797 
## 350   1.2746 
## 400   1.2698 
## 450   1.2654 
## 500   1.2609 
## 550   1.2567 
## 600   1.2528 
## 650   1.249 
## 700   1.2454 
## 750   1.242 
## 800   1.2388 
## 850   1.2358 
## 900   1.2329 
## 950   1.2301 
## 1000   1.2279 
## 1050   1.2256 
## 1100   1.2231 
## 1150   1.221 
## 1200   1.2186 
## 1250   1.2165 
## 1300   1.2147 
## 1350   1.213 
## 1400   1.2112 
## 1450   1.2096 
## 1500   1.208 
## 1550   1.2062 
## 1600   1.2047 
## 1650   1.2034 
## 1700   1.2017 
## 1750   1.2004 
## 1800   1.1992 
## 1850   1.1983 
## 1900   1.1971 
## 1950   1.1961 
## 2000   1.1952 
## 2050   1.1942 
## 2100   1.1935 
## 2150   1.1925 
## 2200   1.1918 
## 2250   1.1908 
## 2300   1.1903 
## 2350   1.1899 
## 2400   1.1891 
## 2450   1.1888 
## 2500   1.1884 
## 2550   1.1877 
## 2600   1.1876 
## 2650   1.1872 
## 2700   1.1866 
## 2750   1.1861 
## 2800   1.1859 
## 2850   1.1858 
## 2900   1.1856 
## 2950   1.1854 
## 3000   1.1851 
## 3050   1.1849 
## 3100   1.1846 
## 3150   1.1844 
## 3200   1.1843 
## 3250   1.1843 
## 3300   1.1842 
## 3350   1.1841 
## 3400   1.1837 
## 3450   1.1837 
## 3500   1.1839 
## 3550   1.1841 
## 3600   1.1844 
## 3650   1.1845

## fitting final gbm model with a fixed number of  3400  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.896 
##  
## estimated cv deviance = 1.184 ; se = 0.032 
##  
## training data correlation = 0.73 
## cv correlation =  0.374 ; se = 0.058 
##  
## training data ROC score = 0.929 
## cv ROC score = 0.7 ; se = 0.035 
##  
## elapsed time -  0.55 minutes

cvtc7.lr0005$cv.statistics

## $deviance.mean
## [1] 1.183677
## 
## $deviance.se
## [1] 0.03204527
## 
## $correlation.mean
## [1] 0.3735429
## 
## $correlation.se
## [1] 0.05756677
## 
## $discrimination.mean
## [1] 0.69994
## 
## $discrimination.se
## [1] 0.03451235
## 
## $calibration.mean
## [1] 0.1435293 1.2494374 0.5545121 0.6393124 0.4386365
## 
## $calibration.se
## [1] 0.17080188 0.23951765 0.11551781 0.07927105 0.11225110
## 
## $cv.threshold
## [1] 0.3722637
## 
## $cv.threshold.se
## [1] 0.01102345

summary(cvtc7.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 21.4802327
## pain_duration                     pain_duration  8.7570713
## isi_tot                                 isi_tot  8.3914981
## overlapping_pain_number overlapping_pain_number  6.7332283
## employment                           employment  6.3353581
## pcs_tot                                 pcs_tot  5.8214480
## age                                         age  5.0204265
## RST_PQ_BIS                           RST_PQ_BIS  4.9334367
## RST_PQ_FFS                           RST_PQ_FFS  3.5101919
## income                                   income  3.4152542
## RST_PQ_BAS_I                       RST_PQ_BAS_I  3.0885317
## prodep_t                               prodep_t  2.8672008
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.7066101
## proanx_t                               proanx_t  2.7023593
## current_opioid_meds         current_opioid_meds  2.6500610
## meq_tot                                 meq_tot  2.5755224
## audit_total                         audit_total  2.2503655
## bis_brief_tot                     bis_brief_tot  1.9426311
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.7699601
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.6124604
## cuditr0                                 cuditr0  0.7091166
## hispanic                               hispanic  0.2728453
## race                                       race  0.2587092
## assigned_sex_at_birth     assigned_sex_at_birth  0.1954807

tc 7 lr .0005

set.seed(1)

cvtc7.lr0005_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3044 
##  
## now adding trees... 
## 100   1.2938 
## 150   1.2835 
## 200   1.2737 
## 250   1.2645 
## 300   1.2555 
## 350   1.247 
## 400   1.2389 
## 450   1.2308 
## 500   1.2234 
## 550   1.2163 
## 600   1.2095 
## 650   1.2029 
## 700   1.196 
## 750   1.1898 
## 800   1.1839 
## 850   1.1783 
## 900   1.173 
## 950   1.1677 
## 1000   1.1629 
## 1050   1.1579 
## 1100   1.153 
## 1150   1.1486 
## 1200   1.1439 
## 1250   1.1399 
## 1300   1.1358 
## 1350   1.1317 
## 1400   1.128 
## 1450   1.1243 
## 1500   1.1208 
## 1550   1.1174 
## 1600   1.1142 
## 1650   1.1107 
## 1700   1.1075 
## 1750   1.1043 
## 1800   1.1014 
## 1850   1.0985 
## 1900   1.0961 
## 1950   1.0935 
## 2000   1.0909 
## 2050   1.0883 
## 2100   1.0861 
## 2150   1.0839 
## 2200   1.0815 
## 2250   1.0796 
## 2300   1.0778 
## 2350   1.0759 
## 2400   1.0738 
## 2450   1.0723 
## 2500   1.0704 
## 2550   1.0687 
## 2600   1.0669 
## 2650   1.0654 
## 2700   1.0638 
## 2750   1.0624 
## 2800   1.0609 
## 2850   1.0599 
## 2900   1.0584 
## 2950   1.0571 
## 3000   1.0559 
## 3050   1.0547 
## 3100   1.0537 
## 3150   1.0528 
## 3200   1.0519 
## 3250   1.0509 
## 3300   1.0498 
## 3350   1.0489 
## 3400   1.0481 
## 3450   1.0472 
## 3500   1.0466 
## 3550   1.0458 
## 3600   1.0454 
## 3650   1.0448 
## 3700   1.0443 
## 3750   1.0438 
## 3800   1.0432 
## 3850   1.0426 
## 3900   1.042 
## 3950   1.0415 
## 4000   1.041 
## 4050   1.0407 
## 4100   1.0402 
## 4150   1.0398 
## 4200   1.0397 
## 4250   1.0393 
## 4300   1.0391 
## 4350   1.0389 
## 4400   1.0385 
## 4450   1.0383 
## 4500   1.0379 
## 4550   1.0376 
## 4600   1.0376 
## 4650   1.0373 
## 4700   1.037 
## 4750   1.0371 
## 4800   1.0369 
## 4850   1.0368 
## 4900   1.0367 
## 4950   1.0365 
## 5000   1.0362 
## 5050   1.036 
## 5100   1.0361 
## 5150   1.0363 
## 5200   1.0365 
## 5250   1.0366 
## 5300   1.0367

## fitting final gbm model with a fixed number of  5050  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.666 
##  
## estimated cv deviance = 1.036 ; se = 0.038 
##  
## training data correlation = 0.82 
## cv correlation =  0.507 ; se = 0.046 
##  
## training data ROC score = 0.964 
## cv ROC score = 0.789 ; se = 0.025 
##  
## elapsed time -  0.93 minutes

cvtc7.lr0005_1$cv.statistics

## $deviance.mean
## [1] 1.036001
## 
## $deviance.se
## [1] 0.03786563
## 
## $correlation.mean
## [1] 0.5072275
## 
## $correlation.se
## [1] 0.04604393
## 
## $discrimination.mean
## [1] 0.78889
## 
## $discrimination.se
## [1] 0.02501377
## 
## $calibration.mean
## [1] 0.05500414 1.09376000 0.64646320 0.69874566 0.52160042
## 
## $calibration.se
## [1] 0.09349984 0.12339961 0.07951680 0.05195854 0.10265585
## 
## $cv.threshold
## [1] 0.4186719
## 
## $cv.threshold.se
## [1] 0.031679

summary(cvtc7.lr0005_1)

##                                             var    rel.inf
## gcpsr_5                                 gcpsr_5 15.5652641
## cpaq8_tot                             cpaq8_tot  9.1917185
## pain_duration                     pain_duration  7.6887020
## gcpsr_4                                 gcpsr_4  7.2466827
## gcpsr_3                                 gcpsr_3  6.0423804
## gcpsr_6                                 gcpsr_6  5.9061624
## isi_tot                                 isi_tot  5.2183183
## employment                           employment  4.6172084
## overlapping_pain_number overlapping_pain_number  4.5413599
## age                                         age  4.0874978
## pcs_tot                                 pcs_tot  3.0368260
## RST_PQ_BIS                           RST_PQ_BIS  2.9141104
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.7225697
## RST_PQ_FFS                           RST_PQ_FFS  2.6895609
## meq_tot                                 meq_tot  2.5811279
## income                                   income  2.3821865
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.2011815
## bis_brief_tot                     bis_brief_tot  1.9405536
## proanx_t                               proanx_t  1.6974620
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.6193291
## prodep_t                               prodep_t  1.4872161
## audit_total                         audit_total  1.4742663
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.3208316
## current_opioid_meds         current_opioid_meds  0.9274296
## hispanic                               hispanic  0.3485474
## cuditr0                                 cuditr0  0.2265714
## assigned_sex_at_birth     assigned_sex_at_birth  0.2000181
## race                                       race  0.1249175

tc 7 lr .0025

set.seed(1)

cvtc7.lr0025 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2844 
##  
## now adding trees... 
## 100   1.2609 
## 150   1.242 
## 200   1.2274 
## 250   1.2164 
## 300   1.2067 
## 350   1.2013 
## 400   1.1956 
## 450   1.192 
## 500   1.1875 
## 550   1.1851 
## 600   1.1833 
## 650   1.1833 
## 700   1.1821 
## 750   1.1837 
## 800   1.185 
## 850   1.1866 
## 900   1.1875 
## 950   1.1897 
## 1000   1.1925 
## 1050   1.1956 
## 1100   1.1969 
## 1150   1.1986 
## 1200   1.1997

## fitting final gbm model with a fixed number of  700  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.888 
##  
## estimated cv deviance = 1.182 ; se = 0.034 
##  
## training data correlation = 0.734 
## cv correlation =  0.375 ; se = 0.057 
##  
## training data ROC score = 0.932 
## cv ROC score = 0.697 ; se = 0.034 
##  
## elapsed time -  0.17 minutes

cvtc7.lr0025$cv.statistics

## $deviance.mean
## [1] 1.182111
## 
## $deviance.se
## [1] 0.03359732
## 
## $correlation.mean
## [1] 0.3753678
## 
## $correlation.se
## [1] 0.05729905
## 
## $discrimination.mean
## [1] 0.69749
## 
## $discrimination.se
## [1] 0.03415318
## 
## $calibration.mean
## [1] 0.1254085 1.2133900 0.5557290 0.6476719 0.4420732
## 
## $calibration.se
## [1] 0.16485902 0.22320871 0.11243833 0.08064683 0.11382093
## 
## $cv.threshold
## [1] 0.369778
## 
## $cv.threshold.se
## [1] 0.0118075

summary(cvtc7.lr0025) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 20.9863040
## pain_duration                     pain_duration  8.9185981
## isi_tot                                 isi_tot  8.0248895
## overlapping_pain_number overlapping_pain_number  6.8174959
## employment                           employment  6.1180188
## pcs_tot                                 pcs_tot  5.9473523
## RST_PQ_BIS                           RST_PQ_BIS  5.1253684
## age                                         age  5.0862529
## RST_PQ_FFS                           RST_PQ_FFS  3.7519670
## income                                   income  3.2245599
## prodep_t                               prodep_t  3.1401372
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.9267532
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.7243910
## bis_brief_tot                     bis_brief_tot  2.6992574
## meq_tot                                 meq_tot  2.5702763
## current_opioid_meds         current_opioid_meds  2.5358785
## proanx_t                               proanx_t  2.5091523
## audit_total                         audit_total  2.4701486
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.6610988
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.3806745
## cuditr0                                 cuditr0  0.5584860
## hispanic                               hispanic  0.3353975
## assigned_sex_at_birth     assigned_sex_at_birth  0.2444684
## race                                       race  0.2430735

tc 7 lr .0025

set.seed(1)

cvtc7.lr0025_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2639 
##  
## now adding trees... 
## 100   1.2222 
## 150   1.1881 
## 200   1.1606 
## 250   1.1392 
## 300   1.1198 
## 350   1.1034 
## 400   1.0895 
## 450   1.0783 
## 500   1.0682 
## 550   1.0606 
## 600   1.0549 
## 650   1.0493 
## 700   1.0448 
## 750   1.042 
## 800   1.0395 
## 850   1.038 
## 900   1.0374 
## 950   1.0362 
## 1000   1.0368 
## 1050   1.0364 
## 1100   1.0365 
## 1150   1.0371 
## 1200   1.0373 
## 1250   1.0391 
## 1300   1.0398 
## 1350   1.0414 
## 1400   1.0438 
## 1450   1.0454 
## 1500   1.0466 
## 1550   1.0476

## fitting final gbm model with a fixed number of  950  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.685 
##  
## estimated cv deviance = 1.036 ; se = 0.037 
##  
## training data correlation = 0.811 
## cv correlation =  0.507 ; se = 0.046 
##  
## training data ROC score = 0.961 
## cv ROC score = 0.789 ; se = 0.024 
##  
## elapsed time -  0.24 minutes

cvtc7.lr0025_1$cv.statistics

## $deviance.mean
## [1] 1.036184
## 
## $deviance.se
## [1] 0.03675316
## 
## $correlation.mean
## [1] 0.5065879
## 
## $correlation.se
## [1] 0.04569805
## 
## $discrimination.mean
## [1] 0.78866
## 
## $discrimination.se
## [1] 0.02379967
## 
## $calibration.mean
## [1] 0.07789571 1.12883899 0.64448262 0.70036497 0.52279631
## 
## $calibration.se
## [1] 0.09552980 0.12674983 0.07965691 0.05350939 0.10345193
## 
## $cv.threshold
## [1] 0.4173048
## 
## $cv.threshold.se
## [1] 0.02914774

summary(cvtc7.lr0025_1)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 16.50917560
## cpaq8_tot                             cpaq8_tot  9.58021851
## pain_duration                     pain_duration  7.42728602
## gcpsr_4                                 gcpsr_4  7.32144404
## gcpsr_3                                 gcpsr_3  6.21385160
## gcpsr_6                                 gcpsr_6  5.82795595
## isi_tot                                 isi_tot  4.75581212
## employment                           employment  4.60354458
## overlapping_pain_number overlapping_pain_number  4.36383772
## age                                         age  4.30424772
## RST_PQ_BIS                           RST_PQ_BIS  2.98537818
## pcs_tot                                 pcs_tot  2.83322973
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.81841125
## RST_PQ_FFS                           RST_PQ_FFS  2.41095373
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.31513786
## meq_tot                                 meq_tot  2.29848254
## income                                   income  2.04628762
## bis_brief_tot                     bis_brief_tot  2.00071018
## proanx_t                               proanx_t  1.80060628
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.65940413
## prodep_t                               prodep_t  1.53236380
## audit_total                         audit_total  1.40394901
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.13073458
## current_opioid_meds         current_opioid_meds  0.93139849
## hispanic                               hispanic  0.38331893
## assigned_sex_at_birth     assigned_sex_at_birth  0.24401814
## cuditr0                                 cuditr0  0.20001777
## race                                       race  0.09822394

tc 7 lr .005

set.seed(1)

cvtc7.lr005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2605 
##  
## now adding trees... 
## 100   1.2274 
## 150   1.2046 
## 200   1.1945 
## 250   1.1883 
## 300   1.1849 
## 350   1.1853 
## 400   1.1882 
## 450   1.1913 
## 500   1.1943 
## 550   1.1963 
## 600   1.2022 
## 650   1.2073 
## 700   1.2104 
## 750   1.2161 
## 800   1.2214 
## 850   1.2284 
## 900   1.2362 
## 950   1.2408 
## 1000   1.2471

## fitting final gbm model with a fixed number of  300  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.925 
##  
## estimated cv deviance = 1.185 ; se = 0.03 
##  
## training data correlation = 0.715 
## cv correlation =  0.373 ; se = 0.056 
##  
## training data ROC score = 0.922 
## cv ROC score = 0.698 ; se = 0.034 
##  
## elapsed time -  0.14 minutes

cvtc7.lr005$cv.statistics

## $deviance.mean
## [1] 1.184939
## 
## $deviance.se
## [1] 0.02989794
## 
## $correlation.mean
## [1] 0.3726517
## 
## $correlation.se
## [1] 0.05628425
## 
## $discrimination.mean
## [1] 0.69772
## 
## $discrimination.se
## [1] 0.03352887
## 
## $calibration.mean
## [1] 0.1714954 1.3168555 0.5567728 0.6576525 0.4483368
## 
## $calibration.se
## [1] 0.17332535 0.25003073 0.11945301 0.07995939 0.12038951
## 
## $cv.threshold
## [1] 0.3725912
## 
## $cv.threshold.se
## [1] 0.01034632

summary(cvtc7.lr005) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 22.6327373
## pain_duration                     pain_duration  8.6811272
## isi_tot                                 isi_tot  8.5523463
## overlapping_pain_number overlapping_pain_number  6.8348854
## employment                           employment  5.4729970
## pcs_tot                                 pcs_tot  5.3216252
## RST_PQ_BIS                           RST_PQ_BIS  4.8788211
## age                                         age  4.3953753
## RST_PQ_FFS                           RST_PQ_FFS  3.6462341
## current_opioid_meds         current_opioid_meds  3.2293437
## prodep_t                               prodep_t  3.2003219
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.9774021
## income                                   income  2.8951290
## meq_tot                                 meq_tot  2.6264988
## audit_total                         audit_total  2.5985016
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.5092164
## proanx_t                               proanx_t  2.4068154
## bis_brief_tot                     bis_brief_tot  2.2661968
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.8055346
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.7556846
## cuditr0                                 cuditr0  0.6367187
## assigned_sex_at_birth     assigned_sex_at_birth  0.2736726
## hispanic                               hispanic  0.2259074
## race                                       race  0.1769076

tc 7 lr .005

set.seed(1)

cvtc7.lr005_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2218 
##  
## now adding trees... 
## 100   1.1597 
## 150   1.1178 
## 200   1.0917 
## 250   1.0719 
## 300   1.0567 
## 350   1.0466 
## 400   1.0383 
## 450   1.0337 
## 500   1.0307 
## 550   1.0322 
## 600   1.0354 
## 650   1.0378 
## 700   1.042 
## 750   1.0443 
## 800   1.0465 
## 850   1.0516 
## 900   1.0573 
## 950   1.0608 
## 1000   1.0672 
## 1050   1.0724 
## 1100   1.0764

## fitting final gbm model with a fixed number of  500  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.664 
##  
## estimated cv deviance = 1.031 ; se = 0.036 
##  
## training data correlation = 0.82 
## cv correlation =  0.512 ; se = 0.043 
##  
## training data ROC score = 0.965 
## cv ROC score = 0.793 ; se = 0.022 
##  
## elapsed time -  0.17 minutes

cvtc7.lr005_1$cv.statistics

## $deviance.mean
## [1] 1.030715
## 
## $deviance.se
## [1] 0.03563275
## 
## $correlation.mean
## [1] 0.5122601
## 
## $correlation.se
## [1] 0.04341216
## 
## $discrimination.mean
## [1] 0.79256
## 
## $discrimination.se
## [1] 0.02175939
## 
## $calibration.mean
## [1] 0.06567966 1.09990876 0.66420685 0.69198584 0.53284767
## 
## $calibration.se
## [1] 0.08753195 0.11188223 0.07065612 0.05351112 0.09414945
## 
## $cv.threshold
## [1] 0.4116784
## 
## $cv.threshold.se
## [1] 0.03167422

summary(cvtc7.lr005_1)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 15.47421663
## cpaq8_tot                             cpaq8_tot 10.16040072
## pain_duration                     pain_duration  7.55492935
## gcpsr_4                                 gcpsr_4  7.11868623
## gcpsr_3                                 gcpsr_3  5.73492216
## gcpsr_6                                 gcpsr_6  5.51540553
## isi_tot                                 isi_tot  5.40727021
## employment                           employment  4.85587595
## overlapping_pain_number overlapping_pain_number  4.62348822
## age                                         age  4.31039314
## RST_PQ_BIS                           RST_PQ_BIS  3.16444977
## pcs_tot                                 pcs_tot  2.78907426
## income                                   income  2.70007566
## RST_PQ_FFS                           RST_PQ_FFS  2.68164900
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.51944958
## meq_tot                                 meq_tot  2.26801079
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.99436870
## bis_brief_tot                     bis_brief_tot  1.93001691
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.82910995
## prodep_t                               prodep_t  1.67621430
## audit_total                         audit_total  1.43696363
## proanx_t                               proanx_t  1.39317793
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.26964851
## current_opioid_meds         current_opioid_meds  0.79152816
## cuditr0                                 cuditr0  0.32932418
## hispanic                               hispanic  0.22335467
## assigned_sex_at_birth     assigned_sex_at_birth  0.20652491
## race                                       race  0.04147096

tc 7 lr .00005

set.seed(1)

cvtc7.lr00005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .00005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3149 
##  
## now adding trees... 
## 100   1.3142 
## 150   1.3135 
## 200   1.3128 
## 250   1.3121 
## 300   1.3115 
## 350   1.3108 
## 400   1.3101 
## 450   1.3094 
## 500   1.3087 
## 550   1.308 
## 600   1.3074 
## 650   1.3067 
## 700   1.306 
## 750   1.3054 
## 800   1.3047 
## 850   1.3041 
## 900   1.3035 
## 950   1.3028 
## 1000   1.3022 
## 1050   1.3016 
## 1100   1.3009 
## 1150   1.3003 
## 1200   1.2997 
## 1250   1.299 
## 1300   1.2984 
## 1350   1.2978 
## 1400   1.2972 
## 1450   1.2966 
## 1500   1.296 
## 1550   1.2954 
## 1600   1.2948 
## 1650   1.2942 
## 1700   1.2936 
## 1750   1.293 
## 1800   1.2924 
## 1850   1.2918 
## 1900   1.2912 
## 1950   1.2906 
## 2000   1.29 
## 2050   1.2894 
## 2100   1.2888 
## 2150   1.2883 
## 2200   1.2877 
## 2250   1.2872 
## 2300   1.2866 
## 2350   1.286 
## 2400   1.2855 
## 2450   1.2849 
## 2500   1.2844 
## 2550   1.2838 
## 2600   1.2833 
## 2650   1.2827 
## 2700   1.2822 
## 2750   1.2817 
## 2800   1.2811 
## 2850   1.2806 
## 2900   1.2801 
## 2950   1.2796 
## 3000   1.279 
## 3050   1.2785 
## 3100   1.278 
## 3150   1.2775 
## 3200   1.277 
## 3250   1.2765 
## 3300   1.276 
## 3350   1.2755 
## 3400   1.275 
## 3450   1.2745 
## 3500   1.274 
## 3550   1.2735 
## 3600   1.2731 
## 3650   1.2726 
## 3700   1.2721 
## 3750   1.2716 
## 3800   1.2712 
## 3850   1.2707 
## 3900   1.2702 
## 3950   1.2697 
## 4000   1.2693 
## 4050   1.2688 
## 4100   1.2683 
## 4150   1.2679 
## 4200   1.2674 
## 4250   1.2669 
## 4300   1.2665 
## 4350   1.266 
## 4400   1.2656 
## 4450   1.2651 
## 4500   1.2647 
## 4550   1.2643 
## 4600   1.2638 
## 4650   1.2634 
## 4700   1.2629 
## 4750   1.2625 
## 4800   1.262 
## 4850   1.2616 
## 4900   1.2612 
## 4950   1.2608 
## 5000   1.2603 
## 5050   1.2599 
## 5100   1.2595 
## 5150   1.2591 
## 5200   1.2587 
## 5250   1.2583 
## 5300   1.2579 
## 5350   1.2575 
## 5400   1.2571 
## 5450   1.2566 
## 5500   1.2563 
## 5550   1.2559 
## 5600   1.2555 
## 5650   1.2551 
## 5700   1.2547 
## 5750   1.2543 
## 5800   1.2539 
## 5850   1.2535 
## 5900   1.2531 
## 5950   1.2527 
## 6000   1.2523 
## 6050   1.2519 
## 6100   1.2516 
## 6150   1.2512 
## 6200   1.2508 
## 6250   1.2505 
## 6300   1.2501 
## 6350   1.2497 
## 6400   1.2493 
## 6450   1.249 
## 6500   1.2486 
## 6550   1.2482 
## 6600   1.2479 
## 6650   1.2475 
## 6700   1.2472 
## 6750   1.2468 
## 6800   1.2465 
## 6850   1.2461 
## 6900   1.2458 
## 6950   1.2454 
## 7000   1.2451 
## 7050   1.2447 
## 7100   1.2444 
## 7150   1.244 
## 7200   1.2437 
## 7250   1.2433 
## 7300   1.243 
## 7350   1.2426 
## 7400   1.2423 
## 7450   1.242 
## 7500   1.2416 
## 7550   1.2413 
## 7600   1.241 
## 7650   1.2407 
## 7700   1.2403 
## 7750   1.24 
## 7800   1.2397 
## 7850   1.2393 
## 7900   1.239 
## 7950   1.2387 
## 8000   1.2384 
## 8050   1.238 
## 8100   1.2377 
## 8150   1.2374 
## 8200   1.2371 
## 8250   1.2368 
## 8300   1.2365 
## 8350   1.2362 
## 8400   1.2359 
## 8450   1.2356 
## 8500   1.2353 
## 8550   1.235 
## 8600   1.2347 
## 8650   1.2344 
## 8700   1.2342 
## 8750   1.2339 
## 8800   1.2336 
## 8850   1.2333 
## 8900   1.233 
## 8950   1.2328 
## 9000   1.2325 
## 9050   1.2322 
## 9100   1.2319 
## 9150   1.2316 
## 9200   1.2314 
## 9250   1.2311 
## 9300   1.2308 
## 9350   1.2306 
## 9400   1.2303 
## 9450   1.23 
## 9500   1.2297 
## 9550   1.2294 
## 9600   1.2292 
## 9650   1.2289 
## 9700   1.2287 
## 9750   1.2284 
## 9800   1.2281 
## 9850   1.2279 
## 9900   1.2276 
## 9950   1.2274 
## 10000   1.2271 
## 10050   1.2268 
## 10100   1.2266 
## 10150   1.2263 
## 10200   1.2261 
## 10250   1.2258 
## 10300   1.2256 
## 10350   1.2253 
## 10400   1.2251 
## 10450   1.2248 
## 10500   1.2246 
## 10550   1.2243 
## 10600   1.2241 
## 10650   1.2239 
## 10700   1.2236 
## 10750   1.2234 
## 10800   1.2231 
## 10850   1.2229 
## 10900   1.2227 
## 10950   1.2225 
## 11000   1.2222 
## 11050   1.222 
## 11100   1.2217 
## 11150   1.2215 
## 11200   1.2213 
## 11250   1.2211 
## 11300   1.2208 
## 11350   1.2206 
## 11400   1.2204 
## 11450   1.2201 
## 11500   1.2199 
## 11550   1.2197 
## 11600   1.2195 
## 11650   1.2193 
## 11700   1.2191 
## 11750   1.2188 
## 11800   1.2186 
## 11850   1.2184 
## 11900   1.2182 
## 11950   1.218 
## 12000   1.2178 
## 12050   1.2176 
## 12100   1.2173 
## 12150   1.2172 
## 12200   1.217 
## 12250   1.2168 
## 12300   1.2165 
## 12350   1.2163 
## 12400   1.2162 
## 12450   1.2159 
## 12500   1.2157 
## 12550   1.2155 
## 12600   1.2154 
## 12650   1.2152 
## 12700   1.215 
## 12750   1.2148 
## 12800   1.2146 
## 12850   1.2144 
## 12900   1.2142 
## 12950   1.214 
## 13000   1.2138 
## 13050   1.2136 
## 13100   1.2134 
## 13150   1.2132 
## 13200   1.213 
## 13250   1.2128 
## 13300   1.2126 
## 13350   1.2124 
## 13400   1.2123 
## 13450   1.2121 
## 13500   1.2119 
## 13550   1.2117 
## 13600   1.2115 
## 13650   1.2114 
## 13700   1.2112 
## 13750   1.211 
## 13800   1.2108 
## 13850   1.2107 
## 13900   1.2105 
## 13950   1.2103 
## 14000   1.2101 
## 14050   1.21 
## 14100   1.2098 
## 14150   1.2097 
## 14200   1.2095 
## 14250   1.2093 
## 14300   1.2092 
## 14350   1.209 
## 14400   1.2088 
## 14450   1.2086 
## 14500   1.2085 
## 14550   1.2083 
## 14600   1.2081 
## 14650   1.208 
## 14700   1.2078 
## 14750   1.2077 
## 14800   1.2075 
## 14850   1.2074 
## 14900   1.2072 
## 14950   1.2071 
## 15000   1.2069 
## 15050   1.2068 
## 15100   1.2066 
## 15150   1.2065 
## 15200   1.2063 
## 15250   1.2061 
## 15300   1.206 
## 15350   1.2059 
## 15400   1.2057 
## 15450   1.2056 
## 15500   1.2054 
## 15550   1.2053 
## 15600   1.2051 
## 15650   1.205 
## 15700   1.2048 
## 15750   1.2047 
## 15800   1.2046 
## 15850   1.2045 
## 15900   1.2043 
## 15950   1.2042 
## 16000   1.204 
## 16050   1.2039 
## 16100   1.2037 
## 16150   1.2036 
## 16200   1.2034 
## 16250   1.2032 
## 16300   1.2031 
## 16350   1.203 
## 16400   1.2029 
## 16450   1.2027 
## 16500   1.2026 
## 16550   1.2024 
## 16600   1.2023 
## 16650   1.2021 
## 16700   1.202 
## 16750   1.2019 
## 16800   1.2018 
## 16850   1.2017 
## 16900   1.2015 
## 16950   1.2014

## fitting final gbm model with a fixed number of  16950  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 1.045 
##  
## estimated cv deviance = 1.201 ; se = 0.021 
##  
## training data correlation = 0.672 
## cv correlation =  0.377 ; se = 0.056 
##  
## training data ROC score = 0.899 
## cv ROC score = 0.698 ; se = 0.034 
##  
## elapsed time -  3.47 minutes

cvtc7.lr00005$cv.statistics

## $deviance.mean
## [1] 1.201419
## 
## $deviance.se
## [1] 0.02096563
## 
## $correlation.mean
## [1] 0.3773108
## 
## $correlation.se
## [1] 0.05565321
## 
## $discrimination.mean
## [1] 0.69784
## 
## $discrimination.se
## [1] 0.03382687
## 
## $calibration.mean
## [1] 0.5340504 2.0003737 0.4304614 0.6898166 0.2941006
## 
## $calibration.se
## [1] 0.23016848 0.39381204 0.10842773 0.08264641 0.08691873
## 
## $cv.threshold
## [1] 0.3691244
## 
## $cv.threshold.se
## [1] 0.007563429

summary(cvtc7.lr00005) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 27.7416003
## isi_tot                                 isi_tot  8.5402283
## pain_duration                     pain_duration  8.1276618
## overlapping_pain_number overlapping_pain_number  6.9382441
## employment                           employment  5.6718507
## pcs_tot                                 pcs_tot  5.2409966
## age                                         age  4.9018880
## RST_PQ_BIS                           RST_PQ_BIS  4.4214933
## current_opioid_meds         current_opioid_meds  3.5252575
## income                                   income  2.8897918
## RST_PQ_FFS                           RST_PQ_FFS  2.8479092
## proanx_t                               proanx_t  2.6194077
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.5205005
## prodep_t                               prodep_t  2.4735597
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.1309925
## meq_tot                                 meq_tot  1.9843152
## audit_total                         audit_total  1.9317429
## bis_brief_tot                     bis_brief_tot  1.8405628
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.3319265
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.2605309
## cuditr0                                 cuditr0  0.6019877
## race                                       race  0.1711519
## hispanic                               hispanic  0.1662346
## assigned_sex_at_birth     assigned_sex_at_birth  0.1201654

tc 7 lr .00005

set.seed(1)

cvtc7.lr00005_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .00005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3145 
##  
## now adding trees... 
## 100   1.3133 
## 150   1.3122 
## 200   1.3111 
## 250   1.31 
## 300   1.3089 
## 350   1.3078 
## 400   1.3067 
## 450   1.3056 
## 500   1.3045 
## 550   1.3034 
## 600   1.3024 
## 650   1.3013 
## 700   1.3002 
## 750   1.2992 
## 800   1.2981 
## 850   1.297 
## 900   1.296 
## 950   1.2949 
## 1000   1.2939 
## 1050   1.2928 
## 1100   1.2918 
## 1150   1.2908 
## 1200   1.2897 
## 1250   1.2887 
## 1300   1.2877 
## 1350   1.2867 
## 1400   1.2857 
## 1450   1.2847 
## 1500   1.2837 
## 1550   1.2827 
## 1600   1.2817 
## 1650   1.2807 
## 1700   1.2797 
## 1750   1.2788 
## 1800   1.2778 
## 1850   1.2768 
## 1900   1.2759 
## 1950   1.2749 
## 2000   1.274 
## 2050   1.273 
## 2100   1.272 
## 2150   1.2711 
## 2200   1.2701 
## 2250   1.2692 
## 2300   1.2683 
## 2350   1.2674 
## 2400   1.2664 
## 2450   1.2655 
## 2500   1.2646 
## 2550   1.2637 
## 2600   1.2627 
## 2650   1.2618 
## 2700   1.2609 
## 2750   1.26 
## 2800   1.2591 
## 2850   1.2582 
## 2900   1.2573 
## 2950   1.2565 
## 3000   1.2556 
## 3050   1.2547 
## 3100   1.2538 
## 3150   1.2529 
## 3200   1.252 
## 3250   1.2511 
## 3300   1.2503 
## 3350   1.2494 
## 3400   1.2485 
## 3450   1.2477 
## 3500   1.2468 
## 3550   1.246 
## 3600   1.2452 
## 3650   1.2444 
## 3700   1.2435 
## 3750   1.2427 
## 3800   1.2418 
## 3850   1.241 
## 3900   1.2402 
## 3950   1.2394 
## 4000   1.2386 
## 4050   1.2377 
## 4100   1.237 
## 4150   1.2361 
## 4200   1.2353 
## 4250   1.2345 
## 4300   1.2337 
## 4350   1.2329 
## 4400   1.2321 
## 4450   1.2314 
## 4500   1.2306 
## 4550   1.2298 
## 4600   1.2291 
## 4650   1.2283 
## 4700   1.2275 
## 4750   1.2267 
## 4800   1.226 
## 4850   1.2252 
## 4900   1.2244 
## 4950   1.2237 
## 5000   1.2229 
## 5050   1.2222 
## 5100   1.2214 
## 5150   1.2207 
## 5200   1.22 
## 5250   1.2193 
## 5300   1.2185 
## 5350   1.2179 
## 5400   1.2171 
## 5450   1.2164 
## 5500   1.2157 
## 5550   1.215 
## 5600   1.2143 
## 5650   1.2136 
## 5700   1.2129 
## 5750   1.2122 
## 5800   1.2115 
## 5850   1.2108 
## 5900   1.2101 
## 5950   1.2095 
## 6000   1.2088 
## 6050   1.2081 
## 6100   1.2074 
## 6150   1.2067 
## 6200   1.2061 
## 6250   1.2054 
## 6300   1.2047 
## 6350   1.204 
## 6400   1.2034 
## 6450   1.2027 
## 6500   1.2021 
## 6550   1.2014 
## 6600   1.2007 
## 6650   1.2001 
## 6700   1.1994 
## 6750   1.1988 
## 6800   1.1981 
## 6850   1.1975 
## 6900   1.1968 
## 6950   1.1962 
## 7000   1.1956 
## 7050   1.1949 
## 7100   1.1942 
## 7150   1.1936 
## 7200   1.1929 
## 7250   1.1923 
## 7300   1.1917 
## 7350   1.1911 
## 7400   1.1904 
## 7450   1.1898 
## 7500   1.1892 
## 7550   1.1886 
## 7600   1.188 
## 7650   1.1874 
## 7700   1.1868 
## 7750   1.1862 
## 7800   1.1856 
## 7850   1.185 
## 7900   1.1844 
## 7950   1.1838 
## 8000   1.1832 
## 8050   1.1826 
## 8100   1.182 
## 8150   1.1814 
## 8200   1.1808 
## 8250   1.1803 
## 8300   1.1797 
## 8350   1.1791 
## 8400   1.1786 
## 8450   1.178 
## 8500   1.1774 
## 8550   1.1768 
## 8600   1.1763 
## 8650   1.1757 
## 8700   1.1751 
## 8750   1.1746 
## 8800   1.174 
## 8850   1.1734 
## 8900   1.1729 
## 8950   1.1723 
## 9000   1.1718 
## 9050   1.1712 
## 9100   1.1707 
## 9150   1.1701 
## 9200   1.1696 
## 9250   1.1691 
## 9300   1.1685 
## 9350   1.168 
## 9400   1.1675 
## 9450   1.167 
## 9500   1.1665 
## 9550   1.1659 
## 9600   1.1654 
## 9650   1.1649 
## 9700   1.1643 
## 9750   1.1638 
## 9800   1.1633 
## 9850   1.1628 
## 9900   1.1623 
## 9950   1.1617 
## 10000   1.1612 
## 10050   1.1607 
## 10100   1.1602 
## 10150   1.1597 
## 10200   1.1592 
## 10250   1.1587 
## 10300   1.1582 
## 10350   1.1577 
## 10400   1.1572 
## 10450   1.1567 
## 10500   1.1562 
## 10550   1.1557 
## 10600   1.1553 
## 10650   1.1548 
## 10700   1.1543 
## 10750   1.1538 
## 10800   1.1533 
## 10850   1.1528 
## 10900   1.1523 
## 10950   1.1519 
## 11000   1.1514 
## 11050   1.1509 
## 11100   1.1504 
## 11150   1.15 
## 11200   1.1495 
## 11250   1.1491 
## 11300   1.1486 
## 11350   1.1482 
## 11400   1.1477 
## 11450   1.1472 
## 11500   1.1468 
## 11550   1.1463 
## 11600   1.1459 
## 11650   1.1454 
## 11700   1.145 
## 11750   1.1445 
## 11800   1.144 
## 11850   1.1436 
## 11900   1.1431 
## 11950   1.1427 
## 12000   1.1422 
## 12050   1.1418 
## 12100   1.1414 
## 12150   1.1409 
## 12200   1.1405 
## 12250   1.1401 
## 12300   1.1397 
## 12350   1.1393 
## 12400   1.1388 
## 12450   1.1384 
## 12500   1.138 
## 12550   1.1376 
## 12600   1.1371 
## 12650   1.1367 
## 12700   1.1362 
## 12750   1.1358 
## 12800   1.1354 
## 12850   1.135 
## 12900   1.1346 
## 12950   1.1342 
## 13000   1.1337 
## 13050   1.1333 
## 13100   1.1329 
## 13150   1.1325 
## 13200   1.1321 
## 13250   1.1317 
## 13300   1.1313 
## 13350   1.1309 
## 13400   1.1305 
## 13450   1.1301 
## 13500   1.1297 
## 13550   1.1293 
## 13600   1.1289 
## 13650   1.1285 
## 13700   1.1281 
## 13750   1.1277 
## 13800   1.1273 
## 13850   1.1269 
## 13900   1.1265 
## 13950   1.1262 
## 14000   1.1257 
## 14050   1.1254 
## 14100   1.125 
## 14150   1.1246 
## 14200   1.1243 
## 14250   1.1239 
## 14300   1.1235 
## 14350   1.1231 
## 14400   1.1227 
## 14450   1.1224 
## 14500   1.122 
## 14550   1.1216 
## 14600   1.1213 
## 14650   1.1209 
## 14700   1.1205 
## 14750   1.1201 
## 14800   1.1198 
## 14850   1.1194 
## 14900   1.1191 
## 14950   1.1187 
## 15000   1.1184 
## 15050   1.118 
## 15100   1.1177 
## 15150   1.1174 
## 15200   1.1171 
## 15250   1.1167 
## 15300   1.1164 
## 15350   1.116 
## 15400   1.1157 
## 15450   1.1154 
## 15500   1.115 
## 15550   1.1146 
## 15600   1.1143 
## 15650   1.114 
## 15700   1.1137 
## 15750   1.1133 
## 15800   1.113 
## 15850   1.1126 
## 15900   1.1123 
## 15950   1.112 
## 16000   1.1116 
## 16050   1.1113 
## 16100   1.1109 
## 16150   1.1106 
## 16200   1.1103 
## 16250   1.1099 
## 16300   1.1096 
## 16350   1.1093 
## 16400   1.109 
## 16450   1.1087 
## 16500   1.1083 
## 16550   1.108 
## 16600   1.1077 
## 16650   1.1074 
## 16700   1.1071 
## 16750   1.1067 
## 16800   1.1064 
## 16850   1.1061 
## 16900   1.1058 
## 16950   1.1055 
## 17000   1.1052 
## 17050   1.1049 
## 17100   1.1046 
## 17150   1.1043 
## 17200   1.104 
## 17250   1.1037 
## 17300   1.1034 
## 17350   1.1031 
## 17400   1.1028 
## 17450   1.1025 
## 17500   1.1022 
## 17550   1.1019 
## 17600   1.1016 
## 17650   1.1013 
## 17700   1.101 
## 17750   1.1008 
## 17800   1.1005 
## 17850   1.1002 
## 17900   1.0999 
## 17950   1.0996 
## 18000   1.0993 
## 18050   1.099 
## 18100   1.0987 
## 18150   1.0984 
## 18200   1.0981 
## 18250   1.0978 
## 18300   1.0976 
## 18350   1.0973 
## 18400   1.097 
## 18450   1.0967 
## 18500   1.0965 
## 18550   1.0962 
## 18600   1.0959 
## 18650   1.0957 
## 18700   1.0954 
## 18750   1.0951 
## 18800   1.0948 
## 18850   1.0946 
## 18900   1.0943 
## 18950   1.094 
## 19000   1.0938 
## 19050   1.0935 
## 19100   1.0932 
## 19150   1.093 
## 19200   1.0927 
## 19250   1.0925 
## 19300   1.0922 
## 19350   1.0919 
## 19400   1.0917 
## 19450   1.0914 
## 19500   1.0912 
## 19550   1.0909 
## 19600   1.0906 
## 19650   1.0904 
## 19700   1.0901 
## 19750   1.0899 
## 19800   1.0896 
## 19850   1.0894 
## 19900   1.0891 
## 19950   1.0889 
## 20000   1.0886 
## 20050   1.0884 
## 20100   1.0882 
## 20150   1.0879 
## 20200   1.0877 
## 20250   1.0874 
## 20300   1.0872 
## 20350   1.0869 
## 20400   1.0867 
## 20450   1.0864 
## 20500   1.0862 
## 20550   1.086 
## 20600   1.0857 
## 20650   1.0854 
## 20700   1.0852 
## 20750   1.085 
## 20800   1.0848 
## 20850   1.0845 
## 20900   1.0843 
## 20950   1.0841 
## 21000   1.0839 
## 21050   1.0836 
## 21100   1.0834 
## 21150   1.0832 
## 21200   1.0829 
## 21250   1.0827 
## 21300   1.0825 
## 21350   1.0822 
## 21400   1.082 
## 21450   1.0818 
## 21500   1.0816 
## 21550   1.0813 
## 21600   1.0811 
## 21650   1.0809 
## 21700   1.0807 
## 21750   1.0805 
## 21800   1.0802 
## 21850   1.08 
## 21900   1.0798 
## 21950   1.0796 
## 22000   1.0794 
## 22050   1.0792 
## 22100   1.0789 
## 22150   1.0787 
## 22200   1.0785 
## 22250   1.0783 
## 22300   1.0781 
## 22350   1.0779 
## 22400   1.0777 
## 22450   1.0775 
## 22500   1.0772 
## 22550   1.0771 
## 22600   1.0769 
## 22650   1.0767 
## 22700   1.0765 
## 22750   1.0763 
## 22800   1.0761 
## 22850   1.0758 
## 22900   1.0756 
## 22950   1.0754 
## 23000   1.0752 
## 23050   1.075 
## 23100   1.0748 
## 23150   1.0746 
## 23200   1.0744 
## 23250   1.0742 
## 23300   1.074 
## 23350   1.0738 
## 23400   1.0736 
## 23450   1.0734 
## 23500   1.0732 
## 23550   1.073 
## 23600   1.0728 
## 23650   1.0726 
## 23700   1.0724 
## 23750   1.0723 
## 23800   1.0721 
## 23850   1.0719 
## 23900   1.0717 
## 23950   1.0715 
## 24000   1.0713 
## 24050   1.0712 
## 24100   1.071 
## 24150   1.0708 
## 24200   1.0706 
## 24250   1.0704 
## 24300   1.0702 
## 24350   1.0701 
## 24400   1.0699 
## 24450   1.0697 
## 24500   1.0695 
## 24550   1.0693 
## 24600   1.0691 
## 24650   1.069 
## 24700   1.0688 
## 24750   1.0686 
## 24800   1.0684 
## 24850   1.0682 
## 24900   1.0681 
## 24950   1.0679 
## 25000   1.0677 
## 25050   1.0675 
## 25100   1.0673 
## 25150   1.0671 
## 25200   1.067 
## 25250   1.0668 
## 25300   1.0666 
## 25350   1.0665 
## 25400   1.0663 
## 25450   1.0661 
## 25500   1.066 
## 25550   1.0658 
## 25600   1.0656 
## 25650   1.0655 
## 25700   1.0653 
## 25750   1.0651 
## 25800   1.065 
## 25850   1.0648 
## 25900   1.0647 
## 25950   1.0645 
## 26000   1.0643 
## 26050   1.0642 
## 26100   1.064 
## 26150   1.0638 
## 26200   1.0637 
## 26250   1.0635 
## 26300   1.0633 
## 26350   1.0632 
## 26400   1.063 
## 26450   1.0628 
## 26500   1.0627 
## 26550   1.0625 
## 26600   1.0624 
## 26650   1.0622 
## 26700   1.0621 
## 26750   1.0619 
## 26800   1.0618 
## 26850   1.0616 
## 26900   1.0615 
## 26950   1.0614 
## 27000   1.0612 
## 27050   1.0611 
## 27100   1.0609 
## 27150   1.0607 
## 27200   1.0606 
## 27250   1.0605 
## 27300   1.0603 
## 27350   1.0602 
## 27400   1.06 
## 27450   1.0599 
## 27500   1.0597 
## 27550   1.0596 
## 27600   1.0594 
## 27650   1.0593 
## 27700   1.0591 
## 27750   1.059 
## 27800   1.0589 
## 27850   1.0587 
## 27900   1.0586 
## 27950   1.0584 
## 28000   1.0583 
## 28050   1.0582 
## 28100   1.058 
## 28150   1.0579 
## 28200   1.0578 
## 28250   1.0576 
## 28300   1.0575 
## 28350   1.0574 
## 28400   1.0572

## fitting final gbm model with a fixed number of  28400  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.829 
##  
## estimated cv deviance = 1.057 ; se = 0.027 
##  
## training data correlation = 0.771 
## cv correlation =  0.508 ; se = 0.042 
##  
## training data ROC score = 0.943 
## cv ROC score = 0.795 ; se = 0.022 
##  
## elapsed time -  7.69 minutes

cvtc7.lr00005_1$cv.statistics

## $deviance.mean
## [1] 1.057229
## 
## $deviance.se
## [1] 0.0274483
## 
## $correlation.mean
## [1] 0.5080948
## 
## $correlation.se
## [1] 0.04167875
## 
## $discrimination.mean
## [1] 0.79547
## 
## $discrimination.se
## [1] 0.02216561
## 
## $calibration.mean
## [1] 0.2707792 1.5324922 0.5407383 0.7416255 0.4107525
## 
## $calibration.se
## [1] 0.09892171 0.15460285 0.10601916 0.04659420 0.10982681
## 
## $cv.threshold
## [1] 0.4137747
## 
## $cv.threshold.se
## [1] 0.02114323

summary(cvtc7.lr00005_1)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 18.75195628
## cpaq8_tot                             cpaq8_tot 10.34557229
## gcpsr_4                                 gcpsr_4  7.41270409
## gcpsr_6                                 gcpsr_6  7.16269684
## pain_duration                     pain_duration  7.00097570
## gcpsr_3                                 gcpsr_3  6.77281955
## isi_tot                                 isi_tot  4.83797970
## employment                           employment  4.37569598
## overlapping_pain_number overlapping_pain_number  4.27665676
## age                                         age  4.12391466
## pcs_tot                                 pcs_tot  2.59872107
## RST_PQ_BIS                           RST_PQ_BIS  2.51528160
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.30550976
## income                                   income  2.13572101
## RST_PQ_FFS                           RST_PQ_FFS  2.09932968
## meq_tot                                 meq_tot  1.87363928
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.82571444
## bis_brief_tot                     bis_brief_tot  1.73148182
## proanx_t                               proanx_t  1.45085500
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.30238228
## prodep_t                               prodep_t  1.29512425
## audit_total                         audit_total  1.13528580
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.01187149
## current_opioid_meds         current_opioid_meds  0.97431901
## hispanic                               hispanic  0.29802058
## cuditr0                                 cuditr0  0.18901641
## assigned_sex_at_birth     assigned_sex_at_birth  0.14322918
## race                                       race  0.05352549

tc 5 lr .0005

set.seed(1)

cvtc5.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3088 
##  
## now adding trees... 
## 100   1.3024 
## 150   1.2962 
## 200   1.2905 
## 250   1.2851 
## 300   1.28 
## 350   1.2749 
## 400   1.27 
## 450   1.2657 
## 500   1.2613 
## 550   1.257 
## 600   1.2532 
## 650   1.2493 
## 700   1.2459 
## 750   1.2426 
## 800   1.2394 
## 850   1.2363 
## 900   1.2333 
## 950   1.2305 
## 1000   1.2282 
## 1050   1.2258 
## 1100   1.2234 
## 1150   1.2211 
## 1200   1.2188 
## 1250   1.2168 
## 1300   1.215 
## 1350   1.213 
## 1400   1.2112 
## 1450   1.2095 
## 1500   1.2079 
## 1550   1.206 
## 1600   1.2044 
## 1650   1.203 
## 1700   1.2012 
## 1750   1.1999 
## 1800   1.1986 
## 1850   1.1975 
## 1900   1.1963 
## 1950   1.1952 
## 2000   1.1942 
## 2050   1.1932 
## 2100   1.1924 
## 2150   1.1913 
## 2200   1.1905 
## 2250   1.1895 
## 2300   1.1887 
## 2350   1.1881 
## 2400   1.1873 
## 2450   1.1867 
## 2500   1.1861 
## 2550   1.1854 
## 2600   1.1851 
## 2650   1.1845 
## 2700   1.1839 
## 2750   1.1832 
## 2800   1.183 
## 2850   1.1827 
## 2900   1.1823 
## 2950   1.182 
## 3000   1.1818 
## 3050   1.1814 
## 3100   1.181 
## 3150   1.1806 
## 3200   1.1806 
## 3250   1.1803 
## 3300   1.18 
## 3350   1.1798 
## 3400   1.1792 
## 3450   1.1791 
## 3500   1.1791 
## 3550   1.1792 
## 3600   1.1795 
## 3650   1.1795 
## 3700   1.1794 
## 3750   1.1793 
## 3800   1.1795 
## 3850   1.1793 
## 3900   1.1792

## fitting final gbm model with a fixed number of  3500  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.953 
##  
## estimated cv deviance = 1.179 ; se = 0.031 
##  
## training data correlation = 0.674 
## cv correlation =  0.382 ; se = 0.058 
##  
## training data ROC score = 0.896 
## cv ROC score = 0.704 ; se = 0.035 
##  
## elapsed time -  0.47 minutes

cvtc5.lr0005$cv.statistics

## $deviance.mean
## [1] 1.179074
## 
## $deviance.se
## [1] 0.03127977
## 
## $correlation.mean
## [1] 0.3816371
## 
## $correlation.se
## [1] 0.05754611
## 
## $discrimination.mean
## [1] 0.70379
## 
## $discrimination.se
## [1] 0.03517637
## 
## $calibration.mean
## [1] 0.1905766 1.3362313 0.5365693 0.6476231 0.4240671
## 
## $calibration.se
## [1] 0.17760136 0.25866132 0.11913839 0.07840703 0.11572572
## 
## $cv.threshold
## [1] 0.3686573
## 
## $cv.threshold.se
## [1] 0.01040199

summary(cvtc5.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 24.9282058
## pain_duration                     pain_duration  8.4884904
## isi_tot                                 isi_tot  8.4414647
## overlapping_pain_number overlapping_pain_number  7.5405979
## employment                           employment  6.0404447
## pcs_tot                                 pcs_tot  5.8540994
## age                                         age  5.2486400
## RST_PQ_BIS                           RST_PQ_BIS  4.6045031
## current_opioid_meds         current_opioid_meds  3.2408720
## RST_PQ_FFS                           RST_PQ_FFS  3.1987448
## prodep_t                               prodep_t  2.6494253
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.6097163
## proanx_t                               proanx_t  2.5981054
## income                                   income  2.5050669
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.4256882
## meq_tot                                 meq_tot  2.0308386
## audit_total                         audit_total  2.0113400
## bis_brief_tot                     bis_brief_tot  1.6667861
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.3529421
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.3474388
## cuditr0                                 cuditr0  0.5055121
## race                                       race  0.2924086
## hispanic                               hispanic  0.2912166
## assigned_sex_at_birth     assigned_sex_at_birth  0.1274524

tc 5 lr .0005

set.seed(1)

cvtc5.lr0005_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3048 
##  
## now adding trees... 
## 100   1.2945 
## 150   1.2845 
## 200   1.275 
## 250   1.2658 
## 300   1.2572 
## 350   1.2487 
## 400   1.2407 
## 450   1.233 
## 500   1.2258 
## 550   1.2189 
## 600   1.2121 
## 650   1.2056 
## 700   1.199 
## 750   1.1929 
## 800   1.1871 
## 850   1.1814 
## 900   1.1761 
## 950   1.1709 
## 1000   1.166 
## 1050   1.1609 
## 1100   1.1561 
## 1150   1.1515 
## 1200   1.1468 
## 1250   1.1427 
## 1300   1.1386 
## 1350   1.1346 
## 1400   1.1309 
## 1450   1.127 
## 1500   1.1235 
## 1550   1.1201 
## 1600   1.1167 
## 1650   1.1134 
## 1700   1.1101 
## 1750   1.1068 
## 1800   1.1038 
## 1850   1.1007 
## 1900   1.0982 
## 1950   1.0955 
## 2000   1.0927 
## 2050   1.0901 
## 2100   1.0877 
## 2150   1.0853 
## 2200   1.083 
## 2250   1.0809 
## 2300   1.079 
## 2350   1.0769 
## 2400   1.0747 
## 2450   1.073 
## 2500   1.071 
## 2550   1.0693 
## 2600   1.0676 
## 2650   1.0658 
## 2700   1.0641 
## 2750   1.0625 
## 2800   1.061 
## 2850   1.0598 
## 2900   1.0583 
## 2950   1.0569 
## 3000   1.0556 
## 3050   1.0542 
## 3100   1.053 
## 3150   1.0519 
## 3200   1.0508 
## 3250   1.0495 
## 3300   1.0482 
## 3350   1.0471 
## 3400   1.046 
## 3450   1.0451 
## 3500   1.0443 
## 3550   1.0434 
## 3600   1.0427 
## 3650   1.0419 
## 3700   1.0412 
## 3750   1.0406 
## 3800   1.0399 
## 3850   1.0391 
## 3900   1.0383 
## 3950   1.0377 
## 4000   1.0369 
## 4050   1.0365 
## 4100   1.0358 
## 4150   1.0353 
## 4200   1.0348 
## 4250   1.0343 
## 4300   1.0339 
## 4350   1.0337 
## 4400   1.0331 
## 4450   1.0328 
## 4500   1.0324 
## 4550   1.0321 
## 4600   1.0319 
## 4650   1.0315 
## 4700   1.0311 
## 4750   1.0308 
## 4800   1.0304 
## 4850   1.0301 
## 4900   1.0299 
## 4950   1.0294 
## 5000   1.0291 
## 5050   1.0289 
## 5100   1.0288 
## 5150   1.0288 
## 5200   1.0289 
## 5250   1.029 
## 5300   1.0289 
## 5350   1.0288 
## 5400   1.0286 
## 5450   1.0286 
## 5500   1.0287 
## 5550   1.0286 
## 5600   1.0286

## fitting final gbm model with a fixed number of  5450  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.72 
##  
## estimated cv deviance = 1.029 ; se = 0.037 
##  
## training data correlation = 0.781 
## cv correlation =  0.513 ; se = 0.046 
##  
## training data ROC score = 0.947 
## cv ROC score = 0.796 ; se = 0.024 
##  
## elapsed time -  0.78 minutes

cvtc5.lr0005_1$cv.statistics

## $deviance.mean
## [1] 1.02859
## 
## $deviance.se
## [1] 0.03747852
## 
## $correlation.mean
## [1] 0.5125266
## 
## $correlation.se
## [1] 0.04578342
## 
## $discrimination.mean
## [1] 0.79618
## 
## $discrimination.se
## [1] 0.02400416
## 
## $calibration.mean
## [1] 0.06466175 1.12604257 0.65583115 0.71663364 0.53657616
## 
## $calibration.se
## [1] 0.09269590 0.12779639 0.08557712 0.04893467 0.10871764
## 
## $cv.threshold
## [1] 0.4163356
## 
## $cv.threshold.se
## [1] 0.03108364

summary(cvtc5.lr0005_1)

##                                             var    rel.inf
## gcpsr_5                                 gcpsr_5 16.7742295
## cpaq8_tot                             cpaq8_tot 10.0999776
## gcpsr_4                                 gcpsr_4  8.0941208
## pain_duration                     pain_duration  6.9475641
## gcpsr_3                                 gcpsr_3  6.8607362
## gcpsr_6                                 gcpsr_6  6.5210080
## isi_tot                                 isi_tot  5.1306146
## overlapping_pain_number overlapping_pain_number  4.7534500
## employment                           employment  4.3591720
## age                                         age  4.0747212
## RST_PQ_BIS                           RST_PQ_BIS  2.8247542
## pcs_tot                                 pcs_tot  2.6769135
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.5223428
## RST_PQ_FFS                           RST_PQ_FFS  2.4987727
## meq_tot                                 meq_tot  2.2526208
## income                                   income  1.8880582
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.8669079
## bis_brief_tot                     bis_brief_tot  1.7372349
## proanx_t                               proanx_t  1.4332731
## prodep_t                               prodep_t  1.3640046
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.3057740
## audit_total                         audit_total  1.1977014
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.9565626
## current_opioid_meds         current_opioid_meds  0.8918367
## hispanic                               hispanic  0.3934290
## race                                       race  0.2168846
## cuditr0                                 cuditr0  0.1886804
## assigned_sex_at_birth     assigned_sex_at_birth  0.1686547

tc 5 lr .0025

set.seed(1)

cvtc5.lr0025 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2854 
##  
## now adding trees... 
## 100   1.2619 
## 150   1.2431 
## 200   1.2286 
## 250   1.2176 
## 300   1.2086 
## 350   1.202 
## 400   1.1961 
## 450   1.1922 
## 500   1.1874 
## 550   1.185 
## 600   1.1837 
## 650   1.1816 
## 700   1.1805 
## 750   1.1811 
## 800   1.1811 
## 850   1.1819 
## 900   1.1827 
## 950   1.1843 
## 1000   1.1861 
## 1050   1.1887 
## 1100   1.1896 
## 1150   1.1908 
## 1200   1.1913 
## 1250   1.1933 
## 1300   1.1944

## fitting final gbm model with a fixed number of  700  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.953 
##  
## estimated cv deviance = 1.181 ; se = 0.031 
##  
## training data correlation = 0.675 
## cv correlation =  0.38 ; se = 0.056 
##  
## training data ROC score = 0.895 
## cv ROC score = 0.699 ; se = 0.034 
##  
## elapsed time -  0.14 minutes

cvtc5.lr0025$cv.statistics

## $deviance.mean
## [1] 1.180541
## 
## $deviance.se
## [1] 0.03116531
## 
## $correlation.mean
## [1] 0.3797401
## 
## $correlation.se
## [1] 0.05639836
## 
## $discrimination.mean
## [1] 0.69884
## 
## $discrimination.se
## [1] 0.03362719
## 
## $calibration.mean
## [1] 0.1757282 1.3087762 0.5355817 0.6562492 0.4171216
## 
## $calibration.se
## [1] 0.17239395 0.24261537 0.11546833 0.07950215 0.11416712
## 
## $cv.threshold
## [1] 0.3652963
## 
## $cv.threshold.se
## [1] 0.01086483

summary(cvtc5.lr0025) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 24.7810411
## pain_duration                     pain_duration  9.0073441
## isi_tot                                 isi_tot  8.3599604
## overlapping_pain_number overlapping_pain_number  7.4434522
## employment                           employment  6.0033367
## pcs_tot                                 pcs_tot  5.7857400
## age                                         age  5.1832774
## RST_PQ_BIS                           RST_PQ_BIS  4.7081277
## RST_PQ_FFS                           RST_PQ_FFS  3.3382092
## current_opioid_meds         current_opioid_meds  2.8861095
## proanx_t                               proanx_t  2.5498484
## income                                   income  2.4959685
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.4597145
## prodep_t                               prodep_t  2.4183424
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.3472881
## audit_total                         audit_total  2.2274154
## meq_tot                                 meq_tot  2.2179955
## bis_brief_tot                     bis_brief_tot  1.7376902
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.5035024
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.1309442
## cuditr0                                 cuditr0  0.6121982
## race                                       race  0.4550115
## hispanic                               hispanic  0.1989830
## assigned_sex_at_birth     assigned_sex_at_birth  0.1484993

tc 5 lr .0025

set.seed(1)

cvtc5.lr0025_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2661 
##  
## now adding trees... 
## 100   1.2257 
## 150   1.1922 
## 200   1.1644 
## 250   1.1425 
## 300   1.123 
## 350   1.106 
## 400   1.0923 
## 450   1.0806 
## 500   1.0699 
## 550   1.062 
## 600   1.0554 
## 650   1.0495 
## 700   1.0443 
## 750   1.041 
## 800   1.0386 
## 850   1.0367 
## 900   1.035 
## 950   1.0331 
## 1000   1.0333 
## 1050   1.0323 
## 1100   1.032 
## 1150   1.0322 
## 1200   1.0321 
## 1250   1.0323 
## 1300   1.0324 
## 1350   1.033 
## 1400   1.0347 
## 1450   1.0353 
## 1500   1.0365 
## 1550   1.0372 
## 1600   1.0386 
## 1650   1.0402

## fitting final gbm model with a fixed number of  1100  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.722 
##  
## estimated cv deviance = 1.032 ; se = 0.038 
##  
## training data correlation = 0.779 
## cv correlation =  0.51 ; se = 0.046 
##  
## training data ROC score = 0.945 
## cv ROC score = 0.793 ; se = 0.024 
##  
## elapsed time -  0.21 minutes

cvtc5.lr0025_1$cv.statistics

## $deviance.mean
## [1] 1.032009
## 
## $deviance.se
## [1] 0.0375305
## 
## $correlation.mean
## [1] 0.5095567
## 
## $correlation.se
## [1] 0.04566738
## 
## $discrimination.mean
## [1] 0.79313
## 
## $discrimination.se
## [1] 0.02404356
## 
## $calibration.mean
## [1] 0.05226445 1.10718344 0.65601077 0.71405316 0.52708828
## 
## $calibration.se
## [1] 0.09220384 0.12733837 0.08074928 0.05109581 0.10268058
## 
## $cv.threshold
## [1] 0.4142599
## 
## $cv.threshold.se
## [1] 0.02991937

summary(cvtc5.lr0025_1)

##                                             var    rel.inf
## gcpsr_5                                 gcpsr_5 17.5290273
## cpaq8_tot                             cpaq8_tot  9.7662840
## gcpsr_4                                 gcpsr_4  8.2190010
## pain_duration                     pain_duration  6.8928556
## gcpsr_3                                 gcpsr_3  6.4953900
## gcpsr_6                                 gcpsr_6  5.9369907
## isi_tot                                 isi_tot  5.2910042
## overlapping_pain_number overlapping_pain_number  4.5980794
## employment                           employment  4.1867152
## age                                         age  4.1279930
## RST_PQ_BIS                           RST_PQ_BIS  3.0229158
## pcs_tot                                 pcs_tot  2.6771611
## RST_PQ_FFS                           RST_PQ_FFS  2.6163415
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.1909579
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.1689198
## bis_brief_tot                     bis_brief_tot  2.1188346
## meq_tot                                 meq_tot  1.8759135
## income                                   income  1.8522493
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.5118891
## audit_total                         audit_total  1.4138389
## proanx_t                               proanx_t  1.2463421
## prodep_t                               prodep_t  1.2457050
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.0085215
## current_opioid_meds         current_opioid_meds  0.9763553
## hispanic                               hispanic  0.3737280
## cuditr0                                 cuditr0  0.2639232
## race                                       race  0.2282603
## assigned_sex_at_birth     assigned_sex_at_birth  0.1648030

tc 5 lr .005

set.seed(1)

cvtc5.lr005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2618 
##  
## now adding trees... 
## 100   1.2287 
## 150   1.2063 
## 200   1.1953 
## 250   1.1891 
## 300   1.1845 
## 350   1.1835 
## 400   1.1833 
## 450   1.1848 
## 500   1.1858 
## 550   1.1887 
## 600   1.1944 
## 650   1.1977 
## 700   1.2015 
## 750   1.2065 
## 800   1.2096 
## 850   1.2151 
## 900   1.2213 
## 950   1.2264 
## 1000   1.2324

## fitting final gbm model with a fixed number of  400  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.923 
##  
## estimated cv deviance = 1.183 ; se = 0.034 
##  
## training data correlation = 0.69 
## cv correlation =  0.373 ; se = 0.057 
##  
## training data ROC score = 0.905 
## cv ROC score = 0.697 ; se = 0.034 
##  
## elapsed time -  0.11 minutes

cvtc5.lr005$cv.statistics

## $deviance.mean
## [1] 1.183298
## 
## $deviance.se
## [1] 0.03378967
## 
## $correlation.mean
## [1] 0.3728082
## 
## $correlation.se
## [1] 0.05718965
## 
## $discrimination.mean
## [1] 0.69689
## 
## $discrimination.se
## [1] 0.03373663
## 
## $calibration.mean
## [1] 0.1145446 1.1777036 0.5561596 0.6448285 0.4322357
## 
## $calibration.se
## [1] 0.16771648 0.21841431 0.11242734 0.07729361 0.11071291
## 
## $cv.threshold
## [1] 0.3672349
## 
## $cv.threshold.se
## [1] 0.009850221

summary(cvtc5.lr005) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 23.6902872
## isi_tot                                 isi_tot  8.7868767
## pain_duration                     pain_duration  8.2035344
## overlapping_pain_number overlapping_pain_number  7.4456500
## pcs_tot                                 pcs_tot  5.6125706
## employment                           employment  5.2469153
## age                                         age  4.8527184
## RST_PQ_BIS                           RST_PQ_BIS  4.7445089
## RST_PQ_FFS                           RST_PQ_FFS  3.7937757
## current_opioid_meds         current_opioid_meds  3.5136110
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.8564319
## prodep_t                               prodep_t  2.6905914
## RST_PQ_BAS_I                       RST_PQ_BAS_I  2.5949630
## proanx_t                               proanx_t  2.4673339
## meq_tot                                 meq_tot  2.3652439
## audit_total                         audit_total  2.3185876
## income                                   income  2.2920255
## bis_brief_tot                     bis_brief_tot  1.7695939
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.6942837
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.5802445
## cuditr0                                 cuditr0  0.6480302
## race                                       race  0.3001341
## hispanic                               hispanic  0.2895538
## assigned_sex_at_birth     assigned_sex_at_birth  0.2425345

tc 5 lr .005

set.seed(1)

cvtc5.lr005_1 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.314 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2254 
##  
## now adding trees... 
## 100   1.1639 
## 150   1.121 
## 200   1.0938 
## 250   1.0729 
## 300   1.0564 
## 350   1.046 
## 400   1.0378 
## 450   1.0309 
## 500   1.0272 
## 550   1.0268 
## 600   1.0283 
## 650   1.0298 
## 700   1.0308 
## 750   1.0318 
## 800   1.0336 
## 850   1.0381 
## 900   1.042 
## 950   1.0451 
## 1000   1.051 
## 1050   1.0541 
## 1100   1.0586 
## 1150   1.063

## fitting final gbm model with a fixed number of  550  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.314 
## mean residual deviance = 0.718 
##  
## estimated cv deviance = 1.027 ; se = 0.037 
##  
## training data correlation = 0.781 
## cv correlation =  0.514 ; se = 0.044 
##  
## training data ROC score = 0.947 
## cv ROC score = 0.797 ; se = 0.022 
##  
## elapsed time -  0.14 minutes

cvtc5.lr005_1$cv.statistics

## $deviance.mean
## [1] 1.026764
## 
## $deviance.se
## [1] 0.03688992
## 
## $correlation.mean
## [1] 0.5144144
## 
## $correlation.se
## [1] 0.04428876
## 
## $discrimination.mean
## [1] 0.79702
## 
## $discrimination.se
## [1] 0.02238718
## 
## $calibration.mean
## [1] 0.06331919 1.11749538 0.66765286 0.71906443 0.53765355
## 
## $calibration.se
## [1] 0.08816397 0.12013595 0.08206896 0.05132381 0.10275860
## 
## $cv.threshold
## [1] 0.411925
## 
## $cv.threshold.se
## [1] 0.02932956

summary(cvtc5.lr005_1)

##                                             var    rel.inf
## gcpsr_5                                 gcpsr_5 18.1361390
## cpaq8_tot                             cpaq8_tot  9.9850628
## gcpsr_4                                 gcpsr_4  8.0736451
## pain_duration                     pain_duration  6.1306404
## isi_tot                                 isi_tot  5.8115850
## gcpsr_3                                 gcpsr_3  5.7412784
## gcpsr_6                                 gcpsr_6  5.6426767
## employment                           employment  4.7255699
## overlapping_pain_number overlapping_pain_number  4.6974451
## age                                         age  4.1489279
## RST_PQ_BIS                           RST_PQ_BIS  2.7355501
## RST_PQ_FFS                           RST_PQ_FFS  2.6501357
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.5406324
## pcs_tot                                 pcs_tot  2.5259340
## meq_tot                                 meq_tot  2.3394290
## income                                   income  2.2702297
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  2.0561947
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.7271336
## prodep_t                               prodep_t  1.3955031
## proanx_t                               proanx_t  1.3812406
## bis_brief_tot                     bis_brief_tot  1.3689129
## audit_total                         audit_total  1.2553422
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.9994355
## current_opioid_meds         current_opioid_meds  0.6846939
## hispanic                               hispanic  0.4106690
## assigned_sex_at_birth     assigned_sex_at_birth  0.2301388
## cuditr0                                 cuditr0  0.1979390
## race                                       race  0.1379155

Amrit paused here on Dec 20th, 2024