BRTforAmrit.knit

Boosted Regression Trees and

Install Elith et al additional BRT functions

setwd("C:\\Users\\ajohal3\\Downloads")
source("C:\\Users\\ajohal3\\Downloads\\brt.functions.R")

Set up packages needed

library(gbm)

## Warning: package 'gbm' was built under R version 4.4.2

## Loaded gbm 2.2.2

## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3

library(psych)

## Warning: package 'psych' was built under R version 4.4.2

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.4.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(reshape)

## Warning: package 'reshape' was built under R version 4.4.2

## 
## Attaching package: 'reshape'

## The following object is masked from 'package:dplyr':
## 
##     rename

#also load data here
Amritdata<-read.csv("C:\\Users\\ajohal3\\Downloads\\Research_Data_updated_Dec_9_2024_AJK.csv",stringsAsFactors = TRUE)
summary(Amritdata)

##                    prolific_id       race                            hispanic  
##  54d2c4e2fdf99b2c319a8cf6:  1   Min.   : 1.000   Hispanic/Latinx         : 48  
##  55daf7b369dbc30005b68ac9:  1   1st Qu.: 6.000   I prefer not to disclose: 25  
##  574d70b95e549100063c6533:  1   Median : 6.000   Non-Hispanic/Latinx     :433  
##  57d67fdf6598aa00019917c8:  1   Mean   : 5.834                                 
##  584e7a66e42c2a00013099ac:  1   3rd Qu.: 6.000                                 
##  58dff341282ab00001e9ce26:  1   Max.   :13.000                                 
##  (Other)                 :500                                                  
##             assigned_sex_at_birth      age                       employment 
##  Female                :376       Min.   :18.00   Employed full-time  :174  
##  Intersex/DSD          :  1       1st Qu.:23.00   Student             :131  
##  Male                  :126       Median :29.00   Employed part-time  : 76  
##  prefer not to disclose:  3       Mean   :31.79   Unemployed          : 53  
##                                   3rd Qu.:38.00   Homemaker           : 31  
##                                   Max.   :70.00   Receiving disability: 26  
##                                   NA's   :1       (Other)             : 15  
##                     income    overlapping_pain_number    pain_duration
##  $0-$25,000            :168   Min.   :1.000           >10 years :117  
##  $25,001-$50,000       :130   1st Qu.:2.000           5-7 years : 99  
##  $50,001-$75,000       : 83   Median :3.000           2-4 years : 86  
##  $75,001-$100,000      : 45   Mean   :2.883           1-2 years : 59  
##  >$100,000             : 58   3rd Qu.:4.000           7-10 years: 55  
##  prefer not to disclose: 22   Max.   :9.000           3-6 months: 50  
##                               NA's   :3               (Other)   : 40  
##       gcpsr_2       gcpsr_3          gcpsr_4         gcpsr_5       gcpsr_6  
##  Every day:187   Min.   : 0.000   Min.   : 0.00   Min.   : 0.000   No :400  
##  Some days:319   1st Qu.: 4.000   1st Qu.: 4.00   1st Qu.: 4.000   Yes:106  
##                  Median : 5.000   Median : 6.00   Median : 5.000            
##                  Mean   : 5.255   Mean   : 5.54   Mean   : 5.316            
##                  3rd Qu.: 6.000   3rd Qu.: 7.00   3rd Qu.: 7.000            
##                  Max.   :10.000   Max.   :10.00   Max.   :10.000            
##                                                                             
##     prodep_t        proanx_t        prorx_t        RST_PQ_FFS   
##  Min.   :41.00   Min.   :40.30   Min.   :36.30   Min.   :1.000  
##  1st Qu.:57.30   1st Qu.:59.50   1st Qu.:41.60   1st Qu.:2.000  
##  Median :63.90   Median :65.30   Median :43.70   Median :2.600  
##  Mean   :62.89   Mean   :63.96   Mean   :44.72   Mean   :2.523  
##  3rd Qu.:69.40   3rd Qu.:69.30   3rd Qu.:48.20   3rd Qu.:3.000  
##  Max.   :79.40   Max.   :81.60   Max.   :75.10   Max.   :4.000  
##                                  NA's   :320                    
##    RST_PQ_BIS    RST_PQ_BAS_RI   RST_PQ_BAS_GDP  RST_PQ_BAS_RR  
##  Min.   :1.174   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.609   1st Qu.:2.000   1st Qu.:2.143   1st Qu.:2.300  
##  Median :3.065   Median :2.429   Median :2.714   Median :2.700  
##  Mean   :2.980   Mean   :2.388   Mean   :2.643   Mean   :2.672  
##  3rd Qu.:3.478   3rd Qu.:2.857   3rd Qu.:3.143   3rd Qu.:3.100  
##  Max.   :4.000   Max.   :3.857   Max.   :4.000   Max.   :4.000  
##                                                                 
##   RST_PQ_BAS_I     SHAPS_tot      bis_brief_tot     cpaq8_tot    
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   : 2.00  
##  1st Qu.:1.875   1st Qu.: 0.000   1st Qu.:1.875   1st Qu.:21.00  
##  Median :2.375   Median : 1.000   Median :2.125   Median :27.00  
##  Mean   :2.383   Mean   : 2.433   Mean   :2.192   Mean   :26.27  
##  3rd Qu.:2.875   3rd Qu.: 4.000   3rd Qu.:2.500   3rd Qu.:31.75  
##  Max.   :4.000   Max.   :13.000   Max.   :3.875   Max.   :46.00  
##                                                                  
##     pcs_tot         isi_tot         meq_tot       audit_total    
##  Min.   : 0.00   Min.   : 0.00   Min.   :36.00   Min.   : 0.000  
##  1st Qu.:17.00   1st Qu.:10.00   1st Qu.:45.00   1st Qu.: 1.000  
##  Median :26.00   Median :14.00   Median :48.00   Median : 2.000  
##  Mean   :26.29   Mean   :14.09   Mean   :48.18   Mean   : 2.502  
##  3rd Qu.:36.00   3rd Qu.:19.00   3rd Qu.:52.00   3rd Qu.: 4.000  
##  Max.   :52.00   Max.   :28.00   Max.   :64.00   Max.   :11.000  
##                                                                  
##     cuditr0       current_opioid_meds gcpsr_2_HICP gcpsr_2_HICP_num
##  Min.   :0.0000   No :436             No :319      Min.   :0.0000  
##  1st Qu.:0.0000   Yes: 70             Yes:187      1st Qu.:0.0000  
##  Median :0.0000                                    Median :0.0000  
##  Mean   :0.3814                                    Mean   :0.3696  
##  3rd Qu.:1.0000                                    3rd Qu.:1.0000  
##  Max.   :1.0000                                    Max.   :1.0000  
##                                                                    
##    income_num    income_alpha                   hispanic_bin
##  Min.   :1.000   A:168        I prefer not to disclose: 25  
##  1st Qu.:1.000   B:130        No                      :433  
##  Median :2.000   C: 83        Yes                     : 48  
##  Mean   :2.528   D: 45                                      
##  3rd Qu.:3.000   E: 58                                      
##  Max.   :6.000   F: 22                                      
##

str(Amritdata)

## 'data.frame':    506 obs. of  37 variables:
##  $ prolific_id            : Factor w/ 506 levels "54d2c4e2fdf99b2c319a8cf6",..: 1 2 4 5 6 7 10 13 14 18 ...
##  $ race                   : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ hispanic               : Factor w/ 3 levels "Hispanic/Latinx",..: 3 3 3 2 3 3 3 3 1 3 ...
##  $ assigned_sex_at_birth  : Factor w/ 4 levels "Female","Intersex/DSD",..: 3 3 3 1 1 3 3 1 1 3 ...
##  $ age                    : int  43 37 53 34 42 37 46 22 55 24 ...
##  $ employment             : Factor w/ 9 levels "Employed full-time",..: 1 1 1 1 2 2 1 8 2 9 ...
##  $ income                 : Factor w/ 6 levels "$0-$25,000","$25,001-$50,000",..: 3 2 5 1 1 1 2 1 2 1 ...
##  $ overlapping_pain_number: int  5 4 2 1 3 3 3 3 3 1 ...
##  $ pain_duration          : Factor w/ 8 levels " ",">10 years",..: 2 6 7 2 6 4 7 4 4 6 ...
##  $ gcpsr_2                : Factor w/ 2 levels "Every day","Some days": 1 2 1 2 2 2 2 2 2 2 ...
##  $ gcpsr_3                : int  4 6 8 6 7 5 6 6 5 8 ...
##  $ gcpsr_4                : int  6 8 9 5 6 7 5 5 4 5 ...
##  $ gcpsr_5                : int  6 4 8 5 6 6 6 3 5 3 ...
##  $ gcpsr_6                : Factor w/ 2 levels "No","Yes": 2 1 1 1 2 1 1 1 1 1 ...
##  $ prodep_t               : num  58.9 71.2 62.2 63.9 49 69.4 60.5 62.2 67.5 55.7 ...
##  $ proanx_t               : num  48 69.3 61.4 65.3 40.3 59.5 55.8 65.3 71.2 55.8 ...
##  $ prorx_t                : num  NA NA 41.6 50.4 45.5 NA 36.3 NA NA NA ...
##  $ RST_PQ_FFS             : num  1.5 1.6 3.4 2.5 2.2 3.3 1.9 2.2 3.1 1.7 ...
##  $ RST_PQ_BIS             : num  1.65 3.43 2.65 2.96 2.17 ...
##  $ RST_PQ_BAS_RI          : num  3.43 1.71 2.86 2.57 2.71 ...
##  $ RST_PQ_BAS_GDP         : num  3.14 1.43 3.71 3 3.14 ...
##  $ RST_PQ_BAS_RR          : num  2 1.8 3.2 2.9 3 2.8 2.4 2.7 3.6 1.9 ...
##  $ RST_PQ_BAS_I           : num  2.25 2.62 2.5 2.25 2.62 ...
##  $ SHAPS_tot              : int  0 4 0 0 0 1 0 6 0 10 ...
##  $ bis_brief_tot          : num  2 2.88 1.75 2.62 2.12 ...
##  $ cpaq8_tot              : int  29 26 17 21 20 28 30 30 28 32 ...
##  $ pcs_tot                : int  5 25 23 31 32 7 16 35 21 20 ...
##  $ isi_tot                : int  14 17 11 14 15 15 1 21 13 0 ...
##  $ meq_tot                : int  48 44 43 50 43 57 53 50 45 53 ...
##  $ audit_total            : int  2 1 1 1 0 2 7 3 5 2 ...
##  $ cuditr0                : int  0 1 0 0 0 0 0 1 0 0 ...
##  $ current_opioid_meds    : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
##  $ gcpsr_2_HICP           : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
##  $ gcpsr_2_HICP_num       : int  1 0 1 0 0 0 0 0 0 0 ...
##  $ income_num             : int  3 2 5 1 1 1 2 1 2 1 ...
##  $ income_alpha           : Factor w/ 6 levels "A","B","C","D",..: 3 2 5 1 1 1 2 1 2 1 ...
##  $ hispanic_bin           : Factor w/ 3 levels "I prefer not to disclose",..: 2 2 2 1 2 2 2 2 3 2 ...

Clean data

colnames(Amritdata)

##  [1] "prolific_id"             "race"                   
##  [3] "hispanic"                "assigned_sex_at_birth"  
##  [5] "age"                     "employment"             
##  [7] "income"                  "overlapping_pain_number"
##  [9] "pain_duration"           "gcpsr_2"                
## [11] "gcpsr_3"                 "gcpsr_4"                
## [13] "gcpsr_5"                 "gcpsr_6"                
## [15] "prodep_t"                "proanx_t"               
## [17] "prorx_t"                 "RST_PQ_FFS"             
## [19] "RST_PQ_BIS"              "RST_PQ_BAS_RI"          
## [21] "RST_PQ_BAS_GDP"          "RST_PQ_BAS_RR"          
## [23] "RST_PQ_BAS_I"            "SHAPS_tot"              
## [25] "bis_brief_tot"           "cpaq8_tot"              
## [27] "pcs_tot"                 "isi_tot"                
## [29] "meq_tot"                 "audit_total"            
## [31] "cuditr0"                 "current_opioid_meds"    
## [33] "gcpsr_2_HICP"            "gcpsr_2_HICP_num"       
## [35] "income_num"              "income_alpha"           
## [37] "hispanic_bin"

Amritdata<-Amritdata %>% 
select(!c("gcpsr_2","prorx_t","SHAPS_tot","gcpsr_2_HICP","income_num","income_alpha","hispanic_bin" ))

Now randomly sample the IDs so you have 404 (80% of your sample) unique IDs in the training set, and the other IDs will be used as a test set.

samp<-sample(x = unique(Amritdata$prolific_id), 404, replace=FALSE)
length(samp)

## [1] 404

train<-Amritdata[Amritdata$prolific_id %in% samp, ]
length(unique(train$prolific_id))

## [1] 404

Re-ordering variables so that outcome is first followed by unused variables, followed by the predictors. We won’t use id, which will now be column 2. Remove repeated variables.

colnames(train)

##  [1] "prolific_id"             "race"                   
##  [3] "hispanic"                "assigned_sex_at_birth"  
##  [5] "age"                     "employment"             
##  [7] "income"                  "overlapping_pain_number"
##  [9] "pain_duration"           "gcpsr_3"                
## [11] "gcpsr_4"                 "gcpsr_5"                
## [13] "gcpsr_6"                 "prodep_t"               
## [15] "proanx_t"                "RST_PQ_FFS"             
## [17] "RST_PQ_BIS"              "RST_PQ_BAS_RI"          
## [19] "RST_PQ_BAS_GDP"          "RST_PQ_BAS_RR"          
## [21] "RST_PQ_BAS_I"            "bis_brief_tot"          
## [23] "cpaq8_tot"               "pcs_tot"                
## [25] "isi_tot"                 "meq_tot"                
## [27] "audit_total"             "cuditr0"                
## [29] "current_opioid_meds"     "gcpsr_2_HICP_num"

train<-train[,c(30,1:29)] 
colnames(train) #124

##  [1] "gcpsr_2_HICP_num"        "prolific_id"            
##  [3] "race"                    "hispanic"               
##  [5] "assigned_sex_at_birth"   "age"                    
##  [7] "employment"              "income"                 
##  [9] "overlapping_pain_number" "pain_duration"          
## [11] "gcpsr_3"                 "gcpsr_4"                
## [13] "gcpsr_5"                 "gcpsr_6"                
## [15] "prodep_t"                "proanx_t"               
## [17] "RST_PQ_FFS"              "RST_PQ_BIS"             
## [19] "RST_PQ_BAS_RI"           "RST_PQ_BAS_GDP"         
## [21] "RST_PQ_BAS_RR"           "RST_PQ_BAS_I"           
## [23] "bis_brief_tot"           "cpaq8_tot"              
## [25] "pcs_tot"                 "isi_tot"                
## [27] "meq_tot"                 "audit_total"            
## [29] "cuditr0"                 "current_opioid_meds"

We need to make the cross validation occur over people

#CV fold making
colnames(train)

##  [1] "gcpsr_2_HICP_num"        "prolific_id"            
##  [3] "race"                    "hispanic"               
##  [5] "assigned_sex_at_birth"   "age"                    
##  [7] "employment"              "income"                 
##  [9] "overlapping_pain_number" "pain_duration"          
## [11] "gcpsr_3"                 "gcpsr_4"                
## [13] "gcpsr_5"                 "gcpsr_6"                
## [15] "prodep_t"                "proanx_t"               
## [17] "RST_PQ_FFS"              "RST_PQ_BIS"             
## [19] "RST_PQ_BAS_RI"           "RST_PQ_BAS_GDP"         
## [21] "RST_PQ_BAS_RR"           "RST_PQ_BAS_I"           
## [23] "bis_brief_tot"           "cpaq8_tot"              
## [25] "pcs_tot"                 "isi_tot"                
## [27] "meq_tot"                 "audit_total"            
## [29] "cuditr0"                 "current_opioid_meds"

id<-unique(train$prolific_id)
#summary(id)
#describe(id)
nfolds<-10
cvtest<-sample(rep(1:nfolds,length.out=length(id)), replace=F)
idname <- "prolific_id"
foldname <- "fold"
cvdf<-data.frame(id,cvtest)
names(cvdf) <- c(idname,foldname)
head(cvdf)

##                prolific_id fold
## 1 55daf7b369dbc30005b68ac9    1
## 2 57d67fdf6598aa00019917c8    5
## 3 58dff341282ab00001e9ce26    3
## 4 5947f262de49a9000165ccd3    8
## 5 5a64bb3035f26b0001492e6a    7
## 6 5ac6852df69e940001d98f04    9

combcvdf<-merge(train, cvdf, by= 'prolific_id', sort=F)
   head(combcvdf)

##                prolific_id gcpsr_2_HICP_num race            hispanic
## 1 55daf7b369dbc30005b68ac9                0    6 Non-Hispanic/Latinx
## 2 57d67fdf6598aa00019917c8                1    6 Non-Hispanic/Latinx
## 3 58dff341282ab00001e9ce26                0    6 Non-Hispanic/Latinx
## 4 5947f262de49a9000165ccd3                0    6 Non-Hispanic/Latinx
## 5 5a64bb3035f26b0001492e6a                0    6 Non-Hispanic/Latinx
## 6 5ac6852df69e940001d98f04                0    6 Non-Hispanic/Latinx
##   assigned_sex_at_birth age         employment          income
## 1                  Male  37 Employed full-time $25,001-$50,000
## 2                  Male  53 Employed full-time       >$100,000
## 3                Female  42 Employed part-time      $0-$25,000
## 4                  Male  37 Employed part-time      $0-$25,000
## 5                  Male  46 Employed full-time $25,001-$50,000
## 6                Female  22            Student      $0-$25,000
##   overlapping_pain_number pain_duration gcpsr_3 gcpsr_4 gcpsr_5 gcpsr_6
## 1                       4     5-7 years       6       8       4      No
## 2                       2    7-10 years       8       9       8      No
## 3                       3     5-7 years       7       6       6     Yes
## 4                       3     2-4 years       5       7       6      No
## 5                       3    7-10 years       6       5       6      No
## 6                       3     2-4 years       6       5       3      No
##   prodep_t proanx_t RST_PQ_FFS RST_PQ_BIS RST_PQ_BAS_RI RST_PQ_BAS_GDP
## 1     71.2     69.3        1.6   3.434783      1.714286       1.428571
## 2     62.2     61.4        3.4   2.652174      2.857143       3.714286
## 3     49.0     40.3        2.2   2.173913      2.714286       3.142857
## 4     69.4     59.5        3.3   2.739130      1.714286       1.142857
## 5     60.5     55.8        1.9   2.565217      2.714286       2.571429
## 6     62.2     65.3        2.2   3.304348      1.571429       1.142857
##   RST_PQ_BAS_RR RST_PQ_BAS_I bis_brief_tot cpaq8_tot pcs_tot isi_tot meq_tot
## 1           1.8        2.625         2.875        26      25      17      44
## 2           3.2        2.500         1.750        17      23      11      43
## 3           3.0        2.625         2.125        20      32      15      43
## 4           2.8        2.125         2.625        28       7      15      57
## 5           2.4        1.500         1.750        30      16       1      53
## 6           2.7        2.125         2.500        30      35      21      50
##   audit_total cuditr0 current_opioid_meds fold
## 1           1       1                  No    1
## 2           1       0                 Yes    5
## 3           0       0                  No    3
## 4           2       0                  No    8
## 5           7       0                  No    7
## 6           3       1                  No    9

#table(train$prolific_id, combcvdf$fold)   
   #check what's wrong with table
#rowSums(table(train$prolific_id, combcvdf$fold)>0)
colSums(table(train$prolific_id, combcvdf$fold)>0)

##  1  2  3  4  5  6  7  8  9 10 
## 41 41 41 41 40 40 40 40 40 40

fold.vector<-combcvdf$fold
head(fold.vector)

## [1] 1 5 3 8 7 9

describe(fold.vector)

##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 404 5.47 2.88      5    5.46 3.71   1  10     9 0.02    -1.24 0.14

Now let’s run some models with our finaltrain data set! First, tc 3, lr .0005

finaltrain<-as.data.frame(train)
finaltrain_nocgpsr<-finaltrain[,c(1:10,15:30,11:14)] #you will run these models with both the finaltrain and the finaltrain_nocgpsr datasets (separately). For finaltrain, you will use gbm.x = 3:30,
colnames(finaltrain)

##  [1] "gcpsr_2_HICP_num"        "prolific_id"            
##  [3] "race"                    "hispanic"               
##  [5] "assigned_sex_at_birth"   "age"                    
##  [7] "employment"              "income"                 
##  [9] "overlapping_pain_number" "pain_duration"          
## [11] "gcpsr_3"                 "gcpsr_4"                
## [13] "gcpsr_5"                 "gcpsr_6"                
## [15] "prodep_t"                "proanx_t"               
## [17] "RST_PQ_FFS"              "RST_PQ_BIS"             
## [19] "RST_PQ_BAS_RI"           "RST_PQ_BAS_GDP"         
## [21] "RST_PQ_BAS_RR"           "RST_PQ_BAS_I"           
## [23] "bis_brief_tot"           "cpaq8_tot"              
## [25] "pcs_tot"                 "isi_tot"                
## [27] "meq_tot"                 "audit_total"            
## [29] "cuditr0"                 "current_opioid_meds"

colnames(finaltrain_nocgpsr)

##  [1] "gcpsr_2_HICP_num"        "prolific_id"            
##  [3] "race"                    "hispanic"               
##  [5] "assigned_sex_at_birth"   "age"                    
##  [7] "employment"              "income"                 
##  [9] "overlapping_pain_number" "pain_duration"          
## [11] "prodep_t"                "proanx_t"               
## [13] "RST_PQ_FFS"              "RST_PQ_BIS"             
## [15] "RST_PQ_BAS_RI"           "RST_PQ_BAS_GDP"         
## [17] "RST_PQ_BAS_RR"           "RST_PQ_BAS_I"           
## [19] "bis_brief_tot"           "cpaq8_tot"              
## [21] "pcs_tot"                 "isi_tot"                
## [23] "meq_tot"                 "audit_total"            
## [25] "cuditr0"                 "current_opioid_meds"    
## [27] "gcpsr_3"                 "gcpsr_4"                
## [29] "gcpsr_5"                 "gcpsr_6"

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3089 
##  
## now adding trees... 
## 100   1.3014 
## 150   1.2945 
## 200   1.2876 
## 250   1.2809 
## 300   1.2746 
## 350   1.2687 
## 400   1.2629 
## 450   1.2575 
## 500   1.2524 
## 550   1.2476 
## 600   1.2427 
## 650   1.2382 
## 700   1.2339 
## 750   1.2298 
## 800   1.2258 
## 850   1.2222 
## 900   1.2186 
## 950   1.215 
## 1000   1.2117 
## 1050   1.2086 
## 1100   1.2056 
## 1150   1.2028 
## 1200   1.1999 
## 1250   1.1974 
## 1300   1.195 
## 1350   1.1926 
## 1400   1.1901 
## 1450   1.188 
## 1500   1.1858 
## 1550   1.184 
## 1600   1.182 
## 1650   1.1801 
## 1700   1.1782 
## 1750   1.1764 
## 1800   1.1748 
## 1850   1.1732 
## 1900   1.1716 
## 1950   1.1701 
## 2000   1.1685 
## 2050   1.1671 
## 2100   1.1658 
## 2150   1.1646 
## 2200   1.1633 
## 2250   1.1621 
## 2300   1.161 
## 2350   1.1599 
## 2400   1.1588 
## 2450   1.1579 
## 2500   1.157 
## 2550   1.1561 
## 2600   1.1551 
## 2650   1.1543 
## 2700   1.1535 
## 2750   1.1529 
## 2800   1.1522 
## 2850   1.1514 
## 2900   1.151 
## 2950   1.1504 
## 3000   1.1499 
## 3050   1.1493 
## 3100   1.1489 
## 3150   1.1484 
## 3200   1.148 
## 3250   1.1475 
## 3300   1.1474 
## 3350   1.1471 
## 3400   1.1469 
## 3450   1.1468 
## 3500   1.1464 
## 3550   1.1461 
## 3600   1.1457 
## 3650   1.1453 
## 3700   1.145 
## 3750   1.145 
## 3800   1.1449 
## 3850   1.1447 
## 3900   1.1446 
## 3950   1.1445 
## 4000   1.1443 
## 4050   1.1442 
## 4100   1.1441 
## 4150   1.144 
## 4200   1.1441 
## 4250   1.1442 
## 4300   1.1441 
## 4350   1.144 
## 4400   1.1441

## fitting final gbm model with a fixed number of  4150  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.969 
##  
## estimated cv deviance = 1.144 ; se = 0.048 
##  
## training data correlation = 0.63 
## cv correlation =  0.426 ; se = 0.055 
##  
## training data ROC score = 0.872 
## cv ROC score = 0.745 ; se = 0.03 
##  
## elapsed time -  0.37 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.144017
## 
## $deviance.se
## [1] 0.0475256
## 
## $correlation.mean
## [1] 0.4257112
## 
## $correlation.se
## [1] 0.05542095
## 
## $discrimination.mean
## [1] 0.7446
## 
## $discrimination.se
## [1] 0.03025443
## 
## $calibration.mean
## [1] 0.3201415 1.5979503 0.4264567 0.4602101 0.3688215
## 
## $calibration.se
## [1] 0.23821960 0.44100007 0.09847052 0.09685961 0.07601304
## 
## $cv.threshold
## [1] 0.3781137
## 
## $cv.threshold.se
## [1] 0.01345187

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 34.28308891
## age                                         age  8.52846699
## isi_tot                                 isi_tot  7.84598339
## current_opioid_meds         current_opioid_meds  7.04057701
## overlapping_pain_number overlapping_pain_number  6.44704211
## pain_duration                     pain_duration  5.66407621
## pcs_tot                                 pcs_tot  5.38906926
## RST_PQ_BIS                           RST_PQ_BIS  5.23306327
## employment                           employment  3.29176647
## audit_total                         audit_total  3.28654962
## bis_brief_tot                     bis_brief_tot  2.41069309
## income                                   income  2.15476205
## prodep_t                               prodep_t  1.67415831
## proanx_t                               proanx_t  1.42910599
## RST_PQ_FFS                           RST_PQ_FFS  1.13967206
## meq_tot                                 meq_tot  0.95263740
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  0.78579175
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.72992404
## hispanic                               hispanic  0.51199642
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.49417388
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.46928338
## race                                       race  0.15781572
## cuditr0                                 cuditr0  0.06233123
## assigned_sex_at_birth     assigned_sex_at_birth  0.01797142

THIS IS WHERE WE STOPPED WHILE CODING TOGETHER In all of the below code, you will have to change the gbm.x and data to match what we were doing (so gbm.x = 3:26, data= finaltrain_nocgpsr)

tc 3, lr .0005

cvtc3.lr005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3053 
##  
## now adding trees... 
## 100   1.2943 
## 150   1.2841 
## 200   1.2742 
## 250   1.2645 
## 300   1.2554 
## 350   1.2467 
## 400   1.2381 
## 450   1.2299 
## 500   1.2221 
## 550   1.2148 
## 600   1.2077 
## 650   1.2008 
## 700   1.1939 
## 750   1.1874 
## 800   1.1811 
## 850   1.1752 
## 900   1.1696 
## 950   1.164 
## 1000   1.1588 
## 1050   1.1537 
## 1100   1.1486 
## 1150   1.1438 
## 1200   1.1394 
## 1250   1.135 
## 1300   1.1306 
## 1350   1.1263 
## 1400   1.1225 
## 1450   1.1185 
## 1500   1.1148 
## 1550   1.111 
## 1600   1.1076 
## 1650   1.1044 
## 1700   1.1011 
## 1750   1.098 
## 1800   1.095 
## 1850   1.0919 
## 1900   1.0892 
## 1950   1.0865 
## 2000   1.0836 
## 2050   1.081 
## 2100   1.0785 
## 2150   1.0761 
## 2200   1.0739 
## 2250   1.0719 
## 2300   1.0697 
## 2350   1.0676 
## 2400   1.0656 
## 2450   1.0638 
## 2500   1.062 
## 2550   1.0602 
## 2600   1.0584 
## 2650   1.0567 
## 2700   1.0548 
## 2750   1.0534 
## 2800   1.0518 
## 2850   1.0503 
## 2900   1.0489 
## 2950   1.0476 
## 3000   1.0461 
## 3050   1.0449 
## 3100   1.0436 
## 3150   1.0424 
## 3200   1.0413 
## 3250   1.0401 
## 3300   1.0391 
## 3350   1.0382 
## 3400   1.0372 
## 3450   1.0362 
## 3500   1.0353 
## 3550   1.0345 
## 3600   1.0338 
## 3650   1.0331 
## 3700   1.0321 
## 3750   1.0316 
## 3800   1.0309 
## 3850   1.0301 
## 3900   1.0296 
## 3950   1.029 
## 4000   1.0284 
## 4050   1.028 
## 4100   1.0275 
## 4150   1.0271 
## 4200   1.0267 
## 4250   1.0262 
## 4300   1.0258 
## 4350   1.0258 
## 4400   1.0253 
## 4450   1.0248 
## 4500   1.0246 
## 4550   1.0242 
## 4600   1.0237 
## 4650   1.0234 
## 4700   1.0231 
## 4750   1.0228 
## 4800   1.0226 
## 4850   1.0227 
## 4900   1.0224 
## 4950   1.0222 
## 5000   1.0221 
## 5050   1.022 
## 5100   1.0219 
## 5150   1.0217 
## 5200   1.0216 
## 5250   1.0217 
## 5300   1.0215 
## 5350   1.0212 
## 5400   1.0213 
## 5450   1.0213 
## 5500   1.0214

## fitting final gbm model with a fixed number of  5350  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.819 
##  
## estimated cv deviance = 1.021 ; se = 0.06 
##  
## training data correlation = 0.712 
## cv correlation =  0.54 ; se = 0.055 
##  
## training data ROC score = 0.909 
## cv ROC score = 0.805 ; se = 0.027 
##  
## elapsed time -  0.51 minutes

cvtc3.lr005$cv.statistics

## $deviance.mean
## [1] 1.02125
## 
## $deviance.se
## [1] 0.06019242
## 
## $correlation.mean
## [1] 0.5396279
## 
## $correlation.se
## [1] 0.05466285
## 
## $discrimination.mean
## [1] 0.80513
## 
## $discrimination.se
## [1] 0.02732153
## 
## $calibration.mean
## [1] 0.2656818 1.4480633 0.4394217 0.5036994 0.3846411
## 
## $calibration.se
## [1] 0.20992680 0.31935138 0.10728474 0.09275580 0.09629856
## 
## $cv.threshold
## [1] 0.3950878
## 
## $cv.threshold.se
## [1] 0.02209087

summary(cvtc3.lr005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 28.11781616
## cpaq8_tot                             cpaq8_tot 17.21535092
## gcpsr_3                                 gcpsr_3  6.55010805
## age                                         age  6.23326778
## isi_tot                                 isi_tot  4.68449160
## gcpsr_6                                 gcpsr_6  3.96021195
## pain_duration                     pain_duration  3.90936231
## RST_PQ_BIS                           RST_PQ_BIS  3.58507490
## gcpsr_4                                 gcpsr_4  3.18452015
## overlapping_pain_number overlapping_pain_number  3.06062070
## current_opioid_meds         current_opioid_meds  2.97187506
## employment                           employment  2.69628289
## pcs_tot                                 pcs_tot  1.91984333
## audit_total                         audit_total  1.55495271
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.51650831
## bis_brief_tot                     bis_brief_tot  1.30447828
## income                                   income  1.23263277
## proanx_t                               proanx_t  1.16003137
## meq_tot                                 meq_tot  0.87741060
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.84875439
## hispanic                               hispanic  0.80856748
## prodep_t                               prodep_t  0.69460200
## RST_PQ_FFS                           RST_PQ_FFS  0.66611477
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.59127031
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.48076482
## race                                       race  0.11193943
## cuditr0                                 cuditr0  0.03561131
## assigned_sex_at_birth     assigned_sex_at_birth  0.02753565

tc 3 lr .00025

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.00025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3129 
##  
## now adding trees... 
## 100   1.3088 
## 150   1.3049 
## 200   1.3013 
## 250   1.2978 
## 300   1.294 
## 350   1.2907 
## 400   1.2873 
## 450   1.2839 
## 500   1.2807 
## 550   1.2775 
## 600   1.2744 
## 650   1.2715 
## 700   1.2686 
## 750   1.2659 
## 800   1.2631 
## 850   1.2603 
## 900   1.2577 
## 950   1.2552 
## 1000   1.2526 
## 1050   1.2501 
## 1100   1.2478 
## 1150   1.2455 
## 1200   1.2432 
## 1250   1.2409 
## 1300   1.2386 
## 1350   1.2366 
## 1400   1.2345 
## 1450   1.2325 
## 1500   1.2304 
## 1550   1.2284 
## 1600   1.2264 
## 1650   1.2245 
## 1700   1.2226 
## 1750   1.2208 
## 1800   1.2191 
## 1850   1.2173 
## 1900   1.2156 
## 1950   1.214 
## 2000   1.2124 
## 2050   1.2108 
## 2100   1.2092 
## 2150   1.2077 
## 2200   1.2062 
## 2250   1.2047 
## 2300   1.2033 
## 2350   1.202 
## 2400   1.2006 
## 2450   1.1993 
## 2500   1.198 
## 2550   1.1966 
## 2600   1.1953 
## 2650   1.1941 
## 2700   1.1929 
## 2750   1.1917 
## 2800   1.1906 
## 2850   1.1895 
## 2900   1.1884 
## 2950   1.1873 
## 3000   1.1863 
## 3050   1.1853 
## 3100   1.1843 
## 3150   1.1834 
## 3200   1.1824 
## 3250   1.1814 
## 3300   1.1805 
## 3350   1.1795 
## 3400   1.1787 
## 3450   1.1778 
## 3500   1.1769 
## 3550   1.176 
## 3600   1.175 
## 3650   1.1742 
## 3700   1.1733 
## 3750   1.1726 
## 3800   1.1717 
## 3850   1.1709 
## 3900   1.1702 
## 3950   1.1696 
## 4000   1.1689 
## 4050   1.1683 
## 4100   1.1676 
## 4150   1.167 
## 4200   1.1663 
## 4250   1.1657 
## 4300   1.165 
## 4350   1.1644 
## 4400   1.1637 
## 4450   1.1629 
## 4500   1.1624 
## 4550   1.1619 
## 4600   1.1613 
## 4650   1.1608 
## 4700   1.1602 
## 4750   1.1596 
## 4800   1.1591 
## 4850   1.1587 
## 4900   1.1581 
## 4950   1.1577 
## 5000   1.1573 
## 5050   1.1568 
## 5100   1.1563 
## 5150   1.1559 
## 5200   1.1555 
## 5250   1.155 
## 5300   1.1547 
## 5350   1.1542 
## 5400   1.1537 
## 5450   1.1533 
## 5500   1.153 
## 5550   1.1526 
## 5600   1.1523 
## 5650   1.1521 
## 5700   1.1518 
## 5750   1.1513 
## 5800   1.151 
## 5850   1.1508 
## 5900   1.1505 
## 5950   1.1502 
## 6000   1.1499 
## 6050   1.1497 
## 6100   1.1494 
## 6150   1.1491 
## 6200   1.1489 
## 6250   1.1487 
## 6300   1.1484 
## 6350   1.1482 
## 6400   1.1479 
## 6450   1.1476 
## 6500   1.1475 
## 6550   1.1473 
## 6600   1.147 
## 6650   1.1468 
## 6700   1.1466 
## 6750   1.1463 
## 6800   1.1461 
## 6850   1.1459 
## 6900   1.1459 
## 6950   1.1457 
## 7000   1.1455 
## 7050   1.1453 
## 7100   1.1452 
## 7150   1.1451 
## 7200   1.1451 
## 7250   1.1449 
## 7300   1.1448 
## 7350   1.1448 
## 7400   1.1447 
## 7450   1.1447 
## 7500   1.1446

## fitting final gbm model with a fixed number of  7500  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.985 
##  
## estimated cv deviance = 1.145 ; se = 0.046 
##  
## training data correlation = 0.623 
## cv correlation =  0.429 ; se = 0.056 
##  
## training data ROC score = 0.867 
## cv ROC score = 0.745 ; se = 0.031 
##  
## elapsed time -  0.68 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.144595
## 
## $deviance.se
## [1] 0.04636747
## 
## $correlation.mean
## [1] 0.4289955
## 
## $correlation.se
## [1] 0.0563225
## 
## $discrimination.mean
## [1] 0.7452
## 
## $discrimination.se
## [1] 0.03106307
## 
## $calibration.mean
## [1] 0.3937493 1.7368222 0.3956736 0.4577948 0.3118121
## 
## $calibration.se
## [1] 0.26145664 0.49671625 0.08983369 0.09400341 0.05915848
## 
## $cv.threshold
## [1] 0.3772412
## 
## $cv.threshold.se
## [1] 0.01324371

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 35.42205801
## age                                         age  8.45090923
## isi_tot                                 isi_tot  8.04497580
## current_opioid_meds         current_opioid_meds  7.33079585
## overlapping_pain_number overlapping_pain_number  6.29246025
## pain_duration                     pain_duration  5.66683322
## pcs_tot                                 pcs_tot  5.21650067
## RST_PQ_BIS                           RST_PQ_BIS  5.16650378
## audit_total                         audit_total  3.07406116
## employment                           employment  3.02124418
## bis_brief_tot                     bis_brief_tot  2.40748262
## income                                   income  2.14751680
## prodep_t                               prodep_t  1.55350874
## proanx_t                               proanx_t  1.36160302
## RST_PQ_FFS                           RST_PQ_FFS  0.92761085
## meq_tot                                 meq_tot  0.92412345
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  0.71557865
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.70813123
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.51843122
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.47086999
## hispanic                               hispanic  0.38026145
## race                                       race  0.13388751
## cuditr0                                 cuditr0  0.03560460
## assigned_sex_at_birth     assigned_sex_at_birth  0.02904771

tc 3 lr .00025

cvtc3.lr0025 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.00025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3111 
##  
## now adding trees... 
## 100   1.3054 
## 150   1.2999 
## 200   1.2946 
## 250   1.2893 
## 300   1.2842 
## 350   1.2791 
## 400   1.2741 
## 450   1.2693 
## 500   1.2646 
## 550   1.26 
## 600   1.2553 
## 650   1.2509 
## 700   1.2465 
## 750   1.2423 
## 800   1.2382 
## 850   1.2341 
## 900   1.2303 
## 950   1.2264 
## 1000   1.2226 
## 1050   1.2189 
## 1100   1.2152 
## 1150   1.2117 
## 1200   1.2082 
## 1250   1.2048 
## 1300   1.2014 
## 1350   1.1979 
## 1400   1.1946 
## 1450   1.1914 
## 1500   1.1883 
## 1550   1.1852 
## 1600   1.1822 
## 1650   1.1793 
## 1700   1.1763 
## 1750   1.1734 
## 1800   1.1706 
## 1850   1.1679 
## 1900   1.1651 
## 1950   1.1625 
## 2000   1.1599 
## 2050   1.1572 
## 2100   1.1547 
## 2150   1.1521 
## 2200   1.1498 
## 2250   1.1473 
## 2300   1.145 
## 2350   1.1428 
## 2400   1.1404 
## 2450   1.1382 
## 2500   1.136 
## 2550   1.1338 
## 2600   1.1318 
## 2650   1.1297 
## 2700   1.1277 
## 2750   1.1257 
## 2800   1.1236 
## 2850   1.1217 
## 2900   1.1198 
## 2950   1.1179 
## 3000   1.1161 
## 3050   1.1142 
## 3100   1.1124 
## 3150   1.1107 
## 3200   1.1089 
## 3250   1.1072 
## 3300   1.1055 
## 3350   1.1038 
## 3400   1.102 
## 3450   1.1005 
## 3500   1.099 
## 3550   1.0973 
## 3600   1.0959 
## 3650   1.0944 
## 3700   1.093 
## 3750   1.0915 
## 3800   1.0901 
## 3850   1.0886 
## 3900   1.0873 
## 3950   1.0859 
## 4000   1.0846 
## 4050   1.0832 
## 4100   1.082 
## 4150   1.0807 
## 4200   1.0794 
## 4250   1.078 
## 4300   1.0769 
## 4350   1.0757 
## 4400   1.0747 
## 4450   1.0735 
## 4500   1.0724 
## 4550   1.0713 
## 4600   1.0702 
## 4650   1.0692 
## 4700   1.0681 
## 4750   1.0672 
## 4800   1.0662 
## 4850   1.0652 
## 4900   1.0642 
## 4950   1.0633 
## 5000   1.0623 
## 5050   1.0613 
## 5100   1.0604 
## 5150   1.0595 
## 5200   1.0587 
## 5250   1.058 
## 5300   1.0571 
## 5350   1.0561 
## 5400   1.0554 
## 5450   1.0546 
## 5500   1.0538 
## 5550   1.0531 
## 5600   1.0524 
## 5650   1.0516 
## 5700   1.051 
## 5750   1.0502 
## 5800   1.0495 
## 5850   1.0488 
## 5900   1.0482 
## 5950   1.0474 
## 6000   1.0467 
## 6050   1.046 
## 6100   1.0454 
## 6150   1.0447 
## 6200   1.0441 
## 6250   1.0435 
## 6300   1.043 
## 6350   1.0424 
## 6400   1.0418 
## 6450   1.0413 
## 6500   1.0408 
## 6550   1.0403 
## 6600   1.0398 
## 6650   1.0393 
## 6700   1.0389 
## 6750   1.0384 
## 6800   1.0379 
## 6850   1.0375 
## 6900   1.0369 
## 6950   1.0365 
## 7000   1.0361 
## 7050   1.0356 
## 7100   1.0352 
## 7150   1.0348 
## 7200   1.0344 
## 7250   1.0339 
## 7300   1.0335 
## 7350   1.0331 
## 7400   1.0327 
## 7450   1.0325 
## 7500   1.0322 
## 7550   1.0318 
## 7600   1.0315 
## 7650   1.0311 
## 7700   1.0308 
## 7750   1.0305 
## 7800   1.0301 
## 7850   1.0298 
## 7900   1.0294 
## 7950   1.0291 
## 8000   1.0288 
## 8050   1.0285 
## 8100   1.0283 
## 8150   1.0281 
## 8200   1.0278 
## 8250   1.0275 
## 8300   1.0272 
## 8350   1.0269 
## 8400   1.0268 
## 8450   1.0266 
## 8500   1.0264 
## 8550   1.0261 
## 8600   1.026 
## 8650   1.0258 
## 8700   1.0256 
## 8750   1.0254 
## 8800   1.0252 
## 8850   1.025 
## 8900   1.0248 
## 8950   1.0248 
## 9000   1.0245 
## 9050   1.0244 
## 9100   1.0243 
## 9150   1.0241 
## 9200   1.024 
## 9250   1.0239 
## 9300   1.0239 
## 9350   1.0237 
## 9400   1.0235 
## 9450   1.0235 
## 9500   1.0233 
## 9550   1.0232 
## 9600   1.0231

## fitting final gbm model with a fixed number of  9600  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.84 
##  
## estimated cv deviance = 1.023 ; se = 0.058 
##  
## training data correlation = 0.704 
## cv correlation =  0.54 ; se = 0.055 
##  
## training data ROC score = 0.905 
## cv ROC score = 0.806 ; se = 0.028 
##  
## elapsed time -  0.99 minutes

cvtc3.lr0025$cv.statistics

## $deviance.mean
## [1] 1.023057
## 
## $deviance.se
## [1] 0.05794706
## 
## $correlation.mean
## [1] 0.5401421
## 
## $correlation.se
## [1] 0.05476121
## 
## $discrimination.mean
## [1] 0.8056
## 
## $discrimination.se
## [1] 0.02785595
## 
## $calibration.mean
## [1] 0.2803442 1.4993694 0.4273062 0.5031588 0.3397515
## 
## $calibration.se
## [1] 0.19962522 0.31415119 0.10106043 0.09159738 0.07981578
## 
## $cv.threshold
## [1] 0.3931912
## 
## $cv.threshold.se
## [1] 0.02197686

summary(cvtc3.lr0025)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 29.13310831
## cpaq8_tot                             cpaq8_tot 17.69426027
## gcpsr_3                                 gcpsr_3  6.62468542
## age                                         age  6.09118569
## isi_tot                                 isi_tot  4.62102012
## gcpsr_6                                 gcpsr_6  4.08236008
## pain_duration                     pain_duration  3.62987720
## RST_PQ_BIS                           RST_PQ_BIS  3.50996575
## current_opioid_meds         current_opioid_meds  3.19113694
## overlapping_pain_number overlapping_pain_number  3.15846837
## gcpsr_4                                 gcpsr_4  3.13351585
## employment                           employment  2.54052239
## pcs_tot                                 pcs_tot  1.88682445
## audit_total                         audit_total  1.31350153
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.29075696
## bis_brief_tot                     bis_brief_tot  1.25507709
## income                                   income  1.22194709
## proanx_t                               proanx_t  1.11138997
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.84938023
## meq_tot                                 meq_tot  0.77395428
## hispanic                               hispanic  0.69751503
## prodep_t                               prodep_t  0.65670009
## RST_PQ_FFS                           RST_PQ_FFS  0.56034895
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.46471149
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.36683593
## race                                       race  0.08913563
## cuditr0                                 cuditr0  0.03087730
## assigned_sex_at_birth     assigned_sex_at_birth  0.02093759

tc 3 lr 0.005

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2509 
##  
## now adding trees... 
## 100   1.2116 
## 150   1.1868 
## 200   1.1672 
## 250   1.1555 
## 300   1.1489 
## 350   1.1469 
## 400   1.1452 
## 450   1.1439 
## 500   1.147 
## 550   1.1495 
## 600   1.152 
## 650   1.1551 
## 700   1.157 
## 750   1.1602 
## 800   1.1655 
## 850   1.1712 
## 900   1.1774 
## 950   1.1817 
## 1000   1.1878 
## 1050   1.1928

## fitting final gbm model with a fixed number of  450  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.954 
##  
## estimated cv deviance = 1.144 ; se = 0.048 
##  
## training data correlation = 0.638 
## cv correlation =  0.425 ; se = 0.054 
##  
## training data ROC score = 0.877 
## cv ROC score = 0.745 ; se = 0.03 
##  
## elapsed time -  0.08 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.143858
## 
## $deviance.se
## [1] 0.04837856
## 
## $correlation.mean
## [1] 0.4248425
## 
## $correlation.se
## [1] 0.05403213
## 
## $discrimination.mean
## [1] 0.74496
## 
## $discrimination.se
## [1] 0.02967057
## 
## $calibration.mean
## [1] 0.2495425 1.5043870 0.4422612 0.4668652 0.3921268
## 
## $calibration.se
## [1] 0.20757040 0.38639905 0.10195605 0.10198461 0.07525061
## 
## $cv.threshold
## [1] 0.3790469
## 
## $cv.threshold.se
## [1] 0.01521988

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 32.4329171
## isi_tot                                 isi_tot  7.8137361
## current_opioid_meds         current_opioid_meds  7.6566140
## age                                         age  7.5778058
## pain_duration                     pain_duration  5.9906613
## RST_PQ_BIS                           RST_PQ_BIS  5.8668207
## overlapping_pain_number overlapping_pain_number  5.8631029
## pcs_tot                                 pcs_tot  5.8054731
## employment                           employment  3.7897738
## audit_total                         audit_total  3.3950920
## bis_brief_tot                     bis_brief_tot  2.4366810
## income                                   income  2.2233721
## proanx_t                               proanx_t  1.6813452
## prodep_t                               prodep_t  1.6740108
## meq_tot                                 meq_tot  0.9705514
## RST_PQ_FFS                           RST_PQ_FFS  0.9505188
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  0.9053882
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.8914243
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.8600870
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.4780161
## race                                       race  0.3785031
## hispanic                               hispanic  0.3581050
## assigned_sex_at_birth     assigned_sex_at_birth  0.0000000
## cuditr0                                 cuditr0  0.0000000

tc 3 lr 0.005

cvtc5.lr005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2208 
##  
## now adding trees... 
## 100   1.1592 
## 150   1.1134 
## 200   1.0834 
## 250   1.0622 
## 300   1.0469 
## 350   1.037 
## 400   1.0281 
## 450   1.0228 
## 500   1.0202 
## 550   1.0205 
## 600   1.0215 
## 650   1.0225 
## 700   1.0248 
## 750   1.0283 
## 800   1.031 
## 850   1.0326 
## 900   1.036 
## 950   1.0414 
## 1000   1.0465 
## 1050   1.0514 
## 1100   1.0554 
## 1150   1.0589

## fitting final gbm model with a fixed number of  500  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.832 
##  
## estimated cv deviance = 1.02 ; se = 0.059 
##  
## training data correlation = 0.706 
## cv correlation =  0.543 ; se = 0.055 
##  
## training data ROC score = 0.905 
## cv ROC score = 0.806 ; se = 0.028 
##  
## elapsed time -  0.09 minutes

cvtc5.lr005$cv.statistics

## $deviance.mean
## [1] 1.020192
## 
## $deviance.se
## [1] 0.05931665
## 
## $correlation.mean
## [1] 0.542745
## 
## $correlation.se
## [1] 0.05537704
## 
## $discrimination.mean
## [1] 0.80615
## 
## $discrimination.se
## [1] 0.02835754
## 
## $calibration.mean
## [1] 0.3063210 1.5174696 0.4286507 0.5043258 0.3485032
## 
## $calibration.se
## [1] 0.22593283 0.34471626 0.10361128 0.09312254 0.08310394
## 
## $cv.threshold
## [1] 0.3885993
## 
## $cv.threshold.se
## [1] 0.01882122

summary(cvtc5.lr005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 28.85428279
## cpaq8_tot                             cpaq8_tot 17.27784217
## gcpsr_3                                 gcpsr_3  6.51880614
## age                                         age  6.40939912
## isi_tot                                 isi_tot  5.06610488
## gcpsr_6                                 gcpsr_6  4.33822478
## pain_duration                     pain_duration  3.78272220
## RST_PQ_BIS                           RST_PQ_BIS  3.64156788
## gcpsr_4                                 gcpsr_4  3.37720323
## overlapping_pain_number overlapping_pain_number  3.12190450
## current_opioid_meds         current_opioid_meds  2.94641385
## employment                           employment  2.44501516
## pcs_tot                                 pcs_tot  1.88350921
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.58279126
## bis_brief_tot                     bis_brief_tot  1.26430033
## income                                   income  1.12726705
## audit_total                         audit_total  1.11928817
## proanx_t                               proanx_t  1.03632879
## prodep_t                               prodep_t  0.73708736
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.73574456
## hispanic                               hispanic  0.65610982
## meq_tot                                 meq_tot  0.62258985
## RST_PQ_FFS                           RST_PQ_FFS  0.59359288
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.50556131
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.19732993
## cuditr0                                 cuditr0  0.08912396
## assigned_sex_at_birth     assigned_sex_at_birth  0.03891400
## race                                       race  0.03097481

tc 3 lr .0025

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2822 
##  
## now adding trees... 
## 100   1.2538 
## 150   1.2311 
## 200   1.2125 
## 250   1.1982 
## 300   1.1864 
## 350   1.1759 
## 400   1.1676 
## 450   1.1606 
## 500   1.1554 
## 550   1.152 
## 600   1.1497 
## 650   1.147 
## 700   1.146 
## 750   1.1449 
## 800   1.1446 
## 850   1.1441 
## 900   1.1438 
## 950   1.144 
## 1000   1.1459 
## 1050   1.1462 
## 1100   1.1478 
## 1150   1.1489 
## 1200   1.1502 
## 1250   1.152 
## 1300   1.1529 
## 1350   1.1548 
## 1400   1.157

## fitting final gbm model with a fixed number of  900  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.956 
##  
## estimated cv deviance = 1.144 ; se = 0.048 
##  
## training data correlation = 0.636 
## cv correlation =  0.425 ; se = 0.054 
##  
## training data ROC score = 0.875 
## cv ROC score = 0.744 ; se = 0.03 
##  
## elapsed time -  0.1 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.143781
## 
## $deviance.se
## [1] 0.04848497
## 
## $correlation.mean
## [1] 0.4246872
## 
## $correlation.se
## [1] 0.0543302
## 
## $discrimination.mean
## [1] 0.74382
## 
## $discrimination.se
## [1] 0.03027563
## 
## $calibration.mean
## [1] 0.2558462 1.4989751 0.4404392 0.4579218 0.4030266
## 
## $calibration.se
## [1] 0.20991855 0.39415103 0.10250271 0.09675155 0.08158469
## 
## $cv.threshold
## [1] 0.3769763
## 
## $cv.threshold.se
## [1] 0.01315306

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 32.86669072
## age                                         age  8.61310456
## isi_tot                                 isi_tot  8.32826981
## current_opioid_meds         current_opioid_meds  6.81577421
## pain_duration                     pain_duration  5.87172947
## overlapping_pain_number overlapping_pain_number  5.70567174
## pcs_tot                                 pcs_tot  5.24868055
## RST_PQ_BIS                           RST_PQ_BIS  5.09671484
## audit_total                         audit_total  3.48603158
## employment                           employment  3.22403518
## bis_brief_tot                     bis_brief_tot  2.64234332
## income                                   income  2.31456327
## proanx_t                               proanx_t  1.91836328
## prodep_t                               prodep_t  1.68074171
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.20657037
## RST_PQ_FFS                           RST_PQ_FFS  0.98209923
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.93303543
## meq_tot                                 meq_tot  0.88684301
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.72457093
## hispanic                               hispanic  0.55792799
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.54657193
## race                                       race  0.30579123
## cuditr0                                 cuditr0  0.04387561
## assigned_sex_at_birth     assigned_sex_at_birth  0.00000000

tc 3 lr .0025

cvtc5.lr0005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 3,
         learning.rate = 0.0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2643 
##  
## now adding trees... 
## 100   1.2219 
## 150   1.1863 
## 200   1.1579 
## 250   1.1335 
## 300   1.1131 
## 350   1.0968 
## 400   1.0828 
## 450   1.0707 
## 500   1.0612 
## 550   1.0536 
## 600   1.0469 
## 650   1.0407 
## 700   1.0358 
## 750   1.0328 
## 800   1.0297 
## 850   1.0282 
## 900   1.026 
## 950   1.0244 
## 1000   1.023 
## 1050   1.0228 
## 1100   1.0225 
## 1150   1.0229 
## 1200   1.0226 
## 1250   1.0233 
## 1300   1.0233 
## 1350   1.0243 
## 1400   1.0252 
## 1450   1.0262 
## 1500   1.0279 
## 1550   1.0303 
## 1600   1.0332 
## 1650   1.0346

## fitting final gbm model with a fixed number of  1100  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.813 
##  
## estimated cv deviance = 1.022 ; se = 0.062 
##  
## training data correlation = 0.714 
## cv correlation =  0.538 ; se = 0.055 
##  
## training data ROC score = 0.91 
## cv ROC score = 0.807 ; se = 0.027 
##  
## elapsed time -  0.14 minutes

cvtc5.lr0005$cv.statistics

## $deviance.mean
## [1] 1.022483
## 
## $deviance.se
## [1] 0.06188085
## 
## $correlation.mean
## [1] 0.5383629
## 
## $correlation.se
## [1] 0.05545334
## 
## $discrimination.mean
## [1] 0.80674
## 
## $discrimination.se
## [1] 0.02711901
## 
## $calibration.mean
## [1] 0.2562806 1.4183258 0.4374475 0.5102666 0.3792940
## 
## $calibration.se
## [1] 0.20466788 0.30767427 0.10933206 0.09380486 0.09392956
## 
## $cv.threshold
## [1] 0.3973466
## 
## $cv.threshold.se
## [1] 0.02117932

summary(cvtc5.lr0005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 27.71183079
## cpaq8_tot                             cpaq8_tot 16.86534350
## gcpsr_3                                 gcpsr_3  6.36354119
## age                                         age  6.08918186
## isi_tot                                 isi_tot  4.94450331
## pain_duration                     pain_duration  4.17923340
## gcpsr_6                                 gcpsr_6  4.06601089
## RST_PQ_BIS                           RST_PQ_BIS  3.86291801
## gcpsr_4                                 gcpsr_4  3.34887563
## overlapping_pain_number overlapping_pain_number  3.32118311
## current_opioid_meds         current_opioid_meds  2.79416434
## employment                           employment  2.57229520
## pcs_tot                                 pcs_tot  1.90340178
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.74395946
## audit_total                         audit_total  1.43862821
## bis_brief_tot                     bis_brief_tot  1.28608644
## income                                   income  1.27489693
## proanx_t                               proanx_t  0.99126328
## meq_tot                                 meq_tot  0.97398419
## hispanic                               hispanic  0.88084577
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.87404465
## prodep_t                               prodep_t  0.84877372
## RST_PQ_FFS                           RST_PQ_FFS  0.75832457
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.53412318
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.27388824
## race                                       race  0.09869835
## assigned_sex_at_birth     assigned_sex_at_birth  0.00000000
## cuditr0                                 cuditr0  0.00000000

tc 4 lr .0005

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3086 
##  
## now adding trees... 
## 100   1.3007 
## 150   1.2934 
## 200   1.2866 
## 250   1.2801 
## 300   1.2737 
## 350   1.2676 
## 400   1.2618 
## 450   1.2559 
## 500   1.2507 
## 550   1.2455 
## 600   1.2407 
## 650   1.2362 
## 700   1.2318 
## 750   1.2275 
## 800   1.2236 
## 850   1.2197 
## 900   1.2161 
## 950   1.2123 
## 1000   1.2091 
## 1050   1.206 
## 1100   1.203 
## 1150   1.2003 
## 1200   1.1976 
## 1250   1.195 
## 1300   1.1925 
## 1350   1.1901 
## 1400   1.188 
## 1450   1.1858 
## 1500   1.1838 
## 1550   1.1818 
## 1600   1.18 
## 1650   1.178 
## 1700   1.1762 
## 1750   1.1748 
## 1800   1.1732 
## 1850   1.1716 
## 1900   1.1702 
## 1950   1.1687 
## 2000   1.1674 
## 2050   1.166 
## 2100   1.1648 
## 2150   1.1633 
## 2200   1.1621 
## 2250   1.1612 
## 2300   1.1603 
## 2350   1.1592 
## 2400   1.1583 
## 2450   1.1576 
## 2500   1.1566 
## 2550   1.1558 
## 2600   1.1554 
## 2650   1.1548 
## 2700   1.1541 
## 2750   1.1536 
## 2800   1.1532 
## 2850   1.1525 
## 2900   1.1523 
## 2950   1.1518 
## 3000   1.1512 
## 3050   1.151 
## 3100   1.1506 
## 3150   1.1503 
## 3200   1.1501 
## 3250   1.1501 
## 3300   1.15 
## 3350   1.1498 
## 3400   1.1498 
## 3450   1.1494 
## 3500   1.1491 
## 3550   1.1489 
## 3600   1.1488 
## 3650   1.1489 
## 3700   1.1487 
## 3750   1.1486 
## 3800   1.1485 
## 3850   1.1487 
## 3900   1.1488 
## 3950   1.1488 
## 4000   1.149

## fitting final gbm model with a fixed number of  3800  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.938 
##  
## estimated cv deviance = 1.148 ; se = 0.048 
##  
## training data correlation = 0.663 
## cv correlation =  0.42 ; se = 0.055 
##  
## training data ROC score = 0.89 
## cv ROC score = 0.74 ; se = 0.03 
##  
## elapsed time -  0.4 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.148492
## 
## $deviance.se
## [1] 0.04809471
## 
## $correlation.mean
## [1] 0.4199143
## 
## $correlation.se
## [1] 0.05513532
## 
## $discrimination.mean
## [1] 0.7401
## 
## $discrimination.se
## [1] 0.03047419
## 
## $calibration.mean
## [1] 0.2752414 1.5072358 0.4354791 0.4645656 0.3847396
## 
## $calibration.se
## [1] 0.21722143 0.38072586 0.10131593 0.09764534 0.07957158
## 
## $cv.threshold
## [1] 0.3759294
## 
## $cv.threshold.se
## [1] 0.01359232

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 30.98156534
## age                                         age  7.82507714
## isi_tot                                 isi_tot  7.46495257
## current_opioid_meds         current_opioid_meds  6.42927289
## pain_duration                     pain_duration  6.25612545
## RST_PQ_BIS                           RST_PQ_BIS  6.17802994
## overlapping_pain_number overlapping_pain_number  5.60775797
## pcs_tot                                 pcs_tot  5.39613288
## employment                           employment  3.66095462
## audit_total                         audit_total  3.50067471
## income                                   income  2.89648541
## bis_brief_tot                     bis_brief_tot  2.74829376
## prodep_t                               prodep_t  1.87287593
## proanx_t                               proanx_t  1.86727271
## RST_PQ_FFS                           RST_PQ_FFS  1.45294354
## meq_tot                                 meq_tot  1.25265643
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.16676808
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.07336727
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.87440076
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.70731673
## hispanic                               hispanic  0.45412736
## race                                       race  0.20360342
## cuditr0                                 cuditr0  0.06521223
## assigned_sex_at_birth     assigned_sex_at_birth  0.06413286

tc 4 lr .0005

cvtc3.lr0005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3049 
##  
## now adding trees... 
## 100   1.2935 
## 150   1.2824 
## 200   1.2722 
## 250   1.2622 
## 300   1.2526 
## 350   1.2434 
## 400   1.2348 
## 450   1.2264 
## 500   1.2182 
## 550   1.2106 
## 600   1.2033 
## 650   1.1963 
## 700   1.1893 
## 750   1.1825 
## 800   1.1762 
## 850   1.1701 
## 900   1.1646 
## 950   1.1589 
## 1000   1.1537 
## 1050   1.1486 
## 1100   1.1433 
## 1150   1.1386 
## 1200   1.134 
## 1250   1.1296 
## 1300   1.1253 
## 1350   1.121 
## 1400   1.117 
## 1450   1.1132 
## 1500   1.1093 
## 1550   1.1055 
## 1600   1.1019 
## 1650   1.0984 
## 1700   1.0954 
## 1750   1.0923 
## 1800   1.0893 
## 1850   1.0865 
## 1900   1.0838 
## 1950   1.081 
## 2000   1.0783 
## 2050   1.0757 
## 2100   1.0732 
## 2150   1.071 
## 2200   1.0689 
## 2250   1.0668 
## 2300   1.0647 
## 2350   1.0629 
## 2400   1.061 
## 2450   1.059 
## 2500   1.057 
## 2550   1.0554 
## 2600   1.0536 
## 2650   1.0522 
## 2700   1.0507 
## 2750   1.0492 
## 2800   1.0478 
## 2850   1.0464 
## 2900   1.0453 
## 2950   1.0441 
## 3000   1.0429 
## 3050   1.0417 
## 3100   1.0407 
## 3150   1.0399 
## 3200   1.0389 
## 3250   1.038 
## 3300   1.037 
## 3350   1.0359 
## 3400   1.035 
## 3450   1.0342 
## 3500   1.0332 
## 3550   1.0325 
## 3600   1.0317 
## 3650   1.0311 
## 3700   1.0304 
## 3750   1.0297 
## 3800   1.0292 
## 3850   1.0286 
## 3900   1.0281 
## 3950   1.0278 
## 4000   1.0274 
## 4050   1.027 
## 4100   1.0266 
## 4150   1.0261 
## 4200   1.0258 
## 4250   1.0253 
## 4300   1.025 
## 4350   1.0247 
## 4400   1.0244 
## 4450   1.0242 
## 4500   1.024 
## 4550   1.0239 
## 4600   1.0238 
## 4650   1.0236 
## 4700   1.0235 
## 4750   1.0235 
## 4800   1.0233 
## 4850   1.0234 
## 4900   1.0233 
## 4950   1.0233 
## 5000   1.0233 
## 5050   1.0233 
## 5100   1.0234

## fitting final gbm model with a fixed number of  4800  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.789 
##  
## estimated cv deviance = 1.023 ; se = 0.06 
##  
## training data correlation = 0.739 
## cv correlation =  0.538 ; se = 0.055 
##  
## training data ROC score = 0.923 
## cv ROC score = 0.805 ; se = 0.028 
##  
## elapsed time -  0.57 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.023258
## 
## $deviance.se
## [1] 0.06038332
## 
## $correlation.mean
## [1] 0.537729
## 
## $correlation.se
## [1] 0.05507746
## 
## $discrimination.mean
## [1] 0.80504
## 
## $discrimination.se
## [1] 0.02768744
## 
## $calibration.mean
## [1] 0.2776889 1.4505847 0.4491894 0.5118139 0.3918641
## 
## $calibration.se
## [1] 0.21900549 0.32663941 0.10957562 0.09360325 0.09620300
## 
## $cv.threshold
## [1] 0.3975718
## 
## $cv.threshold.se
## [1] 0.02283161

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 26.01678991
## cpaq8_tot                             cpaq8_tot 15.99500558
## gcpsr_3                                 gcpsr_3  6.15392479
## age                                         age  5.77759062
## pain_duration                     pain_duration  4.73312684
## isi_tot                                 isi_tot  4.72131191
## RST_PQ_BIS                           RST_PQ_BIS  3.95242780
## gcpsr_6                                 gcpsr_6  3.70247059
## gcpsr_4                                 gcpsr_4  3.05903308
## employment                           employment  3.05275911
## overlapping_pain_number overlapping_pain_number  2.90713716
## current_opioid_meds         current_opioid_meds  2.77863394
## pcs_tot                                 pcs_tot  2.39946065
## income                                   income  1.94392141
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.72811983
## audit_total                         audit_total  1.61328392
## bis_brief_tot                     bis_brief_tot  1.61085171
## proanx_t                               proanx_t  1.59590073
## meq_tot                                 meq_tot  1.08713152
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.07447132
## RST_PQ_FFS                           RST_PQ_FFS  1.00167029
## prodep_t                               prodep_t  0.86749729
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.68903968
## hispanic                               hispanic  0.66013523
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.64380391
## race                                       race  0.10308851
## cuditr0                                 cuditr0  0.08830537
## assigned_sex_at_birth     assigned_sex_at_birth  0.04310730

tc 4 lr .0025

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2802 
##  
## now adding trees... 
## 100   1.2506 
## 150   1.2272 
## 200   1.2087 
## 250   1.1936 
## 300   1.1823 
## 350   1.1731 
## 400   1.1639 
## 450   1.1584 
## 500   1.1544 
## 550   1.151 
## 600   1.1488 
## 650   1.1475 
## 700   1.1473 
## 750   1.1471 
## 800   1.1485 
## 850   1.1493 
## 900   1.1502 
## 950   1.1502 
## 1000   1.1507 
## 1050   1.1525 
## 1100   1.1552 
## 1150   1.1575 
## 1200   1.1607 
## 1250   1.1632 
## 1300   1.1661

## fitting final gbm model with a fixed number of  750  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.941 
##  
## estimated cv deviance = 1.147 ; se = 0.048 
##  
## training data correlation = 0.664 
## cv correlation =  0.421 ; se = 0.055 
##  
## training data ROC score = 0.891 
## cv ROC score = 0.739 ; se = 0.03 
##  
## elapsed time -  0.12 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.147096
## 
## $deviance.se
## [1] 0.04796054
## 
## $correlation.mean
## [1] 0.4206574
## 
## $correlation.se
## [1] 0.05454263
## 
## $discrimination.mean
## [1] 0.73888
## 
## $discrimination.se
## [1] 0.02954265
## 
## $calibration.mean
## [1] 0.2903537 1.5251900 0.4427980 0.4742805 0.3813984
## 
## $calibration.se
## [1] 0.22677885 0.38983186 0.10211383 0.09889303 0.07834834
## 
## $cv.threshold
## [1] 0.3790785
## 
## $cv.threshold.se
## [1] 0.01401745

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 31.12430507
## age                                         age  7.60662033
## isi_tot                                 isi_tot  7.06264228
## overlapping_pain_number overlapping_pain_number  6.05371101
## current_opioid_meds         current_opioid_meds  6.01750800
## RST_PQ_BIS                           RST_PQ_BIS  5.91648290
## pain_duration                     pain_duration  5.70664598
## pcs_tot                                 pcs_tot  5.15328360
## employment                           employment  4.37321565
## audit_total                         audit_total  3.74963740
## income                                   income  3.28484875
## bis_brief_tot                     bis_brief_tot  2.83084114
## prodep_t                               prodep_t  2.03113372
## proanx_t                               proanx_t  1.78815068
## RST_PQ_FFS                           RST_PQ_FFS  1.39744255
## meq_tot                                 meq_tot  1.32282219
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.14105152
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.06562745
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.95488749
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.64609256
## hispanic                               hispanic  0.58490038
## race                                       race  0.14712803
## cuditr0                                 cuditr0  0.04102133
## assigned_sex_at_birth     assigned_sex_at_birth  0.00000000

tc 4 lr .0025

cvtc7.lr005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2624 
##  
## now adding trees... 
## 100   1.217 
## 150   1.1818 
## 200   1.1531 
## 250   1.1285 
## 300   1.1095 
## 350   1.0938 
## 400   1.0799 
## 450   1.0688 
## 500   1.0597 
## 550   1.053 
## 600   1.0457 
## 650   1.0421 
## 700   1.0377 
## 750   1.0346 
## 800   1.0315 
## 850   1.0298 
## 900   1.0292 
## 950   1.028 
## 1000   1.0277 
## 1050   1.0275 
## 1100   1.0295 
## 1150   1.0304 
## 1200   1.0309 
## 1250   1.0326 
## 1300   1.0345 
## 1350   1.0364 
## 1400   1.0381 
## 1450   1.0411 
## 1500   1.0424

## fitting final gbm model with a fixed number of  1050  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.769 
##  
## estimated cv deviance = 1.028 ; se = 0.063 
##  
## training data correlation = 0.746 
## cv correlation =  0.532 ; se = 0.056 
##  
## training data ROC score = 0.926 
## cv ROC score = 0.798 ; se = 0.029 
##  
## elapsed time -  0.15 minutes

cvtc7.lr005$cv.statistics

## $deviance.mean
## [1] 1.027529
## 
## $deviance.se
## [1] 0.06334221
## 
## $correlation.mean
## [1] 0.5315409
## 
## $correlation.se
## [1] 0.05591326
## 
## $discrimination.mean
## [1] 0.79808
## 
## $discrimination.se
## [1] 0.02863952
## 
## $calibration.mean
## [1] 0.2283505 1.3769131 0.4587737 0.5071299 0.4302837
## 
## $calibration.se
## [1] 0.21461140 0.32381790 0.11513024 0.09420365 0.10061066
## 
## $cv.threshold
## [1] 0.4028558
## 
## $cv.threshold.se
## [1] 0.02497462

summary(cvtc7.lr005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 25.56134199
## cpaq8_tot                             cpaq8_tot 15.21656080
## age                                         age  5.99112959
## gcpsr_3                                 gcpsr_3  5.79000540
## isi_tot                                 isi_tot  4.85703600
## pain_duration                     pain_duration  4.46364353
## RST_PQ_BIS                           RST_PQ_BIS  3.87092952
## gcpsr_6                                 gcpsr_6  3.71956596
## employment                           employment  3.33744407
## gcpsr_4                                 gcpsr_4  3.01426775
## overlapping_pain_number overlapping_pain_number  3.01271469
## current_opioid_meds         current_opioid_meds  2.65631121
## pcs_tot                                 pcs_tot  2.58254461
## proanx_t                               proanx_t  2.02742074
## bis_brief_tot                     bis_brief_tot  1.89824212
## income                                   income  1.86647508
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.85558643
## audit_total                         audit_total  1.70748851
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.20372989
## meq_tot                                 meq_tot  1.14433995
## prodep_t                               prodep_t  0.93430420
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.89678013
## hispanic                               hispanic  0.85175512
## RST_PQ_FFS                           RST_PQ_FFS  0.74427700
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.51720976
## race                                       race  0.15124324
## cuditr0                                 cuditr0  0.10444275
## assigned_sex_at_birth     assigned_sex_at_birth  0.02320999

tc 4 lr .005

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2499 
##  
## now adding trees... 
## 100   1.2089 
## 150   1.1825 
## 200   1.1652 
## 250   1.1543 
## 300   1.1513 
## 350   1.1488 
## 400   1.1472 
## 450   1.1477 
## 500   1.1521 
## 550   1.1549 
## 600   1.1602 
## 650   1.1666 
## 700   1.1722 
## 750   1.1765 
## 800   1.1808 
## 850   1.1861 
## 900   1.1923 
## 950   1.1987 
## 1000   1.2033

## fitting final gbm model with a fixed number of  400  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.93 
##  
## estimated cv deviance = 1.147 ; se = 0.049 
##  
## training data correlation = 0.664 
## cv correlation =  0.421 ; se = 0.055 
##  
## training data ROC score = 0.892 
## cv ROC score = 0.741 ; se = 0.03 
##  
## elapsed time -  0.09 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.147245
## 
## $deviance.se
## [1] 0.04924865
## 
## $correlation.mean
## [1] 0.4209709
## 
## $correlation.se
## [1] 0.05525873
## 
## $discrimination.mean
## [1] 0.74074
## 
## $discrimination.se
## [1] 0.02974984
## 
## $calibration.mean
## [1] 0.2669300 1.4894672 0.4501995 0.4636395 0.4073396
## 
## $calibration.se
## [1] 0.22157644 0.39834344 0.10676133 0.09534053 0.08484106
## 
## $cv.threshold
## [1] 0.3765187
## 
## $cv.threshold.se
## [1] 0.01309959

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 30.67755162
## isi_tot                                 isi_tot  8.15249505
## age                                         age  7.61844175
## current_opioid_meds         current_opioid_meds  6.39288701
## pain_duration                     pain_duration  6.17316282
## overlapping_pain_number overlapping_pain_number  5.75348708
## RST_PQ_BIS                           RST_PQ_BIS  5.56126854
## pcs_tot                                 pcs_tot  5.02531520
## employment                           employment  4.19002394
## audit_total                         audit_total  3.72494258
## income                                   income  2.98608333
## bis_brief_tot                     bis_brief_tot  2.83315871
## prodep_t                               prodep_t  1.78152300
## proanx_t                               proanx_t  1.68278180
## RST_PQ_FFS                           RST_PQ_FFS  1.48773298
## meq_tot                                 meq_tot  1.38910507
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.21169376
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.92376034
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  0.89371253
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.72756846
## hispanic                               hispanic  0.43220674
## race                                       race  0.23994602
## assigned_sex_at_birth     assigned_sex_at_birth  0.09381367
## cuditr0                                 cuditr0  0.04733799

tc 4 lr .005

cvtc7.lr0005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 4,
         learning.rate = 0.005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2179 
##  
## now adding trees... 
## 100   1.1544 
## 150   1.1093 
## 200   1.0803 
## 250   1.0587 
## 300   1.0454 
## 350   1.035 
## 400   1.0285 
## 450   1.0251 
## 500   1.027 
## 550   1.0293 
## 600   1.0325 
## 650   1.0349 
## 700   1.0371 
## 750   1.0423 
## 800   1.0466 
## 850   1.0513 
## 900   1.0564 
## 950   1.0604 
## 1000   1.0675 
## 1050   1.0725 
## 1100   1.077

## fitting final gbm model with a fixed number of  450  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.802 
##  
## estimated cv deviance = 1.025 ; se = 0.06 
##  
## training data correlation = 0.733 
## cv correlation =  0.537 ; se = 0.056 
##  
## training data ROC score = 0.92 
## cv ROC score = 0.801 ; se = 0.028 
##  
## elapsed time -  0.11 minutes

cvtc7.lr0005$cv.statistics

## $deviance.mean
## [1] 1.02507
## 
## $deviance.se
## [1] 0.05989564
## 
## $correlation.mean
## [1] 0.5373775
## 
## $correlation.se
## [1] 0.05583585
## 
## $discrimination.mean
## [1] 0.80093
## 
## $discrimination.se
## [1] 0.02751183
## 
## $calibration.mean
## [1] 0.2461241 1.4501025 0.4346719 0.5172998 0.3561608
## 
## $calibration.se
## [1] 0.19337992 0.29653197 0.10630938 0.09256631 0.09122205
## 
## $cv.threshold
## [1] 0.3930572
## 
## $cv.threshold.se
## [1] 0.0235085

summary(cvtc7.lr0005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 27.74246406
## cpaq8_tot                             cpaq8_tot 15.91991807
## gcpsr_3                                 gcpsr_3  5.74971237
## age                                         age  5.51183863
## isi_tot                                 isi_tot  4.80658662
## RST_PQ_BIS                           RST_PQ_BIS  4.38148251
## pain_duration                     pain_duration  4.32589904
## gcpsr_6                                 gcpsr_6  4.00541595
## employment                           employment  3.18993269
## overlapping_pain_number overlapping_pain_number  2.98291492
## current_opioid_meds         current_opioid_meds  2.96568610
## gcpsr_4                                 gcpsr_4  2.52700696
## pcs_tot                                 pcs_tot  2.28231061
## income                                   income  1.87296316
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.75104008
## audit_total                         audit_total  1.66176294
## bis_brief_tot                     bis_brief_tot  1.41660210
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.16172891
## proanx_t                               proanx_t  1.12524636
## meq_tot                                 meq_tot  0.98248058
## prodep_t                               prodep_t  0.90691330
## hispanic                               hispanic  0.75212621
## RST_PQ_FFS                           RST_PQ_FFS  0.69522520
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.54390286
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.45783764
## cuditr0                                 cuditr0  0.18189575
## race                                       race  0.09910639
## assigned_sex_at_birth     assigned_sex_at_birth  0.00000000

tc 7 lr .0005

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3087 
##  
## now adding trees... 
## 100   1.3009 
## 150   1.2932 
## 200   1.2862 
## 250   1.2796 
## 300   1.2731 
## 350   1.2666 
## 400   1.2605 
## 450   1.2548 
## 500   1.2496 
## 550   1.2444 
## 600   1.2395 
## 650   1.2353 
## 700   1.2303 
## 750   1.2265 
## 800   1.2226 
## 850   1.2187 
## 900   1.2151 
## 950   1.2118 
## 1000   1.2086 
## 1050   1.2058 
## 1100   1.2029 
## 1150   1.2001 
## 1200   1.1973 
## 1250   1.1947 
## 1300   1.1924 
## 1350   1.1902 
## 1400   1.188 
## 1450   1.1859 
## 1500   1.1844 
## 1550   1.1825 
## 1600   1.1807 
## 1650   1.1791 
## 1700   1.1778 
## 1750   1.1763 
## 1800   1.1748 
## 1850   1.1736 
## 1900   1.1723 
## 1950   1.1713 
## 2000   1.1703 
## 2050   1.1694 
## 2100   1.1685 
## 2150   1.1677 
## 2200   1.167 
## 2250   1.1663 
## 2300   1.1659 
## 2350   1.1654 
## 2400   1.1651 
## 2450   1.1645 
## 2500   1.1641 
## 2550   1.1638 
## 2600   1.1633 
## 2650   1.1628 
## 2700   1.1624 
## 2750   1.1622 
## 2800   1.1618 
## 2850   1.1615 
## 2900   1.1615 
## 2950   1.1614 
## 3000   1.1615 
## 3050   1.1615 
## 3100   1.1614 
## 3150   1.1615 
## 3200   1.1618 
## 3250   1.1618 
## 3300   1.162 
## 3350   1.1619 
## 3400   1.1619

## fitting final gbm model with a fixed number of  2950  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.897 
##  
## estimated cv deviance = 1.161 ; se = 0.047 
##  
## training data correlation = 0.727 
## cv correlation =  0.407 ; se = 0.056 
##  
## training data ROC score = 0.924 
## cv ROC score = 0.735 ; se = 0.031 
##  
## elapsed time -  0.51 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.16139
## 
## $deviance.se
## [1] 0.04684134
## 
## $correlation.mean
## [1] 0.4071974
## 
## $correlation.se
## [1] 0.05562675
## 
## $discrimination.mean
## [1] 0.73484
## 
## $discrimination.se
## [1] 0.03072203
## 
## $calibration.mean
## [1] 0.2809879 1.5054429 0.4314101 0.4703246 0.3791870
## 
## $calibration.se
## [1] 0.22021350 0.37379909 0.10179825 0.09463116 0.08529679
## 
## $cv.threshold
## [1] 0.3791723
## 
## $cv.threshold.se
## [1] 0.01201753

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 26.5442935
## age                                         age  7.1837311
## pain_duration                     pain_duration  6.9825253
## isi_tot                                 isi_tot  6.8556078
## RST_PQ_BIS                           RST_PQ_BIS  6.4798727
## pcs_tot                                 pcs_tot  5.1657426
## current_opioid_meds         current_opioid_meds  5.0548659
## overlapping_pain_number overlapping_pain_number  5.0071341
## employment                           employment  4.8584775
## income                                   income  4.1340840
## audit_total                         audit_total  3.5189234
## bis_brief_tot                     bis_brief_tot  3.0002831
## proanx_t                               proanx_t  2.2667563
## prodep_t                               prodep_t  2.1181201
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.8981482
## RST_PQ_FFS                           RST_PQ_FFS  1.8937246
## meq_tot                                 meq_tot  1.8704085
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.5822165
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.3974834
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.2897053
## hispanic                               hispanic  0.3980300
## cuditr0                                 cuditr0  0.2014803
## race                                       race  0.1875670
## assigned_sex_at_birth     assigned_sex_at_birth  0.1108189

tc 7 lr .0005

cvtc7.lr0005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3044 
##  
## now adding trees... 
## 100   1.2929 
## 150   1.2817 
## 200   1.2709 
## 250   1.2607 
## 300   1.2508 
## 350   1.2415 
## 400   1.2325 
## 450   1.224 
## 500   1.2159 
## 550   1.208 
## 600   1.2003 
## 650   1.1933 
## 700   1.1864 
## 750   1.1794 
## 800   1.173 
## 850   1.1667 
## 900   1.1608 
## 950   1.1549 
## 1000   1.1495 
## 1050   1.1447 
## 1100   1.1397 
## 1150   1.1352 
## 1200   1.1307 
## 1250   1.1263 
## 1300   1.1224 
## 1350   1.1181 
## 1400   1.1142 
## 1450   1.1102 
## 1500   1.1067 
## 1550   1.1031 
## 1600   1.0998 
## 1650   1.097 
## 1700   1.094 
## 1750   1.0912 
## 1800   1.0884 
## 1850   1.086 
## 1900   1.0833 
## 1950   1.0806 
## 2000   1.0781 
## 2050   1.0757 
## 2100   1.0738 
## 2150   1.0717 
## 2200   1.0697 
## 2250   1.0676 
## 2300   1.0655 
## 2350   1.0638 
## 2400   1.0622 
## 2450   1.0606 
## 2500   1.0591 
## 2550   1.0577 
## 2600   1.0563 
## 2650   1.055 
## 2700   1.0538 
## 2750   1.0526 
## 2800   1.0513 
## 2850   1.0504 
## 2900   1.0494 
## 2950   1.0483 
## 3000   1.0473 
## 3050   1.0465 
## 3100   1.046 
## 3150   1.0449 
## 3200   1.0443 
## 3250   1.0437 
## 3300   1.043 
## 3350   1.0424 
## 3400   1.0416 
## 3450   1.0411 
## 3500   1.0407 
## 3550   1.0405 
## 3600   1.0402 
## 3650   1.0403 
## 3700   1.0398 
## 3750   1.0396 
## 3800   1.0393 
## 3850   1.039 
## 3900   1.0388 
## 3950   1.0388 
## 4000   1.0385 
## 4050   1.0382 
## 4100   1.0383 
## 4150   1.038 
## 4200   1.038 
## 4250   1.038 
## 4300   1.038 
## 4350   1.0383 
## 4400   1.0384 
## 4450   1.0387 
## 4500   1.039

## fitting final gbm model with a fixed number of  4200  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.715 
##  
## estimated cv deviance = 1.038 ; se = 0.061 
##  
## training data correlation = 0.799 
## cv correlation =  0.525 ; se = 0.056 
##  
## training data ROC score = 0.954 
## cv ROC score = 0.796 ; se = 0.028 
##  
## elapsed time -  0.77 minutes

cvtc7.lr0005$cv.statistics

## $deviance.mean
## [1] 1.037973
## 
## $deviance.se
## [1] 0.06129746
## 
## $correlation.mean
## [1] 0.5248469
## 
## $correlation.se
## [1] 0.05561748
## 
## $discrimination.mean
## [1] 0.79556
## 
## $discrimination.se
## [1] 0.0282093
## 
## $calibration.mean
## [1] 0.2621396 1.4027645 0.4681123 0.5121292 0.4483848
## 
## $calibration.se
## [1] 0.2258002 0.3334442 0.1155636 0.0907876 0.1093568
## 
## $cv.threshold
## [1] 0.3999033
## 
## $cv.threshold.se
## [1] 0.02290522

summary(cvtc7.lr0005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 22.16973059
## cpaq8_tot                             cpaq8_tot 13.51236916
## age                                         age  5.89837089
## pain_duration                     pain_duration  5.74224751
## gcpsr_3                                 gcpsr_3  5.14195586
## isi_tot                                 isi_tot  4.79831913
## RST_PQ_BIS                           RST_PQ_BIS  4.01671136
## employment                           employment  3.88327488
## income                                   income  3.11436053
## pcs_tot                                 pcs_tot  3.00490531
## gcpsr_4                                 gcpsr_4  2.98154881
## gcpsr_6                                 gcpsr_6  2.92347938
## overlapping_pain_number overlapping_pain_number  2.64568337
## current_opioid_meds         current_opioid_meds  2.39815632
## bis_brief_tot                     bis_brief_tot  2.22881353
## audit_total                         audit_total  2.10407790
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.02253138
## proanx_t                               proanx_t  1.77972121
## RST_PQ_FFS                           RST_PQ_FFS  1.71242157
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.70352835
## meq_tot                                 meq_tot  1.70330080
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.24979022
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.16192063
## prodep_t                               prodep_t  1.14341614
## hispanic                               hispanic  0.56210353
## cuditr0                                 cuditr0  0.22421511
## race                                       race  0.08784938
## assigned_sex_at_birth     assigned_sex_at_birth  0.08519716

tc 7 lr .0025

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2786 
##  
## now adding trees... 
## 100   1.2485 
## 150   1.2264 
## 200   1.2073 
## 250   1.1934 
## 300   1.1827 
## 350   1.1746 
## 400   1.1675 
## 450   1.1627 
## 500   1.1591 
## 550   1.1568 
## 600   1.1552 
## 650   1.1557 
## 700   1.1561 
## 750   1.1584 
## 800   1.1595 
## 850   1.162 
## 900   1.164 
## 950   1.1659 
## 1000   1.1699 
## 1050   1.1737 
## 1100   1.1778 
## 1150   1.1816 
## 1200   1.1847

## fitting final gbm model with a fixed number of  600  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.894 
##  
## estimated cv deviance = 1.155 ; se = 0.046 
##  
## training data correlation = 0.728 
## cv correlation =  0.414 ; se = 0.056 
##  
## training data ROC score = 0.927 
## cv ROC score = 0.739 ; se = 0.032 
##  
## elapsed time -  0.17 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.155226
## 
## $deviance.se
## [1] 0.04646194
## 
## $correlation.mean
## [1] 0.4143922
## 
## $correlation.se
## [1] 0.05550615
## 
## $discrimination.mean
## [1] 0.73861
## 
## $discrimination.se
## [1] 0.03207811
## 
## $calibration.mean
## [1] 0.3078190 1.5167046 0.4299294 0.4789588 0.3517599
## 
## $calibration.se
## [1] 0.23073036 0.35688541 0.10188141 0.09541625 0.07913611
## 
## $cv.threshold
## [1] 0.3792585
## 
## $cv.threshold.se
## [1] 0.01294056

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var    rel.inf
## cpaq8_tot                             cpaq8_tot 26.4129801
## age                                         age  7.0780196
## pain_duration                     pain_duration  6.8267298
## isi_tot                                 isi_tot  6.7878472
## RST_PQ_BIS                           RST_PQ_BIS  6.0733974
## pcs_tot                                 pcs_tot  5.2732816
## current_opioid_meds         current_opioid_meds  5.2177667
## overlapping_pain_number overlapping_pain_number  4.8437692
## employment                           employment  4.5674573
## income                                   income  4.0865064
## audit_total                         audit_total  3.6058221
## bis_brief_tot                     bis_brief_tot  3.0125247
## proanx_t                               proanx_t  2.7491836
## prodep_t                               prodep_t  2.5260201
## RST_PQ_FFS                           RST_PQ_FFS  2.1707597
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.9354572
## meq_tot                                 meq_tot  1.6452369
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.5011247
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.4329772
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.2507375
## hispanic                               hispanic  0.4276314
## race                                       race  0.2356098
## cuditr0                                 cuditr0  0.2053794
## assigned_sex_at_birth     assigned_sex_at_birth  0.1337804

tc 7 lr .0025

cvtc7.lr0005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2611 
##  
## now adding trees... 
## 100   1.2164 
## 150   1.1798 
## 200   1.1491 
## 250   1.125 
## 300   1.1058 
## 350   1.0903 
## 400   1.0759 
## 450   1.065 
## 500   1.0574 
## 550   1.05 
## 600   1.0455 
## 650   1.0427 
## 700   1.0394 
## 750   1.0385 
## 800   1.0366 
## 850   1.0347 
## 900   1.0366 
## 950   1.0376 
## 1000   1.039 
## 1050   1.0413 
## 1100   1.0437 
## 1150   1.0457 
## 1200   1.0477 
## 1250   1.0502 
## 1300   1.0525 
## 1350   1.0548 
## 1400   1.0578

## fitting final gbm model with a fixed number of  850  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.715 
##  
## estimated cv deviance = 1.035 ; se = 0.061 
##  
## training data correlation = 0.798 
## cv correlation =  0.528 ; se = 0.055 
##  
## training data ROC score = 0.953 
## cv ROC score = 0.796 ; se = 0.028 
##  
## elapsed time -  0.22 minutes

cvtc7.lr0005$cv.statistics

## $deviance.mean
## [1] 1.034716
## 
## $deviance.se
## [1] 0.06063829
## 
## $correlation.mean
## [1] 0.5280598
## 
## $correlation.se
## [1] 0.05488477
## 
## $discrimination.mean
## [1] 0.79556
## 
## $discrimination.se
## [1] 0.02828672
## 
## $calibration.mean
## [1] 0.2567427 1.4087035 0.4775388 0.5134735 0.4583241
## 
## $calibration.se
## [1] 0.2242488 0.3395576 0.1173525 0.0930757 0.1111588
## 
## $cv.threshold
## [1] 0.395988
## 
## $cv.threshold.se
## [1] 0.02165936

summary(cvtc7.lr0005)

##                                             var    rel.inf
## gcpsr_5                                 gcpsr_5 22.5774936
## cpaq8_tot                             cpaq8_tot 13.6476402
## age                                         age  5.7298820
## pain_duration                     pain_duration  5.5990992
## gcpsr_3                                 gcpsr_3  4.7117460
## isi_tot                                 isi_tot  4.7019361
## employment                           employment  4.3393770
## RST_PQ_BIS                           RST_PQ_BIS  4.2646053
## gcpsr_6                                 gcpsr_6  3.1316767
## income                                   income  3.1013219
## overlapping_pain_number overlapping_pain_number  2.7626837
## pcs_tot                                 pcs_tot  2.7462558
## gcpsr_4                                 gcpsr_4  2.4214323
## current_opioid_meds         current_opioid_meds  2.3188976
## bis_brief_tot                     bis_brief_tot  2.0544118
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.0008983
## meq_tot                                 meq_tot  1.9082767
## audit_total                         audit_total  1.8661481
## RST_PQ_FFS                           RST_PQ_FFS  1.8498826
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.6537636
## proanx_t                               proanx_t  1.6491420
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.4423852
## prodep_t                               prodep_t  1.2851490
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.1293431
## hispanic                               hispanic  0.6392048
## cuditr0                                 cuditr0  0.2594975
## race                                       race  0.1269996
## assigned_sex_at_birth     assigned_sex_at_birth  0.0808501

tc 7 lr .005

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2494 
##  
## now adding trees... 
## 100   1.2079 
## 150   1.1849 
## 200   1.1697 
## 250   1.1597 
## 300   1.1592 
## 350   1.1631 
## 400   1.1668 
## 450   1.171 
## 500   1.1765 
## 550   1.1835 
## 600   1.1918 
## 650   1.1974 
## 700   1.2071 
## 750   1.2154 
## 800   1.2245 
## 850   1.2352 
## 900   1.245 
## 950   1.2523 
## 1000   1.2601

## fitting final gbm model with a fixed number of  300  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.897 
##  
## estimated cv deviance = 1.159 ; se = 0.046 
##  
## training data correlation = 0.729 
## cv correlation =  0.406 ; se = 0.054 
##  
## training data ROC score = 0.926 
## cv ROC score = 0.734 ; se = 0.031 
##  
## elapsed time -  0.13 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.159242
## 
## $deviance.se
## [1] 0.04610345
## 
## $correlation.mean
## [1] 0.4058745
## 
## $correlation.se
## [1] 0.0539592
## 
## $discrimination.mean
## [1] 0.73402
## 
## $discrimination.se
## [1] 0.0307536
## 
## $calibration.mean
## [1] 0.2255389 1.4008628 0.4465147 0.4855912 0.3841016
## 
## $calibration.se
## [1] 0.20233325 0.29685675 0.10623970 0.09625451 0.08661145
## 
## $cv.threshold
## [1] 0.3807723
## 
## $cv.threshold.se
## [1] 0.01105311

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 25.87089485
## age                                         age  7.35207468
## isi_tot                                 isi_tot  7.26638975
## pain_duration                     pain_duration  7.04923322
## RST_PQ_BIS                           RST_PQ_BIS  6.41150893
## current_opioid_meds         current_opioid_meds  5.15494716
## employment                           employment  5.01347305
## pcs_tot                                 pcs_tot  4.86127806
## overlapping_pain_number overlapping_pain_number  4.58665933
## audit_total                         audit_total  4.12744889
## income                                   income  3.95469326
## bis_brief_tot                     bis_brief_tot  2.99211036
## proanx_t                               proanx_t  2.50911743
## meq_tot                                 meq_tot  2.05432321
## prodep_t                               prodep_t  2.04551993
## RST_PQ_FFS                           RST_PQ_FFS  1.99259209
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.71909655
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.65571755
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.41741659
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.37796060
## hispanic                               hispanic  0.17820937
## race                                       race  0.17534186
## cuditr0                                 cuditr0  0.14552579
## assigned_sex_at_birth     assigned_sex_at_birth  0.08846752

tc 7 lr .005

cvtc7.lr0005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2144 
##  
## now adding trees... 
## 100   1.1486 
## 150   1.1072 
## 200   1.0761 
## 250   1.0549 
## 300   1.0419 
## 350   1.038 
## 400   1.0349 
## 450   1.0325 
## 500   1.0346 
## 550   1.0398 
## 600   1.0442 
## 650   1.0489 
## 700   1.0562 
## 750   1.0629 
## 800   1.0717 
## 850   1.0782 
## 900   1.0857 
## 950   1.0937 
## 1000   1.1007 
## 1050   1.1074

## fitting final gbm model with a fixed number of  450  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.695 
##  
## estimated cv deviance = 1.033 ; se = 0.064 
##  
## training data correlation = 0.806 
## cv correlation =  0.529 ; se = 0.056 
##  
## training data ROC score = 0.957 
## cv ROC score = 0.795 ; se = 0.026 
##  
## elapsed time -  0.16 minutes

cvtc7.lr0005$cv.statistics

## $deviance.mean
## [1] 1.03255
## 
## $deviance.se
## [1] 0.06406451
## 
## $correlation.mean
## [1] 0.5288207
## 
## $correlation.se
## [1] 0.05561399
## 
## $discrimination.mean
## [1] 0.7954
## 
## $discrimination.se
## [1] 0.02632992
## 
## $calibration.mean
## [1] 0.2226074 1.3324316 0.4720496 0.5124822 0.4727824
## 
## $calibration.se
## [1] 0.20992221 0.30515106 0.11368360 0.09287942 0.11105035
## 
## $cv.threshold
## [1] 0.4043779
## 
## $cv.threshold.se
## [1] 0.0254986

summary(cvtc7.lr0005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 21.46808660
## cpaq8_tot                             cpaq8_tot 13.23711933
## pain_duration                     pain_duration  5.83747382
## age                                         age  5.83352167
## gcpsr_3                                 gcpsr_3  5.28297987
## isi_tot                                 isi_tot  5.23846961
## RST_PQ_BIS                           RST_PQ_BIS  4.09308840
## income                                   income  3.68897802
## employment                           employment  3.62925668
## gcpsr_6                                 gcpsr_6  3.01644085
## pcs_tot                                 pcs_tot  2.98574240
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  2.57918863
## current_opioid_meds         current_opioid_meds  2.52221181
## gcpsr_4                                 gcpsr_4  2.50736116
## overlapping_pain_number overlapping_pain_number  2.33878660
## audit_total                         audit_total  2.02455535
## proanx_t                               proanx_t  1.90276051
## meq_tot                                 meq_tot  1.86961715
## bis_brief_tot                     bis_brief_tot  1.77421169
## RST_PQ_FFS                           RST_PQ_FFS  1.63711817
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.63571019
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.42496660
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.34708495
## prodep_t                               prodep_t  1.33338823
## hispanic                               hispanic  0.47160033
## cuditr0                                 cuditr0  0.17430402
## race                                       race  0.08715639
## assigned_sex_at_birth     assigned_sex_at_birth  0.05882097

tc 7 lr .00005

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .00005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.316 
##  
## now adding trees... 
## 100   1.3152 
## 150   1.3143 
## 200   1.3135 
## 250   1.3127 
## 300   1.3119 
## 350   1.3111 
## 400   1.3102 
## 450   1.3094 
## 500   1.3086 
## 550   1.3078 
## 600   1.307 
## 650   1.3062 
## 700   1.3054 
## 750   1.3046 
## 800   1.3038 
## 850   1.3031 
## 900   1.3023 
## 950   1.3015 
## 1000   1.3007 
## 1050   1.3 
## 1100   1.2992 
## 1150   1.2985 
## 1200   1.2977 
## 1250   1.2969 
## 1300   1.2962 
## 1350   1.2955 
## 1400   1.2947 
## 1450   1.294 
## 1500   1.2932 
## 1550   1.2925 
## 1600   1.2918 
## 1650   1.2911 
## 1700   1.2904 
## 1750   1.2896 
## 1800   1.2889 
## 1850   1.2882 
## 1900   1.2875 
## 1950   1.2868 
## 2000   1.2861 
## 2050   1.2854 
## 2100   1.2847 
## 2150   1.284 
## 2200   1.2833 
## 2250   1.2827 
## 2300   1.282 
## 2350   1.2813 
## 2400   1.2806 
## 2450   1.28 
## 2500   1.2793 
## 2550   1.2787 
## 2600   1.278 
## 2650   1.2774 
## 2700   1.2768 
## 2750   1.2761 
## 2800   1.2755 
## 2850   1.2749 
## 2900   1.2742 
## 2950   1.2736 
## 3000   1.273 
## 3050   1.2723 
## 3100   1.2717 
## 3150   1.2711 
## 3200   1.2705 
## 3250   1.2698 
## 3300   1.2692 
## 3350   1.2686 
## 3400   1.268 
## 3450   1.2674 
## 3500   1.2668 
## 3550   1.2662 
## 3600   1.2656 
## 3650   1.265 
## 3700   1.2644 
## 3750   1.2638 
## 3800   1.2633 
## 3850   1.2627 
## 3900   1.2621 
## 3950   1.2615 
## 4000   1.261 
## 4050   1.2604 
## 4100   1.2598 
## 4150   1.2593 
## 4200   1.2588 
## 4250   1.2582 
## 4300   1.2576 
## 4350   1.2571 
## 4400   1.2565 
## 4450   1.256 
## 4500   1.2554 
## 4550   1.2549 
## 4600   1.2543 
## 4650   1.2538 
## 4700   1.2533 
## 4750   1.2528 
## 4800   1.2522 
## 4850   1.2517 
## 4900   1.2511 
## 4950   1.2506 
## 5000   1.2501 
## 5050   1.2496 
## 5100   1.249 
## 5150   1.2485 
## 5200   1.248 
## 5250   1.2475 
## 5300   1.2471 
## 5350   1.2466 
## 5400   1.2461 
## 5450   1.2455 
## 5500   1.2451 
## 5550   1.2446 
## 5600   1.2441 
## 5650   1.2436 
## 5700   1.2432 
## 5750   1.2427 
## 5800   1.2422 
## 5850   1.2417 
## 5900   1.2413 
## 5950   1.2408 
## 6000   1.2403 
## 6050   1.2398 
## 6100   1.2394 
## 6150   1.2389 
## 6200   1.2385 
## 6250   1.238 
## 6300   1.2375 
## 6350   1.2371 
## 6400   1.2366 
## 6450   1.2362 
## 6500   1.2358 
## 6550   1.2353 
## 6600   1.2349 
## 6650   1.2345 
## 6700   1.234 
## 6750   1.2336 
## 6800   1.2332 
## 6850   1.2327 
## 6900   1.2323 
## 6950   1.2319 
## 7000   1.2314 
## 7050   1.231 
## 7100   1.2306 
## 7150   1.2302 
## 7200   1.2298 
## 7250   1.2294 
## 7300   1.229 
## 7350   1.2286 
## 7400   1.2282 
## 7450   1.2277 
## 7500   1.2273 
## 7550   1.227 
## 7600   1.2266 
## 7650   1.2262 
## 7700   1.2258 
## 7750   1.2254 
## 7800   1.225 
## 7850   1.2246 
## 7900   1.2241 
## 7950   1.2238 
## 8000   1.2234 
## 8050   1.2231 
## 8100   1.2227 
## 8150   1.2223 
## 8200   1.2219 
## 8250   1.2215 
## 8300   1.2212 
## 8350   1.2208 
## 8400   1.2205 
## 8450   1.2201 
## 8500   1.2197 
## 8550   1.2194 
## 8600   1.219 
## 8650   1.2186 
## 8700   1.2182 
## 8750   1.2179 
## 8800   1.2176 
## 8850   1.2172 
## 8900   1.2169 
## 8950   1.2165 
## 9000   1.2162 
## 9050   1.2158 
## 9100   1.2155 
## 9150   1.2151 
## 9200   1.2148 
## 9250   1.2144 
## 9300   1.2141 
## 9350   1.2138 
## 9400   1.2135 
## 9450   1.2131 
## 9500   1.2128 
## 9550   1.2125 
## 9600   1.2121 
## 9650   1.2118 
## 9700   1.2115 
## 9750   1.2112 
## 9800   1.2109 
## 9850   1.2106 
## 9900   1.2102 
## 9950   1.2099 
## 10000   1.2096 
## 10050   1.2093 
## 10100   1.209 
## 10150   1.2087 
## 10200   1.2084 
## 10250   1.2081 
## 10300   1.2078 
## 10350   1.2075 
## 10400   1.2072 
## 10450   1.2069 
## 10500   1.2066 
## 10550   1.2063 
## 10600   1.206 
## 10650   1.2057 
## 10700   1.2054 
## 10750   1.2051 
## 10800   1.2048 
## 10850   1.2045 
## 10900   1.2043 
## 10950   1.2039 
## 11000   1.2036 
## 11050   1.2034 
## 11100   1.2031 
## 11150   1.2028 
## 11200   1.2025 
## 11250   1.2022 
## 11300   1.202 
## 11350   1.2017 
## 11400   1.2014 
## 11450   1.2011 
## 11500   1.2009 
## 11550   1.2006 
## 11600   1.2004 
## 11650   1.2001 
## 11700   1.1999 
## 11750   1.1996 
## 11800   1.1993 
## 11850   1.199 
## 11900   1.1988 
## 11950   1.1985 
## 12000   1.1983 
## 12050   1.198 
## 12100   1.1978 
## 12150   1.1975 
## 12200   1.1973 
## 12250   1.197 
## 12300   1.1968 
## 12350   1.1965 
## 12400   1.1963 
## 12450   1.196 
## 12500   1.1958 
## 12550   1.1956 
## 12600   1.1953 
## 12650   1.1951 
## 12700   1.1948 
## 12750   1.1946 
## 12800   1.1944 
## 12850   1.1942 
## 12900   1.1939 
## 12950   1.1937 
## 13000   1.1935 
## 13050   1.1932 
## 13100   1.193 
## 13150   1.1928 
## 13200   1.1926 
## 13250   1.1923 
## 13300   1.1921 
## 13350   1.1919 
## 13400   1.1917 
## 13450   1.1915 
## 13500   1.1913 
## 13550   1.191 
## 13600   1.1908 
## 13650   1.1906 
## 13700   1.1904 
## 13750   1.1902 
## 13800   1.19 
## 13850   1.1897 
## 13900   1.1895 
## 13950   1.1893 
## 14000   1.1891 
## 14050   1.1889 
## 14100   1.1887 
## 14150   1.1885 
## 14200   1.1883 
## 14250   1.1881 
## 14300   1.1879 
## 14350   1.1877 
## 14400   1.1874 
## 14450   1.1872 
## 14500   1.187 
## 14550   1.1868 
## 14600   1.1866 
## 14650   1.1865 
## 14700   1.1863 
## 14750   1.1861 
## 14800   1.1859 
## 14850   1.1857 
## 14900   1.1855 
## 14950   1.1853 
## 15000   1.1851 
## 15050   1.1849 
## 15100   1.1848 
## 15150   1.1846 
## 15200   1.1843 
## 15250   1.1842 
## 15300   1.184 
## 15350   1.1838 
## 15400   1.1836 
## 15450   1.1835 
## 15500   1.1833 
## 15550   1.1831 
## 15600   1.183 
## 15650   1.1828 
## 15700   1.1826 
## 15750   1.1824 
## 15800   1.1822 
## 15850   1.182 
## 15900   1.1819 
## 15950   1.1817 
## 16000   1.1815 
## 16050   1.1813 
## 16100   1.1812 
## 16150   1.181 
## 16200   1.1809 
## 16250   1.1807 
## 16300   1.1806 
## 16350   1.1804 
## 16400   1.1803 
## 16450   1.1801 
## 16500   1.18 
## 16550   1.1798 
## 16600   1.1796 
## 16650   1.1795 
## 16700   1.1793 
## 16750   1.1792 
## 16800   1.179 
## 16850   1.1789 
## 16900   1.1787 
## 16950   1.1786 
## 17000   1.1784 
## 17050   1.1783 
## 17100   1.1781 
## 17150   1.178 
## 17200   1.1778 
## 17250   1.1777 
## 17300   1.1775 
## 17350   1.1774 
## 17400   1.1773 
## 17450   1.1771 
## 17500   1.177 
## 17550   1.1768 
## 17600   1.1767 
## 17650   1.1765 
## 17700   1.1764 
## 17750   1.1763 
## 17800   1.1761 
## 17850   1.176 
## 17900   1.1759 
## 17950   1.1757 
## 18000   1.1756 
## 18050   1.1755 
## 18100   1.1753 
## 18150   1.1752 
## 18200   1.175 
## 18250   1.1749

## fitting final gbm model with a fixed number of  18250  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 1.002 
##  
## estimated cv deviance = 1.175 ; se = 0.039 
##  
## training data correlation = 0.693 
## cv correlation =  0.415 ; se = 0.056 
##  
## training data ROC score = 0.908 
## cv ROC score = 0.737 ; se = 0.03 
##  
## elapsed time -  3.82 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.174888
## 
## $deviance.se
## [1] 0.03858339
## 
## $correlation.mean
## [1] 0.4146352
## 
## $correlation.se
## [1] 0.05575038
## 
## $discrimination.mean
## [1] 0.73704
## 
## $discrimination.se
## [1] 0.03041651
## 
## $calibration.mean
## [1] 0.5894906 2.0976129 0.3221037 0.4589708 0.2204941
## 
## $calibration.se
## [1] 0.27066276 0.49634444 0.07821405 0.08519943 0.04941401
## 
## $cv.threshold
## [1] 0.375911
## 
## $cv.threshold.se
## [1] 0.0101839

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 31.01260821
## age                                         age  7.41804589
## isi_tot                                 isi_tot  7.03432474
## pain_duration                     pain_duration  6.38560387
## RST_PQ_BIS                           RST_PQ_BIS  6.27896129
## current_opioid_meds         current_opioid_meds  5.60580686
## overlapping_pain_number overlapping_pain_number  5.06036427
## pcs_tot                                 pcs_tot  4.54917783
## employment                           employment  4.25806532
## income                                   income  3.64586471
## audit_total                         audit_total  3.21475327
## bis_brief_tot                     bis_brief_tot  2.55226299
## proanx_t                               proanx_t  2.00219547
## prodep_t                               prodep_t  1.95615355
## RST_PQ_FFS                           RST_PQ_FFS  1.78170026
## meq_tot                                 meq_tot  1.62798334
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.53617012
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.22870709
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.19527173
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.02641023
## hispanic                               hispanic  0.21218915
## race                                       race  0.16983123
## cuditr0                                 cuditr0  0.15566165
## assigned_sex_at_birth     assigned_sex_at_birth  0.09188695

tc 7 lr .00005

cvtc7.lr0005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 7,
         learning.rate = .00005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3156 
##  
## now adding trees... 
## 100   1.3143 
## 150   1.3131 
## 200   1.3118 
## 250   1.3106 
## 300   1.3094 
## 350   1.3082 
## 400   1.3069 
## 450   1.3057 
## 500   1.3045 
## 550   1.3033 
## 600   1.3021 
## 650   1.3009 
## 700   1.2997 
## 750   1.2985 
## 800   1.2973 
## 850   1.2962 
## 900   1.295 
## 950   1.2938 
## 1000   1.2926 
## 1050   1.2915 
## 1100   1.2903 
## 1150   1.2892 
## 1200   1.288 
## 1250   1.2869 
## 1300   1.2858 
## 1350   1.2847 
## 1400   1.2836 
## 1450   1.2825 
## 1500   1.2814 
## 1550   1.2802 
## 1600   1.2791 
## 1650   1.278 
## 1700   1.2769 
## 1750   1.2758 
## 1800   1.2748 
## 1850   1.2737 
## 1900   1.2727 
## 1950   1.2716 
## 2000   1.2705 
## 2050   1.2695 
## 2100   1.2684 
## 2150   1.2674 
## 2200   1.2664 
## 2250   1.2653 
## 2300   1.2643 
## 2350   1.2632 
## 2400   1.2622 
## 2450   1.2612 
## 2500   1.2602 
## 2550   1.2592 
## 2600   1.2582 
## 2650   1.2572 
## 2700   1.2562 
## 2750   1.2552 
## 2800   1.2542 
## 2850   1.2532 
## 2900   1.2523 
## 2950   1.2513 
## 3000   1.2503 
## 3050   1.2494 
## 3100   1.2484 
## 3150   1.2475 
## 3200   1.2465 
## 3250   1.2456 
## 3300   1.2447 
## 3350   1.2437 
## 3400   1.2428 
## 3450   1.2419 
## 3500   1.241 
## 3550   1.24 
## 3600   1.2391 
## 3650   1.2382 
## 3700   1.2373 
## 3750   1.2364 
## 3800   1.2355 
## 3850   1.2346 
## 3900   1.2338 
## 3950   1.2329 
## 4000   1.232 
## 4050   1.2311 
## 4100   1.2302 
## 4150   1.2293 
## 4200   1.2284 
## 4250   1.2276 
## 4300   1.2267 
## 4350   1.2259 
## 4400   1.225 
## 4450   1.2242 
## 4500   1.2234 
## 4550   1.2225 
## 4600   1.2217 
## 4650   1.2209 
## 4700   1.2201 
## 4750   1.2193 
## 4800   1.2185 
## 4850   1.2177 
## 4900   1.2169 
## 4950   1.2161 
## 5000   1.2153 
## 5050   1.2145 
## 5100   1.2137 
## 5150   1.2129 
## 5200   1.2121 
## 5250   1.2113 
## 5300   1.2105 
## 5350   1.2098 
## 5400   1.209 
## 5450   1.2082 
## 5500   1.2074 
## 5550   1.2067 
## 5600   1.2059 
## 5650   1.2052 
## 5700   1.2044 
## 5750   1.2036 
## 5800   1.2029 
## 5850   1.2021 
## 5900   1.2014 
## 5950   1.2006 
## 6000   1.1999 
## 6050   1.1992 
## 6100   1.1985 
## 6150   1.1977 
## 6200   1.197 
## 6250   1.1963 
## 6300   1.1956 
## 6350   1.1949 
## 6400   1.1942 
## 6450   1.1935 
## 6500   1.1927 
## 6550   1.192 
## 6600   1.1913 
## 6650   1.1906 
## 6700   1.1899 
## 6750   1.1892 
## 6800   1.1885 
## 6850   1.1878 
## 6900   1.1872 
## 6950   1.1865 
## 7000   1.1859 
## 7050   1.1852 
## 7100   1.1845 
## 7150   1.1838 
## 7200   1.1832 
## 7250   1.1825 
## 7300   1.1818 
## 7350   1.1811 
## 7400   1.1805 
## 7450   1.1798 
## 7500   1.1792 
## 7550   1.1785 
## 7600   1.1779 
## 7650   1.1773 
## 7700   1.1766 
## 7750   1.176 
## 7800   1.1754 
## 7850   1.1747 
## 7900   1.1741 
## 7950   1.1735 
## 8000   1.1729 
## 8050   1.1723 
## 8100   1.1717 
## 8150   1.1711 
## 8200   1.1705 
## 8250   1.1699 
## 8300   1.1693 
## 8350   1.1687 
## 8400   1.168 
## 8450   1.1675 
## 8500   1.1669 
## 8550   1.1663 
## 8600   1.1657 
## 8650   1.1651 
## 8700   1.1645 
## 8750   1.1639 
## 8800   1.1633 
## 8850   1.1627 
## 8900   1.1621 
## 8950   1.1616 
## 9000   1.161 
## 9050   1.1604 
## 9100   1.1599 
## 9150   1.1593 
## 9200   1.1587 
## 9250   1.1582 
## 9300   1.1576 
## 9350   1.1571 
## 9400   1.1565 
## 9450   1.156 
## 9500   1.1554 
## 9550   1.1549 
## 9600   1.1543 
## 9650   1.1538 
## 9700   1.1532 
## 9750   1.1527 
## 9800   1.1522 
## 9850   1.1516 
## 9900   1.1511 
## 9950   1.1506 
## 10000   1.15 
## 10050   1.1495 
## 10100   1.149 
## 10150   1.1484 
## 10200   1.1479 
## 10250   1.1474 
## 10300   1.1469 
## 10350   1.1463 
## 10400   1.1458 
## 10450   1.1453 
## 10500   1.1448 
## 10550   1.1443 
## 10600   1.1438 
## 10650   1.1433 
## 10700   1.1428 
## 10750   1.1423 
## 10800   1.1418 
## 10850   1.1413 
## 10900   1.1408 
## 10950   1.1403 
## 11000   1.1398 
## 11050   1.1393 
## 11100   1.1388 
## 11150   1.1383 
## 11200   1.1378 
## 11250   1.1374 
## 11300   1.1369 
## 11350   1.1364 
## 11400   1.1359 
## 11450   1.1354 
## 11500   1.1349 
## 11550   1.1344 
## 11600   1.134 
## 11650   1.1335 
## 11700   1.1331 
## 11750   1.1326 
## 11800   1.1321 
## 11850   1.1317 
## 11900   1.1312 
## 11950   1.1308 
## 12000   1.1303 
## 12050   1.1299 
## 12100   1.1294 
## 12150   1.129 
## 12200   1.1286 
## 12250   1.1281 
## 12300   1.1277 
## 12350   1.1273 
## 12400   1.1269 
## 12450   1.1264 
## 12500   1.126 
## 12550   1.1256 
## 12600   1.1251 
## 12650   1.1247 
## 12700   1.1243 
## 12750   1.1238 
## 12800   1.1234 
## 12850   1.123 
## 12900   1.1226 
## 12950   1.1221 
## 13000   1.1217 
## 13050   1.1213 
## 13100   1.1208 
## 13150   1.1205 
## 13200   1.12 
## 13250   1.1196 
## 13300   1.1192 
## 13350   1.1188 
## 13400   1.1184 
## 13450   1.118 
## 13500   1.1176 
## 13550   1.1172 
## 13600   1.1168 
## 13650   1.1164 
## 13700   1.116 
## 13750   1.1157 
## 13800   1.1153 
## 13850   1.1149 
## 13900   1.1145 
## 13950   1.1141 
## 14000   1.1137 
## 14050   1.1134 
## 14100   1.113 
## 14150   1.1126 
## 14200   1.1122 
## 14250   1.1119 
## 14300   1.1115 
## 14350   1.1111 
## 14400   1.1108 
## 14450   1.1104 
## 14500   1.11 
## 14550   1.1096 
## 14600   1.1093 
## 14650   1.1089 
## 14700   1.1085 
## 14750   1.1082 
## 14800   1.1078 
## 14850   1.1074 
## 14900   1.1071 
## 14950   1.1067 
## 15000   1.1063 
## 15050   1.106 
## 15100   1.1057 
## 15150   1.1053 
## 15200   1.105 
## 15250   1.1046 
## 15300   1.1043 
## 15350   1.1039 
## 15400   1.1035 
## 15450   1.1032 
## 15500   1.1028 
## 15550   1.1025 
## 15600   1.1022 
## 15650   1.1019 
## 15700   1.1015 
## 15750   1.1012 
## 15800   1.1009 
## 15850   1.1005 
## 15900   1.1002 
## 15950   1.0999 
## 16000   1.0995 
## 16050   1.0992 
## 16100   1.0989 
## 16150   1.0986 
## 16200   1.0983 
## 16250   1.098 
## 16300   1.0977 
## 16350   1.0973 
## 16400   1.097 
## 16450   1.0967 
## 16500   1.0964 
## 16550   1.0961 
## 16600   1.0958 
## 16650   1.0955 
## 16700   1.0952 
## 16750   1.0948 
## 16800   1.0945 
## 16850   1.0942 
## 16900   1.0939 
## 16950   1.0936 
## 17000   1.0933 
## 17050   1.0931 
## 17100   1.0928 
## 17150   1.0924 
## 17200   1.0922 
## 17250   1.0919 
## 17300   1.0916 
## 17350   1.0913 
## 17400   1.091 
## 17450   1.0907 
## 17500   1.0905 
## 17550   1.0901 
## 17600   1.0899 
## 17650   1.0895 
## 17700   1.0893 
## 17750   1.089 
## 17800   1.0887 
## 17850   1.0884 
## 17900   1.0881 
## 17950   1.0879 
## 18000   1.0876 
## 18050   1.0873 
## 18100   1.087 
## 18150   1.0868 
## 18200   1.0865 
## 18250   1.0862 
## 18300   1.0859 
## 18350   1.0857 
## 18400   1.0854 
## 18450   1.0851 
## 18500   1.0849 
## 18550   1.0846 
## 18600   1.0843 
## 18650   1.084 
## 18700   1.0838 
## 18750   1.0835 
## 18800   1.0833 
## 18850   1.083 
## 18900   1.0827 
## 18950   1.0825 
## 19000   1.0822 
## 19050   1.082 
## 19100   1.0817 
## 19150   1.0815 
## 19200   1.0813 
## 19250   1.081 
## 19300   1.0808 
## 19350   1.0805 
## 19400   1.0803 
## 19450   1.08 
## 19500   1.0798 
## 19550   1.0796 
## 19600   1.0793 
## 19650   1.0791 
## 19700   1.0788 
## 19750   1.0786 
## 19800   1.0783 
## 19850   1.0781 
## 19900   1.0778 
## 19950   1.0776 
## 20000   1.0773 
## 20050   1.0771 
## 20100   1.0769 
## 20150   1.0767 
## 20200   1.0764 
## 20250   1.0762 
## 20300   1.076 
## 20350   1.0757 
## 20400   1.0755 
## 20450   1.0753 
## 20500   1.0751 
## 20550   1.0748 
## 20600   1.0746 
## 20650   1.0744 
## 20700   1.0742 
## 20750   1.074 
## 20800   1.0738 
## 20850   1.0735 
## 20900   1.0733 
## 20950   1.0731 
## 21000   1.0729 
## 21050   1.0727 
## 21100   1.0724 
## 21150   1.0723 
## 21200   1.072 
## 21250   1.0718 
## 21300   1.0716 
## 21350   1.0714 
## 21400   1.0712 
## 21450   1.071 
## 21500   1.0708 
## 21550   1.0706 
## 21600   1.0704 
## 21650   1.0702 
## 21700   1.07 
## 21750   1.0698 
## 21800   1.0696 
## 21850   1.0694 
## 21900   1.0692 
## 21950   1.069 
## 22000   1.0688 
## 22050   1.0686 
## 22100   1.0684 
## 22150   1.0682 
## 22200   1.068 
## 22250   1.0678 
## 22300   1.0677 
## 22350   1.0675 
## 22400   1.0673 
## 22450   1.0671 
## 22500   1.0669 
## 22550   1.0667 
## 22600   1.0665 
## 22650   1.0663 
## 22700   1.0661 
## 22750   1.0659 
## 22800   1.0657 
## 22850   1.0656 
## 22900   1.0654 
## 22950   1.0652 
## 23000   1.0651 
## 23050   1.0649 
## 23100   1.0647 
## 23150   1.0645 
## 23200   1.0643 
## 23250   1.0642 
## 23300   1.064 
## 23350   1.0638 
## 23400   1.0637 
## 23450   1.0635 
## 23500   1.0633 
## 23550   1.0632 
## 23600   1.063 
## 23650   1.0628 
## 23700   1.0626 
## 23750   1.0625 
## 23800   1.0623 
## 23850   1.0621 
## 23900   1.062 
## 23950   1.0618 
## 24000   1.0617 
## 24050   1.0615 
## 24100   1.0614 
## 24150   1.0612 
## 24200   1.0611 
## 24250   1.0609 
## 24300   1.0607 
## 24350   1.0605 
## 24400   1.0604 
## 24450   1.0602 
## 24500   1.0601 
## 24550   1.0599 
## 24600   1.0598 
## 24650   1.0596 
## 24700   1.0595 
## 24750   1.0593 
## 24800   1.0591 
## 24850   1.059 
## 24900   1.0588 
## 24950   1.0587 
## 25000   1.0585 
## 25050   1.0584 
## 25100   1.0582 
## 25150   1.0581 
## 25200   1.0579 
## 25250   1.0578 
## 25300   1.0576 
## 25350   1.0575 
## 25400   1.0573 
## 25450   1.0572 
## 25500   1.0571 
## 25550   1.0569 
## 25600   1.0568 
## 25650   1.0566 
## 25700   1.0565 
## 25750   1.0563 
## 25800   1.0562 
## 25850   1.0561 
## 25900   1.0559 
## 25950   1.0558 
## 26000   1.0557 
## 26050   1.0555 
## 26100   1.0554 
## 26150   1.0552 
## 26200   1.0551 
## 26250   1.055 
## 26300   1.0548 
## 26350   1.0547 
## 26400   1.0546

## fitting final gbm model with a fixed number of  26400  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.84 
##  
## estimated cv deviance = 1.055 ; se = 0.049 
##  
## training data correlation = 0.762 
## cv correlation =  0.53 ; se = 0.056 
##  
## training data ROC score = 0.935 
## cv ROC score = 0.798 ; se = 0.028 
##  
## elapsed time -  6.76 minutes

cvtc7.lr0005$cv.statistics

## $deviance.mean
## [1] 1.054553
## 
## $deviance.se
## [1] 0.04930168
## 
## $correlation.mean
## [1] 0.530333
## 
## $correlation.se
## [1] 0.05550569
## 
## $discrimination.mean
## [1] 0.79772
## 
## $discrimination.se
## [1] 0.02825031
## 
## $calibration.mean
## [1] 0.4660760 1.8455725 0.3772194 0.4916652 0.2584489
## 
## $calibration.se
## [1] 0.24670873 0.40702300 0.09327762 0.08018175 0.06239416
## 
## $cv.threshold
## [1] 0.3897129
## 
## $cv.threshold.se
## [1] 0.01770522

summary(cvtc7.lr0005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 26.47704702
## cpaq8_tot                             cpaq8_tot 14.68806679
## age                                         age  6.06210537
## gcpsr_3                                 gcpsr_3  5.30042091
## pain_duration                     pain_duration  5.27996291
## isi_tot                                 isi_tot  4.47817356
## employment                           employment  3.69521043
## RST_PQ_BIS                           RST_PQ_BIS  3.60069219
## gcpsr_6                                 gcpsr_6  3.24779947
## income                                   income  2.68496568
## overlapping_pain_number overlapping_pain_number  2.61510261
## pcs_tot                                 pcs_tot  2.47780997
## gcpsr_4                                 gcpsr_4  2.42966846
## current_opioid_meds         current_opioid_meds  2.40729410
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.78155341
## bis_brief_tot                     bis_brief_tot  1.73250802
## audit_total                         audit_total  1.62404592
## proanx_t                               proanx_t  1.62028940
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.43246247
## RST_PQ_FFS                           RST_PQ_FFS  1.42234397
## meq_tot                                 meq_tot  1.25561555
## prodep_t                               prodep_t  1.07715459
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.00776179
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.94981940
## hispanic                               hispanic  0.38146283
## cuditr0                                 cuditr0  0.17539088
## assigned_sex_at_birth     assigned_sex_at_birth  0.04866678
## race                                       race  0.04660550

tc 5 lr .0005

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3084 
##  
## now adding trees... 
## 100   1.3007 
## 150   1.2931 
## 200   1.2857 
## 250   1.2787 
## 300   1.2726 
## 350   1.2665 
## 400   1.2605 
## 450   1.2547 
## 500   1.2497 
## 550   1.2451 
## 600   1.2404 
## 650   1.2356 
## 700   1.2313 
## 750   1.2269 
## 800   1.2227 
## 850   1.2191 
## 900   1.2153 
## 950   1.2121 
## 1000   1.2085 
## 1050   1.2053 
## 1100   1.2022 
## 1150   1.1994 
## 1200   1.1964 
## 1250   1.1941 
## 1300   1.1916 
## 1350   1.1893 
## 1400   1.1871 
## 1450   1.1851 
## 1500   1.1832 
## 1550   1.1812 
## 1600   1.1796 
## 1650   1.1776 
## 1700   1.1758 
## 1750   1.1741 
## 1800   1.1726 
## 1850   1.1712 
## 1900   1.1699 
## 1950   1.1686 
## 2000   1.1674 
## 2050   1.1661 
## 2100   1.165 
## 2150   1.1642 
## 2200   1.1633 
## 2250   1.1626 
## 2300   1.1618 
## 2350   1.161 
## 2400   1.1604 
## 2450   1.1595 
## 2500   1.1587 
## 2550   1.1583 
## 2600   1.1577 
## 2650   1.1573 
## 2700   1.1569 
## 2750   1.1562 
## 2800   1.1559 
## 2850   1.1555 
## 2900   1.1553 
## 2950   1.155 
## 3000   1.1546 
## 3050   1.1542 
## 3100   1.1538 
## 3150   1.1537 
## 3200   1.1534 
## 3250   1.1532 
## 3300   1.153 
## 3350   1.1526 
## 3400   1.1523 
## 3450   1.1524 
## 3500   1.1524 
## 3550   1.1524 
## 3600   1.1529 
## 3650   1.1529 
## 3700   1.153 
## 3750   1.1533 
## 3800   1.1536

## fitting final gbm model with a fixed number of  3400  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.924 
##  
## estimated cv deviance = 1.152 ; se = 0.048 
##  
## training data correlation = 0.686 
## cv correlation =  0.417 ; se = 0.055 
##  
## training data ROC score = 0.903 
## cv ROC score = 0.738 ; se = 0.03 
##  
## elapsed time -  0.48 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.152343
## 
## $deviance.se
## [1] 0.04765349
## 
## $correlation.mean
## [1] 0.4165447
## 
## $correlation.se
## [1] 0.055271
## 
## $discrimination.mean
## [1] 0.73818
## 
## $discrimination.se
## [1] 0.02970235
## 
## $calibration.mean
## [1] 0.3023157 1.5543152 0.4385018 0.4667890 0.3803662
## 
## $calibration.se
## [1] 0.23089895 0.41981806 0.10168000 0.09480698 0.07937049
## 
## $cv.threshold
## [1] 0.3808227
## 
## $cv.threshold.se
## [1] 0.01283925

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 28.95237115
## age                                         age  7.60796775
## isi_tot                                 isi_tot  7.39147593
## RST_PQ_BIS                           RST_PQ_BIS  6.53780002
## pain_duration                     pain_duration  6.40217695
## current_opioid_meds         current_opioid_meds  5.79631821
## pcs_tot                                 pcs_tot  5.45310446
## overlapping_pain_number overlapping_pain_number  5.20444438
## employment                           employment  4.32952123
## audit_total                         audit_total  3.53390257
## income                                   income  3.28608465
## bis_brief_tot                     bis_brief_tot  2.93996130
## proanx_t                               proanx_t  2.18749738
## prodep_t                               prodep_t  2.07839278
## RST_PQ_FFS                           RST_PQ_FFS  1.58502630
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.41543546
## meq_tot                                 meq_tot  1.36737523
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.14293059
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.03196118
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.94951363
## hispanic                               hispanic  0.44574132
## race                                       race  0.20569466
## cuditr0                                 cuditr0  0.08464924
## assigned_sex_at_birth     assigned_sex_at_birth  0.07065364

tc 5 lr .0005

cvtc7.lr0005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .0005,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.3047 
##  
## now adding trees... 
## 100   1.2928 
## 150   1.2818 
## 200   1.2714 
## 250   1.2614 
## 300   1.2514 
## 350   1.2424 
## 400   1.2339 
## 450   1.2255 
## 500   1.2176 
## 550   1.2096 
## 600   1.2022 
## 650   1.1949 
## 700   1.188 
## 750   1.1814 
## 800   1.1753 
## 850   1.1692 
## 900   1.1633 
## 950   1.1575 
## 1000   1.152 
## 1050   1.1468 
## 1100   1.142 
## 1150   1.1374 
## 1200   1.1329 
## 1250   1.1286 
## 1300   1.1244 
## 1350   1.1204 
## 1400   1.1166 
## 1450   1.1127 
## 1500   1.1089 
## 1550   1.1051 
## 1600   1.1017 
## 1650   1.0981 
## 1700   1.0948 
## 1750   1.0919 
## 1800   1.0888 
## 1850   1.086 
## 1900   1.0834 
## 1950   1.0807 
## 2000   1.0782 
## 2050   1.076 
## 2100   1.0735 
## 2150   1.0713 
## 2200   1.0692 
## 2250   1.0669 
## 2300   1.0649 
## 2350   1.0632 
## 2400   1.0611 
## 2450   1.0593 
## 2500   1.0579 
## 2550   1.0564 
## 2600   1.0548 
## 2650   1.0534 
## 2700   1.0523 
## 2750   1.0508 
## 2800   1.0496 
## 2850   1.0483 
## 2900   1.047 
## 2950   1.0459 
## 3000   1.0446 
## 3050   1.0435 
## 3100   1.0424 
## 3150   1.0416 
## 3200   1.0407 
## 3250   1.0397 
## 3300   1.0388 
## 3350   1.038 
## 3400   1.0372 
## 3450   1.0366 
## 3500   1.036 
## 3550   1.0355 
## 3600   1.035 
## 3650   1.0344 
## 3700   1.0339 
## 3750   1.0336 
## 3800   1.033 
## 3850   1.0326 
## 3900   1.0323 
## 3950   1.0319 
## 4000   1.0316 
## 4050   1.0313 
## 4100   1.0309 
## 4150   1.0307 
## 4200   1.0306 
## 4250   1.0304 
## 4300   1.0304 
## 4350   1.0304 
## 4400   1.0303 
## 4450   1.0301 
## 4500   1.0299 
## 4550   1.03 
## 4600   1.0302 
## 4650   1.0302 
## 4700   1.0302 
## 4750   1.0304

## fitting final gbm model with a fixed number of  4500  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.763 
##  
## estimated cv deviance = 1.03 ; se = 0.061 
##  
## training data correlation = 0.762 
## cv correlation =  0.532 ; se = 0.056 
##  
## training data ROC score = 0.935 
## cv ROC score = 0.799 ; se = 0.028 
##  
## elapsed time -  0.65 minutes

cvtc7.lr0005$cv.statistics

## $deviance.mean
## [1] 1.029904
## 
## $deviance.se
## [1] 0.06052019
## 
## $correlation.mean
## [1] 0.5320984
## 
## $correlation.se
## [1] 0.05580901
## 
## $discrimination.mean
## [1] 0.79931
## 
## $discrimination.se
## [1] 0.02780227
## 
## $calibration.mean
## [1] 0.2727509 1.4394668 0.4584682 0.5111955 0.4120924
## 
## $calibration.se
## [1] 0.22628993 0.33853335 0.11374904 0.09191772 0.10218536
## 
## $cv.threshold
## [1] 0.3989254
## 
## $cv.threshold.se
## [1] 0.02240444

summary(cvtc7.lr0005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 24.71836166
## cpaq8_tot                             cpaq8_tot 14.58809298
## age                                         age  6.09903302
## gcpsr_3                                 gcpsr_3  5.61847863
## pain_duration                     pain_duration  4.98103376
## isi_tot                                 isi_tot  4.78624690
## RST_PQ_BIS                           RST_PQ_BIS  4.02689864
## employment                           employment  3.53255422
## gcpsr_6                                 gcpsr_6  3.30843361
## gcpsr_4                                 gcpsr_4  3.02053608
## overlapping_pain_number overlapping_pain_number  2.83678271
## pcs_tot                                 pcs_tot  2.61331618
## current_opioid_meds         current_opioid_meds  2.57259051
## income                                   income  2.46190210
## audit_total                         audit_total  1.90484351
## bis_brief_tot                     bis_brief_tot  1.84053784
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.79101379
## proanx_t                               proanx_t  1.70953052
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.37233977
## meq_tot                                 meq_tot  1.29465581
## RST_PQ_FFS                           RST_PQ_FFS  1.21359212
## prodep_t                               prodep_t  0.99991650
## RST_PQ_BAS_I                       RST_PQ_BAS_I  0.86649202
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  0.85006084
## hispanic                               hispanic  0.73348653
## race                                       race  0.10887060
## cuditr0                                 cuditr0  0.07793542
## assigned_sex_at_birth     assigned_sex_at_birth  0.07246373

tc 5 lr .0025

cvtc3.lr0005 <- gbm.step(data=finaltrain_nocgpsr, 
         gbm.x = 3:26,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain_nocgpsr and using a family of bernoulli 
## 
## Using 404 observations and 24 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2796 
##  
## now adding trees... 
## 100   1.249 
## 150   1.2253 
## 200   1.2072 
## 250   1.1933 
## 300   1.1829 
## 350   1.1752 
## 400   1.1685 
## 450   1.1621 
## 500   1.1577 
## 550   1.155 
## 600   1.1537 
## 650   1.1533 
## 700   1.1532 
## 750   1.1533 
## 800   1.1544 
## 850   1.1569 
## 900   1.1598 
## 950   1.1614 
## 1000   1.1642 
## 1050   1.1656 
## 1100   1.167 
## 1150   1.1692 
## 1200   1.1722 
## 1250   1.1753

## fitting final gbm model with a fixed number of  700  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.921 
##  
## estimated cv deviance = 1.153 ; se = 0.047 
##  
## training data correlation = 0.686 
## cv correlation =  0.414 ; se = 0.055 
##  
## training data ROC score = 0.903 
## cv ROC score = 0.74 ; se = 0.029 
##  
## elapsed time -  0.14 minutes

cvtc3.lr0005$cv.statistics

## $deviance.mean
## [1] 1.153182
## 
## $deviance.se
## [1] 0.04716857
## 
## $correlation.mean
## [1] 0.4135266
## 
## $correlation.se
## [1] 0.05510049
## 
## $discrimination.mean
## [1] 0.73962
## 
## $discrimination.se
## [1] 0.0292763
## 
## $calibration.mean
## [1] 0.2530776 1.4391522 0.4419874 0.4714546 0.3959977
## 
## $calibration.se
## [1] 0.21224567 0.33267254 0.10410803 0.09706560 0.08950416
## 
## $cv.threshold
## [1] 0.3810061
## 
## $cv.threshold.se
## [1] 0.01359757

summary(cvtc3.lr0005) #this is where we'll get the deviance measures to compare models

##                                             var     rel.inf
## cpaq8_tot                             cpaq8_tot 29.46570907
## age                                         age  7.29866027
## isi_tot                                 isi_tot  7.15591172
## RST_PQ_BIS                           RST_PQ_BIS  6.07041218
## pain_duration                     pain_duration  6.04310979
## current_opioid_meds         current_opioid_meds  5.94944672
## pcs_tot                                 pcs_tot  5.34746385
## overlapping_pain_number overlapping_pain_number  5.33324027
## employment                           employment  4.04735896
## income                                   income  3.75650741
## audit_total                         audit_total  3.52440565
## bis_brief_tot                     bis_brief_tot  2.93143729
## prodep_t                               prodep_t  2.26620315
## proanx_t                               proanx_t  2.10790693
## RST_PQ_FFS                           RST_PQ_FFS  1.78187147
## meq_tot                                 meq_tot  1.40009568
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.37157018
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.16808259
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.11954478
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.01429717
## hispanic                               hispanic  0.46250609
## race                                       race  0.22822530
## assigned_sex_at_birth     assigned_sex_at_birth  0.08192841
## cuditr0                                 cuditr0  0.07410507

tc 5 lr .0025

cvtc7.lr0005 <- gbm.step(data=finaltrain, 
         gbm.x = 3:30,
         gbm.y = 1,
         fold.vector = fold.vector,
         family = "bernoulli",
         tree.complexity = 5,
         learning.rate = .0025,
         bag.fraction = 0.5,
         tolerance.method = "auto",
         #tolerance = 0.01,
         max.trees = 50000)

## Length  Class   Mode 
##      0   NULL   NULL 
## 
##  
##  GBM STEP - version 2.9 
##  
## Performing cross-validation optimisation of a boosted regression tree model 
## for gcpsr_2_HICP_num with dataframe finaltrain and using a family of bernoulli 
## 
## Using 404 observations and 28 predictors 
## 
## loading user-supplied fold vector 
## 
## creating 10 initial models of 50 trees 
## 
##  folds are stratified by prevalence 
##  
## total mean deviance =  1.3112 
##  
## tolerance is fixed at  0.0013 
##  
## ntrees resid. dev. 
## 50    1.2618 
##  
## now adding trees... 
## 100   1.2169 
## 150   1.1813 
## 200   1.1514 
## 250   1.1277 
## 300   1.1091 
## 350   1.0926 
## 400   1.079 
## 450   1.067 
## 500   1.0564 
## 550   1.0492 
## 600   1.043 
## 650   1.0384 
## 700   1.0338 
## 750   1.033 
## 800   1.0315 
## 850   1.0301 
## 900   1.0297 
## 950   1.0293 
## 1000   1.0303 
## 1050   1.0321 
## 1100   1.0319 
## 1150   1.033 
## 1200   1.0349 
## 1250   1.0362 
## 1300   1.0379 
## 1350   1.0406 
## 1400   1.0441 
## 1450   1.0467

## fitting final gbm model with a fixed number of  950  trees for  gcpsr_2_HICP_num 
## 
## mean total deviance = 1.311 
## mean residual deviance = 0.749 
##  
## estimated cv deviance = 1.029 ; se = 0.061 
##  
## training data correlation = 0.766 
## cv correlation =  0.532 ; se = 0.055 
##  
## training data ROC score = 0.938 
## cv ROC score = 0.803 ; se = 0.026 
##  
## elapsed time -  0.18 minutes

cvtc7.lr0005$cv.statistics

## $deviance.mean
## [1] 1.029272
## 
## $deviance.se
## [1] 0.06142857
## 
## $correlation.mean
## [1] 0.531747
## 
## $correlation.se
## [1] 0.05512385
## 
## $discrimination.mean
## [1] 0.80257
## 
## $discrimination.se
## [1] 0.02637095
## 
## $calibration.mean
## [1] 0.2569699 1.4019267 0.4557784 0.5146885 0.4223676
## 
## $calibration.se
## [1] 0.21994230 0.32234097 0.11366907 0.09399728 0.10275562
## 
## $cv.threshold
## [1] 0.401829
## 
## $cv.threshold.se
## [1] 0.02262023

summary(cvtc7.lr0005)

##                                             var     rel.inf
## gcpsr_5                                 gcpsr_5 24.71647659
## cpaq8_tot                             cpaq8_tot 14.01333882
## age                                         age  5.75450308
## isi_tot                                 isi_tot  5.28679706
## gcpsr_3                                 gcpsr_3  5.04339385
## pain_duration                     pain_duration  4.92060525
## RST_PQ_BIS                           RST_PQ_BIS  4.22435147
## gcpsr_6                                 gcpsr_6  3.53087405
## employment                           employment  3.47134861
## gcpsr_4                                 gcpsr_4  2.98103233
## overlapping_pain_number overlapping_pain_number  2.93622536
## income                                   income  2.59192761
## pcs_tot                                 pcs_tot  2.52548063
## current_opioid_meds         current_opioid_meds  2.43593237
## bis_brief_tot                     bis_brief_tot  2.02075732
## RST_PQ_BAS_GDP                   RST_PQ_BAS_GDP  1.93410369
## audit_total                         audit_total  1.80822723
## proanx_t                               proanx_t  1.78469313
## RST_PQ_FFS                           RST_PQ_FFS  1.60631743
## meq_tot                                 meq_tot  1.38280230
## RST_PQ_BAS_RR                     RST_PQ_BAS_RR  1.12501219
## RST_PQ_BAS_I                       RST_PQ_BAS_I  1.11856188
## RST_PQ_BAS_RI                     RST_PQ_BAS_RI  1.00109996
## prodep_t                               prodep_t  0.86230612
## hispanic                               hispanic  0.63286889
## cuditr0                                 cuditr0  0.18160950
## assigned_sex_at_birth     assigned_sex_at_birth  0.06340201
## race                                       race  0.04595124

Amrit paused here on Dec 15th, 2024