knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
Libraries = c("readr", "doMC", "caret", "randomForest", "ggplot2")
# Install if not present
for(p in Libraries){
if (!require(p, character.only = TRUE)) { install.packages(p) }
library(p, character.only = TRUE)
}
cache = TRUE
setwd("~/Dropbox/Oxy-RF/5_RF_Tuning_w_ntree")
test_harness_paa <- read_csv("test_harness_paa.csv",
col_types = cols(TotalAA = col_skip(),
id = col_skip()))
Convert Class(numerical) to Factor of 7 Protein Classes(Prot_Class)
cache = TRUE
Class <- as.factor(test_harness_paa$Class)
typeof(test_harness_paa)
## [1] "list"
class(Class)
## [1] "factor"
cache = TRUE
set.seed(1000)
index <- createDataPartition(test_harness_paa$Class, p = 0.8, list = FALSE)
training_set <- test_harness_paa[ index,]
testing_set <- test_harness_paa[-index,]
preProcValues <- preProcess(training_set, method = c("center", "scale"))
train_transformed <- predict(preProcValues, training_set)
cache = TRUE
set.seed(1000)
registerDoMC(cores = 3)
start_time <- Sys.time() # Start timer
mtry_def <- 2
# How many columns to select in each bootstrap sample?
t_grid <- expand.grid(mtry= c(mtry_def))
set.seed(1234)
start <- proc.time()[3]
model.rf <- train(Class ~ .,
data = train_transformed,
method = "rf",
ntree = 50, # How many trees to grow in total?
tuneGrid = t_grid)
end_time <- Sys.time() # End timer
end_time - start_time # Display time
## Time difference of 5.529183 secs
print(model.rf)
## Random Forest
##
## 2800 samples
## 20 predictors
## 7 classes: 'Ctrl', 'Ery', 'Hcy', 'Hgb', 'Hhe', 'Lgb', 'Mgb'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 2800, 2800, 2800, 2800, 2800, 2800, ...
## Resampling results:
##
## Accuracy Kappa
## 0.8757229 0.8549459
##
## Tuning parameter 'mtry' was held constant at a value of 2
Machine Settings:
Sys.info()[c(1:3,5)]
## sysname
## "Linux"
## release
## "4.15.0-47-generic"
## version
## "#50~16.04.1-Ubuntu SMP Fri Mar 15 16:06:21 UTC 2019"
## machine
## "x86_64"
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 18.3
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] randomForest_4.6-14 caret_6.0-81 ggplot2_3.1.0
## [4] lattice_0.20-38 doMC_1.3.5 iterators_1.0.10
## [7] foreach_1.4.4 readr_1.3.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 xfun_0.4 purrr_0.2.5
## [4] reshape2_1.4.3 splines_3.4.4 colorspace_1.3-2
## [7] generics_0.0.2 stats4_3.4.4 htmltools_0.3.6
## [10] yaml_2.2.0 prodlim_2018.04.18 survival_2.43-3
## [13] rlang_0.3.0.1 e1071_1.7-0.1 ModelMetrics_1.2.2
## [16] pillar_1.3.1 glue_1.3.0 withr_2.1.2
## [19] bindrcpp_0.2.2 bindr_0.1.1 plyr_1.8.4
## [22] lava_1.6.4 stringr_1.3.1 timeDate_3043.102
## [25] munsell_0.5.0 gtable_0.2.0 recipes_0.1.4
## [28] codetools_0.2-16 evaluate_0.12 knitr_1.21
## [31] class_7.3-14 Rcpp_1.0.0 scales_1.0.0
## [34] ipred_0.9-8 hms_0.4.2 digest_0.6.18
## [37] stringi_1.2.4 dplyr_0.7.8 grid_3.4.4
## [40] tools_3.4.4 magrittr_1.5 lazyeval_0.2.1
## [43] tibble_1.4.2 crayon_1.3.4 pkgconfig_2.0.2
## [46] MASS_7.3-51.1 Matrix_1.2-15 data.table_1.11.8
## [49] lubridate_1.7.4 gower_0.1.2 assertthat_0.2.0
## [52] rmarkdown_1.11 R6_2.3.0 rpart_4.1-13
## [55] nnet_7.3-12 nlme_3.1-137 compiler_3.4.4
EOF