knitr::opts_chunk$set(echo = TRUE)
Libraries = c("readr", "doMC", "caret")
# Install if not present
for(p in Libraries){
if(!require(p, character.only = TRUE))
install.packages(p)
library(p, character.only = TRUE)
}
test_harness_paa <- read_csv("test_harness_paa.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Class = col_character(),
## id = col_character()
## )
## See spec(...) for full column specifications.
Convert Class(numerical) to Factor of 7 Protein Classes(Prot_Class)
class_factor <- as.factor(test_harness_paa$Class)
typeof(test_harness_paa)
## [1] "list"
class(class_factor)
## [1] "factor"
test_harness_paa <- test_harness_paa[,-c(2:3)] # More later
set.seed(1000)
index <- createDataPartition(test_harness_paa$Class, p = 0.8, list = FALSE)
training_set <- test_harness_paa[ index,]
testing_set <- test_harness_paa[-index,]
start_time <- Sys.time() # Start timer
registerDoMC(cores=3)
modFit <- train(Class ~ .,
data = training_set,
method = "rf")
end_time <- Sys.time() # End timer
end_time - start_time # Display time
## Time difference of 2.594309 mins
modFit
## Random Forest
##
## 2800 samples
## 20 predictors
## 7 classes: 'Ctrl', 'Ery', 'Hcy', 'Hgb', 'Hhe', 'Lgb', 'Mgb'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 2800, 2800, 2800, 2800, 2800, 2800, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8888620 0.8702890
## 11 0.8710888 0.8495418
## 20 0.8460415 0.8203050
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
start_time <- Sys.time() # Start timer
registerDoMC(cores=3)
modFit2 <- train(Class ~ .,
data = training_set,
method = "rf",
preProcess = c("center","scale")
)
end_time <- Sys.time() # End timer
end_time - start_time # Display time
## Time difference of 2.557944 mins
modFit2
## Random Forest
##
## 2800 samples
## 20 predictors
## 7 classes: 'Ctrl', 'Ery', 'Hcy', 'Hgb', 'Hhe', 'Lgb', 'Mgb'
##
## Pre-processing: centered (20), scaled (20)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 2800, 2800, 2800, 2800, 2800, 2800, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8890704 0.8705152
## 11 0.8695372 0.8477239
## 20 0.8477447 0.8222883
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
Machine Settings:
Sys.info()[c(1:3,5)]
## sysname
## "Linux"
## release
## "4.15.0-46-generic"
## version
## "#49~16.04.1-Ubuntu SMP Tue Feb 12 17:45:24 UTC 2019"
## machine
## "x86_64"
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 18.3
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] caret_6.0-81 ggplot2_3.1.0 lattice_0.20-38 doMC_1.3.5
## [5] iterators_1.0.10 foreach_1.4.4 readr_1.3.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 xfun_0.4 purrr_0.2.5
## [4] reshape2_1.4.3 splines_3.4.4 colorspace_1.3-2
## [7] generics_0.0.2 stats4_3.4.4 htmltools_0.3.6
## [10] yaml_2.2.0 prodlim_2018.04.18 survival_2.43-3
## [13] rlang_0.3.0.1 e1071_1.7-0.1 ModelMetrics_1.2.2
## [16] pillar_1.3.1 glue_1.3.0 withr_2.1.2
## [19] bindrcpp_0.2.2 bindr_0.1.1 plyr_1.8.4
## [22] lava_1.6.4 stringr_1.3.1 timeDate_3043.102
## [25] munsell_0.5.0 gtable_0.2.0 recipes_0.1.4
## [28] codetools_0.2-16 evaluate_0.12 knitr_1.21
## [31] class_7.3-14 Rcpp_1.0.0 scales_1.0.0
## [34] ipred_0.9-8 hms_0.4.2 digest_0.6.18
## [37] stringi_1.2.4 dplyr_0.7.8 grid_3.4.4
## [40] tools_3.4.4 magrittr_1.5 lazyeval_0.2.1
## [43] tibble_1.4.2 randomForest_4.6-14 crayon_1.3.4
## [46] pkgconfig_2.0.2 MASS_7.3-51.1 Matrix_1.2-15
## [49] data.table_1.11.8 lubridate_1.7.4 gower_0.1.2
## [52] assertthat_0.2.0 rmarkdown_1.11 R6_2.3.0
## [55] rpart_4.1-13 nnet_7.3-12 nlme_3.1-137
## [58] compiler_3.4.4
EOF