R Markdown

weatherDS<-new.env()
evalq({
  data(weather)
  data<-na.omit(weather)
  nobs<-nrow(data)
  form<-formula(RainTomorrow ~.)
  target<-all.vars(form)[1]
  vars<- -grep('^(Date|Location|RISK_)', names(data))
  set.seed(42)
  train<-sample(nobs, 0.7*nobs)
  
}, weatherDS)
weatherRF<-new.env(parent=weatherDS)
evalq({
  Tdata<-data[train, vars]
  form= formula(RainTomorrow ~.)
  model<-randomForest(formula=form, data=Tdata,
                      ntree=500, mtry=4,
                      importance=TRUE,
                      localImp= TRUE,
                      na.action=na.roughfix,
                      replace=FALSE)
}, weatherRF)
weatherRF$model
## 
## Call:
##  randomForest(formula = form, data = Tdata, ntree = 500, mtry = 4,      importance = TRUE, localImp = TRUE, replace = FALSE, na.action = na.roughfix) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 4
## 
##         OOB estimate of  error rate: 15.28%
## Confusion matrix:
##      No Yes class.error
## No  184   7  0.03664921
## Yes  28  10  0.73684211
str(weatherRF$model)
## List of 19
##  $ call           : language randomForest(formula = form, data = Tdata, ntree = 500, mtry = 4, importance = TRUE,      localImp = TRUE, replac| __truncated__
##  $ type           : chr "classification"
##  $ predicted      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "names")= chr [1:229] "1" "2" "3" "4" ...
##  $ err.rate       : num [1:500, 1:3] 0.214 0.191 0.185 0.161 0.163 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:3] "OOB" "No" "Yes"
##  $ confusion      : num [1:2, 1:3] 184 28 7 10 0.0366 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "No" "Yes"
##   .. ..$ : chr [1:3] "No" "Yes" "class.error"
##  $ votes          : 'matrix' num [1:229, 1:2] 0.647 0.97 0.972 0.686 0.875 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:229] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:2] "No" "Yes"
##  $ oob.times      : num [1:229] 184 200 176 185 176 182 176 186 193 180 ...
##  $ classes        : chr [1:2] "No" "Yes"
##  $ importance     : num [1:20, 1:4] 0.008 0.01341 0.00223 0.00259 0.01914 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
##   .. ..$ : chr [1:4] "No" "Yes" "MeanDecreaseAccuracy" "MeanDecreaseGini"
##  $ importanceSD   : num [1:20, 1:3] 0.001221 0.001423 0.000561 0.000892 0.001609 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
##   .. ..$ : chr [1:3] "No" "Yes" "MeanDecreaseAccuracy"
##  $ localImportance: num [1:20, 1:229] -0.02717 0 -0.00543 -0.01087 0.06522 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
##   .. ..$ : chr [1:229] "1" "2" "3" "4" ...
##  $ proximity      : NULL
##  $ ntree          : num 500
##  $ mtry           : num 4
##  $ forest         :List of 14
##   ..$ ndbigtree : int [1:500] 43 39 57 45 47 41 51 47 45 41 ...
##   ..$ nodestatus: int [1:65, 1:500] 1 1 1 1 1 1 -1 1 1 -1 ...
##   ..$ bestvar   : int [1:65, 1:500] 7 18 12 14 4 1 0 19 17 0 ...
##   ..$ treemap   : int [1:65, 1:2, 1:500] 2 4 6 8 10 12 0 14 16 0 ...
##   ..$ nodepred  : int [1:65, 1:500] 0 0 0 0 0 0 2 0 0 2 ...
##   ..$ xbestsplit: num [1:65, 1:500] 63 20.4 49.5 1016.2 7.1 ...
##   ..$ pid       : num [1:2] 1 1
##   ..$ cutoff    : num [1:2] 0.5 0.5
##   ..$ ncat      : Named int [1:20] 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "names")= chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
##   ..$ maxcat    : int 2
##   ..$ nrnodes   : int 65
##   ..$ ntree     : num 500
##   ..$ nclass    : int 2
##   ..$ xlevels   :List of 20
##   .. ..$ MinTemp      : num 0
##   .. ..$ MaxTemp      : num 0
##   .. ..$ Rainfall     : num 0
##   .. ..$ Evaporation  : num 0
##   .. ..$ Sunshine     : num 0
##   .. ..$ WindGustDir  : num 0
##   .. ..$ WindGustSpeed: num 0
##   .. ..$ WindDir9am   : num 0
##   .. ..$ WindDir3pm   : num 0
##   .. ..$ WindSpeed9am : num 0
##   .. ..$ WindSpeed3pm : num 0
##   .. ..$ Humidity9am  : num 0
##   .. ..$ Humidity3pm  : num 0
##   .. ..$ Pressure9am  : num 0
##   .. ..$ Pressure3pm  : num 0
##   .. ..$ Cloud9am     : num 0
##   .. ..$ Cloud3pm     : num 0
##   .. ..$ Temp9am      : num 0
##   .. ..$ Temp3pm      : num 0
##   .. ..$ RainToday    : chr [1:2] "No" "Yes"
##  $ y              : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 1 1 2 ...
##   ..- attr(*, "names")= chr [1:229] "1" "2" "3" "4" ...
##  $ test           : NULL
##  $ inbag          : NULL
##  $ terms          :Classes 'terms', 'formula'  language RainTomorrow ~ MinTemp + MaxTemp + Rainfall + Evaporation + Sunshine +      WindGustDir + WindGustSpeed + WindDir| __truncated__ ...
##   .. ..- attr(*, "variables")= language list(RainTomorrow, MinTemp, MaxTemp, Rainfall, Evaporation, Sunshine, WindGustDir,      WindGustSpeed, WindDir9am| __truncated__ ...
##   .. ..- attr(*, "factors")= int [1:21, 1:20] 0 1 0 0 0 0 0 0 0 0 ...
##   .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. ..$ : chr [1:21] "RainTomorrow" "MinTemp" "MaxTemp" "Rainfall" ...
##   .. .. .. ..$ : chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
##   .. ..- attr(*, "term.labels")= chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
##   .. ..- attr(*, "order")= int [1:20] 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "intercept")= num 0
##   .. ..- attr(*, "response")= int 1
##   .. ..- attr(*, ".Environment")=<environment: 0x000001e7cc158388> 
##   .. ..- attr(*, "predvars")= language list(RainTomorrow, MinTemp, MaxTemp, Rainfall, Evaporation, Sunshine, WindGustDir,      WindGustSpeed, WindDir9am| __truncated__ ...
##   .. ..- attr(*, "dataClasses")= Named chr [1:21] "factor" "numeric" "numeric" "numeric" ...
##   .. .. ..- attr(*, "names")= chr [1:21] "RainTomorrow" "MinTemp" "MaxTemp" "Rainfall" ...
##  - attr(*, "class")= chr [1:2] "randomForest.formula" "randomForest"
head(weatherRF$model$predicted)
##  1  2  3  4  5  6 
## No No No No No No 
## Levels: No Yes
head(weatherRF$model$predicted, 10)
##  1  2  3  4  5  6  7  8  9 10 
## No No No No No No No No No No 
## Levels: No Yes
head(weatherRF$model$importance)
##                      No          Yes MeanDecreaseAccuracy MeanDecreaseGini
## MinTemp     0.008001176  0.002800146         0.0071428571        2.3031681
## MaxTemp     0.013409119 -0.006438784         0.0101904762        1.9157680
## Rainfall    0.002225895 -0.010962177         0.0000952381        0.9946594
## Evaporation 0.002585936 -0.001352748         0.0018095238        1.2982645
## Sunshine    0.019138256  0.013873650         0.0181190476        3.3679873
## WindGustDir 0.002722396  0.003097635         0.0028095238        1.1188261
head(weatherRF$model$localImportance)[, 1:4]
##                        1      2            3            4
## MinTemp     -0.027173913  0.025  0.000000000 -0.043243243
## MaxTemp      0.000000000 -0.010  0.000000000  0.016216216
## Rainfall    -0.005434783  0.000  0.000000000 -0.005405405
## Evaporation -0.010869565  0.000 -0.005681818  0.005405405
## Sunshine     0.065217391  0.020  0.045454545  0.162162162
## WindGustDir -0.038043478  0.015  0.005681818 -0.005405405
head(weatherRF$model$err.rate)
##            OOB         No       Yes
## [1,] 0.2142857 0.13043478 0.6000000
## [2,] 0.1914894 0.11206897 0.5600000
## [3,] 0.1849711 0.12413793 0.5000000
## [4,] 0.1606218 0.10493827 0.4516129
## [5,] 0.1626794 0.09142857 0.5294118
## [6,] 0.1712963 0.08888889 0.5833333
round(head(weatherRF$model$err.rate,15),4)
##          OOB     No    Yes
##  [1,] 0.2143 0.1304 0.6000
##  [2,] 0.1915 0.1121 0.5600
##  [3,] 0.1850 0.1241 0.5000
##  [4,] 0.1606 0.1049 0.4516
##  [5,] 0.1627 0.0914 0.5294
##  [6,] 0.1713 0.0889 0.5833
##  [7,] 0.1855 0.0924 0.6486
##  [8,] 0.1867 0.1070 0.5789
##  [9,] 0.1504 0.0745 0.5263
## [10,] 0.1630 0.0899 0.5263
## [11,] 0.1703 0.0942 0.5526
## [12,] 0.1528 0.0733 0.5526
## [13,] 0.1572 0.0733 0.5789
## [14,] 0.1572 0.0576 0.6579
## [15,] 0.1354 0.0524 0.5526
evalq({
  min.err<-min(data.frame(model$err.rate)["OOB"])
  min.err.index<-which(data.frame(model$err.rate)["OOB"]==min.err)
}, weatherRF)
weatherRF$min.err.index
## [1] 18
weatherRF$model$err.rate[weatherRF$min.err.index,]
##        OOB         No        Yes 
## 0.12663755 0.03664921 0.57894737
head(weatherRF$model$votes)
##          No        Yes
## 1 0.6467391 0.35326087
## 2 0.9700000 0.03000000
## 3 0.9715909 0.02840909
## 4 0.6864865 0.31351351
## 5 0.8750000 0.12500000
## 6 0.7252747 0.27472527