R Markdown
weatherDS<-new.env()
evalq({
data(weather)
data<-na.omit(weather)
nobs<-nrow(data)
form<-formula(RainTomorrow ~.)
target<-all.vars(form)[1]
vars<- -grep('^(Date|Location|RISK_)', names(data))
set.seed(42)
train<-sample(nobs, 0.7*nobs)
}, weatherDS)
weatherRF<-new.env(parent=weatherDS)
evalq({
Tdata<-data[train, vars]
form= formula(RainTomorrow ~.)
model<-randomForest(formula=form, data=Tdata,
ntree=500, mtry=4,
importance=TRUE,
localImp= TRUE,
na.action=na.roughfix,
replace=FALSE)
}, weatherRF)
weatherRF$model
##
## Call:
## randomForest(formula = form, data = Tdata, ntree = 500, mtry = 4, importance = TRUE, localImp = TRUE, replace = FALSE, na.action = na.roughfix)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 4
##
## OOB estimate of error rate: 15.28%
## Confusion matrix:
## No Yes class.error
## No 184 7 0.03664921
## Yes 28 10 0.73684211
str(weatherRF$model)
## List of 19
## $ call : language randomForest(formula = form, data = Tdata, ntree = 500, mtry = 4, importance = TRUE, localImp = TRUE, replac| __truncated__
## $ type : chr "classification"
## $ predicted : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "names")= chr [1:229] "1" "2" "3" "4" ...
## $ err.rate : num [1:500, 1:3] 0.214 0.191 0.185 0.161 0.163 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:3] "OOB" "No" "Yes"
## $ confusion : num [1:2, 1:3] 184 28 7 10 0.0366 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "No" "Yes"
## .. ..$ : chr [1:3] "No" "Yes" "class.error"
## $ votes : 'matrix' num [1:229, 1:2] 0.647 0.97 0.972 0.686 0.875 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:229] "1" "2" "3" "4" ...
## .. ..$ : chr [1:2] "No" "Yes"
## $ oob.times : num [1:229] 184 200 176 185 176 182 176 186 193 180 ...
## $ classes : chr [1:2] "No" "Yes"
## $ importance : num [1:20, 1:4] 0.008 0.01341 0.00223 0.00259 0.01914 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
## .. ..$ : chr [1:4] "No" "Yes" "MeanDecreaseAccuracy" "MeanDecreaseGini"
## $ importanceSD : num [1:20, 1:3] 0.001221 0.001423 0.000561 0.000892 0.001609 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
## .. ..$ : chr [1:3] "No" "Yes" "MeanDecreaseAccuracy"
## $ localImportance: num [1:20, 1:229] -0.02717 0 -0.00543 -0.01087 0.06522 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
## .. ..$ : chr [1:229] "1" "2" "3" "4" ...
## $ proximity : NULL
## $ ntree : num 500
## $ mtry : num 4
## $ forest :List of 14
## ..$ ndbigtree : int [1:500] 43 39 57 45 47 41 51 47 45 41 ...
## ..$ nodestatus: int [1:65, 1:500] 1 1 1 1 1 1 -1 1 1 -1 ...
## ..$ bestvar : int [1:65, 1:500] 7 18 12 14 4 1 0 19 17 0 ...
## ..$ treemap : int [1:65, 1:2, 1:500] 2 4 6 8 10 12 0 14 16 0 ...
## ..$ nodepred : int [1:65, 1:500] 0 0 0 0 0 0 2 0 0 2 ...
## ..$ xbestsplit: num [1:65, 1:500] 63 20.4 49.5 1016.2 7.1 ...
## ..$ pid : num [1:2] 1 1
## ..$ cutoff : num [1:2] 0.5 0.5
## ..$ ncat : Named int [1:20] 1 1 1 1 1 1 1 1 1 1 ...
## .. ..- attr(*, "names")= chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
## ..$ maxcat : int 2
## ..$ nrnodes : int 65
## ..$ ntree : num 500
## ..$ nclass : int 2
## ..$ xlevels :List of 20
## .. ..$ MinTemp : num 0
## .. ..$ MaxTemp : num 0
## .. ..$ Rainfall : num 0
## .. ..$ Evaporation : num 0
## .. ..$ Sunshine : num 0
## .. ..$ WindGustDir : num 0
## .. ..$ WindGustSpeed: num 0
## .. ..$ WindDir9am : num 0
## .. ..$ WindDir3pm : num 0
## .. ..$ WindSpeed9am : num 0
## .. ..$ WindSpeed3pm : num 0
## .. ..$ Humidity9am : num 0
## .. ..$ Humidity3pm : num 0
## .. ..$ Pressure9am : num 0
## .. ..$ Pressure3pm : num 0
## .. ..$ Cloud9am : num 0
## .. ..$ Cloud3pm : num 0
## .. ..$ Temp9am : num 0
## .. ..$ Temp3pm : num 0
## .. ..$ RainToday : chr [1:2] "No" "Yes"
## $ y : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 1 1 2 ...
## ..- attr(*, "names")= chr [1:229] "1" "2" "3" "4" ...
## $ test : NULL
## $ inbag : NULL
## $ terms :Classes 'terms', 'formula' language RainTomorrow ~ MinTemp + MaxTemp + Rainfall + Evaporation + Sunshine + WindGustDir + WindGustSpeed + WindDir| __truncated__ ...
## .. ..- attr(*, "variables")= language list(RainTomorrow, MinTemp, MaxTemp, Rainfall, Evaporation, Sunshine, WindGustDir, WindGustSpeed, WindDir9am| __truncated__ ...
## .. ..- attr(*, "factors")= int [1:21, 1:20] 0 1 0 0 0 0 0 0 0 0 ...
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:21] "RainTomorrow" "MinTemp" "MaxTemp" "Rainfall" ...
## .. .. .. ..$ : chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
## .. ..- attr(*, "term.labels")= chr [1:20] "MinTemp" "MaxTemp" "Rainfall" "Evaporation" ...
## .. ..- attr(*, "order")= int [1:20] 1 1 1 1 1 1 1 1 1 1 ...
## .. ..- attr(*, "intercept")= num 0
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: 0x000001e7cc158388>
## .. ..- attr(*, "predvars")= language list(RainTomorrow, MinTemp, MaxTemp, Rainfall, Evaporation, Sunshine, WindGustDir, WindGustSpeed, WindDir9am| __truncated__ ...
## .. ..- attr(*, "dataClasses")= Named chr [1:21] "factor" "numeric" "numeric" "numeric" ...
## .. .. ..- attr(*, "names")= chr [1:21] "RainTomorrow" "MinTemp" "MaxTemp" "Rainfall" ...
## - attr(*, "class")= chr [1:2] "randomForest.formula" "randomForest"
head(weatherRF$model$predicted)
## 1 2 3 4 5 6
## No No No No No No
## Levels: No Yes
head(weatherRF$model$predicted, 10)
## 1 2 3 4 5 6 7 8 9 10
## No No No No No No No No No No
## Levels: No Yes
head(weatherRF$model$importance)
## No Yes MeanDecreaseAccuracy MeanDecreaseGini
## MinTemp 0.008001176 0.002800146 0.0071428571 2.3031681
## MaxTemp 0.013409119 -0.006438784 0.0101904762 1.9157680
## Rainfall 0.002225895 -0.010962177 0.0000952381 0.9946594
## Evaporation 0.002585936 -0.001352748 0.0018095238 1.2982645
## Sunshine 0.019138256 0.013873650 0.0181190476 3.3679873
## WindGustDir 0.002722396 0.003097635 0.0028095238 1.1188261
head(weatherRF$model$localImportance)[, 1:4]
## 1 2 3 4
## MinTemp -0.027173913 0.025 0.000000000 -0.043243243
## MaxTemp 0.000000000 -0.010 0.000000000 0.016216216
## Rainfall -0.005434783 0.000 0.000000000 -0.005405405
## Evaporation -0.010869565 0.000 -0.005681818 0.005405405
## Sunshine 0.065217391 0.020 0.045454545 0.162162162
## WindGustDir -0.038043478 0.015 0.005681818 -0.005405405
head(weatherRF$model$err.rate)
## OOB No Yes
## [1,] 0.2142857 0.13043478 0.6000000
## [2,] 0.1914894 0.11206897 0.5600000
## [3,] 0.1849711 0.12413793 0.5000000
## [4,] 0.1606218 0.10493827 0.4516129
## [5,] 0.1626794 0.09142857 0.5294118
## [6,] 0.1712963 0.08888889 0.5833333
round(head(weatherRF$model$err.rate,15),4)
## OOB No Yes
## [1,] 0.2143 0.1304 0.6000
## [2,] 0.1915 0.1121 0.5600
## [3,] 0.1850 0.1241 0.5000
## [4,] 0.1606 0.1049 0.4516
## [5,] 0.1627 0.0914 0.5294
## [6,] 0.1713 0.0889 0.5833
## [7,] 0.1855 0.0924 0.6486
## [8,] 0.1867 0.1070 0.5789
## [9,] 0.1504 0.0745 0.5263
## [10,] 0.1630 0.0899 0.5263
## [11,] 0.1703 0.0942 0.5526
## [12,] 0.1528 0.0733 0.5526
## [13,] 0.1572 0.0733 0.5789
## [14,] 0.1572 0.0576 0.6579
## [15,] 0.1354 0.0524 0.5526
evalq({
min.err<-min(data.frame(model$err.rate)["OOB"])
min.err.index<-which(data.frame(model$err.rate)["OOB"]==min.err)
}, weatherRF)
weatherRF$min.err.index
## [1] 18
weatherRF$model$err.rate[weatherRF$min.err.index,]
## OOB No Yes
## 0.12663755 0.03664921 0.57894737
head(weatherRF$model$votes)
## No Yes
## 1 0.6467391 0.35326087
## 2 0.9700000 0.03000000
## 3 0.9715909 0.02840909
## 4 0.6864865 0.31351351
## 5 0.8750000 0.12500000
## 6 0.7252747 0.27472527