install.packages("http://download.r-forge.r-project.org/src/contrib/IsolationForest_0.0-26.tar.gz")
## inferring 'repos = NULL' from 'pkgs'
library(IsolationForest)
## IsolationForest 0.0-26
## Air.Flow Water.Temp Acid.Conc. stack.loss
## 1 80 27 89 42
## 2 80 27 88 37
## 3 75 25 90 37
## 4 62 24 87 28
## 5 62 22 87 18
## 6 62 23 87 18
## 7 62 24 93 19
## 8 62 24 93 20
## 9 58 23 87 15
## 10 58 18 80 14
## 11 58 18 89 14
## 12 58 17 88 13
## 13 58 18 82 11
## 14 58 19 93 12
## 15 50 18 89 8
## 16 50 18 86 7
## 17 50 19 72 8
## 18 50 19 79 8
## 19 50 20 80 9
## 20 56 20 82 15
## 21 70 20 91 15
plot(stackloss[,1], stackloss[,2])
plot(stackloss[,1], stackloss[,3])
Entrenando un modelo
# https://rdrr.io/rforge/IsolationForest/man/IsolationTrees.html
# randomisation factor, range from 0 to 1, 0 for fully deterministic, 1 for fully random
modelo <- IsolationTrees(stackloss, rFactor = 0)
Obtención del score de anomalía
score <- AnomalyScore(stackloss, modelo)
# mostramos el score
score$outF
## [1] 0.7520695 0.7520695 0.7675875 0.5162021 0.4040806 0.4040806 0.4040806
## [8] 0.4040806 0.3608785 0.3608785 0.3608785 0.3608785 0.3608785 0.3608785
## [15] 0.6589037 0.6589037 0.5772792 0.5772792 0.5891906 0.5162021 0.6724994
stackloss
## Air.Flow Water.Temp Acid.Conc. stack.loss
## 1 80 27 89 42
## 2 80 27 88 37
## 3 75 25 90 37
## 4 62 24 87 28
## 5 62 22 87 18
## 6 62 23 87 18
## 7 62 24 93 19
## 8 62 24 93 20
## 9 58 23 87 15
## 10 58 18 80 14
## 11 58 18 89 14
## 12 58 17 88 13
## 13 58 18 82 11
## 14 58 19 93 12
## 15 50 18 89 8
## 16 50 18 86 7
## 17 50 19 72 8
## 18 50 19 79 8
## 19 50 20 80 9
## 20 56 20 82 15
## 21 70 20 91 15
adaptando el modelo au nuevo set de datos
prueba<- data.frame("Air.Flow"=55, "Water.Temp"=21, "Acid.Conc."=88, "Stack.loss"=25)
prueba
## Air.Flow Water.Temp Acid.Conc. Stack.loss
## 1 55 21 88 25
# corriendo el modelo
score2 <- AnomalyScore(prueba, modelo)
score2$outF
## [1] 0.5162021