Detección de anomalías con Isolation Forest

install.packages("http://download.r-forge.r-project.org/src/contrib/IsolationForest_0.0-26.tar.gz")
## inferring 'repos = NULL' from 'pkgs'
library(IsolationForest)
## IsolationForest 0.0-26

Dataset

##    Air.Flow Water.Temp Acid.Conc. stack.loss
## 1        80         27         89         42
## 2        80         27         88         37
## 3        75         25         90         37
## 4        62         24         87         28
## 5        62         22         87         18
## 6        62         23         87         18
## 7        62         24         93         19
## 8        62         24         93         20
## 9        58         23         87         15
## 10       58         18         80         14
## 11       58         18         89         14
## 12       58         17         88         13
## 13       58         18         82         11
## 14       58         19         93         12
## 15       50         18         89          8
## 16       50         18         86          7
## 17       50         19         72          8
## 18       50         19         79          8
## 19       50         20         80          9
## 20       56         20         82         15
## 21       70         20         91         15
plot(stackloss[,1], stackloss[,2])

plot(stackloss[,1], stackloss[,3])

Entrenando un modelo

# https://rdrr.io/rforge/IsolationForest/man/IsolationTrees.html
# randomisation factor, range from 0 to 1, 0 for fully deterministic, 1 for fully random
modelo <- IsolationTrees(stackloss, rFactor = 0)

Obtención del score de anomalía

score <- AnomalyScore(stackloss, modelo)
# mostramos el score
score$outF
##  [1] 0.7520695 0.7520695 0.7675875 0.5162021 0.4040806 0.4040806 0.4040806
##  [8] 0.4040806 0.3608785 0.3608785 0.3608785 0.3608785 0.3608785 0.3608785
## [15] 0.6589037 0.6589037 0.5772792 0.5772792 0.5891906 0.5162021 0.6724994
stackloss
##    Air.Flow Water.Temp Acid.Conc. stack.loss
## 1        80         27         89         42
## 2        80         27         88         37
## 3        75         25         90         37
## 4        62         24         87         28
## 5        62         22         87         18
## 6        62         23         87         18
## 7        62         24         93         19
## 8        62         24         93         20
## 9        58         23         87         15
## 10       58         18         80         14
## 11       58         18         89         14
## 12       58         17         88         13
## 13       58         18         82         11
## 14       58         19         93         12
## 15       50         18         89          8
## 16       50         18         86          7
## 17       50         19         72          8
## 18       50         19         79          8
## 19       50         20         80          9
## 20       56         20         82         15
## 21       70         20         91         15

adaptando el modelo au nuevo set de datos

prueba<- data.frame("Air.Flow"=55, "Water.Temp"=21, "Acid.Conc."=88, "Stack.loss"=25)
prueba
##   Air.Flow Water.Temp Acid.Conc. Stack.loss
## 1       55         21         88         25
# corriendo el modelo
score2 <- AnomalyScore(prueba, modelo)
score2$outF
## [1] 0.5162021