loading data
library(data.table)
## Warning: package 'data.table' was built under R version 3.3.1
library(bitops)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
options(sqldf.driver = "SQLite")
options(warn=0)
Data=fread("PI.csv")
## Warning in fread("PI.csv"): Bumped column 31 to type character on data row
## 24, field contains '30/04/2014 15:10:00'. Coercing previously read values
## in this column from logical, integer or numeric back to character which
## may not be lossless; e.g., if '00' and '000' occurred before they will
## now be just '0', and there may be inconsistencies with treatment of ',,'
## and ',NA,' too (if they occurred in this column before the bump). If this
## matters please rerun and set 'colClasses' to 'character' for this column.
## Please note that column type detection uses the first 5 rows, the middle
## 5 rows and the last 5 rows, so hopefully this message should be very rare.
## If reporting to datatable-help, please rerun and include the output from
## verbose=TRUE.
## Warning in fread("PI.csv"): Bumped column 3 to type character on data
## row 49, field contains 'HFE'. Coercing previously read values in this
## column from logical, integer or numeric back to character which may not
## be lossless; e.g., if '00' and '000' occurred before they will now be just
## '0', and there may be inconsistencies with treatment of ',,' and ',NA,' too
## (if they occurred in this column before the bump). If this matters please
## rerun and set 'colClasses' to 'character' for this column. Please note
## that column type detection uses the first 5 rows, the middle 5 rows and the
## last 5 rows, so hopefully this message should be very rare. If reporting to
## datatable-help, please rerun and include the output from verbose=TRUE.
Data[,c("Comapny","Flight number","Destination","SID","Aircraft","Origin","Situation","Stand","TOBT","TSAT","ASRT","EXOT","AXOT","AOBT","ATOT","Company","Nav. Flight number","CTOT","TTOT","SOBT"):=NULL]
Data[,c("ASAT"):=NULL]
# seggregate by runway
Data36R <- Data[Runway == "36R"]
head(Data36R,10)
## Runway AOBT-ASAT AOBT-SOBT ASAT-ASRT ASAT-TSAT ASRT-TSAT AXOT-EXOT
## 1: 36R 5 3 0 -5 -5 -5
## 2: 36R 5 3 0 -6 -6 -5
## 3: 36R 5 5 0 -3 -3 0
## 4: 36R 8 16 0 -5 -5 -1
## 5: 36R 5 19 0 -4 -4 -5
## 6: 36R 5 21 0 1 1 -5
## 7: 36R 5 13 0 -1 -1 -7
## 8: 36R 5 1 0 -4 -4 -6
## 9: 36R 5 29 0 -1 -1 -8
## 10: 36R 5 4 0 -1 -1 -7
## TSAT-TOBT TOBT-SOBT ASRT-TOBT AOBT-TOBT
## 1: 3 0 -2 3
## 2: 4 0 -2 3
## 3: 3 0 0 5
## 4: 13 0 8 16
## 5: 3 15 -1 4
## 6: 0 15 1 6
## 7: 1 8 0 5
## 8: 0 0 -4 1
## 9: 0 25 -1 4
## 10: 0 0 -1 4
Data36L <- Data[Runway == "36L"]
head(Data36L,10)
## Runway AOBT-ASAT AOBT-SOBT ASAT-ASRT ASAT-TSAT ASRT-TSAT AXOT-EXOT
## 1: 36L 4 9 0 1 1 0
## 2: 36L 7 6 0 -5 -5 4
## 3: 36L 5 0 0 -6 -6 -5
## 4: 36L 5 4 0 -1 -1 -3
## 5: 36L 5 25 2 -1 -3 -3
## 6: 36L 5 1 0 -4 -4 -6
## 7: 36L 5 11 0 1 1 -5
## 8: 36L 5 7 0 2 2 0
## 9: 36L 5 3 0 -4 -4 -5
## 10: 36L 6 20 0 -2 -2 -4
## TSAT-TOBT TOBT-SOBT ASRT-TOBT AOBT-TOBT
## 1: 4 0 5 9
## 2: 4 0 -1 6
## 3: 1 0 -5 0
## 4: 0 0 -1 4
## 5: 1 20 -2 5
## 6: 0 0 -4 1
## 7: 5 0 6 11
## 8: 0 0 2 7
## 9: 2 0 -2 3
## 10: 0 16 -2 4
Using an autoencoder model for anomaly detection
library(h2o)
## Warning: package 'h2o' was built under R version 3.3.1
## Loading required package: statmod
## Warning: package 'statmod' was built under R version 3.3.1
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following objects are masked from 'package:data.table':
##
## hour, month, week, year
## The following objects are masked from 'package:stats':
##
## cor, sd, var
## The following objects are masked from 'package:base':
##
## %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
h2o.init()
##
## H2O is not running yet, starting it now...
##
## Note: In case of errors look at the following log files:
## C:\Users\m00864\AppData\Local\Temp\RtmpGQ5pz0/h2o_M00864_started_from_r.out
## C:\Users\m00864\AppData\Local\Temp\RtmpGQ5pz0/h2o_M00864_started_from_r.err
##
##
## Starting H2O JVM and connecting: . Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 6 seconds 727 milliseconds
## H2O cluster version: 3.8.3.3
## H2O cluster name: H2O_started_from_R_M00864_xtm842
## H2O cluster total nodes: 1
## H2O cluster total memory: 3.54 GB
## H2O cluster total cores: 0
## H2O cluster allowed cores: 0
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## R Version: R version 3.3.0 (2016-05-03)
##
## Note: As started, H2O is limited to the CRAN default of 2 CPUs.
## Shut down and restart H2O as shown below to use all your CPUs.
## > h2o.shutdown()
## > h2o.init(nthreads = -1)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
set.seed(3456)
trainIndex <- createDataPartition(Data36L$Runway, p = .6,
list = FALSE,
times = 1)
Data36LTrain <- Data36L[ trainIndex,]
Data36LTest <- Data36L[-trainIndex,]
Data36LTrainH2o=as.h2o(Data36LTrain)
##
|
| | 0%
|
|=================================================================| 100%
Data36LTestH2o=as.h2o(Data36LTest)
##
|
| | 0%
|
|=================================================================| 100%
M1 = h2o.deeplearning(x = 2:11, training_frame = Data36LTrainH2o, autoencoder = TRUE,hidden = c(10, 10), epochs = 5)
##
|
| | 0%
|
|=================================================================| 100%
m1.anomaly=h2o.anomaly(M1, Data36LTestH2o)
runway1 <- as.data.frame(m1.anomaly)
ggplot(data = runway1, aes(x = 1:nrow(runway1), y = Reconstruction.MSE)) +
geom_line()
