loading data

library(data.table)
## Warning: package 'data.table' was built under R version 3.3.1
library(bitops)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
options(sqldf.driver = "SQLite")

options(warn=0)

Data=fread("PI.csv")
## Warning in fread("PI.csv"): Bumped column 31 to type character on data row
## 24, field contains '30/04/2014 15:10:00'. Coercing previously read values
## in this column from logical, integer or numeric back to character which
## may not be lossless; e.g., if '00' and '000' occurred before they will
## now be just '0', and there may be inconsistencies with treatment of ',,'
## and ',NA,' too (if they occurred in this column before the bump). If this
## matters please rerun and set 'colClasses' to 'character' for this column.
## Please note that column type detection uses the first 5 rows, the middle
## 5 rows and the last 5 rows, so hopefully this message should be very rare.
## If reporting to datatable-help, please rerun and include the output from
## verbose=TRUE.
## Warning in fread("PI.csv"): Bumped column 3 to type character on data
## row 49, field contains 'HFE'. Coercing previously read values in this
## column from logical, integer or numeric back to character which may not
## be lossless; e.g., if '00' and '000' occurred before they will now be just
## '0', and there may be inconsistencies with treatment of ',,' and ',NA,' too
## (if they occurred in this column before the bump). If this matters please
## rerun and set 'colClasses' to 'character' for this column. Please note
## that column type detection uses the first 5 rows, the middle 5 rows and the
## last 5 rows, so hopefully this message should be very rare. If reporting to
## datatable-help, please rerun and include the output from verbose=TRUE.
Data[,c("Comapny","Flight number","Destination","SID","Aircraft","Origin","Situation","Stand","TOBT","TSAT","ASRT","EXOT","AXOT","AOBT","ATOT","Company","Nav. Flight number","CTOT","TTOT","SOBT"):=NULL]

Data[,c("ASAT"):=NULL]

# seggregate by runway 

Data36R <- Data[Runway == "36R"]

head(Data36R,10)
##     Runway AOBT-ASAT AOBT-SOBT ASAT-ASRT ASAT-TSAT ASRT-TSAT AXOT-EXOT
##  1:    36R         5         3         0        -5        -5        -5
##  2:    36R         5         3         0        -6        -6        -5
##  3:    36R         5         5         0        -3        -3         0
##  4:    36R         8        16         0        -5        -5        -1
##  5:    36R         5        19         0        -4        -4        -5
##  6:    36R         5        21         0         1         1        -5
##  7:    36R         5        13         0        -1        -1        -7
##  8:    36R         5         1         0        -4        -4        -6
##  9:    36R         5        29         0        -1        -1        -8
## 10:    36R         5         4         0        -1        -1        -7
##     TSAT-TOBT TOBT-SOBT ASRT-TOBT AOBT-TOBT
##  1:         3         0        -2         3
##  2:         4         0        -2         3
##  3:         3         0         0         5
##  4:        13         0         8        16
##  5:         3        15        -1         4
##  6:         0        15         1         6
##  7:         1         8         0         5
##  8:         0         0        -4         1
##  9:         0        25        -1         4
## 10:         0         0        -1         4
Data36L <- Data[Runway == "36L"]

head(Data36L,10)
##     Runway AOBT-ASAT AOBT-SOBT ASAT-ASRT ASAT-TSAT ASRT-TSAT AXOT-EXOT
##  1:    36L         4         9         0         1         1         0
##  2:    36L         7         6         0        -5        -5         4
##  3:    36L         5         0         0        -6        -6        -5
##  4:    36L         5         4         0        -1        -1        -3
##  5:    36L         5        25         2        -1        -3        -3
##  6:    36L         5         1         0        -4        -4        -6
##  7:    36L         5        11         0         1         1        -5
##  8:    36L         5         7         0         2         2         0
##  9:    36L         5         3         0        -4        -4        -5
## 10:    36L         6        20         0        -2        -2        -4
##     TSAT-TOBT TOBT-SOBT ASRT-TOBT AOBT-TOBT
##  1:         4         0         5         9
##  2:         4         0        -1         6
##  3:         1         0        -5         0
##  4:         0         0        -1         4
##  5:         1        20        -2         5
##  6:         0         0        -4         1
##  7:         5         0         6        11
##  8:         0         0         2         7
##  9:         2         0        -2         3
## 10:         0        16        -2         4

Using an autoencoder model for anomaly detection

library(h2o)
## Warning: package 'h2o' was built under R version 3.3.1
## Loading required package: statmod
## Warning: package 'statmod' was built under R version 3.3.1
## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------
## 
## Attaching package: 'h2o'
## The following objects are masked from 'package:data.table':
## 
##     hour, month, week, year
## The following objects are masked from 'package:stats':
## 
##     cor, sd, var
## The following objects are masked from 'package:base':
## 
##     %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc
h2o.init()
## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     C:\Users\m00864\AppData\Local\Temp\RtmpGQ5pz0/h2o_M00864_started_from_r.out
##     C:\Users\m00864\AppData\Local\Temp\RtmpGQ5pz0/h2o_M00864_started_from_r.err
## 
## 
## Starting H2O JVM and connecting: . Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         6 seconds 727 milliseconds 
##     H2O cluster version:        3.8.3.3 
##     H2O cluster name:           H2O_started_from_R_M00864_xtm842 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   3.54 GB 
##     H2O cluster total cores:    0 
##     H2O cluster allowed cores:  0 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     R Version:                  R version 3.3.0 (2016-05-03) 
## 
## Note:  As started, H2O is limited to the CRAN default of 2 CPUs.
##        Shut down and restart H2O as shown below to use all your CPUs.
##            > h2o.shutdown()
##            > h2o.init(nthreads = -1)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
set.seed(3456)

trainIndex <- createDataPartition(Data36L$Runway, p = .6, 
                                  list = FALSE, 
                                  times = 1)

Data36LTrain <- Data36L[ trainIndex,]
Data36LTest  <- Data36L[-trainIndex,]


Data36LTrainH2o=as.h2o(Data36LTrain)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
Data36LTestH2o=as.h2o(Data36LTest)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
M1 = h2o.deeplearning(x = 2:11, training_frame = Data36LTrainH2o, autoencoder = TRUE,hidden = c(10, 10), epochs = 5)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
m1.anomaly=h2o.anomaly(M1, Data36LTestH2o)




runway1 <- as.data.frame(m1.anomaly)

ggplot(data = runway1, aes(x = 1:nrow(runway1), y = Reconstruction.MSE)) +
    geom_line()