setwd("C:/Users/m00864/Desktop/CDM")
library(data.table)
CDM_new=fread("PI.csv")
## Warning in fread("PI.csv"): Bumped column 31 to type character on data row
## 24, field contains '30/04/2014 15:10:00'. Coercing previously read values
## in this column from logical, integer or numeric back to character which
## may not be lossless; e.g., if '00' and '000' occurred before they will
## now be just '0', and there may be inconsistencies with treatment of ',,'
## and ',NA,' too (if they occurred in this column before the bump). If this
## matters please rerun and set 'colClasses' to 'character' for this column.
## Please note that column type detection uses the first 5 rows, the middle
## 5 rows and the last 5 rows, so hopefully this message should be very rare.
## If reporting to datatable-help, please rerun and include the output from
## verbose=TRUE.
## Warning in fread("PI.csv"): Bumped column 3 to type character on data
## row 49, field contains 'HFE'. Coercing previously read values in this
## column from logical, integer or numeric back to character which may not
## be lossless; e.g., if '00' and '000' occurred before they will now be just
## '0', and there may be inconsistencies with treatment of ',,' and ',NA,' too
## (if they occurred in this column before the bump). If this matters please
## rerun and set 'colClasses' to 'character' for this column. Please note
## that column type detection uses the first 5 rows, the middle 5 rows and the
## last 5 rows, so hopefully this message should be very rare. If reporting to
## datatable-help, please rerun and include the output from verbose=TRUE.
CDM=fread("PI.csv")
## Warning in fread("PI.csv"): Bumped column 31 to type character on data row
## 24, field contains '30/04/2014 15:10:00'. Coercing previously read values
## in this column from logical, integer or numeric back to character which
## may not be lossless; e.g., if '00' and '000' occurred before they will
## now be just '0', and there may be inconsistencies with treatment of ',,'
## and ',NA,' too (if they occurred in this column before the bump). If this
## matters please rerun and set 'colClasses' to 'character' for this column.
## Please note that column type detection uses the first 5 rows, the middle
## 5 rows and the last 5 rows, so hopefully this message should be very rare.
## If reporting to datatable-help, please rerun and include the output from
## verbose=TRUE.
## Warning in fread("PI.csv"): Bumped column 3 to type character on data
## row 49, field contains 'HFE'. Coercing previously read values in this
## column from logical, integer or numeric back to character which may not
## be lossless; e.g., if '00' and '000' occurred before they will now be just
## '0', and there may be inconsistencies with treatment of ',,' and ',NA,' too
## (if they occurred in this column before the bump). If this matters please
## rerun and set 'colClasses' to 'character' for this column. Please note
## that column type detection uses the first 5 rows, the middle 5 rows and the
## last 5 rows, so hopefully this message should be very rare. If reporting to
## datatable-help, please rerun and include the output from verbose=TRUE.
CDM[,c("Comapny","Flightnumber","Destination","SID","Aircraft","Origin","Situation","Stand","TOBT","TSAT","ASRT","EXOT","AXOT","AOBT","ATOT","Company","Nav. Flightnumber","CTOT","TTOT","SOBT","Runway"):=NULL]
## Warning in `[.data.table`(CDM, , `:=`(c("Comapny", "Flightnumber",
## "Destination", : Adding new column 'Flightnumber' then assigning NULL
## (deleting it).
## Warning in `[.data.table`(CDM, , `:=`(c("Comapny", "Flightnumber",
## "Destination", : Adding new column 'Nav. Flightnumber' then assigning NULL
## (deleting it).
## Flight number ASAT AOBT-ASAT AOBT-SOBT ASAT-ASRT
## 1: 8524 30/04/2014 14:08:00 5 3 0
## 2: 1610 30/04/2014 14:08:00 5 3 0
## 3: 8754 30/04/2014 14:10:00 5 5 0
## 4: 1012 30/04/2014 14:08:00 8 16 0
## 5: 3564 30/04/2014 14:14:00 5 19 0
## ---
## 510: 3966 30/04/2014 13:49:00 5 9 0
## 511: 3912 30/04/2014 13:50:00 5 10 0
## 512: 2006 30/04/2014 13:51:00 5 11 0
## 513: 1546 30/04/2014 13:51:00 5 11 0
## 514: 3620 30/04/2014 13:52:00 5 2 0
## ASAT-TSAT ASRT-TSAT AXOT-EXOT TSAT-TOBT TOBT-SOBT ASRT-TOBT AOBT-TOBT
## 1: -5 -5 -5 3 0 -2 3
## 2: -6 -6 -5 4 0 -2 3
## 3: -3 -3 0 3 0 0 5
## 4: -5 -5 -1 13 0 8 16
## 5: -4 -4 -5 3 15 -1 4
## ---
## 510: 3 3 -6 1 0 4 9
## 511: -6 -6 -4 6 5 0 5
## 512: -6 -6 -3 12 0 6 11
## 513: -5 -5 -7 11 0 6 11
## 514: -4 -4 -3 1 0 -3 2
## Nav. Flight number
## 1: 8524
## 2: 16SP
## 3: 8754
## 4: 1012
## 5: 3564
## ---
## 510: 3966
## 511: 3912
## 512: 2006
## 513: 1546
## 514: 3620
CDM[,c("Flight number","ASAT","Nav. Flight number"):=NULL]
## AOBT-ASAT AOBT-SOBT ASAT-ASRT ASAT-TSAT ASRT-TSAT AXOT-EXOT TSAT-TOBT
## 1: 5 3 0 -5 -5 -5 3
## 2: 5 3 0 -6 -6 -5 4
## 3: 5 5 0 -3 -3 0 3
## 4: 8 16 0 -5 -5 -1 13
## 5: 5 19 0 -4 -4 -5 3
## ---
## 510: 5 9 0 3 3 -6 1
## 511: 5 10 0 -6 -6 -4 6
## 512: 5 11 0 -6 -6 -3 12
## 513: 5 11 0 -5 -5 -7 11
## 514: 5 2 0 -4 -4 -3 1
## TOBT-SOBT ASRT-TOBT AOBT-TOBT
## 1: 0 -2 3
## 2: 0 -2 3
## 3: 0 0 5
## 4: 0 8 16
## 5: 15 -1 4
## ---
## 510: 0 4 9
## 511: 5 0 5
## 512: 0 6 11
## 513: 0 6 11
## 514: 0 -3 2
Plotting the values
library(ggplot2)
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:data.table':
##
## melt
data_plot<- data.frame(sample = seq(1,514),CDM)
Molten <- melt(data_plot, id.vars = "sample")
ggplot(Molten, aes(x = sample, y = value, colour = variable)) + geom_line(size=0.8)

library(cluster)
cdm=as.matrix(CDM)
cdm=na.omit(cdm)
wss = kmeans(cdm, centers=1)$tot.withinss
for (i in 2:15)
wss[i] = kmeans(cdm, centers=i)$tot.withinss
library(ggvis)
##
## Attaching package: 'ggvis'
## The following object is masked from 'package:ggplot2':
##
## resolution
sse = data.frame(c(1:15), c(wss))
names(sse)[1] = 'Clusters'
names(sse)[2] = 'SSE'
sse %>%
ggvis(~Clusters, ~SSE) %>%
layer_points(fill := 'blue') %>%
layer_lines() %>%
set_options(height = 300, width = 400)
clusters = kmeans(cdm, 6)
clusters
## K-means clustering with 6 clusters of sizes 20, 7, 165, 8, 198, 97
##
## Cluster means:
## AOBT-ASAT AOBT-SOBT ASAT-ASRT ASAT-TSAT ASRT-TSAT AXOT-EXOT
## 1 5.000000 42.350000 0.0000000 -1.1000000 -1.1000000 -4.850000
## 2 5.714286 107.142857 0.0000000 -0.2857143 -0.2857143 -3.571429
## 3 5.151515 7.369697 0.1090909 -1.0727273 -1.1818182 -3.266667
## 4 5.000000 25.500000 14.3750000 -9.7500000 -24.1250000 -0.875000
## 5 5.050505 1.171717 0.3030303 -3.9949495 -4.2979798 -3.792929
## 6 5.226804 17.721649 0.3092784 -1.7938144 -2.1030928 -3.453608
## TSAT-TOBT TOBT-SOBT ASRT-TOBT AOBT-TOBT
## 1 1.2500000 37.2000000 0.150000 5.150000
## 2 -1.4285714 103.1428571 -1.714286 4.000000
## 3 2.5090909 0.7818182 1.327273 6.587879
## 4 29.0000000 1.2500000 4.875000 24.250000
## 5 0.7020202 -0.5858586 -3.595960 1.757576
## 6 0.3298969 13.9587629 -1.773196 3.762887
##
## Clustering vector:
## [1] 5 5 3 3 6 3 3 6 6 5 3 6 5 5 3 3 5 6 6 2 3 5 6 5 5 5 3 5 1 3 5 1 6 3 5
## [36] 3 5 5 5 3 3 3 5 4 6 5 6 6 5 6 3 2 5 3 4 3 3 6 5 6 6 5 3 6 3 6 5 3 6 3
## [71] 6 3 6 6 3 5 6 3 3 3 3 6 6 5 6 5 3 3 5 6 2 5 5 1 5 5 3 5 3 5 5 5 5 6 1
## [106] 1 3 5 5 1 6 6 6 6 5 3 6 3 3 3 5 5 5 3 3 3 3 3 3 6 3 5 5 3 5 3 3 3 5 3
## [141] 5 3 2 5 3 3 3 6 5 6 3 3 6 1 6 6 3 6 6 5 1 6 5 3 5 6 5 6 6 6 1 6 6 5 5
## [176] 6 3 6 1 3 3 5 5 5 3 5 3 5 5 1 5 5 6 5 5 6 3 1 5 6 1 2 5 6 3 6 5 5 1 3
## [211] 6 3 5 5 4 3 3 3 5 3 3 5 5 6 3 3 3 3 5 6 3 3 4 5 3 5 5 5 5 3 3 3 5 5 5
## [246] 6 2 5 5 6 5 5 6 5 5 5 3 5 5 5 6 6 5 3 5 5 3 3 5 3 3 3 6 3 6 6 3 3 3 5
## [281] 5 5 5 5 5 6 3 3 3 3 5 3 3 5 5 3 6 3 5 3 5 3 5 5 5 5 3 5 5 5 5 5 5 5 3
## [316] 3 5 5 5 5 5 5 3 3 5 5 5 5 3 5 3 5 5 5 3 3 5 3 5 3 3 5 6 5 5 5 5 3 3 5
## [351] 5 1 5 3 3 4 5 3 5 6 5 5 5 5 5 5 3 1 3 3 5 3 5 5 5 5 3 3 5 6 5 5 3 3 3
## [386] 5 6 3 5 3 3 5 5 6 5 3 5 3 3 5 6 3 5 3 6 6 3 5 6 6 5 1 3 5 3 3 1 5 5 4
## [421] 6 6 5 5 6 6 3 5 3 1 6 3 5 3 3 3 5 6 3 5 3 5 3 5 6 5 6 5 3 5 3 5 3 6 5
## [456] 5 6 6 6 5 5 2 5 4 5 5 3 6 6 6 3 3 6 3 6 3 5 6 3 5 5 1 6 6 5 4 3 5 5 3
## [491] 3 3 3 3 5
##
## Within cluster sum of squares by cluster:
## [1] 6920.750 10158.857 12439.321 11891.000 9229.899 9383.938
## (between_SS / total_SS = 79.9 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
CDM=na.omit(CDM)
CDM$Cluster = clusters$cluster
head(CDM,5)
## AOBT-ASAT AOBT-SOBT ASAT-ASRT ASAT-TSAT ASRT-TSAT AXOT-EXOT TSAT-TOBT
## 1: 5 3 0 -5 -5 -5 3
## 2: 5 3 0 -6 -6 -5 4
## 3: 5 5 0 -3 -3 0 3
## 4: 8 16 0 -5 -5 -1 13
## 5: 5 19 0 -4 -4 -5 3
## TOBT-SOBT ASRT-TOBT AOBT-TOBT Cluster
## 1: 0 -2 3 5
## 2: 0 -2 3 5
## 3: 0 0 5 3
## 4: 0 8 16 3
## 5: 15 -1 4 6
clusplot(cdm, clusters$cluster, color=T, shade=F,labels=0,lines=0, main='k-Means Cluster Analysis')

CDM_new=na.omit(CDM_new)
Analysis_Data=cbind.data.frame(CDM$Cluster,CDM_new$Comapny,CDM_new$`Flight number`,CDM_new$Destination,CDM_new$Aircraft,CDM_new$Runway,CDM_new$Stand)
head(Analysis_Data,10)
## CDM$Cluster CDM_new$Comapny CDM_new$`Flight number` CDM_new$Destination
## 1 5 ANE 8524 PNA
## 2 5 IBE 1610 BCN
## 3 3 ANE 8754 NCE
## 4 3 VLG 1012 BCN
## 5 6 IBE 3564 MUC
## 6 3 ANE 8026 XRY
## 7 3 VLG 8204 CDG
## 8 6 ANE 8786 BLQ
## 9 6 ANE 8976 VLC
## 10 5 AVA 17 MDE
## CDM_new$Aircraft CDM_new$Runway CDM_new$Stand
## 1 CRJ9 36R 408
## 2 A320 36R 346
## 3 CRJX 36R 329
## 4 A32A 36R 434
## 5 A320 36R 564
## 6 CRJX 36L 606
## 7 A320 36L 428
## 8 CRJX 36R 312
## 9 CRJ9 36R 328
## 10 A332 36L 585