Association Rules for “Flight Delay” dataset: Generate the rules and identify the patterns.
- Read the data into R
rm(list = ls(all=T))
setwd("C:\\Users\\C5215696\\Desktop\\Data Science\\Association-Rules")
flight_data<-read.csv(file = "FlightDelays.csv", header = T, sep = ",")
sum(is.na((flight_data)))
## [1] 0
colSums(is.na(flight_data))
## CRS_DEP_TIME CARRIER DEST ORIGIN Weather
## 0 0 0 0 0
## DAY_WEEK Flight.Status
## 0 0
- Look at the summary of all the variables and convert the following variables as factors + Weather + DAY_WEEK + Flight Status
str(flight_data)
## 'data.frame': 2201 obs. of 7 variables:
## $ CRS_DEP_TIME : int 600 600 600 600 600 600 600 600 600 600 ...
## $ CARRIER : Factor w/ 8 levels "CO","DH","DL",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ DEST : Factor w/ 3 levels "EWR","JFK","LGA": 2 2 2 2 2 2 2 2 2 2 ...
## $ ORIGIN : Factor w/ 3 levels "BWI","DCA","IAD": 2 2 2 2 2 2 2 2 2 2 ...
## $ Weather : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DAY_WEEK : int 4 5 6 7 1 2 3 4 5 6 ...
## $ Flight.Status: int 0 0 0 0 0 0 0 0 0 1 ...
flight_data$Weather<-as.factor(flight_data$Weather)
flight_data$DAY_WEEK<-as.factor(flight_data$DAY_WEEK)
flight_data$Flight.Status<-as.factor(flight_data$Flight.Status)
str(flight_data)
## 'data.frame': 2201 obs. of 7 variables:
## $ CRS_DEP_TIME : int 600 600 600 600 600 600 600 600 600 600 ...
## $ CARRIER : Factor w/ 8 levels "CO","DH","DL",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ DEST : Factor w/ 3 levels "EWR","JFK","LGA": 2 2 2 2 2 2 2 2 2 2 ...
## $ ORIGIN : Factor w/ 3 levels "BWI","DCA","IAD": 2 2 2 2 2 2 2 2 2 2 ...
## $ Weather : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ DAY_WEEK : Factor w/ 7 levels "1","2","3","4",..: 4 5 6 7 1 2 3 4 5 6 ...
## $ Flight.Status: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
- Bin the numeric variable ‘CRS_DEP_TIME’ into 4 bins as follows: If time is between 6 AM to 12 noon code as 1 and If the time is between 2 , etc.
##As data have starting value from 6 AM, we will have only 3 bins. 000 to 559 will not have any levels.
flight_data$CRS_DEP_TIME<-cut(flight_data$CRS_DEP_TIME, breaks = c(1159,1759,2359, 559), labels = c(1,2,3))
unique(flight_data$CRS_DEP_TIME)
## [1] 1 2 3
## Levels: 1 2 3
table(flight_data$CRS_DEP_TIME)
##
## 1 2 3
## 699 1108 394
summary(flight_data)
## CRS_DEP_TIME CARRIER DEST ORIGIN Weather DAY_WEEK
## 1: 699 DH :551 EWR: 665 BWI: 145 0:2169 1:308
## 2:1108 RU :408 JFK: 386 DCA:1370 1: 32 2:307
## 3: 394 US :404 LGA:1150 IAD: 686 3:320
## DL :388 4:372
## MQ :295 5:391
## CO : 94 6:250
## (Other): 61 7:253
## Flight.Status
## 0:1773
## 1: 428
##
##
##
##
##
head(flight_data)
## CRS_DEP_TIME CARRIER DEST ORIGIN Weather DAY_WEEK Flight.Status
## 1 1 MQ JFK DCA 0 4 0
## 2 1 MQ JFK DCA 0 5 0
## 3 1 MQ JFK DCA 0 6 0
## 4 1 MQ JFK DCA 0 7 0
## 5 1 MQ JFK DCA 0 1 0
## 6 1 MQ JFK DCA 0 2 0
dim(flight_data)
## [1] 2201 7
- Convert the data frame in a transactions object. Look at the first 6 transactions to understand how ‘arules’ library requires data to be transformed. Use the below R code to view the transactions data
library(arules)
## Warning: package 'arules' was built under R version 3.3.3
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
##Transactional objects
flight_trans <- as(flight_data, "transactions")
summary(flight_trans)
## transactions as itemMatrix in sparse format with
## 2201 rows (elements/itemsets/transactions) and
## 28 columns (items) and a density of 0.25
##
## most frequent items:
## Weather=0 Flight.Status=0 ORIGIN=DCA DEST=LGA
## 2169 1773 1370 1150
## CRS_DEP_TIME=2 (Other)
## 1108 7837
##
## element (itemset/transaction) length distribution:
## sizes
## 7
## 2201
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7 7 7 7 7 7
##
## includes extended item information - examples:
## labels variables levels
## 1 CRS_DEP_TIME=1 CRS_DEP_TIME 1
## 2 CRS_DEP_TIME=2 CRS_DEP_TIME 2
## 3 CRS_DEP_TIME=3 CRS_DEP_TIME 3
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
itemInfo(flight_trans)
## labels variables levels
## 1 CRS_DEP_TIME=1 CRS_DEP_TIME 1
## 2 CRS_DEP_TIME=2 CRS_DEP_TIME 2
## 3 CRS_DEP_TIME=3 CRS_DEP_TIME 3
## 4 CARRIER=CO CARRIER CO
## 5 CARRIER=DH CARRIER DH
## 6 CARRIER=DL CARRIER DL
## 7 CARRIER=MQ CARRIER MQ
## 8 CARRIER=OH CARRIER OH
## 9 CARRIER=RU CARRIER RU
## 10 CARRIER=UA CARRIER UA
## 11 CARRIER=US CARRIER US
## 12 DEST=EWR DEST EWR
## 13 DEST=JFK DEST JFK
## 14 DEST=LGA DEST LGA
## 15 ORIGIN=BWI ORIGIN BWI
## 16 ORIGIN=DCA ORIGIN DCA
## 17 ORIGIN=IAD ORIGIN IAD
## 18 Weather=0 Weather 0
## 19 Weather=1 Weather 1
## 20 DAY_WEEK=1 DAY_WEEK 1
## 21 DAY_WEEK=2 DAY_WEEK 2
## 22 DAY_WEEK=3 DAY_WEEK 3
## 23 DAY_WEEK=4 DAY_WEEK 4
## 24 DAY_WEEK=5 DAY_WEEK 5
## 25 DAY_WEEK=6 DAY_WEEK 6
## 26 DAY_WEEK=7 DAY_WEEK 7
## 27 Flight.Status=0 Flight.Status 0
## 28 Flight.Status=1 Flight.Status 1
inspect(flight_trans[2000:2010])
## items transactionID
## [1] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=3,
## Flight.Status=1} 2000
## [2] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=4,
## Flight.Status=1} 2001
## [3] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=5,
## Flight.Status=0} 2002
## [4] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=1,
## Flight.Status=0} 2003
## [5] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=2,
## Flight.Status=0} 2004
## [6] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=3,
## Flight.Status=0} 2005
## [7] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=4,
## Flight.Status=0} 2006
## [8] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=5,
## Flight.Status=0} 2007
## [9] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=7,
## Flight.Status=0} 2008
## [10] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=1,
## Flight.Status=0} 2009
## [11] {CRS_DEP_TIME=3,
## CARRIER=DL,
## DEST=LGA,
## ORIGIN=DCA,
## Weather=0,
## DAY_WEEK=4,
## Flight.Status=0} 2010
image(flight_trans)

- Apply ‘arules’ algorithm and play with various support, lift and confidence values.
rules1<-apriori(flight_trans, parameter = list(support = 0.1, conf = 0.1, target = 'rules', minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.1 0.1 1 none FALSE TRUE 5 0.1 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 220
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[28 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [23 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [314 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules1
## set of 314 rules
inspect(rules1[1:10])
## lhs rhs support
## [1] {DAY_WEEK=6,Flight.Status=0} => {Weather=0} 0.1026806
## [2] {Weather=0,DAY_WEEK=6} => {Flight.Status=0} 0.1026806
## [3] {Weather=0,Flight.Status=0} => {DAY_WEEK=6} 0.1026806
## [4] {CARRIER=MQ,ORIGIN=DCA} => {Weather=0} 0.1294866
## [5] {CARRIER=MQ,Weather=0} => {ORIGIN=DCA} 0.1294866
## [6] {ORIGIN=DCA,Weather=0} => {CARRIER=MQ} 0.1294866
## [7] {DAY_WEEK=2,Flight.Status=0} => {Weather=0} 0.1108587
## [8] {Weather=0,DAY_WEEK=2} => {Flight.Status=0} 0.1108587
## [9] {Weather=0,Flight.Status=0} => {DAY_WEEK=2} 0.1108587
## [10] {DAY_WEEK=1,Flight.Status=0} => {Weather=0} 0.1017719
## confidence lift
## [1] 1.0000000 1.0147533
## [2] 0.9040000 1.1222245
## [3] 0.1274676 1.1222245
## [4] 0.9661017 0.9803549
## [5] 1.0000000 1.6065693
## [6] 0.2106430 1.5716111
## [7] 1.0000000 1.0147533
## [8] 0.8356164 1.0373332
## [9] 0.1376199 0.9866492
## [10] 1.0000000 1.0147533
head(quality(rules1))
## support confidence lift
## 1 0.1026806 1.0000000 1.0147533
## 2 0.1026806 0.9040000 1.1222245
## 3 0.1026806 0.1274676 1.1222245
## 4 0.1294866 0.9661017 0.9803549
## 5 0.1294866 1.0000000 1.6065693
## 6 0.1294866 0.2106430 1.5716111
rules2<-apriori(flight_trans, parameter = list(support = 0.1, conf = 0.2, target = 'rules', minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.1 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 220
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[28 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [23 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 done [0.02s].
## writing ... [301 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
head(quality(rules2))
## support confidence lift
## 1 0.1026806 1.0000000 1.0147533
## 2 0.1026806 0.9040000 1.1222245
## 3 0.1294866 0.9661017 0.9803549
## 4 0.1294866 1.0000000 1.6065693
## 5 0.1294866 0.2106430 1.5716111
## 6 0.1108587 1.0000000 1.0147533
rules2
## set of 301 rules
rules3<-apriori(flight_trans, parameter = list(support = 0.1, conf = 0.5, target = 'rules', minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.1 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 220
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[28 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [23 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [233 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules3
## set of 233 rules
- Inspect all the rules.
inspect(sort(rules1[1:20], by = "lift"))
## lhs rhs support
## [1] {CARRIER=MQ,Weather=0} => {ORIGIN=DCA} 0.1294866
## [2] {ORIGIN=DCA,Weather=0} => {CARRIER=MQ} 0.1294866
## [3] {Weather=0,DAY_WEEK=6} => {Flight.Status=0} 0.1026806
## [4] {Weather=0,Flight.Status=0} => {DAY_WEEK=6} 0.1026806
## [5] {Weather=0,DAY_WEEK=4} => {Flight.Status=0} 0.1431168
## [6] {Weather=0,DAY_WEEK=2} => {Flight.Status=0} 0.1108587
## [7] {ORIGIN=DCA,Weather=0} => {DAY_WEEK=4} 0.1076783
## [8] {Weather=0,DAY_WEEK=4} => {ORIGIN=DCA} 0.1076783
## [9] {Weather=0,DAY_WEEK=3} => {Flight.Status=0} 0.1194911
## [10] {Weather=0,Flight.Status=0} => {DAY_WEEK=3} 0.1194911
## [11] {DAY_WEEK=6,Flight.Status=0} => {Weather=0} 0.1026806
## [12] {DAY_WEEK=2,Flight.Status=0} => {Weather=0} 0.1108587
## [13] {DAY_WEEK=1,Flight.Status=0} => {Weather=0} 0.1017719
## [14] {DAY_WEEK=3,Flight.Status=0} => {Weather=0} 0.1194911
## [15] {ORIGIN=DCA,DAY_WEEK=4} => {Weather=0} 0.1076783
## [16] {DAY_WEEK=4,Flight.Status=0} => {Weather=0} 0.1431168
## [17] {Weather=0,Flight.Status=0} => {DAY_WEEK=2} 0.1108587
## [18] {CARRIER=MQ,ORIGIN=DCA} => {Weather=0} 0.1294866
## [19] {Weather=0,DAY_WEEK=1} => {Flight.Status=0} 0.1017719
## [20] {Weather=0,Flight.Status=0} => {DAY_WEEK=1} 0.1017719
## confidence lift
## [1] 1.0000000 1.6065693
## [2] 0.2106430 1.5716111
## [3] 0.9040000 1.1222245
## [4] 0.1274676 1.1222245
## [5] 0.8467742 1.0511844
## [6] 0.8356164 1.0373332
## [7] 0.1751663 1.0364006
## [8] 0.6370968 1.0235401
## [9] 0.8218750 1.0202746
## [10] 0.1483362 1.0202746
## [11] 1.0000000 1.0147533
## [12] 1.0000000 1.0147533
## [13] 1.0000000 1.0147533
## [14] 1.0000000 1.0147533
## [15] 1.0000000 1.0147533
## [16] 1.0000000 1.0147533
## [17] 0.1376199 0.9866492
## [18] 0.9661017 0.9803549
## [19] 0.7619048 0.9458276
## [20] 0.1263395 0.9028355
inspect(sort(rules1[1:20], by = c("support", "confidence")))
## lhs rhs support
## [1] {DAY_WEEK=4,Flight.Status=0} => {Weather=0} 0.1431168
## [2] {Weather=0,DAY_WEEK=4} => {Flight.Status=0} 0.1431168
## [3] {CARRIER=MQ,Weather=0} => {ORIGIN=DCA} 0.1294866
## [4] {CARRIER=MQ,ORIGIN=DCA} => {Weather=0} 0.1294866
## [5] {ORIGIN=DCA,Weather=0} => {CARRIER=MQ} 0.1294866
## [6] {DAY_WEEK=3,Flight.Status=0} => {Weather=0} 0.1194911
## [7] {Weather=0,DAY_WEEK=3} => {Flight.Status=0} 0.1194911
## [8] {Weather=0,Flight.Status=0} => {DAY_WEEK=3} 0.1194911
## [9] {DAY_WEEK=2,Flight.Status=0} => {Weather=0} 0.1108587
## [10] {Weather=0,DAY_WEEK=2} => {Flight.Status=0} 0.1108587
## [11] {Weather=0,Flight.Status=0} => {DAY_WEEK=2} 0.1108587
## [12] {ORIGIN=DCA,DAY_WEEK=4} => {Weather=0} 0.1076783
## [13] {Weather=0,DAY_WEEK=4} => {ORIGIN=DCA} 0.1076783
## [14] {ORIGIN=DCA,Weather=0} => {DAY_WEEK=4} 0.1076783
## [15] {DAY_WEEK=6,Flight.Status=0} => {Weather=0} 0.1026806
## [16] {Weather=0,DAY_WEEK=6} => {Flight.Status=0} 0.1026806
## [17] {Weather=0,Flight.Status=0} => {DAY_WEEK=6} 0.1026806
## [18] {DAY_WEEK=1,Flight.Status=0} => {Weather=0} 0.1017719
## [19] {Weather=0,DAY_WEEK=1} => {Flight.Status=0} 0.1017719
## [20] {Weather=0,Flight.Status=0} => {DAY_WEEK=1} 0.1017719
## confidence lift
## [1] 1.0000000 1.0147533
## [2] 0.8467742 1.0511844
## [3] 1.0000000 1.6065693
## [4] 0.9661017 0.9803549
## [5] 0.2106430 1.5716111
## [6] 1.0000000 1.0147533
## [7] 0.8218750 1.0202746
## [8] 0.1483362 1.0202746
## [9] 1.0000000 1.0147533
## [10] 0.8356164 1.0373332
## [11] 0.1376199 0.9866492
## [12] 1.0000000 1.0147533
## [13] 0.6370968 1.0235401
## [14] 0.1751663 1.0364006
## [15] 1.0000000 1.0147533
## [16] 0.9040000 1.1222245
## [17] 0.1274676 1.1222245
## [18] 1.0000000 1.0147533
## [19] 0.7619048 0.9458276
## [20] 0.1263395 0.9028355
inspect(sort(rules2[1:20], by = "lift"))
## lhs rhs support
## [1] {DEST=JFK,Weather=0} => {CARRIER=DH} 0.1044980
## [2] {CARRIER=DH,Weather=0} => {DEST=JFK} 0.1044980
## [3] {CARRIER=MQ,Weather=0} => {ORIGIN=DCA} 0.1294866
## [4] {ORIGIN=DCA,Weather=0} => {CARRIER=MQ} 0.1294866
## [5] {DEST=JFK,Weather=0} => {CRS_DEP_TIME=2} 0.1163108
## [6] {Weather=0,DAY_WEEK=6} => {Flight.Status=0} 0.1026806
## [7] {Weather=0,DAY_WEEK=4} => {Flight.Status=0} 0.1431168
## [8] {Weather=0,DAY_WEEK=2} => {Flight.Status=0} 0.1108587
## [9] {Weather=0,DAY_WEEK=4} => {ORIGIN=DCA} 0.1076783
## [10] {Weather=0,DAY_WEEK=3} => {Flight.Status=0} 0.1194911
## [11] {DAY_WEEK=6,Flight.Status=0} => {Weather=0} 0.1026806
## [12] {DAY_WEEK=2,Flight.Status=0} => {Weather=0} 0.1108587
## [13] {DAY_WEEK=1,Flight.Status=0} => {Weather=0} 0.1017719
## [14] {DAY_WEEK=3,Flight.Status=0} => {Weather=0} 0.1194911
## [15] {ORIGIN=DCA,DAY_WEEK=4} => {Weather=0} 0.1076783
## [16] {DAY_WEEK=4,Flight.Status=0} => {Weather=0} 0.1431168
## [17] {CRS_DEP_TIME=2,DEST=JFK} => {Weather=0} 0.1163108
## [18] {CARRIER=DH,DEST=JFK} => {Weather=0} 0.1044980
## [19] {CARRIER=MQ,ORIGIN=DCA} => {Weather=0} 0.1294866
## [20] {Weather=0,DAY_WEEK=1} => {Flight.Status=0} 0.1017719
## confidence lift
## [1] 0.6084656 2.4305496
## [2] 0.4259259 2.4286605
## [3] 1.0000000 1.6065693
## [4] 0.2106430 1.5716111
## [5] 0.6772487 1.3453288
## [6] 0.9040000 1.1222245
## [7] 0.8467742 1.0511844
## [8] 0.8356164 1.0373332
## [9] 0.6370968 1.0235401
## [10] 0.8218750 1.0202746
## [11] 1.0000000 1.0147533
## [12] 1.0000000 1.0147533
## [13] 1.0000000 1.0147533
## [14] 1.0000000 1.0147533
## [15] 1.0000000 1.0147533
## [16] 1.0000000 1.0147533
## [17] 0.9846154 0.9991418
## [18] 0.9829060 0.9974071
## [19] 0.9661017 0.9803549
## [20] 0.7619048 0.9458276
inspect(sort(rules2[1:20], by = c("support", "confidence")))
## lhs rhs support
## [1] {DAY_WEEK=4,Flight.Status=0} => {Weather=0} 0.1431168
## [2] {Weather=0,DAY_WEEK=4} => {Flight.Status=0} 0.1431168
## [3] {CARRIER=MQ,Weather=0} => {ORIGIN=DCA} 0.1294866
## [4] {CARRIER=MQ,ORIGIN=DCA} => {Weather=0} 0.1294866
## [5] {ORIGIN=DCA,Weather=0} => {CARRIER=MQ} 0.1294866
## [6] {DAY_WEEK=3,Flight.Status=0} => {Weather=0} 0.1194911
## [7] {Weather=0,DAY_WEEK=3} => {Flight.Status=0} 0.1194911
## [8] {CRS_DEP_TIME=2,DEST=JFK} => {Weather=0} 0.1163108
## [9] {DEST=JFK,Weather=0} => {CRS_DEP_TIME=2} 0.1163108
## [10] {DAY_WEEK=2,Flight.Status=0} => {Weather=0} 0.1108587
## [11] {Weather=0,DAY_WEEK=2} => {Flight.Status=0} 0.1108587
## [12] {ORIGIN=DCA,DAY_WEEK=4} => {Weather=0} 0.1076783
## [13] {Weather=0,DAY_WEEK=4} => {ORIGIN=DCA} 0.1076783
## [14] {CARRIER=DH,DEST=JFK} => {Weather=0} 0.1044980
## [15] {DEST=JFK,Weather=0} => {CARRIER=DH} 0.1044980
## [16] {CARRIER=DH,Weather=0} => {DEST=JFK} 0.1044980
## [17] {DAY_WEEK=6,Flight.Status=0} => {Weather=0} 0.1026806
## [18] {Weather=0,DAY_WEEK=6} => {Flight.Status=0} 0.1026806
## [19] {DAY_WEEK=1,Flight.Status=0} => {Weather=0} 0.1017719
## [20] {Weather=0,DAY_WEEK=1} => {Flight.Status=0} 0.1017719
## confidence lift
## [1] 1.0000000 1.0147533
## [2] 0.8467742 1.0511844
## [3] 1.0000000 1.6065693
## [4] 0.9661017 0.9803549
## [5] 0.2106430 1.5716111
## [6] 1.0000000 1.0147533
## [7] 0.8218750 1.0202746
## [8] 0.9846154 0.9991418
## [9] 0.6772487 1.3453288
## [10] 1.0000000 1.0147533
## [11] 0.8356164 1.0373332
## [12] 1.0000000 1.0147533
## [13] 0.6370968 1.0235401
## [14] 0.9829060 0.9974071
## [15] 0.6084656 2.4305496
## [16] 0.4259259 2.4286605
## [17] 1.0000000 1.0147533
## [18] 0.9040000 1.1222245
## [19] 1.0000000 1.0147533
## [20] 0.7619048 0.9458276
inspect(sort(rules3[1:20], by = "lift"))
## lhs rhs support
## [1] {DEST=JFK,Weather=0} => {CARRIER=DH} 0.1044980
## [2] {CARRIER=MQ,Weather=0} => {ORIGIN=DCA} 0.1294866
## [3] {DEST=JFK,Weather=0} => {CRS_DEP_TIME=2} 0.1163108
## [4] {Weather=0,DAY_WEEK=6} => {Flight.Status=0} 0.1026806
## [5] {Weather=0,DAY_WEEK=4} => {Flight.Status=0} 0.1431168
## [6] {Weather=0,DAY_WEEK=2} => {Flight.Status=0} 0.1108587
## [7] {Weather=0,DAY_WEEK=4} => {ORIGIN=DCA} 0.1076783
## [8] {Weather=0,DAY_WEEK=3} => {Flight.Status=0} 0.1194911
## [9] {DAY_WEEK=6,Flight.Status=0} => {Weather=0} 0.1026806
## [10] {DAY_WEEK=2,Flight.Status=0} => {Weather=0} 0.1108587
## [11] {DAY_WEEK=1,Flight.Status=0} => {Weather=0} 0.1017719
## [12] {DAY_WEEK=3,Flight.Status=0} => {Weather=0} 0.1194911
## [13] {ORIGIN=DCA,DAY_WEEK=4} => {Weather=0} 0.1076783
## [14] {DAY_WEEK=4,Flight.Status=0} => {Weather=0} 0.1431168
## [15] {DEST=JFK,Flight.Status=0} => {Weather=0} 0.1372104
## [16] {CRS_DEP_TIME=2,DEST=JFK} => {Weather=0} 0.1163108
## [17] {CARRIER=DH,DEST=JFK} => {Weather=0} 0.1044980
## [18] {DEST=JFK,Weather=0} => {Flight.Status=0} 0.1372104
## [19] {CARRIER=MQ,ORIGIN=DCA} => {Weather=0} 0.1294866
## [20] {Weather=0,DAY_WEEK=1} => {Flight.Status=0} 0.1017719
## confidence lift
## [1] 0.6084656 2.4305496
## [2] 1.0000000 1.6065693
## [3] 0.6772487 1.3453288
## [4] 0.9040000 1.1222245
## [5] 0.8467742 1.0511844
## [6] 0.8356164 1.0373332
## [7] 0.6370968 1.0235401
## [8] 0.8218750 1.0202746
## [9] 1.0000000 1.0147533
## [10] 1.0000000 1.0147533
## [11] 1.0000000 1.0147533
## [12] 1.0000000 1.0147533
## [13] 1.0000000 1.0147533
## [14] 1.0000000 1.0147533
## [15] 1.0000000 1.0147533
## [16] 0.9846154 0.9991418
## [17] 0.9829060 0.9974071
## [18] 0.7989418 0.9918054
## [19] 0.9661017 0.9803549
## [20] 0.7619048 0.9458276
inspect(sort(rules3[1:20], by = c("support", "confidence")))
## lhs rhs support
## [1] {DAY_WEEK=4,Flight.Status=0} => {Weather=0} 0.1431168
## [2] {Weather=0,DAY_WEEK=4} => {Flight.Status=0} 0.1431168
## [3] {DEST=JFK,Flight.Status=0} => {Weather=0} 0.1372104
## [4] {DEST=JFK,Weather=0} => {Flight.Status=0} 0.1372104
## [5] {CARRIER=MQ,Weather=0} => {ORIGIN=DCA} 0.1294866
## [6] {CARRIER=MQ,ORIGIN=DCA} => {Weather=0} 0.1294866
## [7] {DAY_WEEK=3,Flight.Status=0} => {Weather=0} 0.1194911
## [8] {Weather=0,DAY_WEEK=3} => {Flight.Status=0} 0.1194911
## [9] {CRS_DEP_TIME=2,DEST=JFK} => {Weather=0} 0.1163108
## [10] {DEST=JFK,Weather=0} => {CRS_DEP_TIME=2} 0.1163108
## [11] {DAY_WEEK=2,Flight.Status=0} => {Weather=0} 0.1108587
## [12] {Weather=0,DAY_WEEK=2} => {Flight.Status=0} 0.1108587
## [13] {ORIGIN=DCA,DAY_WEEK=4} => {Weather=0} 0.1076783
## [14] {Weather=0,DAY_WEEK=4} => {ORIGIN=DCA} 0.1076783
## [15] {CARRIER=DH,DEST=JFK} => {Weather=0} 0.1044980
## [16] {DEST=JFK,Weather=0} => {CARRIER=DH} 0.1044980
## [17] {DAY_WEEK=6,Flight.Status=0} => {Weather=0} 0.1026806
## [18] {Weather=0,DAY_WEEK=6} => {Flight.Status=0} 0.1026806
## [19] {DAY_WEEK=1,Flight.Status=0} => {Weather=0} 0.1017719
## [20] {Weather=0,DAY_WEEK=1} => {Flight.Status=0} 0.1017719
## confidence lift
## [1] 1.0000000 1.0147533
## [2] 0.8467742 1.0511844
## [3] 1.0000000 1.0147533
## [4] 0.7989418 0.9918054
## [5] 1.0000000 1.6065693
## [6] 0.9661017 0.9803549
## [7] 1.0000000 1.0147533
## [8] 0.8218750 1.0202746
## [9] 0.9846154 0.9991418
## [10] 0.6772487 1.3453288
## [11] 1.0000000 1.0147533
## [12] 0.8356164 1.0373332
## [13] 1.0000000 1.0147533
## [14] 0.6370968 1.0235401
## [15] 0.9829060 0.9974071
## [16] 0.6084656 2.4305496
## [17] 1.0000000 1.0147533
## [18] 0.9040000 1.1222245
## [19] 1.0000000 1.0147533
## [20] 0.7619048 0.9458276
- Filter the rules with specific LHS and RHS conditions E.g.; Filter the rules with Flighstatus=0
## filtering only rhs(rules) as we need to find which rules affect flightstatus the most and more over rules with > 2 elements will be rendered because we supplied minlen=3
rules.filter1 <- sort(subset(rules1, subset = rhs %in%
c("Flight.Status=0", "Flight.Status=1")), by=c("support", "confidence"))
rules.filter2 <- sort(subset(rules2, subset = rhs %in%
c("Flight.Status=0", "Flight.Status=1")), by=c("support", "confidence"))
rules.filter3 <- sort(subset(rules3, subset = rhs %in%
c("Flight.Status=0", "Flight.Status=1")), by=c("support", "confidence"))
- Filter redundant rules if any
rules.filter1
## set of 57 rules
rules.filter2
## set of 57 rules
rules.filter3
## set of 56 rules
itemFrequency(rules.filter1[01:57]@lhs, type = "absolute")
## CRS_DEP_TIME=1 CRS_DEP_TIME=2 CRS_DEP_TIME=3 CARRIER=CO
## 7 16 1 0
## CARRIER=DH CARRIER=DL CARRIER=MQ CARRIER=OH
## 7 7 0 0
## CARRIER=RU CARRIER=UA CARRIER=US DEST=EWR
## 3 0 7 5
## DEST=JFK DEST=LGA ORIGIN=BWI ORIGIN=DCA
## 1 19 0 19
## ORIGIN=IAD Weather=0 Weather=1 DAY_WEEK=1
## 7 38 0 1
## DAY_WEEK=2 DAY_WEEK=3 DAY_WEEK=4 DAY_WEEK=5
## 1 1 1 1
## DAY_WEEK=6 DAY_WEEK=7 Flight.Status=0 Flight.Status=1
## 1 0 0 0
itemFrequency(rules.filter2[01:57]@lhs, type = "absolute")
## CRS_DEP_TIME=1 CRS_DEP_TIME=2 CRS_DEP_TIME=3 CARRIER=CO
## 7 16 1 0
## CARRIER=DH CARRIER=DL CARRIER=MQ CARRIER=OH
## 7 7 0 0
## CARRIER=RU CARRIER=UA CARRIER=US DEST=EWR
## 3 0 7 5
## DEST=JFK DEST=LGA ORIGIN=BWI ORIGIN=DCA
## 1 19 0 19
## ORIGIN=IAD Weather=0 Weather=1 DAY_WEEK=1
## 7 38 0 1
## DAY_WEEK=2 DAY_WEEK=3 DAY_WEEK=4 DAY_WEEK=5
## 1 1 1 1
## DAY_WEEK=6 DAY_WEEK=7 Flight.Status=0 Flight.Status=1
## 1 0 0 0
itemFrequency(rules.filter3[01:56]@lhs, type = "absolute")
## CRS_DEP_TIME=1 CRS_DEP_TIME=2 CRS_DEP_TIME=3 CARRIER=CO
## 7 15 1 0
## CARRIER=DH CARRIER=DL CARRIER=MQ CARRIER=OH
## 7 7 0 0
## CARRIER=RU CARRIER=UA CARRIER=US DEST=EWR
## 3 0 7 5
## DEST=JFK DEST=LGA ORIGIN=BWI ORIGIN=DCA
## 1 19 0 19
## ORIGIN=IAD Weather=0 Weather=1 DAY_WEEK=1
## 7 37 0 1
## DAY_WEEK=2 DAY_WEEK=3 DAY_WEEK=4 DAY_WEEK=5
## 1 1 1 1
## DAY_WEEK=6 DAY_WEEK=7 Flight.Status=0 Flight.Status=1
## 1 0 0 0
table(duplicated(rules.filter1))
##
## FALSE
## 57
table(is.redundant(rules.filter1))
##
## FALSE TRUE
## 39 18
table(duplicated(rules.filter2))
##
## FALSE
## 57
table(is.redundant(rules.filter2))
##
## FALSE TRUE
## 39 18
table(duplicated(rules.filter3))
##
## FALSE
## 56
table(is.redundant(rules.filter3))
##
## FALSE TRUE
## 38 18
# After applyig filters for necessary condition(flightstatus), we get same rules in all three variables. Hence using rule.filter1 for our analysis.
which(is.redundant(rules.filter1))
## [1] 7 14 16 21 22 23 26 30 33 35 37 38 41 45 48 51 56 57
#Removing redudant filters from rule.filter1
rules1_upd<-rules.filter1[!is.redundant(rules.filter1)]
typeof(rules1_upd)
## [1] "S4"
table(is.redundant(rules1_upd))
##
## FALSE
## 39
- Pot and visualize the rules
library(arulesViz)
## Warning: package 'arulesViz' was built under R version 3.3.3
## Loading required package: grid
plot(rules1_upd)
