Let’s analyze flyers by the ratings groups of 1-5
customer satisfaction rating = 1
ruleset <- apriori(dfX, parameter=list(support=.01, confidence=.5, minlen=2, maxlen=6, maxtime=10),
appearance = list(lhs="satisfaction=(0,1]"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 10 0.01 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1294
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.42s].
## sorting and recoding items ... [282 item(s)] done [0.05s].
## creating transaction tree ... done [0.10s].
## checking subsets of size 1 2 done [0.01s].
## writing ... [10 rule(s)] done [0.00s].
## creating S4 object ... done [0.04s].
plot(ruleset)

ruleset_filter <- subset(ruleset, subset=lhs %ain% c("satisfaction=(0,1]") & lift>1)
plot(ruleset_filter)

summary(ruleset_filter)
## set of 8 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 8
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence lift count
## Min. :0.01313 Min. :0.5688 Min. :1.000 Min. :1700
## 1st Qu.:0.01380 1st Qu.:0.5977 1st Qu.:1.018 1st Qu.:1786
## Median :0.01447 Median :0.6265 Median :1.136 Median :1872
## Mean :0.01639 Mean :0.7099 Mean :1.278 Mean :2122
## 3rd Qu.:0.01941 3rd Qu.:0.8405 3rd Qu.:1.363 3rd Qu.:2512
## Max. :0.02267 Max. :0.9816 Max. :2.001 Max. :2934
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.01 0.5
ruleset_filter <- sort(ruleset_filter, decreasing=T, by="count")
inspect(ruleset_filter)
## lhs rhs support confidence lift count
## [1] {satisfaction=(0,1]} => {flight_cancelled=No} 0.02266775 0.9815992 1.000065 2934
## [2] {satisfaction=(0,1]} => {airline_status=Blue} 0.01999459 0.8658414 1.263959 2588
## [3] {satisfaction=(0,1]} => {class=Economy} 0.01921428 0.8320509 1.022516 2487
## [4] {satisfaction=(0,1]} => {num_loyalty_cards=(-1,0]} 0.01464828 0.6343259 1.197200 1896
## [5] {satisfaction=(0,1]} => {type_of_travel=Personal} 0.01428516 0.6186015 2.001167 1849
## [6] {satisfaction=(0,1]} => {gender=Female} 0.01399930 0.6062228 1.073941 1812
## [7] {satisfaction=(0,1]} => {airport_shopping=(-1,0]} 0.01321126 0.5720977 1.006367 1710
## [8] {satisfaction=(0,1]} => {arrival_delay_greater_5_mins=yes} 0.01313401 0.5687521 1.659485 1700
- there are relatively few customers who give a customer satisfaction rating of 1, but those that do have one or more of the following attributes: female, personal travel, arrivel delay > 5 mins, blue status, economy class, 0 loyalty cards, and 0 airport_shopping
customer satisfaction rating = 2
ruleset <- apriori(dfX, parameter=list(support=.1, confidence=.5, minlen=2, maxtime=10),
appearance = list(lhs="satisfaction=(1,2]"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 10 0.1 2
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 12943
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.42s].
## sorting and recoding items ... [73 item(s)] done [0.04s].
## creating transaction tree ... done [0.09s].
## checking subsets of size 1 2 done [0.02s].
## writing ... [9 rule(s)] done [0.00s].
## creating S4 object ... done [0.03s].
ruleset_filter <- subset(ruleset, subset=lhs %ain% c("satisfaction=(1,2]") & lift>1)
plot(ruleset_filter)

summary(ruleset_filter)
## set of 6 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 6
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence lift count
## Min. :0.1117 Min. :0.6150 Min. :1.013 Min. :14452
## 1st Qu.:0.1137 1st Qu.:0.6264 1st Qu.:1.086 1st Qu.:14719
## Median :0.1311 Median :0.7220 Median :1.158 Median :16967
## Mean :0.1330 Mean :0.7325 Mean :1.383 Mean :17212
## 3rd Qu.:0.1485 3rd Qu.:0.8181 3rd Qu.:1.280 3rd Qu.:19224
## Max. :0.1618 Max. :0.8912 Max. :2.583 Max. :20942
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.1 0.5
ruleset_filter <- sort(ruleset_filter, decreasing=T, by="count")
inspect(ruleset_filter)
## lhs rhs support
## [1] {satisfaction=(1,2]} => {airline_status=Blue} 0.1617955
## [2] {satisfaction=(1,2]} => {class=Economy} 0.1497122
## [3] {satisfaction=(1,2]} => {type_of_travel=Personal} 0.1449608
## [4] {satisfaction=(1,2]} => {num_loyalty_cards=(-1,0]} 0.1172094
## [5] {satisfaction=(1,2]} => {gender=Female} 0.1125507
## [6] {satisfaction=(1,2]} => {airport_shopping=(-1,0]} 0.1116545
## confidence lift count
## [1] 0.8911869 1.300958 20942
## [2] 0.8246308 1.013398 19378
## [3] 0.7984595 2.583005 18763
## [4] 0.6456019 1.218482 15171
## [5] 0.6199413 1.098244 14568
## [6] 0.6150049 1.081844 14452
- Virtually the same result from customers with ratings=1; female, personal travel, blue status, economy class, 0 loyalty cards, and 0 airport_shopping
customer satisfaction rating = 3
ruleset <- apriori(dfX, parameter=list(support=.1, confidence=.5, minlen=2, maxlen=6, maxtime=10),
appearance = list(lhs="satisfaction=(2,3]"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 10 0.1 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 12943
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.45s].
## sorting and recoding items ... [73 item(s)] done [0.04s].
## creating transaction tree ... done [0.08s].
## checking subsets of size 1 2 done [0.04s].
## writing ... [9 rule(s)] done [0.00s].
## creating S4 object ... done [0.03s].
plot(ruleset)

ruleset_filter <- subset(ruleset, subset=lhs %ain% c("satisfaction=(2,3]") & lift>1)
plot(ruleset_filter)

summary(ruleset_filter)
## set of 7 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 7
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence lift count
## Min. :0.1548 Min. :0.5434 Min. :1.005 Min. :20033
## 1st Qu.:0.1726 1st Qu.:0.6060 1st Qu.:1.022 1st Qu.:22342
## Median :0.1925 Median :0.6760 Median :1.027 Median :24920
## Mean :0.1955 Mean :0.6865 Mean :1.047 Mean :25310
## 3rd Qu.:0.2217 3rd Qu.:0.7784 3rd Qu.:1.071 3rd Qu.:28696
## Max. :0.2328 Max. :0.8175 Max. :1.115 Max. :30138
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.1 0.5
ruleset_filter <- sort(ruleset_filter, decreasing=T, by="count")
inspect(ruleset_filter)
## lhs rhs support confidence lift count
## [1] {satisfaction=(2,3]} => {class=Economy} 0.2328427 0.8175012 1.004636 30138
## [2] {satisfaction=(2,3]} => {departure_delay_greater_5_mins=no} 0.2257658 0.7926545 1.017900 29222
## [3] {satisfaction=(2,3]} => {airline_status=Blue} 0.2176382 0.7641187 1.115464 28170
## [4] {satisfaction=(2,3]} => {arrival_delay_greater_5_mins=no} 0.1925291 0.6759616 1.028435 24920
## [5] {satisfaction=(2,3]} => {gender=Female} 0.1789161 0.6281669 1.112816 23158
## [6] {satisfaction=(2,3]} => {airport_shopping=(-1,0]} 0.1663151 0.5839256 1.027173 21527
## [7] {satisfaction=(2,3]} => {num_loyalty_cards=(-1,0]} 0.1547727 0.5434004 1.025591 20033
- Similar assessment to customers with rating=2 except for one key difference: arrival and departure delays are NOT greater than 5 mins
customer satisfaction rating = 4
ruleset <- apriori(dfX, parameter=list(support=.1, confidence=.5, minlen=2, maxtime=10),
appearance = list(lhs="satisfaction=(3,4]"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 10 0.1 2
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 12943
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.43s].
## sorting and recoding items ... [73 item(s)] done [0.04s].
## creating transaction tree ... done [0.09s].
## checking subsets of size 1 2 done [0.04s].
## writing ... [9 rule(s)] done [0.00s].
## creating S4 object ... done [0.03s].
plot(ruleset)

ruleset_filter <- subset(ruleset, subset=lhs %ain% c("satisfaction=(3,4]") & lift>1)
plot(ruleset_filter)

summary(ruleset_filter)
## set of 5 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 5
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence lift count
## Min. :0.2900 Min. :0.7007 Min. :1.004 Min. :37539
## 1st Qu.:0.3055 1st Qu.:0.7381 1st Qu.:1.034 1st Qu.:39540
## Median :0.3415 Median :0.8251 Median :1.060 Median :44202
## Mean :0.3379 Mean :0.8164 Mean :1.116 Mean :43735
## 3rd Qu.:0.3446 3rd Qu.:0.8325 3rd Qu.:1.123 3rd Qu.:44602
## Max. :0.4079 Max. :0.9854 Max. :1.357 Max. :52792
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.1 0.5
ruleset_filter <- sort(ruleset_filter, decreasing=T, by="count")
inspect(ruleset_filter)
## lhs rhs support confidence lift count
## [1] {satisfaction=(3,4]} => {flight_cancelled=No} 0.4078650 0.9854218 1.003960 52792
## [2] {satisfaction=(3,4]} => {type_of_travel=Business} 0.3445899 0.8325462 1.357443 44602
## [3] {satisfaction=(3,4]} => {departure_delay_greater_5_mins=no} 0.3414996 0.8250798 1.059540 44202
## [4] {satisfaction=(3,4]} => {arrival_delay_greater_5_mins=no} 0.3054815 0.7380584 1.122912 39540
## [5] {satisfaction=(3,4]} => {price_sensitivity=(0,1]} 0.2900220 0.7007074 1.033727 37539
- Customers with one or more of the following attributes (business, departure/arrival delay < 5 min, flight not cancelled, price_sensitivity=1) give ratings=4
customer satisfaction rating = 5
ruleset <- apriori(dfX, parameter=list(support=.01, confidence=.5, minlen=2, maxlen=6, maxtime=10),
appearance = list(lhs="satisfaction=(4,5]"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 10 0.01 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1294
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.46s].
## sorting and recoding items ... [282 item(s)] done [0.05s].
## creating transaction tree ... done [0.09s].
## checking subsets of size 1 2 done [0.02s].
## writing ... [8 rule(s)] done [0.00s].
## creating S4 object ... done [0.04s].
ruleset_filter <- subset(ruleset, subset=lhs %ain% c("satisfaction=(4,5]") & lift>1)
plot(ruleset_filter)

summary(ruleset_filter)
## set of 5 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 5
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence lift count
## Min. :0.06416 Min. :0.6640 Min. :1.005 Min. : 8305
## 1st Qu.:0.07536 1st Qu.:0.7798 1st Qu.:1.016 1st Qu.: 9754
## Median :0.07901 Median :0.8176 Median :1.150 Median :10227
## Mean :0.08101 Mean :0.8383 Mean :1.243 Mean :10486
## 3rd Qu.:0.09017 3rd Qu.:0.9331 3rd Qu.:1.521 3rd Qu.:11671
## Max. :0.09636 Max. :0.9971 Max. :1.525 Max. :12472
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.01 0.5
ruleset_filter <- sort(ruleset_filter, decreasing=T, by="lift")
inspect(ruleset_filter)
## lhs rhs support
## [1] {satisfaction=(4,5]} => {gender=Male} 0.06416348
## [2] {satisfaction=(4,5]} => {type_of_travel=Business} 0.09016881
## [3] {satisfaction=(4,5]} => {price_sensitivity=(0,1]} 0.07535829
## [4] {satisfaction=(4,5]} => {flight_cancelled=No} 0.09635724
## [5] {satisfaction=(4,5]} => {class=Economy} 0.07901263
## confidence lift count
## [1] 0.6639751 1.524571 8305
## [2] 0.9330828 1.521365 11671
## [3] 0.7798209 1.150440 9754
## [4] 0.9971218 1.015880 12472
## [5] 0.8176367 1.004802 10227
- Customers with one or more of the following attributes give a rating=5: male, business trip, economy class, price sensitivity=1, flight not cancelled
Let’s dig deeper into why females give lower ratings
female_ruleset <- apriori(dfX, parameter=list(support=.01, confidence=.8, minlen=2, maxlen=6, maxtime=15))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 15 0.01 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1294
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.44s].
## sorting and recoding items ... [282 item(s)] done [0.05s].
## creating transaction tree ... done [0.09s].
## checking subsets of size 1 2 3 4 5 6
## Warning in apriori(dfX, parameter = list(support = 0.01, confidence =
## 0.8, : Mining stopped (maxlen reached). Only patterns up to a length of 6
## returned!
## done [29.46s].
## writing ... [1158073 rule(s)] done [0.20s].
## creating S4 object ... done [0.24s].
female_filter <- subset(female_ruleset, subset=lhs %ain% c("gender=Female","satisfaction=(0,1]") & lift>1)
plot(female_filter)

female_filter <- sort(female_filter, decreasing=T, by="count")
summary(female_filter)
## set of 7 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4 5
## 1 4 2
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 4.000 4.000 4.143 4.500 5.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.01031 Min. :0.8153 Min. :1.001 Min. :1334
## 1st Qu.:0.01040 1st Qu.:0.9209 1st Qu.:1.002 1st Qu.:1346
## Median :0.01119 Median :0.9260 Median :1.344 Median :1448
## Mean :0.01131 Mean :0.9250 Mean :1.200 Mean :1463
## 3rd Qu.:0.01195 3rd Qu.:0.9543 3rd Qu.:1.348 3rd Qu.:1546
## Max. :0.01296 Max. :0.9831 Max. :1.352 Max. :1678
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.01 0.8
inspect(female_filter)
## lhs rhs support confidence lift count
## [1] {satisfaction=(0,1],
## gender=Female} => {airline_status=Blue} 0.01296404 0.9260486 1.351850 1678
## [2] {satisfaction=(0,1],
## gender=Female,
## flight_cancelled=No} => {airline_status=Blue} 0.01270908 0.9262387 1.352127 1645
## [3] {satisfaction=(0,1],
## gender=Female,
## class=Economy} => {flight_cancelled=No} 0.01118708 0.9823609 1.000841 1448
## [4] {satisfaction=(0,1],
## gender=Female,
## flight_cancelled=No} => {class=Economy} 0.01118708 0.8153153 1.001950 1448
## [5] {satisfaction=(0,1],
## gender=Female,
## class=Economy} => {airline_status=Blue} 0.01048403 0.9206242 1.343931 1357
## [6] {satisfaction=(0,1],
## airline_status=Blue,
## gender=Female,
## class=Economy} => {flight_cancelled=No} 0.01030633 0.9830508 1.001544 1334
## [7] {satisfaction=(0,1],
## gender=Female,
## class=Economy,
## flight_cancelled=No} => {airline_status=Blue} 0.01030633 0.9212707 1.344875 1334
- 7 rules (sored decreasing by count, confidence > .9, lift > 1) were generated for female travelers (rating = 1) with the following attributes: flight not cancelled, blue status, economy class
female_ruleset <- apriori(dfX, parameter=list(support=.01, confidence=.8, minlen=2, maxlen=6, maxtime=10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 10 0.01 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1294
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.40s].
## sorting and recoding items ... [282 item(s)] done [0.05s].
## creating transaction tree ... done [0.10s].
## checking subsets of size 1 2 3 4 5
## Warning in apriori(dfX, parameter = list(support = 0.01, confidence =
## 0.8, : Mining stopped (time limit reached). Only patterns up to a length of
## 5 returned!
## done [13.04s].
## writing ... [533271 rule(s)] done [0.08s].
## creating S4 object ... done [0.12s].
female_filter <- subset(female_ruleset, subset=lhs %ain% c("gender=Female","satisfaction=(1,2]") & lift>8)
plot(female_filter)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

female_filter <- sort(female_filter, decreasing=T, by="count")
summary(female_filter)
## set of 18 rules
##
## rule length distribution (lhs + rhs):sizes
## 4 5
## 4 14
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.000 5.000 5.000 4.778 5.000 5.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.01002 Min. :1 Min. :8.432 Min. :1297
## 1st Qu.:0.01143 1st Qu.:1 1st Qu.:8.432 1st Qu.:1479
## Median :0.01169 Median :1 Median :8.432 Median :1513
## Mean :0.01214 Mean :1 Mean :8.854 Mean :1571
## 3rd Qu.:0.01308 3rd Qu.:1 3rd Qu.:9.382 3rd Qu.:1693
## Max. :0.01458 Max. :1 Max. :9.382 Max. :1887
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.01 0.8
inspect(female_filter)
## lhs rhs support confidence lift count
## [1] {satisfaction=(1,2],
## gender=Female,
## airline_code=EV} => {airline_name=FlyFast} 0.01457875 1 8.432248 1887
## [2] {satisfaction=(1,2],
## gender=Female,
## airline_name=FlyFast} => {airline_code=EV} 0.01457875 1 8.432248 1887
## [3] {satisfaction=(1,2],
## gender=Female,
## airline_code=EV,
## flight_cancelled=No} => {airline_name=FlyFast} 0.01386796 1 8.432248 1795
## [4] {satisfaction=(1,2],
## gender=Female,
## airline_name=FlyFast,
## flight_cancelled=No} => {airline_code=EV} 0.01386796 1 8.432248 1795
## [5] {satisfaction=(1,2],
## airline_status=Blue,
## gender=Female,
## airline_code=EV} => {airline_name=FlyFast} 0.01307992 1 8.432248 1693
## [6] {satisfaction=(1,2],
## airline_status=Blue,
## gender=Female,
## airline_name=FlyFast} => {airline_code=EV} 0.01307992 1 8.432248 1693
## [7] {satisfaction=(1,2],
## gender=Female,
## type_of_travel=Personal,
## airline_code=EV} => {airline_name=FlyFast} 0.01249276 1 8.432248 1617
## [8] {satisfaction=(1,2],
## gender=Female,
## type_of_travel=Personal,
## airline_name=FlyFast} => {airline_code=EV} 0.01249276 1 8.432248 1617
## [9] {satisfaction=(1,2],
## gender=Female,
## class=Economy,
## airline_code=EV} => {airline_name=FlyFast} 0.01168926 1 8.432248 1513
## [10] {satisfaction=(1,2],
## gender=Female,
## class=Economy,
## airline_name=FlyFast} => {airline_code=EV} 0.01168926 1 8.432248 1513
## [11] {satisfaction=(1,2],
## gender=Female,
## airline_name=Northwest} => {airline_code=OO} 0.01161973 1 9.382067 1504
## [12] {satisfaction=(1,2],
## gender=Female,
## airline_code=OO} => {airline_name=Northwest} 0.01161973 1 9.382067 1504
## [13] {satisfaction=(1,2],
## gender=Female,
## airline_name=Northwest,
## flight_cancelled=No} => {airline_code=OO} 0.01142658 1 9.382067 1479
## [14] {satisfaction=(1,2],
## gender=Female,
## airline_code=OO,
## flight_cancelled=No} => {airline_name=Northwest} 0.01142658 1 9.382067 1479
## [15] {satisfaction=(1,2],
## airline_status=Blue,
## gender=Female,
## airline_name=Northwest} => {airline_code=OO} 0.01047630 1 9.382067 1356
## [16] {satisfaction=(1,2],
## airline_status=Blue,
## gender=Female,
## airline_code=OO} => {airline_name=Northwest} 0.01047630 1 9.382067 1356
## [17] {satisfaction=(1,2],
## gender=Female,
## type_of_travel=Personal,
## airline_name=Northwest} => {airline_code=OO} 0.01002047 1 9.382067 1297
## [18] {satisfaction=(1,2],
## gender=Female,
## type_of_travel=Personal,
## airline_code=OO} => {airline_name=Northwest} 0.01002047 1 9.382067 1297
- 52 rules (sorted decreasing by count, confidence = .8, lift > 8) were generated for female travelers (rating = 2)
- the top 20 rules (sorted by decreasing count) were for airlines: FlyFast and Sigma with airline code: EV and DL
Let’s dig deeper into why females give higher ratings (4)
female_ruleset <- apriori(dfX, parameter=list(support=.1, confidence=.5, minlen=2, maxlen=6, maxtime=20))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 20 0.1 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 12943
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.42s].
## sorting and recoding items ... [73 item(s)] done [0.04s].
## creating transaction tree ... done [0.09s].
## checking subsets of size 1 2 3 4 5 6
## Warning in apriori(dfX, parameter = list(support = 0.1, confidence =
## 0.5, : Mining stopped (maxlen reached). Only patterns up to a length of 6
## returned!
## done [1.10s].
## writing ... [13955 rule(s)] done [0.00s].
## creating S4 object ... done [0.03s].
female_filter <- subset(female_ruleset, subset=lhs %ain% c("gender=Female","satisfaction=(3,4]") & lift>1.1)
plot(female_filter)

female_filter <- sort(female_filter, decreasing=T, by="count")
summary(female_filter)
## set of 53 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4 5 6
## 2 13 24 14
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 4.000 5.000 4.943 6.000 6.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.1006 Min. :0.7295 Min. :1.110 Min. :13017
## 1st Qu.:0.1136 1st Qu.:0.7930 1st Qu.:1.256 1st Qu.:14698
## Median :0.1289 Median :0.8210 Median :1.303 Median :16689
## Mean :0.1287 Mean :0.8423 Mean :1.273 Mean :16656
## 3rd Qu.:0.1365 3rd Qu.:0.8811 3rd Qu.:1.336 3rd Qu.:17662
## Max. :0.1861 Max. :0.9792 Max. :1.399 Max. :24090
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.1 0.5
inspect(female_filter[1:20,])
## lhs rhs support confidence lift count
## [1] {satisfaction=(3,4],
## gender=Female} => {type_of_travel=Business} 0.1861166 0.8215394 1.339497 24090
## [2] {satisfaction=(3,4],
## gender=Female,
## flight_cancelled=No} => {type_of_travel=Business} 0.1836057 0.8242006 1.343836 23765
## [3] {satisfaction=(3,4],
## gender=Female} => {arrival_delay_greater_5_mins=no} 0.1707421 0.7536746 1.146671 22100
## [4] {satisfaction=(3,4],
## gender=Female,
## arrival_delay_greater_5_mins=no} => {departure_delay_greater_5_mins=no} 0.1671186 0.9787783 1.256914 21631
## [5] {satisfaction=(3,4],
## gender=Female,
## departure_delay_greater_5_mins=no} => {arrival_delay_greater_5_mins=no} 0.1671186 0.8817822 1.341579 21631
## [6] {satisfaction=(3,4],
## gender=Female,
## flight_cancelled=No} => {arrival_delay_greater_5_mins=no} 0.1669641 0.7494971 1.140315 21611
## [7] {satisfaction=(3,4],
## gender=Female,
## flight_cancelled=No,
## arrival_delay_greater_5_mins=no} => {departure_delay_greater_5_mins=no} 0.1633639 0.9784369 1.256476 21145
## [8] {satisfaction=(3,4],
## gender=Female,
## departure_delay_greater_5_mins=no,
## flight_cancelled=No} => {arrival_delay_greater_5_mins=no} 0.1633639 0.8793928 1.337944 21145
## [9] {satisfaction=(3,4],
## gender=Female,
## departure_delay_greater_5_mins=no} => {type_of_travel=Business} 0.1534361 0.8095879 1.320010 19860
## [10] {satisfaction=(3,4],
## gender=Female,
## departure_delay_greater_5_mins=no,
## flight_cancelled=No} => {type_of_travel=Business} 0.1509484 0.8125598 1.324856 19538
## [11] {satisfaction=(3,4],
## gender=Female,
## class=Economy} => {type_of_travel=Business} 0.1421640 0.8159365 1.330361 18401
## [12] {satisfaction=(3,4],
## gender=Female,
## class=Economy,
## flight_cancelled=No} => {type_of_travel=Business} 0.1400626 0.8189827 1.335328 18129
## [13] {satisfaction=(3,4],
## gender=Female,
## type_of_travel=Business} => {arrival_delay_greater_5_mins=no} 0.1364546 0.7331673 1.115470 17662
## [14] {satisfaction=(3,4],
## gender=Female,
## arrival_delay_greater_5_mins=no} => {type_of_travel=Business} 0.1364546 0.7991855 1.303049 17662
## [15] {satisfaction=(3,4],
## gender=Female,
## type_of_travel=Business,
## flight_cancelled=No} => {arrival_delay_greater_5_mins=no} 0.1339437 0.7295182 1.109918 17337
## [16] {satisfaction=(3,4],
## gender=Female,
## flight_cancelled=No,
## arrival_delay_greater_5_mins=no} => {type_of_travel=Business} 0.1339437 0.8022303 1.308014 17337
## [17] {satisfaction=(3,4],
## gender=Female,
## type_of_travel=Business,
## arrival_delay_greater_5_mins=no} => {departure_delay_greater_5_mins=no} 0.1335806 0.9789378 1.257119 17290
## [18] {satisfaction=(3,4],
## gender=Female,
## type_of_travel=Business,
## departure_delay_greater_5_mins=no} => {arrival_delay_greater_5_mins=no} 0.1335806 0.8705942 1.324557 17290
## [19] {satisfaction=(3,4],
## gender=Female,
## departure_delay_greater_5_mins=no,
## arrival_delay_greater_5_mins=no} => {type_of_travel=Business} 0.1335806 0.7993158 1.303262 17290
## [20] {satisfaction=(3,4],
## gender=Female,
## class=Economy} => {arrival_delay_greater_5_mins=no} 0.1318345 0.7566513 1.151200 17064
- 53 rules were generated; the top 20 rules (confidence > .8, lift > 1.2) were for female business travelers with delay < 5 mins.
- No personal travel was overserved for this subset of the rules (1-30)
- Business travel was the number 1 rule with 24090 observations
Let’s dig deeper into why females give higher ratings (5)
female_ruleset <- apriori(dfX, parameter=list(support=.01, confidence=.5, minlen=3, maxlen=6, maxtime=10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 10 0.01 3
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1294
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.44s].
## sorting and recoding items ... [282 item(s)] done [0.05s].
## creating transaction tree ... done [0.11s].
## checking subsets of size 1 2 3 4 5
## Warning in apriori(dfX, parameter = list(support = 0.01, confidence =
## 0.5, : Mining stopped (time limit reached). Only patterns up to a length of
## 5 returned!
## done [12.94s].
## writing ... [1335284 rule(s)] done [0.12s].
## creating S4 object ... done [0.22s].
female_filter <- subset(female_ruleset, subset=lhs %ain% c("gender=Female","satisfaction=(4,5]") & lift>1.5)
plot(female_filter)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

female_filter <- sort(female_filter, decreasing=T, by="count")
inspect(female_filter[1:20,])
## lhs rhs support confidence lift count
## [1] {satisfaction=(4,5],
## gender=Female} => {type_of_travel=Business} 0.03025457 0.9317154 1.519136 3916
## [2] {satisfaction=(4,5],
## gender=Female,
## flight_cancelled=No} => {type_of_travel=Business} 0.03013868 0.9316933 1.519100 3901
## [3] {satisfaction=(4,5],
## gender=Female,
## class=Economy} => {type_of_travel=Business} 0.02320856 0.9291680 1.514982 3004
## [4] {satisfaction=(4,5],
## gender=Female,
## class=Economy,
## flight_cancelled=No} => {type_of_travel=Business} 0.02311585 0.9291925 1.515022 2992
## [5] {satisfaction=(4,5],
## gender=Female,
## price_sensitivity=(0,1]} => {type_of_travel=Business} 0.02309267 0.9308627 1.517745 2989
## [6] {satisfaction=(4,5],
## gender=Female,
## departure_delay_greater_5_mins=no} => {type_of_travel=Business} 0.02304632 0.9269733 1.511404 2983
## [7] {satisfaction=(4,5],
## gender=Female,
## price_sensitivity=(0,1],
## flight_cancelled=No} => {type_of_travel=Business} 0.02299996 0.9308943 1.517797 2977
## [8] {satisfaction=(4,5],
## gender=Female,
## departure_delay_greater_5_mins=no,
## flight_cancelled=No} => {type_of_travel=Business} 0.02293043 0.9269207 1.511318 2968
## [9] {satisfaction=(4,5],
## gender=Female,
## arrival_delay_greater_5_mins=no} => {type_of_travel=Business} 0.01952331 0.9259802 1.509785 2527
## [10] {satisfaction=(4,5],
## gender=Female,
## flight_cancelled=No,
## arrival_delay_greater_5_mins=no} => {type_of_travel=Business} 0.01940742 0.9259123 1.509674 2512
## [11] {satisfaction=(4,5],
## gender=Female,
## departure_delay_greater_5_mins=no,
## arrival_delay_greater_5_mins=no} => {type_of_travel=Business} 0.01909839 0.9254961 1.508995 2472
## [12] {satisfaction=(4,5],
## gender=Female,
## class=Economy,
## departure_delay_greater_5_mins=no} => {type_of_travel=Business} 0.01783134 0.9257922 1.509478 2308
## [13] {satisfaction=(4,5],
## gender=Female,
## price_sensitivity=(0,1],
## departure_delay_greater_5_mins=no} => {type_of_travel=Business} 0.01764592 0.9265720 1.510749 2284
## [14] {satisfaction=(4,5],
## gender=Female,
## price_sensitivity=(0,1],
## class=Economy} => {type_of_travel=Business} 0.01750686 0.9290693 1.514821 2266
## [15] {satisfaction=(4,5],
## gender=Female,
## class=Economy,
## arrival_delay_greater_5_mins=no} => {type_of_travel=Business} 0.01511183 0.9256981 1.509325 1956
## [16] {satisfaction=(4,5],
## gender=Female,
## num_loyalty_cards=(-1,0]} => {type_of_travel=Business} 0.01508865 0.9335564 1.522137 1953
## [17] {satisfaction=(4,5],
## gender=Female,
## num_loyalty_cards=(-1,0],
## flight_cancelled=No} => {type_of_travel=Business} 0.01503457 0.9337812 1.522504 1946
## [18] {satisfaction=(4,5],
## gender=Female,
## price_sensitivity=(0,1],
## arrival_delay_greater_5_mins=no} => {type_of_travel=Business} 0.01502685 0.9253092 1.508691 1945
## [19] {satisfaction=(4,5],
## gender=Female,
## airport_shopping=(-1,0]} => {type_of_travel=Business} 0.01453239 0.9238703 1.506344 1881
## [20] {satisfaction=(4,5],
## gender=Female,
## airport_shopping=(-1,0],
## flight_cancelled=No} => {type_of_travel=Business} 0.01448604 0.9236453 1.505978 1875
- Of the 54 rules returned (confidence > .9, lift > 1.5), all of them include female travelers with the following characteristics: type_of_travel=Business
- Surprisingly, class=Economy shows up as the highest association with/without price_sensitivity=1 and satisfaction=5
- Unsurprisingly, arrival_delay_greater_5_mins = no is a common pairing in the rules
- This is the first ruleset where age is a common pairing. Female, age groups of 34-46 and 46-58 are associated with business travel and rating=5
- Silver status was observed in two rules. No Blue status were observed in any of the rules
get ruleset to only show associations for females age(46-58) with ratings of 4
ruleset <- apriori(dfX, parameter=list(support=.01, confidence=.5, minlen=2, maxlen=6, maxtime=20))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 20 0.01 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1294
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.41s].
## sorting and recoding items ... [282 item(s)] done [0.05s].
## creating transaction tree ... done [0.10s].
## checking subsets of size 1 2 3 4 5 6
## Warning in apriori(dfX, parameter = list(support = 0.01, confidence =
## 0.5, : Mining stopped (maxlen reached). Only patterns up to a length of 6
## returned!
## done [30.43s].
## writing ... [2741159 rule(s)] done [0.31s].
## creating S4 object ... done [0.70s].
female_rules <- subset(female_ruleset, subset=lhs %ain% c("gender=Female","satisfaction=(3,4]","age=(46,58]") & lift>1.5)
plot(female_rules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

female_rules <- sort(female_rules, decreasing = T, by="count")
inspect(female_rules)
## lhs rhs support confidence lift count
## [1] {satisfaction=(3,4],
## age=(46,58],
## gender=Female,
## percent_flight_other_airlines=(0,4]} => {num_loyalty_cards=(-1,0]} 0.01551358 0.8243021 1.555753 2008
## [2] {satisfaction=(3,4],
## age=(46,58],
## gender=Female,
## flight_distance=(32,363]} => {flight_time_in_minutes=(7,61]} 0.01333488 0.9239829 3.487874 1726
## [3] {satisfaction=(3,4],
## age=(46,58],
## gender=Female,
## flight_time_in_minutes=(7,61]} => {flight_distance=(32,363]} 0.01333488 0.8621379 3.428816 1726
## [4] {satisfaction=(3,4],
## age=(46,58],
## gender=Female,
## flight_time_in_minutes=(142,670]} => {flight_distance=(1.02e+03,4.98e+03]} 0.01219917 0.8733407 3.509387 1579
## [5] {satisfaction=(3,4],
## age=(46,58],
## gender=Female,
## flight_distance=(1.02e+03,4.98e+03]} => {flight_time_in_minutes=(142,670]} 0.01219917 0.8675824 3.565956 1579
## [6] {satisfaction=(3,4],
## age=(46,58],
## gender=Female,
## airline_code=WN} => {airline_name=Cheapseats} 0.01093985 1.0000000 4.983828 1416
## [7] {satisfaction=(3,4],
## age=(46,58],
## gender=Female,
## airline_name=Cheapseats} => {airline_code=WN} 0.01093985 1.0000000 4.983828 1416
## [8] {satisfaction=(3,4],
## age=(46,58],
## gender=Female,
## flight_time_in_minutes=(61,95]} => {flight_distance=(363,630]} 0.01039132 0.7756632 3.103684 1345
## [9] {satisfaction=(3,4],
## age=(46,58],
## gender=Female,
## flight_distance=(363,630]} => {flight_time_in_minutes=(61,95]} 0.01039132 0.7509771 3.063047 1345
- 4 rules show with high confidence (1.0) and lift (4.98) that females, age 46-59 flying with Cheapseats on airline code WN give a satisfaction rating of 4
- Surprisingly, the top 3 of the 32 rules returned were for female business travelers, age 46-58, and 0 loyalty cards
- These travelors also seem to either take shorter flights that are about an hour in length, or longer flights that are up to 12 hours
get ruleset for females age(34_46) with rating 4
ruleset <- apriori(dfX, parameter=list(support=.05, confidence=.5, minlen=2, maxlen=6, maxtime=20))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 20 0.05 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 6471
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.41s].
## sorting and recoding items ... [98 item(s)] done [0.05s].
## creating transaction tree ... done [0.09s].
## checking subsets of size 1 2 3 4 5 6
## Warning in apriori(dfX, parameter = list(support = 0.05, confidence =
## 0.5, : Mining stopped (maxlen reached). Only patterns up to a length of 6
## returned!
## done [3.75s].
## writing ... [87030 rule(s)] done [0.01s].
## creating S4 object ... done [0.05s].
female_filter <- subset(ruleset, subset=lhs %ain% c("gender=Female","satisfaction=(3,4]","age=(34,46]") & lift>1)
plot(female_filter)

female_filter <- sort(female_filter, decreasing = T, by="lift")
summary(female_filter)
## set of 19 rules
##
## rule length distribution (lhs + rhs):sizes
## 4 5 6
## 5 11 3
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.000 4.500 5.000 4.895 5.000 6.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.05023 Min. :0.7167 Min. :1.000 Min. :6501
## 1st Qu.:0.05086 1st Qu.:0.8211 1st Qu.:1.006 1st Qu.:6582
## Median :0.05140 Median :0.8712 Median :1.057 Median :6653
## Mean :0.05481 Mean :0.8858 Mean :1.127 Mean :7095
## 3rd Qu.:0.05826 3rd Qu.:0.9831 3rd Qu.:1.255 3rd Qu.:7540
## Max. :0.07048 Max. :0.9904 Max. :1.388 Max. :9123
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.05 0.5
inspect(female_filter)
## lhs rhs support confidence lift count
## [1] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## flight_cancelled=No} => {type_of_travel=Business} 0.05999150 0.8511455 1.387769 7765
## [2] {satisfaction=(3,4],
## age=(34,46],
## gender=Female} => {type_of_travel=Business} 0.06057094 0.8481177 1.382832 7840
## [3] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## departure_delay_greater_5_mins=no} => {arrival_delay_greater_5_mins=no} 0.05116081 0.8712012 1.325480 6622
## [4] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## departure_delay_greater_5_mins=no,
## flight_cancelled=No} => {arrival_delay_greater_5_mins=no} 0.05022598 0.8691176 1.322310 6501
## [5] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## arrival_delay_greater_5_mins=no} => {departure_delay_greater_5_mins=no} 0.05116081 0.9775613 1.255351 6622
## [6] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## flight_cancelled=No,
## arrival_delay_greater_5_mins=no} => {departure_delay_greater_5_mins=no} 0.05022598 0.9771532 1.254827 6501
## [7] {satisfaction=(3,4],
## age=(34,46],
## gender=Female} => {arrival_delay_greater_5_mins=no} 0.05233515 0.7327997 1.114911 6774
## [8] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## flight_cancelled=No} => {arrival_delay_greater_5_mins=no} 0.05140032 0.7292557 1.109519 6653
## [9] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## flight_cancelled=No} => {price_sensitivity=(0,1]} 0.05055047 0.7171983 1.058055 6543
## [10] {satisfaction=(3,4],
## age=(34,46],
## gender=Female} => {price_sensitivity=(0,1]} 0.05118399 0.7166811 1.057292 6625
## [11] {satisfaction=(3,4],
## age=(34,46],
## gender=Female} => {departure_delay_greater_5_mins=no} 0.05872446 0.8222631 1.055923 7601
## [12] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## flight_cancelled=No} => {departure_delay_greater_5_mins=no} 0.05778962 0.8199057 1.052896 7480
## [13] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## type_of_travel=Business} => {flight_cancelled=No} 0.05999150 0.9904337 1.009066 7765
## [14] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## price_sensitivity=(0,1]} => {flight_cancelled=No} 0.05055047 0.9876226 1.006202 6543
## [15] {satisfaction=(3,4],
## age=(34,46],
## gender=Female} => {flight_cancelled=No} 0.07048325 0.9869104 1.005476 9123
## [16] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## class=Economy} => {flight_cancelled=No} 0.05569591 0.9853745 1.003912 7209
## [17] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## departure_delay_greater_5_mins=no} => {flight_cancelled=No} 0.05778962 0.9840810 1.002594 7480
## [18] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## arrival_delay_greater_5_mins=no} => {flight_cancelled=No} 0.05140032 0.9821376 1.000614 6653
## [19] {satisfaction=(3,4],
## age=(34,46],
## gender=Female,
## departure_delay_greater_5_mins=no,
## arrival_delay_greater_5_mins=no} => {flight_cancelled=No} 0.05022598 0.9817276 1.000196 6501
- No surprises here. This analysis shows that female business travelers, age 34-46, with no delays or cancellations, and price_sensitity=1 give ratings=4
get ruleset to only show associations for females age(46-58) with ratings of 5
female_ruleset <- apriori(dfX, parameter=list(support=.01, confidence=.5, minlen=2, maxlen=6, maxtime=20))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 20 0.01 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1294
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.46s].
## sorting and recoding items ... [282 item(s)] done [0.06s].
## creating transaction tree ... done [0.11s].
## checking subsets of size 1 2 3 4 5 6
## Warning in apriori(dfX, parameter = list(support = 0.01, confidence =
## 0.5, : Mining stopped (maxlen reached). Only patterns up to a length of 6
## returned!
## done [30.85s].
## writing ... [2741159 rule(s)] done [0.26s].
## creating S4 object ... done [0.48s].
female_filter <- subset(female_ruleset, subset=lhs %ain% c("gender=Female","satisfaction=(4,5]","age=(46,58]") & lift>1)
female_filter <- sort(female_filter, decreasing = T, by="lift")
summary(female_filter)
## set of 4 rules
##
## rule length distribution (lhs + rhs):sizes
## 4 5
## 2 2
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 4.0 4.5 4.5 5.0 5.0
##
## summary of quality measures:
## support confidence lift count
## Min. :0.01018 Min. :0.9341 Min. :1.014 Min. :1318
## 1st Qu.:0.01018 1st Qu.:0.9343 1st Qu.:1.014 1st Qu.:1318
## Median :0.01021 Median :0.9649 Median :1.269 Median :1321
## Mean :0.01037 Mean :0.9649 Mean :1.269 Mean :1343
## 3rd Qu.:0.01040 3rd Qu.:0.9955 3rd Qu.:1.523 3rd Qu.:1346
## Max. :0.01090 Max. :0.9958 Max. :1.523 Max. :1411
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.01 0.5
inspect(female_filter)
## lhs rhs support confidence lift count
## [1] {satisfaction=(4,5],
## age=(46,58],
## gender=Female} => {type_of_travel=Business} 0.01022907 0.9343684 1.523461 1324
## [2] {satisfaction=(4,5],
## age=(46,58],
## gender=Female,
## flight_cancelled=No} => {type_of_travel=Business} 0.01018272 0.9340893 1.523006 1318
## [3] {satisfaction=(4,5],
## age=(46,58],
## gender=Female} => {flight_cancelled=No} 0.01090122 0.9957657 1.014498 1411
## [4] {satisfaction=(4,5],
## age=(46,58],
## gender=Female,
## type_of_travel=Business} => {flight_cancelled=No} 0.01018272 0.9954683 1.014195 1318
- This analysis again shows that female business travelers, age 46-59 on flights that are on-time give high ratings
get ruleset for females age(34-46) with rating 5
female_ruleset <- apriori(dfX, parameter=list(support=.01, confidence=.5, minlen=2, maxlen=6, maxtime=20))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 20 0.01 2
## maxlen target ext
## 6 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1294
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[896 item(s), 129435 transaction(s)] done [0.42s].
## sorting and recoding items ... [282 item(s)] done [0.05s].
## creating transaction tree ... done [0.11s].
## checking subsets of size 1 2 3 4 5 6
## Warning in apriori(dfX, parameter = list(support = 0.01, confidence =
## 0.5, : Mining stopped (maxlen reached). Only patterns up to a length of 6
## returned!
## done [30.59s].
## writing ... [2741159 rule(s)] done [0.32s].
## creating S4 object ... done [0.51s].
female_filter <- subset(female_ruleset, subset=lhs %ain% c("gender=Female","satisfaction=(4,5]","age=(34,46]") & lift>1)
female_filter <- sort(female_filter, decreasing = T, by="lift")
summary(female_filter)
## set of 9 rules
##
## rule length distribution (lhs + rhs):sizes
## 4 5
## 3 6
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.000 4.000 5.000 4.667 5.000 5.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.01045 Min. :0.7822 Min. :1.014 Min. :1352
## 1st Qu.:0.01062 1st Qu.:0.9257 1st Qu.:1.015 1st Qu.:1375
## Median :0.01065 Median :0.9956 Median :1.016 Median :1379
## Mean :0.01158 Mean :0.9331 Mean :1.156 Mean :1499
## 3rd Qu.:0.01257 3rd Qu.:0.9966 3rd Qu.:1.155 3rd Qu.:1627
## Max. :0.01357 Max. :0.9971 Max. :1.510 Max. :1757
##
## mining info:
## data ntransactions support confidence
## dfX 129435 0.01 0.5
inspect(female_filter)
## lhs rhs support confidence lift count
## [1] {satisfaction=(4,5],
## age=(34,46],
## gender=Female,
## flight_cancelled=No} => {type_of_travel=Business} 0.01257002 0.9260102 1.509834 1627
## [2] {satisfaction=(4,5],
## age=(34,46],
## gender=Female} => {type_of_travel=Business} 0.01260865 0.9256948 1.509319 1632
## [3] {satisfaction=(4,5],
## age=(34,46],
## gender=Female,
## flight_cancelled=No} => {price_sensitivity=(0,1]} 0.01062309 0.7825839 1.154516 1375
## [4] {satisfaction=(4,5],
## age=(34,46],
## gender=Female} => {price_sensitivity=(0,1]} 0.01065400 0.7821894 1.153934 1379
## [5] {satisfaction=(4,5],
## age=(34,46],
## gender=Female,
## price_sensitivity=(0,1]} => {flight_cancelled=No} 0.01062309 0.9970993 1.015857 1375
## [6] {satisfaction=(4,5],
## age=(34,46],
## gender=Female,
## type_of_travel=Business} => {flight_cancelled=No} 0.01257002 0.9969363 1.015691 1627
## [7] {satisfaction=(4,5],
## age=(34,46],
## gender=Female} => {flight_cancelled=No} 0.01357438 0.9965967 1.015345 1757
## [8] {satisfaction=(4,5],
## age=(34,46],
## gender=Female,
## class=Economy} => {flight_cancelled=No} 0.01054583 0.9956236 1.014354 1365
## [9] {satisfaction=(4,5],
## age=(34,46],
## gender=Female,
## departure_delay_greater_5_mins=no} => {flight_cancelled=No} 0.01044540 0.9955817 1.014311 1352
- Very similar to preceding analysis: female business travelers, economy class, price_sensitivity=1 give rating=5