## Make numerical values into categorical in order to complement market basket analysis
rest[["weight"]] <- ordered(cut(rest[["weight"]], c(0,66,120)), labels = c("Small", "Large"))
rest[["height"]] <- ordered(cut(rest[["height"]], c(0,1.65,2)), labels = c("short", "tall"))
rest[["birth_year"]] <- ordered(cut(rest[["birth_year"]], c(1930,1986,1989,1994)),
labels = c("old", "middle","young"))
rest <- rest[-18:-20]
rest$rating <- recode(rest$rating, "'0' = 'bad';'1'='normal';'2'='good'")
rest$rating = as.factor(rest$rating)
rest$food_rating = as.factor(rest$food_rating)
rest$service_rating = as.factor(rest$service_rating)
rest1 <- as(rest, "transactions")
summary(rest1)
## transactions as itemMatrix in sparse format with
## 4090 rows (elements/itemsets/transactions) and
## 313 columns (items) and a density of 0.06727232
##
## most frequent items:
## marital_status=single activity=student
## 3919 3655
## religion=Catholic Upayment=cash
## 3606 3352
## dress_preference=informal (Other)
## 2651 68937
##
## element (itemset/transaction) length distribution:
## sizes
## 20 21 22
## 68 3724 298
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.00 21.00 21.00 21.06 21.00 22.00
##
## includes extended item information - examples:
## labels variables levels
## 1 userID=U1001 userID U1001
## 2 userID=U1002 userID U1002
## 3 userID=U1003 userID U1003
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
aa=as(rest1,"matrix") # transforms transaction matrix into incidence matrix
aa[1:2,] # print the first two rows of the incidence matrix
## userID=U1001 userID=U1002 userID=U1003 userID=U1004 userID=U1005
## 1 TRUE FALSE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE FALSE
## userID=U1006 userID=U1007 userID=U1008 userID=U1009 userID=U1010
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1011 userID=U1012 userID=U1013 userID=U1014 userID=U1015
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1016 userID=U1017 userID=U1018 userID=U1019 userID=U1020
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1021 userID=U1022 userID=U1023 userID=U1026 userID=U1027
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1028 userID=U1029 userID=U1030 userID=U1031 userID=U1032
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1033 userID=U1034 userID=U1035 userID=U1036 userID=U1037
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1038 userID=U1039 userID=U1040 userID=U1041 userID=U1042
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1043 userID=U1044 userID=U1045 userID=U1046 userID=U1047
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1048 userID=U1049 userID=U1050 userID=U1051 userID=U1052
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1053 userID=U1054 userID=U1055 userID=U1056 userID=U1057
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1058 userID=U1059 userID=U1060 userID=U1061 userID=U1062
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1063 userID=U1064 userID=U1065 userID=U1066 userID=U1067
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1068 userID=U1069 userID=U1070 userID=U1071 userID=U1072
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1073 userID=U1074 userID=U1075 userID=U1076 userID=U1077
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1078 userID=U1079 userID=U1080 userID=U1081 userID=U1082
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1083 userID=U1084 userID=U1085 userID=U1086 userID=U1087
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1089 userID=U1090 userID=U1091 userID=U1092 userID=U1093
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1094 userID=U1095 userID=U1096 userID=U1097 userID=U1098
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1099 userID=U1100 userID=U1101 userID=U1102 userID=U1103
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1104 userID=U1105 userID=U1106 userID=U1107 userID=U1108
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1109 userID=U1110 userID=U1111 userID=U1112 userID=U1113
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1114 userID=U1115 userID=U1116 userID=U1117 userID=U1118
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1119 userID=U1120 userID=U1121 userID=U1123 userID=U1124
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1125 userID=U1126 userID=U1127 userID=U1128 userID=U1129
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1131 userID=U1132 userID=U1133 userID=U1134 userID=U1135
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1136 userID=U1137 userID=U1138 smoker drink_level=abstemious
## 1 FALSE FALSE FALSE FALSE TRUE
## 2 FALSE FALSE FALSE FALSE TRUE
## drink_level=casual drinker drink_level=social drinker dress_preference=?
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## dress_preference=elegant dress_preference=formal
## 1 FALSE FALSE
## 2 FALSE FALSE
## dress_preference=informal dress_preference=no preference ambience=?
## 1 TRUE FALSE FALSE
## 2 TRUE FALSE FALSE
## ambience=family ambience=friends ambience=solitary transport=?
## 1 TRUE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE
## transport=car owner transport=on foot transport=public marital_status=?
## 1 FALSE TRUE FALSE FALSE
## 2 FALSE TRUE FALSE FALSE
## marital_status=married marital_status=single marital_status=widow
## 1 FALSE TRUE FALSE
## 2 FALSE TRUE FALSE
## hijos=? hijos=dependent hijos=independent hijos=kids birth_year=old
## 1 FALSE FALSE TRUE FALSE FALSE
## 2 FALSE FALSE TRUE FALSE FALSE
## birth_year=middle birth_year=young interest=eco-friendly interest=none
## 1 TRUE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE
## interest=retro interest=technology interest=variety
## 1 FALSE FALSE TRUE
## 2 FALSE FALSE TRUE
## personality=conformist personality=hard-worker
## 1 FALSE FALSE
## 2 FALSE FALSE
## personality=hunter-ostentatious personality=thrifty-protector
## 1 FALSE TRUE
## 2 FALSE TRUE
## religion=Catholic religion=Christian religion=Jewish religion=Mormon
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## religion=none activity=? activity=professional activity=student
## 1 TRUE FALSE FALSE TRUE
## 2 TRUE FALSE FALSE TRUE
## activity=unemployed activity=working-class color=black color=blue
## 1 FALSE FALSE TRUE FALSE
## 2 FALSE FALSE TRUE FALSE
## color=green color=orange color=purple color=red color=white color=yellow
## 1 FALSE FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE FALSE
## weight=Small weight=Large budget=? budget=high budget=low budget=medium
## 1 FALSE TRUE FALSE FALSE FALSE TRUE
## 2 FALSE TRUE FALSE FALSE FALSE TRUE
## height=short height=tall rating=bad rating=good rating=normal
## 1 FALSE TRUE FALSE FALSE TRUE
## 2 FALSE TRUE FALSE FALSE TRUE
## food_rating=0 food_rating=1 food_rating=2 service_rating=0
## 1 FALSE TRUE FALSE FALSE
## 2 FALSE TRUE FALSE FALSE
## service_rating=1 service_rating=2 Rcuisine=Afghan Rcuisine=African
## 1 TRUE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE
## Rcuisine=American Rcuisine=Armenian Rcuisine=Asian Rcuisine=Australian
## 1 TRUE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE
## Rcuisine=Austrian Rcuisine=Bagels Rcuisine=Bakery Rcuisine=Bar
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Bar_Pub_Brewery Rcuisine=Barbecue Rcuisine=Basque
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Brazilian Rcuisine=Breakfast-Brunch Rcuisine=British
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Burgers Rcuisine=Burmese Rcuisine=Cafe-Coffee_Shop
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Cafeteria Rcuisine=Cajun-Creole Rcuisine=California
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Cambodian Rcuisine=Canadian Rcuisine=Caribbean Rcuisine=Chilean
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Chinese Rcuisine=Contemporary Rcuisine=Continental-European
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Cuban Rcuisine=Deli-Sandwiches Rcuisine=Dessert-Ice_Cream
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Dim_Sum Rcuisine=Diner Rcuisine=Doughnuts
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Dutch-Belgian Rcuisine=Eastern_European Rcuisine=Eclectic
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Ethiopian Rcuisine=Family Rcuisine=Fast_Food Rcuisine=Filipino
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Fine_Dining Rcuisine=French Rcuisine=Fusion Rcuisine=Game
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=German Rcuisine=Greek Rcuisine=Hawaiian Rcuisine=Hot_Dogs
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Hungarian Rcuisine=Indian-Pakistani Rcuisine=Indigenous
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Indonesian Rcuisine=International Rcuisine=Irish
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Israeli Rcuisine=Italian Rcuisine=Jamaican Rcuisine=Japanese
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Juice Rcuisine=Korean Rcuisine=Kosher Rcuisine=Latin_American
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Lebanese Rcuisine=Malaysian Rcuisine=Mediterranean
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Mexican Rcuisine=Middle_Eastern Rcuisine=Mongolian
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Moroccan Rcuisine=North_African Rcuisine=Organic-Healthy
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Pacific_Northwest Rcuisine=Pacific_Rim Rcuisine=Persian
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Peruvian Rcuisine=Pizzeria Rcuisine=Polish Rcuisine=Polynesian
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Portuguese Rcuisine=Regional Rcuisine=Romanian
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Russian-Ukrainian Rcuisine=Scandinavian Rcuisine=Seafood
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Soup Rcuisine=Southeast_Asian Rcuisine=Southern
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Southwestern Rcuisine=Spanish Rcuisine=Steaks Rcuisine=Sushi
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Swiss Rcuisine=Tapas Rcuisine=Tea_House Rcuisine=Tex-Mex
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Thai Rcuisine=Tibetan Rcuisine=Tunisian Rcuisine=Turkish
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Vegetarian Rcuisine=Vietnamese Upayment=American_Express
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Upayment=bank_debit_cards Upayment=cash Upayment=MasterCard-Eurocard
## 1 FALSE TRUE FALSE
## 2 FALSE TRUE FALSE
## Upayment=VISA
## 1 FALSE
## 2 FALSE
itemFrequencyPlot(rest1[, itemFrequency(rest1) > 0.4], cex.names = 1)
rules <- apriori(rest1, parameter = list(support = 0.05, confidence = 0.6))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.6 0.1 1 none FALSE TRUE 5 0.05 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 204
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[313 item(s), 4090 transaction(s)] done [0.00s].
## sorting and recoding items ... [55 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10 done [0.57s].
## writing ... [8543938 rule(s)] done [1.81s].
## creating S4 object ... done [5.43s].
summary(rules)
## set of 8543938 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2 3 4 5 6 7 8 9
## 6 640 8986 50545 177902 470514 995145 1708171 2391765
## 10
## 2740264
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 8.000 9.000 8.603 10.000 10.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.05012 Min. :0.6000 Min. : 0.6714 Min. : 205
## 1st Qu.:0.07922 1st Qu.:1.0000 1st Qu.: 1.5847 1st Qu.: 324
## Median :0.35257 Median :1.0000 Median : 2.0328 Median :1442
## Mean :0.24601 Mean :0.9817 Mean : 2.2350 Mean :1006
## 3rd Qu.:0.35257 3rd Qu.:1.0000 3rd Qu.: 2.5138 3rd Qu.:1442
## Max. :0.95819 Max. :1.0000 Max. :10.2262 Max. :3919
##
## mining info:
## data ntransactions support confidence
## rest1 4090 0.05 0.6
rulesrating0 <- subset(rules, subset = rhs %in% "rating=bad" & lift > 1.2)
inspect(sort(rulesrating0, by = "support")[1:10])
## lhs rhs support confidence lift count
## [1] {service_rating=0} => {rating=bad} 0.4283619 0.8897918 2.012859 1752
## [2] {food_rating=0} => {rating=bad} 0.4261614 0.9467681 2.141749 1743
## [3] {food_rating=0,
## service_rating=0} => {rating=bad} 0.4217604 0.9862779 2.231126 1725
## [4] {marital_status=single,
## service_rating=0} => {rating=bad} 0.4207824 0.8921721 2.018243 1721
## [5] {marital_status=single,
## food_rating=0} => {rating=bad} 0.4193154 0.9464680 2.141070 1715
## [6] {service_rating=0,
## Upayment=cash} => {rating=bad} 0.4185819 0.9164882 2.073250 1712
## [7] {food_rating=0,
## Upayment=cash} => {rating=bad} 0.4166259 0.9514238 2.152281 1704
## [8] {activity=student,
## service_rating=0} => {rating=bad} 0.4156479 0.8975713 2.030457 1700
## [9] {marital_status=single,
## food_rating=0,
## service_rating=0} => {rating=bad} 0.4149144 0.9866279 2.231918 1697
## [10] {marital_status=single,
## activity=student,
## service_rating=0} => {rating=bad} 0.4139364 0.8976670 2.030674 1693
rulesrationg2 <- subset(rules, subset = rhs %in% "rating=good" & lift > 1.2)
inspect(sort(rulesrationg2, by = "support")[1:10])
## lhs rhs support confidence lift count
## [1] {food_rating=2} => {rating=good} 0.2447433 0.7170487 2.331263 1001
## [2] {marital_status=single,
## food_rating=2} => {rating=good} 0.2242054 0.7158470 2.327356 917
## [3] {hijos=independent,
## food_rating=2} => {rating=good} 0.2202934 0.7208000 2.343459 901
## [4] {service_rating=2} => {rating=good} 0.2166259 0.8635478 2.807560 886
## [5] {marital_status=single,
## hijos=independent,
## food_rating=2} => {rating=good} 0.2156479 0.7194127 2.338949 882
## [6] {budget=medium,
## food_rating=2} => {rating=good} 0.2019560 0.7108434 2.311089 826
## [7] {marital_status=single,
## service_rating=2} => {rating=good} 0.1990220 0.8724544 2.836517 814
## [8] {hijos=independent,
## service_rating=2} => {rating=good} 0.1948655 0.8865406 2.882314 797
## [9] {marital_status=single,
## hijos=independent,
## service_rating=2} => {rating=good} 0.1916870 0.8868778 2.883410 784
## [10] {religion=Catholic,
## food_rating=2} => {rating=good} 0.1909535 0.6948399 2.259058 781
foodrating0 <- subset(rules, subset = rhs %in% "food_rating=0" & lift > 1.2)
inspect(sort(foodrating0, by = "support")[1:10])
## lhs rhs support confidence lift count
## [1] {service_rating=0} => {food_rating=0} 0.4276284 0.8882682 1.973393 1749
## [2] {rating=bad} => {food_rating=0} 0.4261614 0.9640487 2.141749 1743
## [3] {rating=bad,
## service_rating=0} => {food_rating=0} 0.4217604 0.9845890 2.187381 1725
## [4] {marital_status=single,
## service_rating=0} => {food_rating=0} 0.4205379 0.8916537 1.980915 1720
## [5] {marital_status=single,
## rating=bad} => {food_rating=0} 0.4193154 0.9716714 2.158683 1715
## [6] {weight=Small} => {food_rating=0} 0.4188264 0.6636962 1.474480 1713
## [7] {service_rating=0,
## Upayment=cash} => {food_rating=0} 0.4178484 0.9148822 2.032519 1709
## [8] {rating=bad,
## Upayment=cash} => {food_rating=0} 0.4166259 0.9726027 2.160752 1704
## [9] {marital_status=single,
## weight=Small} => {food_rating=0} 0.4158924 0.6845070 1.520714 1701
## [10] {marital_status=single,
## rating=bad,
## service_rating=0} => {food_rating=0} 0.4149144 0.9860546 2.190637 1697
foodrating2 <- subset(rules, subset = rhs %in% "food_rating=2" & lift > 1.2)
inspect(sort(foodrating2, by = "support")[1:10])
## lhs rhs support confidence lift count
## [1] {hijos=independent,
## budget=medium} => {food_rating=2} 0.2528117 0.6032672 1.767452 1034
## [2] {marital_status=single,
## hijos=independent,
## budget=medium} => {food_rating=2} 0.2469438 0.6026253 1.765571 1010
## [3] {rating=good} => {food_rating=2} 0.2447433 0.7957075 2.331263 1001
## [4] {marital_status=single,
## rating=good} => {food_rating=2} 0.2242054 0.7851027 2.300194 917
## [5] {hijos=independent,
## rating=good} => {food_rating=2} 0.2202934 0.8066249 2.363249 901
## [6] {marital_status=single,
## hijos=independent,
## rating=good} => {food_rating=2} 0.2156479 0.8047445 2.357740 882
## [7] {service_rating=2} => {food_rating=2} 0.2039120 0.8128655 2.381533 834
## [8] {religion=Catholic,
## budget=medium,
## height=tall} => {food_rating=2} 0.2029340 0.6120944 1.793314 830
## [9] {budget=medium,
## rating=good} => {food_rating=2} 0.2019560 0.8227092 2.410373 826
## [10] {personality=thrifty-protector,
## activity=student} => {food_rating=2} 0.1997555 0.6069837 1.778340 817
servicerating0 <- subset(rules, subset = rhs %in% "service_rating=0" & lift > 1.2)
inspect(sort(servicerating0, by = "support")[1:10])
## lhs rhs support confidence lift count
## [1] {marital_status=single,
## religion=Catholic,
## Upayment=cash} => {service_rating=0} 0.4339853 0.6045640 1.255798 1775
## [2] {religion=Catholic,
## activity=student,
## Upayment=cash} => {service_rating=0} 0.4288509 0.6083940 1.263754 1754
## [3] {marital_status=single,
## religion=Catholic,
## activity=student,
## Upayment=cash} => {service_rating=0} 0.4286064 0.6146564 1.276762 1753
## [4] {rating=bad} => {service_rating=0} 0.4283619 0.9690265 2.012859 1752
## [5] {food_rating=0} => {service_rating=0} 0.4276284 0.9500272 1.973393 1749
## [6] {rating=bad,
## food_rating=0} => {service_rating=0} 0.4217604 0.9896730 2.055745 1725
## [7] {marital_status=single,
## rating=bad} => {service_rating=0} 0.4207824 0.9750708 2.025414 1721
## [8] {marital_status=single,
## food_rating=0} => {service_rating=0} 0.4205379 0.9492274 1.971732 1720
## [9] {weight=Small} => {service_rating=0} 0.4198044 0.6652460 1.381847 1717
## [10] {rating=bad,
## Upayment=cash} => {service_rating=0} 0.4185819 0.9771689 2.029772 1712
servicerating2 <- subset(rules, subset = rhs %in% "service_rating=2" & lift > 1.2)
inspect(sort(servicerating2, by = "support")[1:10])
## lhs rhs support confidence lift count
## [1] {rating=good} => {service_rating=2} 0.2166259 0.7042925 2.807560 886
## [2] {marital_status=single,
## rating=good} => {service_rating=2} 0.1990220 0.6969178 2.778162 814
## [3] {hijos=independent,
## rating=good} => {service_rating=2} 0.1948655 0.7135184 2.844337 797
## [4] {marital_status=single,
## hijos=independent,
## rating=good} => {service_rating=2} 0.1916870 0.7153285 2.851553 784
## [5] {rating=good,
## food_rating=2} => {service_rating=2} 0.1841076 0.7522478 2.998726 753
## [6] {budget=medium,
## rating=good} => {service_rating=2} 0.1841076 0.7500000 2.989766 753
## [7] {budget=medium,
## food_rating=2} => {service_rating=2} 0.1706601 0.6006885 2.394557 698
## [8] {religion=Catholic,
## rating=good} => {service_rating=2} 0.1689487 0.6861966 2.735423 691
## [9] {marital_status=single,
## budget=medium,
## rating=good} => {service_rating=2} 0.1674817 0.7453754 2.971331 685
## [10] {marital_status=single,
## rating=good,
## food_rating=2} => {service_rating=2} 0.1667482 0.7437296 2.964770 682
##Bad restaurant rating
plotly_arules(rulesrating0)
##Good restaurant rating
plotly_arules(rulesrationg2)
inspectDT(rulesrationg2)
rest <- rest[-19:-20]
rest1 <- as(rest, "transactions")
rest1
## transactions in sparse format with
## 4090 transactions (rows) and
## 307 items (columns)
summary(rest1)
## transactions as itemMatrix in sparse format with
## 4090 rows (elements/itemsets/transactions) and
## 307 columns (items) and a density of 0.06207243
##
## most frequent items:
## marital_status=single activity=student
## 3919 3655
## religion=Catholic Upayment=cash
## 3606 3352
## dress_preference=informal (Other)
## 2651 60757
##
## element (itemset/transaction) length distribution:
## sizes
## 18 19 20
## 68 3724 298
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 19.00 19.00 19.06 19.00 20.00
##
## includes extended item information - examples:
## labels variables levels
## 1 userID=U1001 userID U1001
## 2 userID=U1002 userID U1002
## 3 userID=U1003 userID U1003
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
aa=as(rest1,"matrix") # transforms transaction matrix into incidence matrix
aa[1:2,] # print the first two rows of the incidence matrix
## userID=U1001 userID=U1002 userID=U1003 userID=U1004 userID=U1005
## 1 TRUE FALSE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE FALSE
## userID=U1006 userID=U1007 userID=U1008 userID=U1009 userID=U1010
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1011 userID=U1012 userID=U1013 userID=U1014 userID=U1015
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1016 userID=U1017 userID=U1018 userID=U1019 userID=U1020
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1021 userID=U1022 userID=U1023 userID=U1026 userID=U1027
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1028 userID=U1029 userID=U1030 userID=U1031 userID=U1032
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1033 userID=U1034 userID=U1035 userID=U1036 userID=U1037
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1038 userID=U1039 userID=U1040 userID=U1041 userID=U1042
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1043 userID=U1044 userID=U1045 userID=U1046 userID=U1047
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1048 userID=U1049 userID=U1050 userID=U1051 userID=U1052
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1053 userID=U1054 userID=U1055 userID=U1056 userID=U1057
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1058 userID=U1059 userID=U1060 userID=U1061 userID=U1062
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1063 userID=U1064 userID=U1065 userID=U1066 userID=U1067
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1068 userID=U1069 userID=U1070 userID=U1071 userID=U1072
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1073 userID=U1074 userID=U1075 userID=U1076 userID=U1077
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1078 userID=U1079 userID=U1080 userID=U1081 userID=U1082
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1083 userID=U1084 userID=U1085 userID=U1086 userID=U1087
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1089 userID=U1090 userID=U1091 userID=U1092 userID=U1093
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1094 userID=U1095 userID=U1096 userID=U1097 userID=U1098
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1099 userID=U1100 userID=U1101 userID=U1102 userID=U1103
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1104 userID=U1105 userID=U1106 userID=U1107 userID=U1108
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1109 userID=U1110 userID=U1111 userID=U1112 userID=U1113
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1114 userID=U1115 userID=U1116 userID=U1117 userID=U1118
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1119 userID=U1120 userID=U1121 userID=U1123 userID=U1124
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1125 userID=U1126 userID=U1127 userID=U1128 userID=U1129
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1131 userID=U1132 userID=U1133 userID=U1134 userID=U1135
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## userID=U1136 userID=U1137 userID=U1138 smoker drink_level=abstemious
## 1 FALSE FALSE FALSE FALSE TRUE
## 2 FALSE FALSE FALSE FALSE TRUE
## drink_level=casual drinker drink_level=social drinker dress_preference=?
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## dress_preference=elegant dress_preference=formal
## 1 FALSE FALSE
## 2 FALSE FALSE
## dress_preference=informal dress_preference=no preference ambience=?
## 1 TRUE FALSE FALSE
## 2 TRUE FALSE FALSE
## ambience=family ambience=friends ambience=solitary transport=?
## 1 TRUE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE
## transport=car owner transport=on foot transport=public marital_status=?
## 1 FALSE TRUE FALSE FALSE
## 2 FALSE TRUE FALSE FALSE
## marital_status=married marital_status=single marital_status=widow
## 1 FALSE TRUE FALSE
## 2 FALSE TRUE FALSE
## hijos=? hijos=dependent hijos=independent hijos=kids birth_year=old
## 1 FALSE FALSE TRUE FALSE FALSE
## 2 FALSE FALSE TRUE FALSE FALSE
## birth_year=middle birth_year=young interest=eco-friendly interest=none
## 1 TRUE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE
## interest=retro interest=technology interest=variety
## 1 FALSE FALSE TRUE
## 2 FALSE FALSE TRUE
## personality=conformist personality=hard-worker
## 1 FALSE FALSE
## 2 FALSE FALSE
## personality=hunter-ostentatious personality=thrifty-protector
## 1 FALSE TRUE
## 2 FALSE TRUE
## religion=Catholic religion=Christian religion=Jewish religion=Mormon
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## religion=none activity=? activity=professional activity=student
## 1 TRUE FALSE FALSE TRUE
## 2 TRUE FALSE FALSE TRUE
## activity=unemployed activity=working-class color=black color=blue
## 1 FALSE FALSE TRUE FALSE
## 2 FALSE FALSE TRUE FALSE
## color=green color=orange color=purple color=red color=white color=yellow
## 1 FALSE FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE FALSE
## weight=Small weight=Large budget=? budget=high budget=low budget=medium
## 1 FALSE TRUE FALSE FALSE FALSE TRUE
## 2 FALSE TRUE FALSE FALSE FALSE TRUE
## height=short height=tall rating=bad rating=good rating=normal
## 1 FALSE TRUE FALSE FALSE TRUE
## 2 FALSE TRUE FALSE FALSE TRUE
## Rcuisine=Afghan Rcuisine=African Rcuisine=American Rcuisine=Armenian
## 1 FALSE FALSE TRUE FALSE
## 2 FALSE FALSE TRUE FALSE
## Rcuisine=Asian Rcuisine=Australian Rcuisine=Austrian Rcuisine=Bagels
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Bakery Rcuisine=Bar Rcuisine=Bar_Pub_Brewery Rcuisine=Barbecue
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Basque Rcuisine=Brazilian Rcuisine=Breakfast-Brunch
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=British Rcuisine=Burgers Rcuisine=Burmese
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Cafe-Coffee_Shop Rcuisine=Cafeteria Rcuisine=Cajun-Creole
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=California Rcuisine=Cambodian Rcuisine=Canadian
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Caribbean Rcuisine=Chilean Rcuisine=Chinese
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Contemporary Rcuisine=Continental-European Rcuisine=Cuban
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Deli-Sandwiches Rcuisine=Dessert-Ice_Cream Rcuisine=Dim_Sum
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Diner Rcuisine=Doughnuts Rcuisine=Dutch-Belgian
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Eastern_European Rcuisine=Eclectic Rcuisine=Ethiopian
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Family Rcuisine=Fast_Food Rcuisine=Filipino
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Fine_Dining Rcuisine=French Rcuisine=Fusion Rcuisine=Game
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=German Rcuisine=Greek Rcuisine=Hawaiian Rcuisine=Hot_Dogs
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Hungarian Rcuisine=Indian-Pakistani Rcuisine=Indigenous
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Indonesian Rcuisine=International Rcuisine=Irish
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Israeli Rcuisine=Italian Rcuisine=Jamaican Rcuisine=Japanese
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Juice Rcuisine=Korean Rcuisine=Kosher Rcuisine=Latin_American
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Lebanese Rcuisine=Malaysian Rcuisine=Mediterranean
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Mexican Rcuisine=Middle_Eastern Rcuisine=Mongolian
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Moroccan Rcuisine=North_African Rcuisine=Organic-Healthy
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Pacific_Northwest Rcuisine=Pacific_Rim Rcuisine=Persian
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Peruvian Rcuisine=Pizzeria Rcuisine=Polish Rcuisine=Polynesian
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Portuguese Rcuisine=Regional Rcuisine=Romanian
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Russian-Ukrainian Rcuisine=Scandinavian Rcuisine=Seafood
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Soup Rcuisine=Southeast_Asian Rcuisine=Southern
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Rcuisine=Southwestern Rcuisine=Spanish Rcuisine=Steaks Rcuisine=Sushi
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Swiss Rcuisine=Tapas Rcuisine=Tea_House Rcuisine=Tex-Mex
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Thai Rcuisine=Tibetan Rcuisine=Tunisian Rcuisine=Turkish
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## Rcuisine=Vegetarian Rcuisine=Vietnamese Upayment=American_Express
## 1 FALSE FALSE FALSE
## 2 FALSE FALSE FALSE
## Upayment=bank_debit_cards Upayment=cash Upayment=MasterCard-Eurocard
## 1 FALSE TRUE FALSE
## 2 FALSE TRUE FALSE
## Upayment=VISA
## 1 FALSE
## 2 FALSE
itemFrequencyPlot(rest1[, itemFrequency(rest1) > 0.4], cex.names = 1)
rules <- apriori(rest1, parameter = list(support = 0.05, confidence = 0.6))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.6 0.1 1 none FALSE TRUE 5 0.05 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 204
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[307 item(s), 4090 transaction(s)] done [0.00s].
## sorting and recoding items ... [49 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10 done [0.17s].
## writing ... [2927841 rule(s)] done [0.67s].
## creating S4 object ... done [1.60s].
rules
## set of 2927841 rules
summary(rules)
## set of 2927841 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2 3 4 5 6 7 8 9 10
## 6 528 6451 32093 99211 229506 424879 635081 763754 736332
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 7.000 9.000 8.304 10.000 10.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.05012 Min. :0.6000 Min. : 0.6714 Min. : 205.0
## 1st Qu.:0.07922 1st Qu.:1.0000 1st Qu.: 1.5428 1st Qu.: 324.0
## Median :0.35257 Median :1.0000 Median : 2.0328 Median :1442.0
## Mean :0.23523 Mean :0.9846 Mean : 2.2743 Mean : 962.1
## 3rd Qu.:0.35257 3rd Qu.:1.0000 3rd Qu.: 2.6576 3rd Qu.:1442.0
## Max. :0.95819 Max. :1.0000 Max. :10.2262 Max. :3919.0
##
## mining info:
## data ntransactions support confidence
## rest1 4090 0.05 0.6
rulesrating0 <- subset(rules, subset = rhs %in% "rating=bad" & lift > 1.2)
inspect(sort(rulesrating0, by = "support")[1:10])
## lhs rhs support confidence lift count
## [1] {weight=Small} => {rating=bad} 0.4129584 0.6543975 1.480357 1689
## [2] {marital_status=single,
## weight=Small} => {rating=bad} 0.4070905 0.6700201 1.515698 1665
## [3] {weight=Small,
## Upayment=cash} => {rating=bad} 0.4051345 0.7066098 1.598470 1657
## [4] {activity=student,
## weight=Small} => {rating=bad} 0.4044010 0.7241681 1.638190 1654
## [5] {drink_level=casual drinker} => {rating=bad} 0.4026895 0.7005530 1.584769 1647
## [6] {drink_level=casual drinker,
## marital_status=single} => {rating=bad} 0.4026895 0.7005530 1.584769 1647
## [7] {religion=Catholic,
## weight=Small} => {rating=bad} 0.4026895 0.7245930 1.639151 1647
## [8] {marital_status=single,
## weight=Small,
## Upayment=cash} => {rating=bad} 0.4024450 0.7175240 1.623160 1646
## [9] {activity=student,
## weight=Small,
## Upayment=cash} => {rating=bad} 0.4007335 0.7463570 1.688385 1639
## [10] {marital_status=single,
## activity=student,
## weight=Small} => {rating=bad} 0.4002445 0.7430776 1.680967 1637
rulesrationg2 <- subset(rules, subset = rhs %in% "rating=good" & lift > 1.2)
inspect(sort(rulesrationg2, by = "support")[1:10])
## lhs rhs support confidence lift count
## [1] {birth_year=young,
## religion=Catholic,
## budget=medium} => {rating=good} 0.09975550 0.6089552 1.979831 408
## [2] {marital_status=single,
## birth_year=young,
## religion=Catholic,
## budget=medium} => {rating=good} 0.09975550 0.6144578 1.997721 408
## [3] {birth_year=young,
## activity=student,
## budget=medium} => {rating=good} 0.09804401 0.6039157 1.963446 401
## [4] {marital_status=single,
## birth_year=young,
## activity=student,
## budget=medium} => {rating=good} 0.09731051 0.6048632 1.966527 398
## [5] {hijos=independent,
## birth_year=young,
## budget=medium} => {rating=good} 0.09462103 0.6231884 2.026105 387
## [6] {marital_status=single,
## hijos=independent,
## birth_year=young,
## budget=medium} => {rating=good} 0.09462103 0.6231884 2.026105 387
## [7] {birth_year=young,
## height=tall} => {rating=good} 0.09388753 0.6104928 1.984830 384
## [8] {birth_year=young,
## budget=medium,
## Upayment=cash} => {rating=good} 0.09364303 0.6031496 1.960955 383
## [9] {birth_year=young,
## religion=Catholic,
## activity=student,
## budget=medium} => {rating=good} 0.09364303 0.6197411 2.014898 383
## [10] {marital_status=single,
## birth_year=young,
## religion=Catholic,
## activity=student,
## budget=medium} => {rating=good} 0.09364303 0.6197411 2.014898 383
plotly_arules(rulesrating0)
plotly_arules(rulesrationg2)
plot(rulesrationg2,main="food ratings' confidence vs. support")
inspectDT(rulesrationg2)