First loading in required packages and the data set.
library(textir)
## Loading required package: distrom
## Loading required package: Matrix
## Loading required package: gamlr
## Loading required package: parallel
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:distrom':
##
## collapse
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(tidytext)
library(wordcloud)
## Loading required package: RColorBrewer
library(reshape2)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
## The following object is masked from 'package:Matrix':
##
## expand
library(arules)
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
rrc <- read.csv("/Users/kimberlyhatlestad/Data Mining/RestaurantRatersComplete.csv")
head(rrc)
## userID drink_level dress_preference ambience transport marital_status
## 1 U1001 abstemious informal family on foot single
## 2 U1001 abstemious informal family on foot single
## 3 U1001 abstemious informal family on foot single
## 4 U1001 abstemious informal family on foot single
## 5 U1001 abstemious informal family on foot single
## 6 U1001 abstemious informal family on foot single
## hijos birth_year interest personality religion activity
## 1 independent 1989 variety thrifty-protector none student
## 2 independent 1989 variety thrifty-protector none student
## 3 independent 1989 variety thrifty-protector none student
## 4 independent 1989 variety thrifty-protector none student
## 5 independent 1989 variety thrifty-protector none student
## 6 independent 1989 variety thrifty-protector none student
## weight budget placeID rating food_rating service_rating Rcuisine
## 1 69 medium 135045 1 1 1 American
## 2 69 medium 135033 1 1 1 American
## 3 69 medium 135025 2 2 2 American
## 4 69 medium 135051 1 1 2 American
## 5 69 medium 135039 1 1 1 American
## 6 69 medium 135040 1 1 1 American
## Upayment
## 1 cash
## 2 cash
## 3 cash
## 4 cash
## 5 cash
## 6 cash
Plotting the different drink levels
plot(rrc$food_rating~rrc$drink_level)

rrc[1:19,]
## userID drink_level dress_preference ambience transport marital_status
## 1 U1001 abstemious informal family on foot single
## 2 U1001 abstemious informal family on foot single
## 3 U1001 abstemious informal family on foot single
## 4 U1001 abstemious informal family on foot single
## 5 U1001 abstemious informal family on foot single
## 6 U1001 abstemious informal family on foot single
## 7 U1001 abstemious informal family on foot single
## 8 U1001 abstemious informal family on foot single
## 9 U1001 abstemious informal family on foot single
## 10 U1002 abstemious informal family public single
## 11 U1002 abstemious informal family public single
## 12 U1002 abstemious informal family public single
## 13 U1002 abstemious informal family public single
## 14 U1002 abstemious informal family public single
## 15 U1002 abstemious informal family public single
## 16 U1002 abstemious informal family public single
## 17 U1002 abstemious informal family public single
## 18 U1002 abstemious informal family public single
## 19 U1002 abstemious informal family public single
## hijos birth_year interest personality religion activity
## 1 independent 1989 variety thrifty-protector none student
## 2 independent 1989 variety thrifty-protector none student
## 3 independent 1989 variety thrifty-protector none student
## 4 independent 1989 variety thrifty-protector none student
## 5 independent 1989 variety thrifty-protector none student
## 6 independent 1989 variety thrifty-protector none student
## 7 independent 1989 variety thrifty-protector none student
## 8 independent 1989 variety thrifty-protector none student
## 9 independent 1989 variety thrifty-protector none student
## 10 independent 1990 technology hunter-ostentatious Catholic student
## 11 independent 1990 technology hunter-ostentatious Catholic student
## 12 independent 1990 technology hunter-ostentatious Catholic student
## 13 independent 1990 technology hunter-ostentatious Catholic student
## 14 independent 1990 technology hunter-ostentatious Catholic student
## 15 independent 1990 technology hunter-ostentatious Catholic student
## 16 independent 1990 technology hunter-ostentatious Catholic student
## 17 independent 1990 technology hunter-ostentatious Catholic student
## 18 independent 1990 technology hunter-ostentatious Catholic student
## 19 independent 1990 technology hunter-ostentatious Catholic student
## weight budget placeID rating food_rating service_rating Rcuisine
## 1 69 medium 135045 1 1 1 American
## 2 69 medium 135033 1 1 1 American
## 3 69 medium 135025 2 2 2 American
## 4 69 medium 135051 1 1 2 American
## 5 69 medium 135039 1 1 1 American
## 6 69 medium 135040 1 1 1 American
## 7 69 medium 135085 0 1 1 American
## 8 69 medium 132825 2 2 1 American
## 9 69 medium 132830 1 1 1 American
## 10 40 low 135085 1 1 1 Mexican
## 11 40 low 132925 1 1 2 Mexican
## 12 40 low 135041 2 1 0 Mexican
## 13 40 low 135059 1 2 1 Mexican
## 14 40 low 132862 2 2 2 Mexican
## 15 40 low 135106 1 1 1 Mexican
## 16 40 low 132825 2 2 0 Mexican
## 17 40 low 132921 2 1 1 Mexican
## 18 40 low 135052 1 1 1 Mexican
## 19 40 low 135062 1 2 1 Mexican
## Upayment
## 1 cash
## 2 cash
## 3 cash
## 4 cash
## 5 cash
## 6 cash
## 7 cash
## 8 cash
## 9 cash
## 10 cash
## 11 cash
## 12 cash
## 13 cash
## 14 cash
## 15 cash
## 16 cash
## 17 cash
## 18 cash
## 19 cash
length(rrc$userID) ## 4090 records in the file
## [1] 4090
rrc$userID <- factor(rrc$userID)
levels(rrc$userID) ## 138 users
## [1] "U1001" "U1002" "U1003" "U1004" "U1005" "U1006" "U1007" "U1008"
## [9] "U1009" "U1010" "U1011" "U1012" "U1013" "U1014" "U1015" "U1016"
## [17] "U1017" "U1018" "U1019" "U1020" "U1021" "U1022" "U1023" "U1026"
## [25] "U1027" "U1028" "U1029" "U1030" "U1031" "U1032" "U1033" "U1034"
## [33] "U1035" "U1036" "U1037" "U1038" "U1039" "U1040" "U1041" "U1042"
## [41] "U1043" "U1044" "U1045" "U1046" "U1047" "U1048" "U1049" "U1050"
## [49] "U1051" "U1052" "U1053" "U1054" "U1055" "U1056" "U1057" "U1058"
## [57] "U1059" "U1060" "U1061" "U1062" "U1063" "U1064" "U1065" "U1066"
## [65] "U1067" "U1068" "U1069" "U1070" "U1071" "U1072" "U1073" "U1074"
## [73] "U1075" "U1076" "U1077" "U1078" "U1079" "U1080" "U1081" "U1082"
## [81] "U1083" "U1084" "U1085" "U1086" "U1087" "U1089" "U1090" "U1091"
## [89] "U1092" "U1093" "U1094" "U1095" "U1096" "U1097" "U1098" "U1099"
## [97] "U1100" "U1101" "U1102" "U1103" "U1104" "U1105" "U1106" "U1107"
## [105] "U1108" "U1109" "U1110" "U1111" "U1112" "U1113" "U1114" "U1115"
## [113] "U1116" "U1117" "U1118" "U1119" "U1120" "U1121" "U1123" "U1124"
## [121] "U1125" "U1126" "U1127" "U1128" "U1129" "U1131" "U1132" "U1133"
## [129] "U1134" "U1135" "U1136" "U1137" "U1138"
levels(rrc$Rcuisine) ## 103 cuisine types
## [1] "Afghan" "African" "American"
## [4] "Armenian" "Asian" "Australian"
## [7] "Austrian" "Bagels" "Bakery"
## [10] "Bar" "Bar_Pub_Brewery" "Barbecue"
## [13] "Basque" "Brazilian" "Breakfast-Brunch"
## [16] "British" "Burgers" "Burmese"
## [19] "Cafe-Coffee_Shop" "Cafeteria" "Cajun-Creole"
## [22] "California" "Cambodian" "Canadian"
## [25] "Caribbean" "Chilean" "Chinese"
## [28] "Contemporary" "Continental-European" "Cuban"
## [31] "Deli-Sandwiches" "Dessert-Ice_Cream" "Dim_Sum"
## [34] "Diner" "Doughnuts" "Dutch-Belgian"
## [37] "Eastern_European" "Eclectic" "Ethiopian"
## [40] "Family" "Fast_Food" "Filipino"
## [43] "Fine_Dining" "French" "Fusion"
## [46] "Game" "German" "Greek"
## [49] "Hawaiian" "Hot_Dogs" "Hungarian"
## [52] "Indian-Pakistani" "Indigenous" "Indonesian"
## [55] "International" "Irish" "Israeli"
## [58] "Italian" "Jamaican" "Japanese"
## [61] "Juice" "Korean" "Kosher"
## [64] "Latin_American" "Lebanese" "Malaysian"
## [67] "Mediterranean" "Mexican" "Middle_Eastern"
## [70] "Mongolian" "Moroccan" "North_African"
## [73] "Organic-Healthy" "Pacific_Northwest" "Pacific_Rim"
## [76] "Persian" "Peruvian" "Pizzeria"
## [79] "Polish" "Polynesian" "Portuguese"
## [82] "Regional" "Romanian" "Russian-Ukrainian"
## [85] "Scandinavian" "Seafood" "Soup"
## [88] "Southeast_Asian" "Southern" "Southwestern"
## [91] "Spanish" "Steaks" "Sushi"
## [94] "Swiss" "Tapas" "Tea_House"
## [97] "Tex-Mex" "Thai" "Tibetan"
## [100] "Tunisian" "Turkish" "Vegetarian"
## [103] "Vietnamese"
Found that there was 138 different users, factored the responses by the User ID
remove drink level duplicates
drink <- split(x=rrc[,"drink_level"], f=rrc$userID)
drink <- lapply(drink ,unique)
drink[1:2]
## $U1001
## [1] abstemious
## Levels: abstemious casual drinker social drinker
##
## $U1002
## [1] abstemious
## Levels: abstemious casual drinker social drinker
drink <- as(drink,"transactions")
itemFrequency(drink)
## abstemious casual drinker social drinker
## 0.3609023 0.3533835 0.2857143
support <- as.table(itemFrequency(drink))
support["social drinker"]
## social drinker
## 0.2857143
itemFrequencyPlot(drink,support=.05,cex.names=1)

I like that there is a relatively even distribution of the three different drinker-types.
drinkrules <- apriori(drink,parameter=list(support=.05,confidence=.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.05 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 6
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[3 item(s), 133 transaction(s)] done [0.00s].
## sorting and recoding items ... [3 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 done [0.00s].
## writing ... [0 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(drinkrules)
inspect(sort(subset(drinkrules, subset=confidence > .1), by="confidence"))
I would like to have a little more knowledge in R so that I could look even more into whether it is the food that more accurately predicts the drink level of the individual or if it is the other way around. I think it would also be interesting to see if there is a significance with the location of the individuals and if maybe their communities play a part in the types of foods they eat and their habits.