First loading in required packages and the data set.

library(textir)
## Loading required package: distrom
## Loading required package: Matrix
## Loading required package: gamlr
## Loading required package: parallel
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:distrom':
## 
##     collapse
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(tidytext)
library(wordcloud)
## Loading required package: RColorBrewer
library(reshape2)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
## The following object is masked from 'package:Matrix':
## 
##     expand
library(arules) 
## 
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
rrc <- read.csv("/Users/kimberlyhatlestad/Data Mining/RestaurantRatersComplete.csv")
head(rrc)
##   userID drink_level dress_preference ambience transport marital_status
## 1  U1001  abstemious         informal   family   on foot         single
## 2  U1001  abstemious         informal   family   on foot         single
## 3  U1001  abstemious         informal   family   on foot         single
## 4  U1001  abstemious         informal   family   on foot         single
## 5  U1001  abstemious         informal   family   on foot         single
## 6  U1001  abstemious         informal   family   on foot         single
##         hijos birth_year interest       personality religion activity
## 1 independent       1989  variety thrifty-protector     none  student
## 2 independent       1989  variety thrifty-protector     none  student
## 3 independent       1989  variety thrifty-protector     none  student
## 4 independent       1989  variety thrifty-protector     none  student
## 5 independent       1989  variety thrifty-protector     none  student
## 6 independent       1989  variety thrifty-protector     none  student
##   weight budget placeID rating food_rating service_rating Rcuisine
## 1     69 medium  135045      1           1              1 American
## 2     69 medium  135033      1           1              1 American
## 3     69 medium  135025      2           2              2 American
## 4     69 medium  135051      1           1              2 American
## 5     69 medium  135039      1           1              1 American
## 6     69 medium  135040      1           1              1 American
##   Upayment
## 1     cash
## 2     cash
## 3     cash
## 4     cash
## 5     cash
## 6     cash

Plotting the different drink levels

plot(rrc$food_rating~rrc$drink_level)

rrc[1:19,]
##    userID drink_level dress_preference ambience transport marital_status
## 1   U1001  abstemious         informal   family   on foot         single
## 2   U1001  abstemious         informal   family   on foot         single
## 3   U1001  abstemious         informal   family   on foot         single
## 4   U1001  abstemious         informal   family   on foot         single
## 5   U1001  abstemious         informal   family   on foot         single
## 6   U1001  abstemious         informal   family   on foot         single
## 7   U1001  abstemious         informal   family   on foot         single
## 8   U1001  abstemious         informal   family   on foot         single
## 9   U1001  abstemious         informal   family   on foot         single
## 10  U1002  abstemious         informal   family    public         single
## 11  U1002  abstemious         informal   family    public         single
## 12  U1002  abstemious         informal   family    public         single
## 13  U1002  abstemious         informal   family    public         single
## 14  U1002  abstemious         informal   family    public         single
## 15  U1002  abstemious         informal   family    public         single
## 16  U1002  abstemious         informal   family    public         single
## 17  U1002  abstemious         informal   family    public         single
## 18  U1002  abstemious         informal   family    public         single
## 19  U1002  abstemious         informal   family    public         single
##          hijos birth_year   interest         personality religion activity
## 1  independent       1989    variety   thrifty-protector     none  student
## 2  independent       1989    variety   thrifty-protector     none  student
## 3  independent       1989    variety   thrifty-protector     none  student
## 4  independent       1989    variety   thrifty-protector     none  student
## 5  independent       1989    variety   thrifty-protector     none  student
## 6  independent       1989    variety   thrifty-protector     none  student
## 7  independent       1989    variety   thrifty-protector     none  student
## 8  independent       1989    variety   thrifty-protector     none  student
## 9  independent       1989    variety   thrifty-protector     none  student
## 10 independent       1990 technology hunter-ostentatious Catholic  student
## 11 independent       1990 technology hunter-ostentatious Catholic  student
## 12 independent       1990 technology hunter-ostentatious Catholic  student
## 13 independent       1990 technology hunter-ostentatious Catholic  student
## 14 independent       1990 technology hunter-ostentatious Catholic  student
## 15 independent       1990 technology hunter-ostentatious Catholic  student
## 16 independent       1990 technology hunter-ostentatious Catholic  student
## 17 independent       1990 technology hunter-ostentatious Catholic  student
## 18 independent       1990 technology hunter-ostentatious Catholic  student
## 19 independent       1990 technology hunter-ostentatious Catholic  student
##    weight budget placeID rating food_rating service_rating Rcuisine
## 1      69 medium  135045      1           1              1 American
## 2      69 medium  135033      1           1              1 American
## 3      69 medium  135025      2           2              2 American
## 4      69 medium  135051      1           1              2 American
## 5      69 medium  135039      1           1              1 American
## 6      69 medium  135040      1           1              1 American
## 7      69 medium  135085      0           1              1 American
## 8      69 medium  132825      2           2              1 American
## 9      69 medium  132830      1           1              1 American
## 10     40    low  135085      1           1              1  Mexican
## 11     40    low  132925      1           1              2  Mexican
## 12     40    low  135041      2           1              0  Mexican
## 13     40    low  135059      1           2              1  Mexican
## 14     40    low  132862      2           2              2  Mexican
## 15     40    low  135106      1           1              1  Mexican
## 16     40    low  132825      2           2              0  Mexican
## 17     40    low  132921      2           1              1  Mexican
## 18     40    low  135052      1           1              1  Mexican
## 19     40    low  135062      1           2              1  Mexican
##    Upayment
## 1      cash
## 2      cash
## 3      cash
## 4      cash
## 5      cash
## 6      cash
## 7      cash
## 8      cash
## 9      cash
## 10     cash
## 11     cash
## 12     cash
## 13     cash
## 14     cash
## 15     cash
## 16     cash
## 17     cash
## 18     cash
## 19     cash
length(rrc$userID)   ## 4090 records in the file
## [1] 4090
rrc$userID <- factor(rrc$userID)
levels(rrc$userID)   ## 138 users
##   [1] "U1001" "U1002" "U1003" "U1004" "U1005" "U1006" "U1007" "U1008"
##   [9] "U1009" "U1010" "U1011" "U1012" "U1013" "U1014" "U1015" "U1016"
##  [17] "U1017" "U1018" "U1019" "U1020" "U1021" "U1022" "U1023" "U1026"
##  [25] "U1027" "U1028" "U1029" "U1030" "U1031" "U1032" "U1033" "U1034"
##  [33] "U1035" "U1036" "U1037" "U1038" "U1039" "U1040" "U1041" "U1042"
##  [41] "U1043" "U1044" "U1045" "U1046" "U1047" "U1048" "U1049" "U1050"
##  [49] "U1051" "U1052" "U1053" "U1054" "U1055" "U1056" "U1057" "U1058"
##  [57] "U1059" "U1060" "U1061" "U1062" "U1063" "U1064" "U1065" "U1066"
##  [65] "U1067" "U1068" "U1069" "U1070" "U1071" "U1072" "U1073" "U1074"
##  [73] "U1075" "U1076" "U1077" "U1078" "U1079" "U1080" "U1081" "U1082"
##  [81] "U1083" "U1084" "U1085" "U1086" "U1087" "U1089" "U1090" "U1091"
##  [89] "U1092" "U1093" "U1094" "U1095" "U1096" "U1097" "U1098" "U1099"
##  [97] "U1100" "U1101" "U1102" "U1103" "U1104" "U1105" "U1106" "U1107"
## [105] "U1108" "U1109" "U1110" "U1111" "U1112" "U1113" "U1114" "U1115"
## [113] "U1116" "U1117" "U1118" "U1119" "U1120" "U1121" "U1123" "U1124"
## [121] "U1125" "U1126" "U1127" "U1128" "U1129" "U1131" "U1132" "U1133"
## [129] "U1134" "U1135" "U1136" "U1137" "U1138"
levels(rrc$Rcuisine) ##  103 cuisine types
##   [1] "Afghan"               "African"              "American"            
##   [4] "Armenian"             "Asian"                "Australian"          
##   [7] "Austrian"             "Bagels"               "Bakery"              
##  [10] "Bar"                  "Bar_Pub_Brewery"      "Barbecue"            
##  [13] "Basque"               "Brazilian"            "Breakfast-Brunch"    
##  [16] "British"              "Burgers"              "Burmese"             
##  [19] "Cafe-Coffee_Shop"     "Cafeteria"            "Cajun-Creole"        
##  [22] "California"           "Cambodian"            "Canadian"            
##  [25] "Caribbean"            "Chilean"              "Chinese"             
##  [28] "Contemporary"         "Continental-European" "Cuban"               
##  [31] "Deli-Sandwiches"      "Dessert-Ice_Cream"    "Dim_Sum"             
##  [34] "Diner"                "Doughnuts"            "Dutch-Belgian"       
##  [37] "Eastern_European"     "Eclectic"             "Ethiopian"           
##  [40] "Family"               "Fast_Food"            "Filipino"            
##  [43] "Fine_Dining"          "French"               "Fusion"              
##  [46] "Game"                 "German"               "Greek"               
##  [49] "Hawaiian"             "Hot_Dogs"             "Hungarian"           
##  [52] "Indian-Pakistani"     "Indigenous"           "Indonesian"          
##  [55] "International"        "Irish"                "Israeli"             
##  [58] "Italian"              "Jamaican"             "Japanese"            
##  [61] "Juice"                "Korean"               "Kosher"              
##  [64] "Latin_American"       "Lebanese"             "Malaysian"           
##  [67] "Mediterranean"        "Mexican"              "Middle_Eastern"      
##  [70] "Mongolian"            "Moroccan"             "North_African"       
##  [73] "Organic-Healthy"      "Pacific_Northwest"    "Pacific_Rim"         
##  [76] "Persian"              "Peruvian"             "Pizzeria"            
##  [79] "Polish"               "Polynesian"           "Portuguese"          
##  [82] "Regional"             "Romanian"             "Russian-Ukrainian"   
##  [85] "Scandinavian"         "Seafood"              "Soup"                
##  [88] "Southeast_Asian"      "Southern"             "Southwestern"        
##  [91] "Spanish"              "Steaks"               "Sushi"               
##  [94] "Swiss"                "Tapas"                "Tea_House"           
##  [97] "Tex-Mex"              "Thai"                 "Tibetan"             
## [100] "Tunisian"             "Turkish"              "Vegetarian"          
## [103] "Vietnamese"

Found that there was 138 different users, factored the responses by the User ID

remove drink level duplicates

drink <- split(x=rrc[,"drink_level"], f=rrc$userID)
drink <- lapply(drink ,unique)    
drink[1:2]
## $U1001
## [1] abstemious
## Levels: abstemious casual drinker social drinker
## 
## $U1002
## [1] abstemious
## Levels: abstemious casual drinker social drinker
drink <- as(drink,"transactions") 
itemFrequency(drink) 
##     abstemious casual drinker social drinker 
##      0.3609023      0.3533835      0.2857143
support <- as.table(itemFrequency(drink))
support["social drinker"]
## social drinker 
##      0.2857143
itemFrequencyPlot(drink,support=.05,cex.names=1) 

I like that there is a relatively even distribution of the three different drinker-types.

drinkrules <- apriori(drink,parameter=list(support=.05,confidence=.5)) 
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5    0.05      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 6 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[3 item(s), 133 transaction(s)] done [0.00s].
## sorting and recoding items ... [3 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 done [0.00s].
## writing ... [0 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(drinkrules)

inspect(sort(subset(drinkrules, subset=confidence > .1), by="confidence")) 

I would like to have a little more knowledge in R so that I could look even more into whether it is the food that more accurately predicts the drink level of the individual or if it is the other way around. I think it would also be interesting to see if there is a significance with the location of the individuals and if maybe their communities play a part in the types of foods they eat and their habits.