#install.packages("MASS")
library(MASS)
chi_data <- read.csv("/Users/yahavmanor/Desktop/ENP164/chi_square_data - Sheet1.csv")
chi_data #prints table from data
##         Year Preference
## 1     Senior   Smoothie
## 2     Senior   Smoothie
## 3     Senior   Smoothie
## 4     Junior      Salad
## 5     Senior   Smoothie
## 6   Freshman      Salad
## 7   Freshman   Smoothie
## 8  Sophomore      Salad
## 9   Freshman   Smoothie
## 10 Sophomore      Salad
table = table(chi_data$Year, chi_data$Preference)  #creates a contingency table

table  #prints table
##            
##             Salad Smoothie
##   Freshman      1        2
##   Junior        1        0
##   Senior        0        4
##   Sophomore     2        0
chisq.test(table)
## Warning in chisq.test(table): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  table
## X-squared = 7.2222, df = 3, p-value = 0.06514
#Analysis: 
# Expected results: it is expected that lowerclassmen may prefer the to-go salads at Kindlevan over what the upperclassmen prefer because Kindlevan started introducing more variety in their to-go salads (like the vegan buddha bowl or the sweet potato bowl) starting in 2023, when the current upperclassmen became upperclassmen and generally stopped getting food from Kindlevan (no more meal swipes, eating more from home). Therefore, we would expect that upperclassmen to have a tendency to prefer smoothies over salads. Our data shows a chi square table value that is greater than 0.05, we can fail to reject the null hypothesis, meaning there is essentially not enough evidence to suggest a statistically significant association between the two variables. Therefore, the expected results vs the observed results are quite different, as it was expected to see a relationship between Kindlevan preferences and class year at Tufts, though there was not a statistically significant reported one.