Load Required Packages

library(readxl)
library(ggplot2)

Import Dataset

DatasetA2 <- read_excel("/Users/karim/Desktop/DatasetA2.xlsx")
head(DatasetA2)
## # A tibble: 6 × 2
##   StudentID FavoriteDrink
##       <dbl> <chr>        
## 1         1 Soda         
## 2         2 Soda         
## 3         3 Soda         
## 4         4 Coffee       
## 5         5 Soda         
## 6         6 Coffee

Create Frequency Table

chi_table <- table(DatasetA2$FavoriteDrink)
chi_table
## 
## Coffee   Soda    Tea  Water 
##     26     29     28     17

Create Bar Chart

ggplot(DatasetA2, aes(x = FavoriteDrink, fill = FavoriteDrink)) +
  geom_bar() +
  labs(
    x = "Favorite Drink",
    y = "Frequency",
    title = "Distribution of Beverage Preferences"
  ) +
  theme(
    text = element_text(size = 14),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 14),
    plot.title = element_text(size = 14),
    legend.position = "none"
  )

Chi-Square Goodness-of-Fit Test

observed <- as.numeric(chi_table)

# Expected proportions (equal distribution across 4 categories)
expected <- c(0.25, 0.25, 0.25, 0.25)

chi_result <- chisq.test(x = observed, p = expected)
chi_result
## 
##  Chi-squared test for given probabilities
## 
## data:  observed
## X-squared = 3.6, df = 3, p-value = 0.308

Effect Size (Cohen’s W)

sqrt(chi_result$statistic / sum(chi_table))
## X-squared 
## 0.1897367

Interpretation A chi-square goodness of fit test showed that there were no significant differences between the observed and expected frequencies, χ²(3) = 3.60, p = .308. therefore, we fail to reject the null hypothesis. This shows that there is no statistically significant difference in the beverage preferences of students, and the distribution of preferences is roughly equal for tea, coffee, soda, and water. The effect size was small (Cohen’s W = .19).