Open the Installed Packages

library(readxl)
library(ggplot2)

Import and Name Dataset

DatasetA2 <- read_excel("C:/Users/cniti/Documents/AA-5221 Applied Analytics/DatasetA2.xlsx")

Create a Frequency Table

table(DatasetA2$FavoriteDrink)
## 
## Coffee   Soda    Tea  Water 
##     26     29     28     17

Create a Bar Chart

ggplot(DatasetA2, aes(x = FavoriteDrink, fill = FavoriteDrink)) +
  geom_bar() +
  labs(
    x = "FavoriteDrink",
    y = "Frequency",
    title = "Distribution of FavoriteDrink"
  ) +
  theme(
    text = element_text(size = 14),       
    axis.title = element_text(size = 14),  
    axis.text = element_text(size = 14),  
    plot.title = element_text(size = 14),  
    legend.position = "none"              
  )

Conduct the Chi-Square Goodness-of-Fit Test

observed <- c(26, 29, 28, 17) 
expected <- c(0.25, 0.10, 0.30, 0.35)
chisq.test(x = observed, p = expected)
## 
##  Chi-squared test for given probabilities
## 
## data:  observed
## X-squared = 45.53, df = 3, p-value = 7.137e-10

Calculate Cohen’s W (Effect Size)

table2 <- table(DatasetA2$StudentID, DatasetA2$FavoriteDrink)
chi_result <- chisq.test(table2)
## Warning in chisq.test(table2): Chi-squared approximation may be incorrect
w <- sqrt(chi_result$statistic / sum(table2))
w
## X-squared 
##  1.732051

A chi-square goodness-of-fit test indicated that the observed frequencies were different from the expected frequencies, χ²(2) = 45.53, p = 7.137e-10. The association between the two variables was moderate (Cohen’s W = 1.732)