# Step 1: Install the Required Packages
# Packages only need to be installed once. Do not run the install code more than once after successful installation.
#install.packages("readxl")
#install.packages("ggplot2")
# Step 2: Open the Installed Packages
# Packages must be loaded every time you open a new R session.
library(readxl)
library(ggplot2)
library(rcompanion)
# Step 3: Import and Name Dataset
# This code imports an Excel dataset and stores it as an object in R.
DrinkData <- read_excel("/Users/ha113ab/Desktop/datasets/DatasetA2.xlsx")
# Step 4: Create a Frequency Table
# A frequency table is always used to visually see the data.
table(DrinkData$FavoriteDrink)
##
## Coffee Soda Tea Water
## 26 29 28 17
# Step 5: Create a Bar Chart
# Bar charts are used to visually display the data. A bar chart is only needed if the variable contains three or more levels.
ggplot(DrinkData, aes(x = FavoriteDrink, fill = FavoriteDrink)) +
geom_bar() +
labs(
x = "Favorite Drink",
y = "Frequency",
title = "Distribution of Favorite Drinks"
) +
theme(
text = element_text(size = 14),
axis.title = element_text(size = 14),
axis.text = element_text(size = 14),
plot.title = element_text(size = 14),
legend.position = "none"
)

# Step 6: Conduct the Chi-Square Goodness-of-Fit Test
# Provide the observed values from your frequency table and provide the expected values from the research scenario.
observed <- c(32, 26, 23, 19)
expected <- c(0.25, 0.25, 0.25, 0.25)
chisq.test(x = observed, p = expected)
##
## Chi-squared test for given probabilities
##
## data: observed
## X-squared = 3.6, df = 3, p-value = 0.308
# Step 7: Calculate Cohen's W (Effect Size)
# Calculate the effect size only if the p-value was statistically significant.
# Note: p-value is 0.308 > 0.05, so effect size is NOT calculated because result is not significant.
# Step 8: Interpret and Report the Results
# A chi-square goodness-of-fit test indicated that the observed
# frequencies were not different from the expected frequencies,
# χ²(3) = 3.6, p = .308. Since the result was not statistically significant, effect size is not reported.
# the observed frequencies were different from the expected frequencies, χ²(3) = 3.68, p = .298. The association between the two variables was weak (Cohen's W = .19)