# Step 1: Install the Required Packages
# Packages only need to be installed once. Do not run the install code more than once after successful installation.

#install.packages("readxl")
#install.packages("ggplot2")



# Step 2: Open the Installed Packages
# Packages must be loaded every time you open a new R session.

library(readxl)
library(ggplot2)
library(rcompanion)


# Step 3: Import and Name Dataset
# This code imports an Excel dataset and stores it as an object in R.

DrinkData <- read_excel("/Users/ha113ab/Desktop/datasets/DatasetA2.xlsx")

# Step 4: Create a Frequency Table
# A frequency table is always used to visually see the data.

table(DrinkData$FavoriteDrink)
## 
## Coffee   Soda    Tea  Water 
##     26     29     28     17
# Step 5: Create a Bar Chart
# Bar charts are used to visually display the data. A bar chart is only needed if the variable contains three or more levels.

ggplot(DrinkData, aes(x = FavoriteDrink, fill = FavoriteDrink)) +
  geom_bar() +
  labs(
    x = "Favorite Drink",
    y = "Frequency",
    title = "Distribution of Favorite Drinks"
  ) +
  theme(
    text = element_text(size = 14),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 14),
    plot.title = element_text(size = 14),
    legend.position = "none"
  )

# Step 6: Conduct the Chi-Square Goodness-of-Fit Test
# Provide the observed values from your frequency table and provide the expected values from the research scenario.

observed <- c(32, 26, 23, 19)
expected <- c(0.25, 0.25, 0.25, 0.25)
chisq.test(x = observed, p = expected)
## 
##  Chi-squared test for given probabilities
## 
## data:  observed
## X-squared = 3.6, df = 3, p-value = 0.308
# Step 7: Calculate Cohen's W (Effect Size)
# Calculate the effect size only if the p-value was statistically significant.
# Note: p-value is 0.308 > 0.05, so effect size is NOT calculated because result is not significant.



# Step 8: Interpret and Report the Results

# A chi-square goodness-of-fit test indicated that the observed 
# frequencies were not different from the expected frequencies, 
# χ²(3) = 3.6, p = .308. Since the result was not statistically significant, effect size is not reported.
# the observed frequencies were different from the expected frequencies, χ²(3) = 3.68, p = .298. The association between the two variables was weak (Cohen's W = .19)