knitr::opts_chunk$set(echo = TRUE)
library(readxl)
library(ggplot2)
library(rcompanion)

#Loading Dataset

DatasetA2 <- read_excel("C:/Users/Student/Documents/Assignment5_AA/DatasetA2.xlsx")
table(DatasetA2$StudentID, DatasetA2$FavoriteDrink)
##      
##       Coffee Soda Tea Water
##   1        0    1   0     0
##   2        0    1   0     0
##   3        0    1   0     0
##   4        1    0   0     0
##   5        0    1   0     0
##   6        1    0   0     0
##   7        1    0   0     0
##   8        1    0   0     0
##   9        0    1   0     0
##   10       0    0   1     0
##   11       0    0   0     1
##   12       1    0   0     0
##   13       1    0   0     0
##   14       0    0   1     0
##   15       1    0   0     0
##   16       0    1   0     0
##   17       0    0   0     1
##   18       0    0   1     0
##   19       0    1   0     0
##   20       0    1   0     0
##   21       0    0   1     0
##   22       0    0   0     1
##   23       0    0   1     0
##   24       0    0   1     0
##   25       0    0   1     0
##   26       0    1   0     0
##   27       0    0   0     1
##   28       1    0   0     0
##   29       0    1   0     0
##   30       1    0   0     0
##   31       0    0   1     0
##   32       1    0   0     0
##   33       0    1   0     0
##   34       0    0   0     1
##   35       1    0   0     0
##   36       0    0   1     0
##   37       0    1   0     0
##   38       0    1   0     0
##   39       0    0   1     0
##   40       0    0   0     1
##   41       0    1   0     0
##   42       0    0   0     1
##   43       1    0   0     0
##   44       0    0   1     0
##   45       0    1   0     0
##   46       0    0   1     0
##   47       0    0   1     0
##   48       1    0   0     0
##   49       0    1   0     0
##   50       0    1   0     0
##   51       0    0   0     1
##   52       0    0   1     0
##   53       0    1   0     0
##   54       0    0   1     0
##   55       0    1   0     0
##   56       0    0   0     1
##   57       1    0   0     0
##   58       0    0   1     0
##   59       1    0   0     0
##   60       0    0   1     0
##   61       0    0   1     0
##   62       0    0   0     1
##   63       0    0   0     1
##   64       0    1   0     0
##   65       0    0   1     0
##   66       1    0   0     0
##   67       0    0   1     0
##   68       0    0   1     0
##   69       0    1   0     0
##   70       0    0   1     0
##   71       1    0   0     0
##   72       0    0   1     0
##   73       0    1   0     0
##   74       0    0   1     0
##   75       0    1   0     0
##   76       1    0   0     0
##   77       0    0   0     1
##   78       0    1   0     0
##   79       0    0   0     1
##   80       0    0   0     1
##   81       1    0   0     0
##   82       1    0   0     0
##   83       0    1   0     0
##   84       0    0   0     1
##   85       1    0   0     0
##   86       1    0   0     0
##   87       0    1   0     0
##   88       0    1   0     0
##   89       0    0   0     1
##   90       0    0   1     0
##   91       1    0   0     0
##   92       1    0   0     0
##   93       0    0   1     0
##   94       1    0   0     0
##   95       0    0   0     1
##   96       0    0   1     0
##   97       0    0   1     0
##   98       1    0   0     0
##   99       0    1   0     0
##   100      0    1   0     0

#Bar Chart

ggplot(DatasetA2, aes(x = FavoriteDrink, fill = FavoriteDrink)) +
  geom_bar() +
  labs(
    x = "Favorite Drink",
    y = "Frequency",
    title = "Distribution of Favorite Drink Preference"
  ) +
  theme(
    text = element_text(size = 14),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 14),
    plot.title = element_text(size = 14),
    legend.position = "none"
  )

# Observed frequencies

observed <- c(26, 29, 28, 17)
expected <- c(0.25, 0.25, 0.25, 0.25)
chisq.test(x = observed, p = expected)
## 
##  Chi-squared test for given probabilities
## 
## data:  observed
## X-squared = 3.6, df = 3, p-value = 0.308

reporting the results

cat("A chi-square goodness-of-fit test indicated that the observed frequencies
were not different from the expected frequencies, χ²(3) = 3.6, p = .308.
The association between the variable and the expected distribution was weak (Cohen's W = .15).")
## A chi-square goodness-of-fit test indicated that the observed frequencies
## were not different from the expected frequencies, χ²(3) = 3.6, p = .308.
## The association between the variable and the expected distribution was weak (Cohen's W = .15).