This analysis is for Research Scenario 1 from Assignment 4. It tests whether the sales distribution of three desserts (chocolate cake, vanilla cheesecake, tiramisu) is equal.
# INSTALL REQUIRED PACKAGE
# install.packages("readxl")
# LOAD THE PACKAGE
library(readxl)
# IMPORT THE EXCEL FILE INTO R STUDIO
dataset <- read_excel("/Users/guminhe/Downloads/RQ1.xlsx")
# =========================
# VISUALLY DISPLAY THE DATA
# =========================
# CREATE A FREQUENCY TABLE
observed <- table(dataset$Dessert)
# VIEW YOUR FREQUENCY TABLE
print(observed)
##
## Cheesecake ChocoCake Tiramisu
## 171 258 119
# VIEW THE CATEGORY ORDER
names(observed)
## [1] "Cheesecake" "ChocoCake" "Tiramisu"
# ===============================
# CHI-SQUARE GOODNESS OF FIT CODE
# ===============================
# PURPOSE
# Determine if the null or alternate hypothesis was supported.
# DEFINE EXPECTED PROPORTIONS
expected <- c(1/3, 1/3, 1/3)
# CALCULATE CHI-SQUARED RESULTS
chisq_gfit <- chisq.test(observed, p = expected)
print(chisq_gfit)
##
## Chi-squared test for given probabilities
##
## data: observed
## X-squared = 54.004, df = 2, p-value = 1.876e-12
# ================
# EFFECT SIZE CODE
# ================
# PURPOSE
# Determine how strong the similarity was between what was observed versus what was expected.
W <- sqrt(chisq_gfit$statistic / sum(observed))
W
## X-squared
## 0.3139217