############################################################
# MANN-WHITNEY U TEST & NORMALITY CHECK
############################################################

# NULL HYPOTHESIS (H0):
# There is no difference in satisfaction scores between customers 
# who used AI service and customers who used human service.

# ALTERNATE HYPOTHESIS (H1):
# There is a difference in satisfaction scores between customers 
# who used AI service and customers who used human service.

############################################################
# DESCRIPTIVE STATISTICS & NORMALITY CHECK
############################################################

library(readxl)
dataset <- read_excel("A6R2.xlsx")

score <- dataset$SatisfactionScore
group <- dataset$ServiceType

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dataset %>%
  group_by(ServiceType) %>%
  summarise(
    Mean   = mean(SatisfactionScore),
    Median = median(SatisfactionScore),
    SD     = sd(SatisfactionScore),
    N      = n()
  )
## # A tibble: 2 × 5
##   ServiceType  Mean Median    SD     N
##   <chr>       <dbl>  <dbl> <dbl> <int>
## 1 AI           3.6       3  1.60   100
## 2 Human        7.42      8  1.44   100
############################################################
# HISTOGRAMS FOR NORMALITY (REQUIRED BY TEMPLATE)
############################################################

hist(dataset$SatisfactionScore[dataset$ServiceType == "Human"],
     main = "Histogram – Human Satisfaction Scores",
     col = "lightblue")

hist(dataset$SatisfactionScore[dataset$ServiceType == "AI"],
     main = "Histogram – AI Satisfaction Scores",
     col = "lightgreen")

# QUESTIONS (as required in full sentence format)
# Q1) Based on the histogram for the Human group, does the distribution appear symmetrical, positively skewed, or negatively skewed?
# ANSWER: The Human histogram appears abnormally distributed and skewed rather than symmetrical.

# Q2) Based on the histogram for the AI group, does the distribution appear symmetrical, positively skewed, or negatively skewed?
# ANSWER: The AI histogram appears abnormally distributed and skewed rather than symmetrical.

# Q3) Do either of the histograms resemble a proper bell curve?
# ANSWER: No, neither histogram resembles a proper bell curve.


############################################################
# SHAPIRO-WILK NORMALITY TESTS
############################################################

shapiro.test(dataset$SatisfactionScore[dataset$ServiceType == "Human"])
## 
##  Shapiro-Wilk normality test
## 
## data:  dataset$SatisfactionScore[dataset$ServiceType == "Human"]
## W = 0.93741, p-value = 0.0001344
# Human: W = 0.93741, p-value = 0.0001344

shapiro.test(dataset$SatisfactionScore[dataset$ServiceType == "AI"])
## 
##  Shapiro-Wilk normality test
## 
## data:  dataset$SatisfactionScore[dataset$ServiceType == "AI"]
## W = 0.91143, p-value = 5.083e-06
# AI: W = 0.91143, p-value = 5.083e-06
# QUESTIONS 
# Q4) Was the Human group normally distributed based on the Shapiro-Wilk test?
# ANSWER: No. The p-value for the Human group was 0.0001344, which is below 0.05, indicating that the Human scores were not normally distributed.

# Q5) Was the AI group normally distributed based on the Shapiro-Wilk test?
# ANSWER: No. The p-value for the AI group was 0.000005083, which is below 0.05, indicating that the AI scores were not normally distributed.

# Q6) Based on all of the above normality results (histograms, Shapiro tests, and boxplots), should you use an Independent t-test or a Mann-Whitney U test?
# ANSWER: Because both groups were abnormally distributed, the appropriate inferential test is the Mann-Whitney U test.


############################################################
# BOXPLOT FOR OUTLIERS 
############################################################

library(ggplot2)
library(ggpubr)

ggboxplot(dataset, x="ServiceType", y="SatisfactionScore",
          color="ServiceType", palette="jco", add="jitter")

# Q7) How many dots (outliers) appear in the boxplot?
# ANSWER: Multiple outlier points appear in the plot.

# Q8) Are the dots close to the whiskers or far away from the whiskers?
# ANSWER: Several outliers appear far from the whiskers, indicating strong deviation from normality.

# Q9) Do the outliers further confirm the decision to use the Mann-Whitney U test?
# ANSWER: Yes. The presence and distance of the outliers from the whiskers clearly reinforce that the data are not normally distributed.


############################################################
# MANN-WHITNEY U TEST
############################################################

wilcox.test(score ~ group, data = dataset, exact = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  score by group
## W = 497, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
# W = 497, p-value < 2.2e-16


############################################################
# EFFECT SIZE 
############################################################

library(effectsize)
cohens_d(score ~ group, data = dataset, pooled_sd = TRUE)
## Cohen's d |         95% CI
## --------------------------
## -2.52     | [-2.89, -2.14]
## 
## - Estimated using pooled SD.
# Cohen's d = -2.52, 95% CI = [-2.89, -2.14]

# Q10) What is the size of the effect?
# ANSWER: A Cohen’s d of 2.52 represents a very large effect size, far exceeding the threshold for a very large effect (±1.30 or above).

# Q11) Which group had the higher average rank?
# ANSWER: The Human service group had higher satisfaction scores (Human median = 8.00; AI median = 3.00), meaning Human service had the higher average rank.


############################################################
# WRITTEN REPORT 
############################################################

# A Mann-Whitney U test was conducted to compare 
# satisfaction scores between customers who used AI service (n = 100) 
# and customers who used human service (n = 100). 
# Customers who interacted with human service had significantly higher median scores (Mdn = 8.00) than 
# customers who interacted with AI service (Mdn = 3.00), U = 497, p < .001.
# The effect size was very large (d = 2.52), indicating a substantial difference between satisfaction levels.
# Overall, customers reported much higher satisfaction with human service.