options(repos = c(CRAN = "https://cloud.r-project.org"))

# INDEPENDENT T-TEST & MANN-WHITNEY U TEST

# HYPOTHESIS 
# H0:There is no difference in the average satisfaction scores of the two groups.
# H1:There is a difference in the average satisfaction scores of the two groups.


# IMPORT EXCEL FILE

# INSTALL Packages
# install.packages("readxl")

# LOAD THE PACKAGE
 
library(readxl)

# IMPORT EXCEL FILE INTO R STUDIO

A6R2 <- read_excel("C:/Users/chris/Downloads/A6R2.xlsx")


# DESCRIPTIVE STATISTICS
# mean, median, SD, and sample size for each group.

# INSTALL REQUIRED PACKAGE

# install.packages("dplyr")

# LOAD THE PACKAGE
 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# CALCULATE THE DESCRIPTIVE STATISTICS


A6R2 %>%
  group_by(ServiceType) %>%
  summarise(
    Mean = mean(SatisfactionScore, na.rm = TRUE),
    Median = median(SatisfactionScore, na.rm = TRUE),
    SD = sd(SatisfactionScore, na.rm = TRUE),
    N = n()
  )
## # A tibble: 2 × 5
##   ServiceType  Mean Median    SD     N
##   <chr>       <dbl>  <dbl> <dbl> <int>
## 1 AI           3.6       3  1.60   100
## 2 Human        7.42      8  1.44   100
# HISTOGRAMS


# CREATE THE HISTOGRAMS 
# Replace "dataset" with your dataset name (without .xlsx)
# Replace "score" with your dependent variable R code name (example: USD)
# Replace "group" with your independent variable R code name (example: Country)
# Replace "Group1" with the R code name for your first group (example: USA)
# Replace "Group2" with the R code name for your second group (example: India)

hist(A6R2$SatisfactionScore[A6R2$ServiceType == "Human"],
main = "Histogram of Group 1 Scores",
xlab = "Value",
ylab = "Frequency",
col = "lightblue",
border = "black",
breaks = 20)

hist(A6R2$SatisfactionScore[A6R2$ServiceType == "AI"],
main = "Histogram of Group 2 Scores",
xlab = "Value",
ylab = "Frequency",
col = "lightgreen",
border = "black",
breaks = 20)

# QUESTIONS
# Answer the questions below as comments within the R script:

# Group 1 Scores
# The histogram is symetrical but slightly neatively skewed.
# The histogram appears to have a proper bell shape.
# Group 2 Scores
# The histogram is positively skewed.
# The histogram appears too flat.

# THE SHAPIRO-WILK TEST

shapiro.test(A6R2$SatisfactionScore[A6R2$ServiceType == "Human"])
## 
##  Shapiro-Wilk normality test
## 
## data:  A6R2$SatisfactionScore[A6R2$ServiceType == "Human"]
## W = 0.93741, p-value = 0.0001344
shapiro.test(A6R2$SatisfactionScore[A6R2$ServiceType == "AI"])
## 
##  Shapiro-Wilk normality test
## 
## data:  A6R2$SatisfactionScore[A6R2$ServiceType == "AI"]
## W = 0.91143, p-value = 5.083e-06
# The data was not normally distributed for Variable 1 (Human)
# The data was not normally distributed for Variable 2 (AI)


# BOXPLOT

# INSTALL REQUIRED PACKAGE

install.packages("ggplot2")
## Installing package into 'C:/Users/chris/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\chris\AppData\Local\Temp\Rtmp0GM7rU\downloaded_packages
install.packages("ggpubr")
## Installing package into 'C:/Users/chris/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'ggpubr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\chris\AppData\Local\Temp\Rtmp0GM7rU\downloaded_packages
# LOAD THE PACKAGE
 
library(ggplot2)
library(ggpubr)

# CREATE THE BOXPLOT

ggboxplot(A6R2, x = "ServiceType", y = "SatisfactionScore",
          color = "ServiceType",
          palette = "jco",
          add = "jitter")

# There are dots outside of the boxplots and they might change the mean.

# MANN-WHITNEY U TEST

wilcox.test(SatisfactionScore ~ ServiceType, data = A6R2, exact = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  SatisfactionScore by ServiceType
## W = 497, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
# STATISTICAL SIGNIFICANCE
# P-value < 2.2e-16

# EFFECT-SIZE

# INSTALL REQUIRED PACKAGE

install.packages("effectsize")
## Installing package into 'C:/Users/chris/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'effectsize' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\chris\AppData\Local\Temp\Rtmp0GM7rU\downloaded_packages
# LOAD THE PACKAGE

library(effectsize)

# EFFECT SIZE (R VALUE)

library(rstatix)
## 
## Attaching package: 'rstatix'
## The following objects are masked from 'package:effectsize':
## 
##     cohens_d, eta_squared
## The following object is masked from 'package:stats':
## 
##     filter
rstatix::wilcox_effsize(A6R2, SatisfactionScore ~ ServiceType)
## # A tibble: 1 × 7
##   .y.               group1 group2 effsize    n1    n2 magnitude
## * <chr>             <chr>  <chr>    <dbl> <int> <int> <ord>    
## 1 SatisfactionScore AI     Human    0.784   100   100 large
# The size of the effect

# ± 0.50 to +   = large

# Human had the higher average rank.

# WRITTEN REPORT FOR MANN-WHITNEY U TEST

# A Mann-Whitney U test was conducted to compare satisfaction scores between participants who interacted with a human service provider (n = 100) and those who interacted with an AI service provider (n = 100). The results showed a statistically significant difference between the two groups (U = 497, p < .001). Participants in the human service condition reported higher median satisfaction scores (Mdn = 8) compared to participants in the AI service condition (Mdn = 3). The effect size, calculated using the rank-biserial correlation, was very large (r = -0.90), indicating a strong negative association between AI service type and satisfaction scores. Overall, these findings suggest that participants were significantly more satisfied with human service providers than with AI service providers.