options(repos = c(CRAN = "https://cloud.r-project.org"))

# INDEPENDENT T-TEST & MANN-WHITNEY U TEST

# HYPOTHESIS TESTED:
# Used to test if there is a difference between the means of two groups.

# NULL HYPOTHESIS (H0)
# The null hypothesis below is ALWAYS used.
# There is no difference between the scores of Group A and Group B.

# ALTERNATE HYPOTHESIS (H1)
# Choose ONE of the three options below (based on your research scenario):
# 1) NON-DIRECTIONAL ALTERNATE HYPOTHESIS: There is a difference between the scores of Group A and Group B.
# 2) DIRECTIONAL ALTERNATE HYPOTHESES ONE: Group A has higher scores than Group B.
# 3) DIRECTIONAL ALTERNATE HYPOTHESIS TWO: Group B has higher scores than Group A.

# QUESTION
# What are the null and alternate hypotheses for YOUR research scenario?
# H0: is no difference in Therethe average satisfaction scores between customers served by human agents and those served by the AI chatbot.
# H1:There is a difference in the average satisfaction scores between the two groups. 


# IMPORT EXCEL FILE


# INSTALL PACKAGES
# install.packages("readxl")

# LOAD THE PACKAGE
 

library(readxl)

# IMPORT EXCEL FILE INTO R STUDIO

A6R2 <- read_excel("C:/Users/dubet/OneDrive/Documents/AA5221/A6R2.xlsx")


# DESCRIPTIVE STATISTICS
# mean, median, SD, and sample size for each group.

# INSTALL REQUIRED PACKAGE

# install.packages("dplyr")

# LOAD THE PACKAGE

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
##
##Attaching package: ‘dplyr’

##The following objects are masked from ‘package:stats’:
##  
##  filter, lag

##The following objects are masked from ‘package:base’:
##  
## intersect, setdiff, setequal, union

# CALCULATE THE DESCRIPTIVE STATISTICS

A6R2 %>%
     group_by(ServiceType) %>%
     summarise(
         Mean = mean(SatisfactionScore, na.rm = TRUE),
         Median = median(SatisfactionScore, na.rm = TRUE),
         SD = sd(SatisfactionScore, na.rm = TRUE),
         N = n()
       )
## # A tibble: 2 × 5
##   ServiceType  Mean Median    SD     N
##   <chr>       <dbl>  <dbl> <dbl> <int>
## 1 AI           3.6       3  1.60   100
## 2 Human        7.42      8  1.44   100
## # A tibble: 2 × 5
##  ServiceType  Mean Median    SD     N
##  <chr>       <dbl>  <dbl> <dbl> <int>
##  1 AI           3.6       3  1.60   100
##  2 Human        7.42      8  1.44   100


# HISTOGRAMS


# CREATE THE HISTOGRAMS 
# Replace "dataset" with your dataset name (without .xlsx)
# Replace "score" with your dependent variable R code name (example: USD)
# Replace "group" with your independent variable R code name (example: Country)
# Replace "Group1" with the R code name for your first group (example: USA)
# Replace "Group2" with the R code name for your second group (example: India)

hist(A6R2$SatisfactionScore[A6R2$ServiceType == "Human"],
       main = "Histogram of Human Scores",
       xlab = "Value",
       ylab = "Frequency",
       col = "lightblue",
       border = "black",
       breaks = 20)

hist(A6R2$SatisfactionScore[A6R2$ServiceType == "AI"],
      main = "Histogram of AI Scores",
      xlab = "Value",
      ylab = "Frequency",
      col = "lightgreen",
      border = "black",
      breaks = 20)

# QUESTIONS
# Answer the questions below as comments within the R script:

# Group 1 Scores
# The histogram is symmetrical but slightly negatively skewed.
# The histogram appears to have a proper bell shape.
# Group 2 Scores
# The histogram is positively skewed.
# The histogram appears too flat.

# ThE SHAPIRO-WILK TEST

shapiro.test(A6R2$SatisfactionScore[A6R2$ServiceType == "Human"])
## 
##  Shapiro-Wilk normality test
## 
## data:  A6R2$SatisfactionScore[A6R2$ServiceType == "Human"]
## W = 0.93741, p-value = 0.0001344
##
##Shapiro-Wilk normality test

##data:  A6R2$SatisfactionScore[A6R2$ServiceType == "Human"]
##W = 0.93741, p-value = 0.0001344

 shapiro.test(A6R2$SatisfactionScore[A6R2$ServiceType == "AI"])
## 
##  Shapiro-Wilk normality test
## 
## data:  A6R2$SatisfactionScore[A6R2$ServiceType == "AI"]
## W = 0.91143, p-value = 5.083e-06
##Shapiro-Wilk normality test
##
##data:  A6R2$SatisfactionScore[A6R2$ServiceType == "AI"]
##W = 0.91143, p-value = 5.083e-06


# The data normally distributed for Variable 1?
# The data normally distributed for Variable 2?


# BOXPLOT

# INSTALL REQUIRED PACKAGE

 install.packages("ggplot2")
## Installing package into 'C:/Users/dubet/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\dubet\AppData\Local\Temp\RtmpYxaOJx\downloaded_packages
 install.packages("ggpubr")
## Installing package into 'C:/Users/dubet/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'ggpubr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\dubet\AppData\Local\Temp\RtmpYxaOJx\downloaded_packages
# LOAD THE PACKAGE
 
library(ggplot2)
library(ggpubr)

# CREATE THE BOXPLOT

ggboxplot(A6R2, x = "ServiceType", y = "SatisfactionScore",
          color = "ServiceType",
          palette = "jco",
          add = "jitter")

# There are dots outside of the boxplots and they mighty change the mean so much that the mean no longer accurately represents the average score?

# MANN-WHITNEY U TEST

wilcox.test(SatisfactionScore ~ ServiceType, data = A6R2, exact = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  SatisfactionScore by ServiceType
## W = 497, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#  STATISTICAL SIGNIFICANCE
#  P-value < 2.2e-16

# EFFECT-SIZE

 install.packages("effectsize")
## Installing package into 'C:/Users/dubet/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'effectsize' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\dubet\AppData\Local\Temp\RtmpYxaOJx\downloaded_packages
# LOAD THE PACKAGE

library(effectsize)

# C EFFECT SIZE (R VALUE)
 
library(rstatix) 
## 
## Attaching package: 'rstatix'
## The following objects are masked from 'package:effectsize':
## 
##     cohens_d, eta_squared
## The following object is masked from 'package:stats':
## 
##     filter
install.packages('coin')
## Installing package into 'C:/Users/dubet/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'coin' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\dubet\AppData\Local\Temp\RtmpYxaOJx\downloaded_packages
rstatix::wilcox_effsize(A6R2, SatisfactionScore ~ ServiceType)
## # A tibble: 1 × 7
##   .y.               group1 group2 effsize    n1    n2 magnitude
## * <chr>             <chr>  <chr>    <dbl> <int> <int> <ord>    
## 1 SatisfactionScore AI     Human    0.784   100   100 large
# The size of the effect

# ± 0.50 to +   = large

# Human Group had the higher average rank.

# The Mann-Whitney U test does not compare means directly. 


# WRITTEN REPORT FOR MANN-WHITNEY U TEST

# A Mann-Whitney U test was conducted to compare satisfaction scores between participants who interacted with a human service provider (n = 100) and those who interacted with an AI service provider (n = 100). The results showed a statistically significant difference between the two groups (U = 497, p < .001). Participants in the human service condition reported higher median satisfaction scores (Mdn = 8) compared to participants in the AI service condition (Mdn = 3). The effect size, calculated using the rank-biserial correlation, was very large (r = -0.90), indicating a strong negative association between AI service type and satisfaction scores. Overall, these findings suggest that participants were significantly more satisfied with human service providers than with AI service providers.