A6R2

options(repos = c(CRAN = "https://cloud.r-project.org"))

# INDEPENDENT T-TEST & MANN-WHITNEY U TEST

# HYPOTHESIS TESTED:
# Used to test if there is a difference between the means of two groups.

# NULL HYPOTHESIS (H0)
# The null hypothesis below is ALWAYS used.
# There is no difference between the scores of Group A and Group B.

# ALTERNATE HYPOTHESIS (H1)
# Choose ONE of the three options below (based on your research scenario):
# 1) NON-DIRECTIONAL ALTERNATE HYPOTHESIS: There is a difference between the scores of Group A and Group B.
# 2) DIRECTIONAL ALTERNATE HYPOTHESES ONE: Group A has higher scores than Group B.
# 3) DIRECTIONAL ALTERNATE HYPOTHESIS TWO: Group B has higher scores than Group A.

# QUESTION
# What are the null and alternate hypotheses for YOUR research scenario?
# H0: is no difference in Therethe average satisfaction scores between customers served by human agents and those served by the AI chatbot.
# H1:There is a difference in the average satisfaction scores between the two groups. 


# IMPORT EXCEL FILE


# INSTALL PACKAGES
# install.packages("readxl")

# LOAD THE PACKAGE
 

library(readxl)

# IMPORT EXCEL FILE INTO R STUDIO

A6R2 <- read_excel("C:/Users/dubet/OneDrive/Documents/AA5221/A6R2.xlsx")


# DESCRIPTIVE STATISTICS
# mean, median, SD, and sample size for each group.

# INSTALL REQUIRED PACKAGE

# install.packages("dplyr")

# LOAD THE PACKAGE

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

##
##Attaching package: ‘dplyr’

##The following objects are masked from ‘package:stats’:
##  
##  filter, lag

##The following objects are masked from ‘package:base’:
##  
## intersect, setdiff, setequal, union

# CALCULATE THE DESCRIPTIVE STATISTICS

A6R2 %>%
     group_by(ServiceType) %>%
     summarise(
         Mean = mean(SatisfactionScore, na.rm = TRUE),
         Median = median(SatisfactionScore, na.rm = TRUE),
         SD = sd(SatisfactionScore, na.rm = TRUE),
         N = n()
       )

## # A tibble: 2 × 5
##   ServiceType  Mean Median    SD     N
##   <chr>       <dbl>  <dbl> <dbl> <int>
## 1 AI           3.6       3  1.60   100
## 2 Human        7.42      8  1.44   100

## # A tibble: 2 × 5
##  ServiceType  Mean Median    SD     N
##  <chr>       <dbl>  <dbl> <dbl> <int>
##  1 AI           3.6       3  1.60   100
##  2 Human        7.42      8  1.44   100


# HISTOGRAMS


# CREATE THE HISTOGRAMS 
# Replace "dataset" with your dataset name (without .xlsx)
# Replace "score" with your dependent variable R code name (example: USD)
# Replace "group" with your independent variable R code name (example: Country)
# Replace "Group1" with the R code name for your first group (example: USA)
# Replace "Group2" with the R code name for your second group (example: India)

hist(A6R2$SatisfactionScore[A6R2$ServiceType == "Human"],
       main = "Histogram of Human Scores",
       xlab = "Value",
       ylab = "Frequency",
       col = "lightblue",
       border = "black",
       breaks = 20)

hist(A6R2$SatisfactionScore[A6R2$ServiceType == "AI"],
      main = "Histogram of AI Scores",
      xlab = "Value",
      ylab = "Frequency",
      col = "lightgreen",
      border = "black",
      breaks = 20)

# QUESTIONS
# Answer the questions below as comments within the R script:

# Group 1 Scores
# The histogram is symmetrical but slightly negatively skewed.
# The histogram appears to have a proper bell shape.
# Group 2 Scores
# The histogram is positively skewed.
# The histogram appears too flat.

# ThE SHAPIRO-WILK TEST

shapiro.test(A6R2$SatisfactionScore[A6R2$ServiceType == "Human"])

## 
##  Shapiro-Wilk normality test
## 
## data:  A6R2$SatisfactionScore[A6R2$ServiceType == "Human"]
## W = 0.93741, p-value = 0.0001344

##
##Shapiro-Wilk normality test

##data:  A6R2$SatisfactionScore[A6R2$ServiceType == "Human"]
##W = 0.93741, p-value = 0.0001344

 shapiro.test(A6R2$SatisfactionScore[A6R2$ServiceType == "AI"])

## 
##  Shapiro-Wilk normality test
## 
## data:  A6R2$SatisfactionScore[A6R2$ServiceType == "AI"]
## W = 0.91143, p-value = 5.083e-06

##Shapiro-Wilk normality test
##
##data:  A6R2$SatisfactionScore[A6R2$ServiceType == "AI"]
##W = 0.91143, p-value = 5.083e-06


# The data normally distributed for Variable 1?
# The data normally distributed for Variable 2?


# BOXPLOT

# INSTALL REQUIRED PACKAGE

 install.packages("ggplot2")

## Installing package into 'C:/Users/dubet/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\dubet\AppData\Local\Temp\RtmpYxaOJx\downloaded_packages

 install.packages("ggpubr")

## Installing package into 'C:/Users/dubet/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'ggpubr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\dubet\AppData\Local\Temp\RtmpYxaOJx\downloaded_packages

# LOAD THE PACKAGE
 
library(ggplot2)
library(ggpubr)

# CREATE THE BOXPLOT

ggboxplot(A6R2, x = "ServiceType", y = "SatisfactionScore",
          color = "ServiceType",
          palette = "jco",
          add = "jitter")

# There are dots outside of the boxplots and they mighty change the mean so much that the mean no longer accurately represents the average score?

# MANN-WHITNEY U TEST

wilcox.test(SatisfactionScore ~ ServiceType, data = A6R2, exact = FALSE)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  SatisfactionScore by ServiceType
## W = 497, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

#  STATISTICAL SIGNIFICANCE
#  P-value < 2.2e-16

# EFFECT-SIZE

 install.packages("effectsize")

## Installing package into 'C:/Users/dubet/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'effectsize' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\dubet\AppData\Local\Temp\RtmpYxaOJx\downloaded_packages

# LOAD THE PACKAGE

library(effectsize)

# C EFFECT SIZE (R VALUE)
 
library(rstatix)

## 
## Attaching package: 'rstatix'

## The following objects are masked from 'package:effectsize':
## 
##     cohens_d, eta_squared

## The following object is masked from 'package:stats':
## 
##     filter

install.packages('coin')

## Installing package into 'C:/Users/dubet/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'coin' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\dubet\AppData\Local\Temp\RtmpYxaOJx\downloaded_packages

rstatix::wilcox_effsize(A6R2, SatisfactionScore ~ ServiceType)

## # A tibble: 1 × 7
##   .y.               group1 group2 effsize    n1    n2 magnitude
## * <chr>             <chr>  <chr>    <dbl> <int> <int> <ord>    
## 1 SatisfactionScore AI     Human    0.784   100   100 large

# The size of the effect

# ± 0.50 to +   = large

# Human Group had the higher average rank.

# The Mann-Whitney U test does not compare means directly. 


# WRITTEN REPORT FOR MANN-WHITNEY U TEST

# A Mann-Whitney U test was conducted to compare satisfaction scores between participants who interacted with a human service provider (n = 100) and those who interacted with an AI service provider (n = 100). The results showed a statistically significant difference between the two groups (U = 497, p < .001). Participants in the human service condition reported higher median satisfaction scores (Mdn = 8) compared to participants in the AI service condition (Mdn = 3). The effect size, calculated using the rank-biserial correlation, was very large (r = -0.90), indicating a strong negative association between AI service type and satisfaction scores. Overall, these findings suggest that participants were significantly more satisfied with human service providers than with AI service providers.

A6R2

GROUP 11

2025-09-30