options(repos = c(CRAN = "https://cloud.r-project.org"))
# DEPENDENT T-TEST & WILCOXON SIGN RANK


# HYPOTHESIS
# H(0):The professional communication training program has no effect on employees’ communication skills.
# H(1):The professional communication training program improves employees’ communication skills.
# Alternate Hypothesis was supported. There was an improvement after the professional training.

# IMPORT EXCEL FILE

# INSTALL REQUIRED PACKAGE

install.packages("readxl")
## Installing package into 'C:/Users/chris/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\chris\AppData\Local\Temp\Rtmpy8T9me\downloaded_packages
# LOAD THE PACKAGE
 
library(readxl)

A6R3 <- read_excel("C:/Users/chris/Downloads/A6R3.xlsx")


# THE DIFFERENCE SCORES

Before <- A6R3$PreTraining
After <- A6R3$PostTraining

Differences <- After - Before


# HISTOGRAM

hist(Differences,
     main = "Histogram of Difference Scores",
     xlab = "Value",
     ylab = "Frequency",
     col = "blue",
     border = "black",
     breaks = 20)

# REPORT

# The histogram is symmetrical.
# The histogram has a proper bell curve.


# SHAPIRO-WILK TEST

shapiro.test(Differences)
## 
##  Shapiro-Wilk normality test
## 
## data:  Differences
## W = 0.98773, p-value = 0.21
# The data was normally distributed.
# p > .05

# BOXPLOT

boxplot(Before, After,
        names = c("Before", "After"),
        main = "Boxplot of Before and After Scores",
        col = c("lightblue", "lightgreen"))

# There are dots outside the boxplots.
# They are not changing the mean because they are not severe.
# Continue with Dependent t-test.

# DESCRIPTIVE STATISTICS
# DESCRIPTIVES FOR BEFORE SCORES

mean(Before, na.rm = TRUE)
## [1] 59.73333
median(Before, na.rm = TRUE)
## [1] 60
sd(Before, na.rm = TRUE)
## [1] 7.966091
length(Before)
## [1] 150
# DESCRIPTIVES FOR AFTER SCORES

mean(After, na.rm = TRUE)
## [1] 69.24
median(After, na.rm = TRUE)
## [1] 69.5
sd(After, na.rm = TRUE)
## [1] 9.481653
length(After)
## [1] 150
# DEPENDENT T-TEST

t.test(Before, After, paired = TRUE)
## 
##  Paired t-test
## 
## data:  Before and After
## t = -23.285, df = 149, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -10.313424  -8.699909
## sample estimates:
## mean difference 
##       -9.506667
# STATISTICAL SIGNIFICANCE
# Results were statistically significant (p < .05).

# EFFECT SIZE FOR DEPENDENT T-TEST

# INSTALL REQUIRED PACKAGE

install.packages("effectsize")
## Installing package into 'C:/Users/chris/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'effectsize' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\chris\AppData\Local\Temp\Rtmpy8T9me\downloaded_packages
# LOAD THE PACKAGE

library(effectsize)

# CALCULATE COHEN’S D

cohens_d(Before, After, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
## Cohen's d |         95% CI
## --------------------------
## -1.90     | [-2.17, -1.63]
# ± 1.30 to +   = very large
# The difference between the group's average is very large.
# Group B (PostTraining) had the higher average score.

# Research Report on Results: Dependent t-test

# Output
# 1. The name of the inferential test used (Dependent t-test or Paired Samples t-test)
# 2. The names of the two related conditions or time points you analyzed PreTraining and PostTraning.
# 3. The sample size (150)
# 4. The test was statistically significant (p < .05).
# 5. The mean (M) and standard deviation (SD) for each condition.
# PreTraining Mean = 59.73 SD = 7.97  PostTraining Mean = 69.24 SD = 9.48
# 6. The scores significantly increased.
# 7. Degrees of freedom (df = 149)
# 8. t-value = -23.285
# 9. EXACT p-value: p < .001
# 10. The significant difference was large.

# SUMMARY
# The communication skills of 150 employees were compared before and after they finished a professional training program using a dependent t-test. t(149) = –23.29, p <.001, indicating that post-training scores (M = 69.24, SD = 9.48) were considerably higher than pre-training values (M = 59.73, SD = 7.97). This suggests that after the training, employees' communication abilities considerably improved. With a fairly large effect size (Cohen's d = –1.90, 95% CI [–2.17, –1.63]), the training program appears to have had a significant impact on the development of communication skills.