# DEPENDENT T-TEST & WILCOXON SIGN RANK
# Used to test if there is a difference between Before scores and After scores (comparing the means).

# NULL HYPOTHESIS (H0)
# The null hypothesis is ALWAYS used.
# There is no difference between the Before and After scores.

# ALTERNATE HYPOTHESIS (H1)
# Choose ONE of the three options below (based on your research scenario):


# 3) DIRECTIONAL ALTERNATE HYPOTHESIS TWO: PostCampaignSales are higher than PreCampaignSales.


# IMPORT EXCEL FILE
# Purpose: Import your Excel dataset into R to conduct analyses.

# INSTALL REQUIRED PACKAGE
# If never installed, remove the hashtag before the install code.
# If previously installed, leave the hashtag in front of the code.

# install.packages("readxl")

# LOAD THE PACKAGE
# Always reload the package you want to use. 

library(readxl)
## Warning: package 'readxl' was built under R version 4.5.1
# IMPORT EXCEL FILE INTO R STUDIO
# Download the Excel file from One Drive and save it to your desktop.
# Right-click the Excel file and click “Copy as path” from the menu.
# In RStudio, replace the example path below with your actual path.
# Replace backslashes \ with forward slashes / or double them //:
# ✘ WRONG   "C:\Users\Joseph\Desktop\mydata.xlsx"
# ✔ CORRECT "C:/Users/Joseph/Desktop/mydata.xlsx"
# ✔ CORRECT "C:\\Users\\Joseph\\Desktop\\mydata.xlsx"
# Replace "dataset" with the name of your excel data (without the .xlsx)

dataset <- read_excel("C:/Users/User/Downloads/A6R4.xlsx")
A6R4 <- read_excel("C:/Users/User/Downloads/A6R4.xlsx")


# CALCULATE THE DIFFERENCE SCORES
# Calculate the difference between the Before scores versus the after scores.

# RENAME THE VARIABLES
# Replace "dataset" with your dataset name (without .xlsx)
# Replace "pre" with name of your variable for before scores.
# Replace "post" with name of your variable for after scores.

Before <- A6R4$PreCampaignSales
After <- A6R4$PostCampaignSales

Differences <- After - Before


# HISTOGRAM
# Create a histogram for difference scores to visually check skewness and kurtosis.


# CREATE THE HISTOGRAMS
# You do not need to edit this code.

hist(Differences,
     main = "Histogram of Difference Scores",
     xlab = "Value",
     ylab = "Frequency",
     col = "blue",
     border = "black",
     breaks = 20)

# WRITE THE REPORT
# Answer the questions below as a comment within the R script:
# Q1) Is the histograms symmetrical, positively skewed, or negatively skewed?
# The histogram is positively skewed
# Q2) Did the histogram look too flat, too tall, or did it have a proper bell curve?
# The histogram is too tall

# SHAPIRO-WILK TEST
# Check the normality for the difference between the groups.

# CONDUCT SHAPIRO-WILK TEST
# You do not need to edit the code.

shapiro.test(Differences)
## 
##  Shapiro-Wilk normality test
## 
## data:  Differences
## W = 0.94747, p-value = 0.01186
# QUESTIONS
# Answer the questions below as a comment within the R script:
# Q1)Was the data normally distributed or abnormally distributed?
# The data is NOT normally didtributed
# If p > 0.05 (P-value is GREATER than .05) this means the data is NORMAL (continue with Dependent t-test).
# If p < 0.05 (P-value is LESS than .05) this means the data is NOT normal (switch to Wilcoxon Sign Rank).

# BOXPLOT
# Check for any outliers impacting the mean. 

# CREATE THE BOXPLOT
# You do not need to edit this code

boxplot(Before, After,
        names = c("Before", "After"),
        main = "Boxplot of Before and After Scores",
        col = c("tan", "navy"))

# QUESTIONS
# Answer the questions below as a comment within the R script:
# Q1) Were there any dots outside of the boxplots? These dots represent participants with extreme scores.
# There were a few dots outside  of the boxplot

# Q2) If there are outliers, are they are changing the mean so much that the mean no longer accurately represents the average score?
# Q3) Make a decision. If the outliers are extreme, you will need to switch to a Wilcoxon Sign Rank. 
# The outliers do not change the mean

# If there are not outliers, or the outliers are not extreme, continue with Dependent t-test.

# DESCRIPTIVE STATISTICS
# Calculate the mean, median, SD, and sample size for each group.

# DESCRIPTIVES FOR BEFORE SCORES
# You do not need to edit this code

mean(Before, na.rm = TRUE)
## [1] 25154.53
median(Before, na.rm = TRUE)
## [1] 24624
sd(Before, na.rm = TRUE)
## [1] 12184.4
length(Before)
## [1] 60
# DESCRIPTIVES FOR AFTER SCORES
# You do not need to edit this code

mean(After, na.rm = TRUE)
## [1] 26873.45
median(After, na.rm = TRUE)
## [1] 25086
sd(After, na.rm = TRUE)
## [1] 14434.37
length(After)
## [1] 60
# WILCOXON SIGN RANK TEST
# Remove the hashtag to use the code
# There are no other edits you need to make to the code.

wilcox.test(Before, After, paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  Before and After
## V = 640, p-value = 0.0433
## alternative hypothesis: true location shift is not equal to 0
# DETERMINE STATISTICAL SIGNIFICANCE
# If results were statistically significant (p < .05), continue to effect size section below.
# If results were NOT statistically significant (p > .05), skip to reporting section below.
# NOTE: Getting results that are not statistically significant does NOT mean you switch to Wilcoxon Sign Rank.
# The Wilcoxon Sign Rank test is only for abnormally distributed data — not based on outcome significance.


# EFFECT SIZE FOR WILCOXON SIGN RANK TEST
# Purpose: Determine how big of a difference there was between the group means.

# INSTALL REQUIRED PACKAGE
# If never installed, remove the hashtag before the install code.
# If previously installed, leave the hashtag in front of the code.

# install.packages("rstatix")

# LOAD THE PACKAGE
# Always reload the package you want to use. 

#library(rstatix)

# CALCULATE RANK BISERIAL CORRELATION (EFFECT SIZE)
# You do not need to edit this code, just remove the hashtags

# df_long <- data.frame(
#   id = rep(1:length(Before), 2),
#   time = rep(c("Before", "After"), each = length(Before)),
#   score = c(Before, After)
# )

# WILCOXON SIGN RANK TEST
# Remove the hashtag to use the code
# There are no other edits you need to make to the code.

wilcox.test(Before, After, paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  Before and After
## V = 640, p-value = 0.0433
## alternative hypothesis: true location shift is not equal to 0
# DETERMINE STATISTICAL SIGNIFICANCE
# If results were statistically significant (p < .05), continue to effect size section below.
# If results were NOT statistically significant (p > .05), skip to reporting section below.
# NOTE: Getting results that are not statistically significant does NOT mean you switch to Wilcoxon Sign Rank.
# The Wilcoxon Sign Rank test is only for abnormally distributed data — not based on outcome significance.


# EFFECT SIZE FOR WILCOXON SIGN RANK TEST
# Purpose: Determine how big of a difference there was between the group means.

# INSTALL REQUIRED PACKAGE
# If never installed, remove the hashtag before the install code.
# If previously installed, leave the hashtag in front of the code.

# install.packages("rstatix")

# LOAD THE PACKAGE
# Always reload the package you want to use. 

library(rstatix)
## Warning: package 'rstatix' was built under R version 4.5.1
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
# CALCULATE RANK BISERIAL CORRELATION (EFFECT SIZE)
# You do not need to edit this code, just remove the hashtags

# install.packages("coin")
df_long <- data.frame(
 id = rep(1:length(Before), 2),
 time = rep(c("Before", "After"), each = length(Before)),
  score = c(Before, After)
 )

wilcox_effsize(df_long, score ~ time, paired = TRUE)
## # A tibble: 1 × 7
##   .y.   group1 group2 effsize    n1    n2 magnitude
## * <chr> <chr>  <chr>    <dbl> <int> <int> <ord>    
## 1 score After  Before   0.261    60    60 small