# DEPENDENT T-TEST & WILCOXON SIGN RANK
# Used to test if there is a difference between Before scores and After scores (comparing the means).
# NULL HYPOTHESIS (H0)
# There is no difference between the Before scores and After scores.
# ALTERNATE HYPOTHESIS (H1)
# There is a difference between the Before scores and After scores.
# IMPORT EXCEL FILE
# Import your Excel dataset into R to conduct analyses.
# INSTALL REQUIRED PACKAGE
# If never installed, remove the hashtag before the install code.
# If previously installed, leave the hashtag in front of the code.
# install.packages("readxl")
# LOAD THE PACKAGE
# Always reload the package you want to use. Remove the hashtag to use the code.
library(readxl)
# IMPORT EXCEL FILE INTO R STUDIO
# Download the Excel file from One Drive and save it to your desktop.
# Right-click the Excel file and click “Copy as path” from the menu.
# In RStudio, replace the example path below with your actual path.
# Replace backslashes \ with forward slashes / or double them //:
# ✘ WRONG "C:\Users\Joseph\Desktop\mydata.xlsx"
# ✔ CORRECT "C:/Users/Joseph/Desktop/mydata.xlsx"
# ✔ CORRECT "C:\\Users\\Joseph\\Desktop\\mydata.xlsx"
# Replace "dataset" with the name of your excel data (without the .xlsx)
dataset4<- read_excel("/Users/saitejadasari/Downloads/A6R4.xlsx")
# CALCULATE THE DIFFERENCE SCORES
# Purpose: Calculate the difference between the Before scores versus the after scores.
# RENAME THE VARIABLES
# Replace "dataset" with your dataset name (without .xlsx)
# Replace "pre" with name of your variable for before scores.
# Replace "post" with name of your variable for after scores.
Before <-dataset4$PreCampaignSales
After <- dataset4$PostCampaignSales
Differences <- After - Before
# HISTOGRAM
# Create a histogram for difference scores to visually check skewness and kurtosis.
# CREATE THE HISTOGRAMS
# You do not need to edit this code.
hist(Differences,
main = "Histogram of Difference Scores",
xlab = "Value",
ylab = "Frequency",
col = "blue",
border = "black",
breaks = 20)
# DIRECTIONS: Answer the questions below directly in your code.
QUESTION 1: Is the histograms symmetrical, positively skewed, or negatively skewed? ANSWER:The histogran is positively skewed
QUESTION 2: Did the histogram look too flat, too tall, or did it have a proper bell curve? ANSWER:The histogram is too tall
# SHAPIRO-WILK TEST
# Check the normality for the difference between the groups.
# You do not need to edit the code.
shapiro.test(Differences)
##
## Shapiro-Wilk normality test
##
## data: Differences
## W = 0.94747, p-value = 0.01186
QUESTION 1: Was the data normally distributed or abnormally distributed? #If p > 0.05 (P-value is GREATER than .05) this means the data is NORMAL (continue with Dependent t-test). # If p < 0.05 (P-value is LESS than .05) this means the data is NOT normal (switch to Wilcoxon Sign Rank). ANSWER:The data is abnormally distributed because p < .05.
# BOXPLOT
# Check for any outliers impacting the mean.
# You do not need to edit this code
boxplot(Differences,
main = "Distribution of Score Differences (After - Before)",
ylab = "Difference in Scores",
col = "blue",
border = "darkblue")
# DIRECTIONS: Answer the questions below directly in your code.
QUESTION 1: How many dots are in your boxplot? # A) No dots. # B) One or two dots. # C) Many dots. ANSWER: B one or two dots
QUESTION 2: Where are the dots in your boxplot? # A) There are no dots. # B) Very close to the whiskers (lines of the boxplot). # C) Far from the whiskers (lines of the boxplot). ANSWER: B Very close to the whiskers (lines of the boxplot). QUESTION 3: Based on the dots and there location, is the data normal? The data is not Normal # If there are no dots, the data is normal. # If there are one or two dots and they are CLOSE to the whiskers, the data is normal # If there are many dots (more than one or two) and they are FAR AWAY from the whiskers, this means data is NOT normal. Switch to a Wilcoxon Sign Rank. # Anything else could be normal or abnormal. Check if there is a big difference between the median and the mean. If there is a big difference, the data is not normal. If there is a small difference, the data is normal.
# DESCRIPTIVES FOR BEFORE SCORES
# You do not need to edit this code
mean(Before, na.rm = TRUE)
## [1] 25154.53
median(Before, na.rm = TRUE)
## [1] 24624
sd(Before, na.rm = TRUE)
## [1] 12184.4
length(Before)
## [1] 60
# DESCRIPTIVES FOR AFTER SCORES
# You do not need to edit this code
mean(After, na.rm = TRUE)
## [1] 26873.45
median(After, na.rm = TRUE)
## [1] 25086
sd(After, na.rm = TRUE)
## [1] 14434.37
length(After)
## [1] 60