INDEPENDENT T-TEST & MANN-WHITNEY U TEST

HYPOTHESIS TESTED:

NULL HYPOTHESIS (H0) # There is no difference in the effectiveness of Medication A and Medication B in reducing the number of headaches reported by participants.

ALTERNATE HYPOTHESIS (H1) # NON-DIRECTIONAL ALTERNATE HYPOTHESIS: There is a difference in the effectiveness of Medication A and Medication B in reducing the number of headaches.

IMPORT EXCEL FILE

INSTALL REQUIRED PACKAGE

install.packages(“readxl”)

LOAD THE PACKAGE

library(readxl)

IMPORT EXCEL FILE INTO R STUDIO

A6R1 <- read_excel("C:\\users\\OP-PC\\Downloads\\A6R1.xlsx")

DESCRIPTIVE STATISTICS

INSTALL REQUIRED PACKAGE

install.packages(“dplyr”)

LOAD THE PACKAGE

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

CALCULATE THE DESCRIPTIVE STATISTICS

A6R1 %>%
  group_by(Medication) %>%
  summarise(
    Mean = mean(HeadacheDays, na.rm = TRUE),
    Median = median(HeadacheDays, na.rm = TRUE),
    SD = sd(HeadacheDays, na.rm = TRUE),
    N = n()
  )
## # A tibble: 2 × 5
##   Medication  Mean Median    SD     N
##   <chr>      <dbl>  <dbl> <dbl> <int>
## 1 A            8.1    8    2.81    50
## 2 B           12.6   12.5  3.59    50

HISTOGRAMS

CREATE THE HISTOGRAMS

hist(A6R1$HeadacheDays[A6R1$Medication == "A"],
main = "Histogram of A Scores",
xlab = "Value",
ylab = "Frequency",
col = "lightblue",
border = "black",
breaks = 20)

hist(A6R1$HeadacheDays[A6R1$Medication == "B"],
main = "Histogram of B Scores",
xlab = "Value",
ylab = "Frequency",
col = "lightgreen",
border = "black",
breaks = 20)

# QUESTIONS

#Q1) Check the SKEWNESS of the VARIABLE 1 histogram. In your opinion, does the histogram look symmetrical, positively skewed, or negatively skewed?
#Q2) Check the KURTOSIS of the VARIABLE 1 histogram. In your opinion, does the histogram look too flat, too tall, or does it have a proper bell curve?
#Q3) Check the SKEWNESS of the VARIABLE 2 histogram. In your opinion, does the histogram look symmetrical, positively skewed, or negatively skewed?
#Q4) Check the KUROTSIS of the VARIABLE 2 histogram. In your opinion, does the histogram look too flat, too tall, or does it have a proper bell curve?

SHAPIRO-WILK TEST

CONDUCT THE SHAPIRO-WILK TEST

shapiro.test(A6R1$HeadacheDays[A6R1$Medication == "A"])
## 
##  Shapiro-Wilk normality test
## 
## data:  A6R1$HeadacheDays[A6R1$Medication == "A"]
## W = 0.97852, p-value = 0.4913
shapiro.test(A6R1$HeadacheDays[A6R1$Medication == "B"])
## 
##  Shapiro-Wilk normality test
## 
## data:  A6R1$HeadacheDays[A6R1$Medication == "B"]
## W = 0.98758, p-value = 0.8741
# QUESTION
# Was the data normally distributed for Variable 1?
# Was the data normally distributed for Variable 2?

BOXPLOT

INSTALL REQUIRED PACKAGE

install.packages(“ggplot2”) install.packages(“ggpubr”)

LOAD THE PACKAGE

library(ggplot2)
library(ggpubr)

CREATE THE BOXPLOT

ggboxplot(A6R1, x = "Medication", y = "HeadacheDays",
          color = "Medication",
          palette = "jco",
          add = "jitter")

# QUESTION
#Q1) Were there any dots outside of the boxplots? These dots represent participants with extreme scores?
#Q2) If there are outliers, in your opinion are the scores of those dots changing the mean so much that the mean no longer accurately represents the average score?

INDEPENDENT T-TEST

t.test(HeadacheDays ~ Medication, data = A6R1, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  HeadacheDays by Medication
## t = -6.9862, df = 98, p-value = 3.431e-10
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
##  -5.778247 -3.221753
## sample estimates:
## mean in group A mean in group B 
##             8.1            12.6

DETERMINE STATISTICAL SIGNIFICANCE

EFFECT-SIZE

INSTALL REQUIRED PACKAGE

install.packages(“effectsize”)

LOAD THE PACKAGE

library(effectsize)

CALCULATE COHEN’S D

cohen_d_result <- cohens_d( HeadacheDays ~ Medication, data = A6R1, pooled_sd = TRUE)
print(cohen_d_result)
## Cohen's d |         95% CI
## --------------------------
## -1.40     | [-1.83, -0.96]
## 
## - Estimated using pooled SD.
# QUESTIONS

# Q1) What is the size of the effect?

# Q2) Which group had the higher average score?