############################################################
# INDEPENDENT T-TEST & NORMALITY CHECKs
############################################################

# NULL HYPOTHESIS (H0):
# There is no difference in headache days between patients 
# taking Medication A and patients taking Medication B.

# ALTERNATE HYPOTHESIS (H1):
# There is a difference in headache days between patients 
# taking Medication A and patients taking Medication B.



############################################################
# DESCRIPTIVE STATISTICS & NORMALITY CHECK
############################################################

library(readxl)
dataset <- read_excel("A6R1.xlsx")

score <- dataset$HeadacheDays
group <- dataset$Medication

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dataset %>%
  group_by(Medication) %>%
  summarise(
    Mean   = mean(HeadacheDays),
    Median = median(HeadacheDays),
    SD     = sd(HeadacheDays),
    N      = n()
  )
## # A tibble: 2 × 5
##   Medication  Mean Median    SD     N
##   <chr>      <dbl>  <dbl> <dbl> <int>
## 1 A            8.1    8    2.81    50
## 2 B           12.6   12.5  3.59    50
# Medication A: Mean = 8.10,  SD = 2.81, n = 50
# Medication B: Mean = 12.60, SD = 3.59, n = 50


############################################################
# HISTOGRAMS
############################################################

hist(dataset$HeadacheDays[dataset$Medication == "A"],
     main = "Medication A", col = "lightblue")

hist(dataset$HeadacheDays[dataset$Medication == "B"],
     main = "Medication B", col = "lightgreen")

# Q1) Based on the histograms, does either group appear skewed?
# ANSWER: Both histograms appear reasonably symmetrical.

# Q2) Do the histograms resemble a bell curve?
# ANSWER: Yes, both groups resemble a normal distribution pattern.


############################################################
# SHAPIRO-WILK NORMALITY TESTS
############################################################

shapiro.test(dataset$HeadacheDays[dataset$Medication == "A"])
## 
##  Shapiro-Wilk normality test
## 
## data:  dataset$HeadacheDays[dataset$Medication == "A"]
## W = 0.97852, p-value = 0.4913
# Medication A: p = 0.4913 → NORMAL

shapiro.test(dataset$HeadacheDays[dataset$Medication == "B"])
## 
##  Shapiro-Wilk normality test
## 
## data:  dataset$HeadacheDays[dataset$Medication == "B"]
## W = 0.98758, p-value = 0.8741
# Medication B: p = 0.8741 → NORMAL

# Q3) Was the Medication A group normally distributed?
# ANSWER: Yes, p = 0.4913 (greater than .05), indicating normality.

# Q4) Was the Medication B group normally distributed?
# ANSWER: Yes, p = 0.8741 (greater than .05), indicating normality.

# Q5) Based on histograms and Shapiro-Wilk, which test should you use?
# ANSWER: Independent t-test (because both groups are normally distributed).


############################################################
# INDEPENDENT T-TEST
############################################################

t.test(score ~ group, data = dataset, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  score by group
## t = -6.9862, df = 98, p-value = 3.431e-10
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
##  -5.778247 -3.221753
## sample estimates:
## mean in group A mean in group B 
##             8.1            12.6
# OUTPUT:
# t = -6.9862
# df = 98
# p = 3.431e-10 (p < .001)
# Medication A Mean = 8.1
# Medication B Mean = 12.6


############################################################
# EFFECT SIZE — COHEN’S D
############################################################

library(effectsize)
cohens_d(score ~ group, data = dataset, pooled_sd = TRUE)
## Cohen's d |         95% CI
## --------------------------
## -1.40     | [-1.83, -0.96]
## 
## - Estimated using pooled SD.
# OUTPUT:
# Cohen’s d = -1.40

# Q1) What is the size of the effect?
# ANSWER: Cohen's d = 1.40, which is considered a VERY LARGE effect (greater than ±1.30).

# Q2) Which group had the higher average score?
# ANSWER: Medication B had the higher average headache days (12.6 vs 8.1).


############################################################
# WRITTEN REPORT FOR INDEPENDENT T-TEST
############################################################

# An Independent t-test was conducted to compare 
# headache days between patients taking Medication A (n = 50) and Medication B (n = 50). 
# Patients taking Medication B reported significantly more headache days (M = 12.60, SD = 3.59) 
# than patients taking Medication A (M = 8.10, SD = 2.81), t(98) = -6.99, p < .001.
# The effect size was very large (d = 1.40), indicating a substantial difference between the two medications.
# Overall, patients taking Medication B experienced more headache days.