Background

This is the code that Mark demonstrates in the lecture videos. I highly recommend using this code instead of the code from the lesson plan as word processors muck up code. If you copy and paste this code from this webpage, don’t forget to place it within a chunk. I have numbered each section as chunks so you can follow with your own code and replicate my file like it is on the lecture video.

Example chunk:


``` r
# YOUR AWESOME CODE GOES IN THIS SPACE
```

Chunk 1: Load libraries

packages <- c(
  'psych', 'summarytools', 'Hmisc', 'dplyr', 'stats', 'rstatix', 
  'ggplot2', 'apaTables', 'effectsize', 'car', 'rcompanion',  
  'vcd', 'tukeytrend', 'lme4', 'lmtest', 'broom', 'modelr', 
  'tidyverse', 'lubridate', 'janitor', 'data.table', 'readr', 'knitr', 'mosaicData')

lapply(packages, library, character.only = TRUE)

Chunk 2: Import the data

You may need to install.packages(‘readxl’)

The XLSX file is called ‘Practice_Math_Planet_Data.xlsx’ and is posted on our course shell

When you download the XLSX file, save it in the same folder as your RMarkdown file (.Rmd)

#Load this library in case
library(readxl)

# Import the Practice Math Planet Data and name it "planetmath"
planetmath<- readxl:: read_xlsx('Practice_Math_Planet_Data.xlsx')

Chunk 3: Recode numbers to values

Howevever, notice that we DID NOT RECODE MATH EFFICACY (MathEff1, etc.) the these items need to be NUMBERS to calculate reliability

library(tidyverse)
library(dplyr)

planetmath <- planetmath %>%
  mutate(
    Planet = case_when(
      Planet == 1 ~ "Earth",
      Planet == 2 ~ "Venus",
      Planet == 3 ~ "Mars",
      TRUE ~ as.character(Planet)
    ),
    Eye_Color = case_when(
      Eye_Color == 1 ~ "Blue",
      Eye_Color == 2 ~ "Green",
      Eye_Color == 3 ~ "Brown",
      TRUE ~ as.character(Eye_Color)
    ),
    Limb = case_when(
      Limb == 1 ~ "Legs",
      Limb == 2 ~ "Fins",
      TRUE ~ as.character(Limb)
    ),
    Earth_Grade = case_when(
      Earth_Grade == 1 ~ "6th_Grade",
      Earth_Grade == 2 ~ "7th_Grade",
      Earth_Grade == 3 ~ "8th_Grade",
      TRUE ~ as.character(Earth_Grade)
    ),
    Instruction_Type = case_when(
      Instruction_Type == 1 ~ "Face_to_Face",
      Instruction_Type == 2 ~ "Purely_online_asynchronous",
      Instruction_Type == 3 ~ "Online_synchronous",
      TRUE ~ as.character(Instruction_Type)
    )
  )%>%
  mutate(Math_Eff_Tot = Matheff1+Matheff2+Matheff3)

# Reverse coding math effiacy quesiton 3
planetmath <- planetmath %>%
  mutate(REVMatheff3 = case_when(
    Matheff3 == 6 ~ 1,
    Matheff3 == 5 ~ 2,
    Matheff3 == 4 ~ 3,
    Matheff3 == 3 ~ 4,
    Matheff3 == 2 ~ 5,
    Matheff3 == 1 ~ 6,
    TRUE ~ NA_real_
  ))

Chunk 4: Calculating frequencies on limb and planet in R

library(dplyr)
library(tidyr)

# Frequency tables
limb_table <- table(planetmath$Limb)
planet_table <- table(planetmath$Planet)

# Convert to data frames
df_limb <- as.data.frame(limb_table) %>%
  rename(Limb = Var1, Count = Freq) %>%
  mutate(Proportion = Count / sum(Count))

df_planet <- as.data.frame(planet_table) %>%
  rename(Planet = Var1, Count = Freq) %>%
  mutate(Proportion = Count / sum(Count))

# Print results
df_limb
##   Limb Count Proportion
## 1 Fins  1475  0.5241649
## 2 Legs  1339  0.4758351
df_planet
##   Planet Count Proportion
## 1  Earth   203  0.0721393
## 2   Mars   256  0.0909737
## 3  Venus  2355  0.8368870

Chunk 5: Chi-square test of limbs given planet

# Create a contingency table for Limb and Planet (Planet as rows)
limb_planet_table <- table(planetmath$Planet, planetmath$Limb)

# Compute row-wise proportions (percent of limbs given planet)
prop_table <- prop.table(limb_planet_table, margin = 1)

# Combine counts and row-wise proportions into a formatted table
formatted_table <- paste0(limb_planet_table, " (", round(100 * prop_table, 2), "%)")

# Convert to matrix with appropriate dimensions
dim(formatted_table) <- dim(limb_planet_table)
dimnames(formatted_table) <- dimnames(limb_planet_table)

# Print the contingency table with row-wise proportions and write to a CSV
print("Contingency Table with Row-Wise Proportions:")
## [1] "Contingency Table with Row-Wise Proportions:"
print(formatted_table)
##        
##         Fins            Legs           
##   Earth "114 (56.16%)"  "89 (43.84%)"  
##   Mars  "131 (51.17%)"  "125 (48.83%)" 
##   Venus "1230 (52.23%)" "1125 (47.77%)"
write.csv(formatted_table, 'chi_table.csv', row.names = TRUE)

# Perform the Chi-Square Test
chi_test <- chisq.test(limb_planet_table)

# Print the Chi-Square test results
print("Chi-Square Test Results:")
## [1] "Chi-Square Test Results:"
print(chi_test)
## 
##  Pearson's Chi-squared test
## 
## data:  limb_planet_table
## X-squared = 1.3312, df = 2, p-value = 0.514
print("Effect size metrics, this uses V because it's 3x2")
## [1] "Effect size metrics, this uses V because it's 3x2"
assocstats(limb_planet_table)
##                     X^2 df P(> X^2)
## Likelihood Ratio 1.3350  2  0.51300
## Pearson          1.3312  2  0.51396
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.022 
## Cramer's V        : 0.022

Chunk 6: Descriptivs of pre overall of pre by limb then independent samples t-test

library(psych)
library(car)

# Reduce the dataset
planetprelimb<- planetmath%>%
  dplyr::select(Limb,Pre)%>%
  na.omit()

# Descriptives of pre
descpre<-psych::describe(planetprelimb$Pre)
descpre
##    vars    n  mean  sd median trimmed mad min max range  skew kurtosis   se
## X1    1 2814 73.45 7.4     73   73.56 8.9  50  87    37 -0.19    -0.39 0.14
write.csv(descpre, "pre_descriptives.csv")

# Get descriptive statistics by limb
descriptives_limb <- psych::describeBy(planetprelimb$Pre, group = planetprelimb$Limb, mat = TRUE)

# Convert the matrix into a data frame
descriptives_limb<- as.data.frame(descriptives_limb)

write.csv(descriptives_limb, "descriptives_limb.csv")

descriptives_limb
##     item group1 vars    n     mean       sd median  trimmed    mad min max
## X11    1   Fins    1 1475 73.52746 7.351314     73 73.65792 8.8956  50  87
## X12    2   Legs    1 1339 73.35922 7.452100     73 73.44921 8.8956  50  87
##     range       skew   kurtosis        se
## X11    37 -0.1999649 -0.3622542 0.1914119
## X12    37 -0.1693996 -0.4261442 0.2036519
# Run Levene's test for homogeneity of variances
levene_test_result <- leveneTest(Pre ~ Limb, data = planetprelimb)

# Print Levene's test result
print("Levene's Test for Homogeneity of Variance:")
## [1] "Levene's Test for Homogeneity of Variance:"
print(levene_test_result)
## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value Pr(>F)
## group    1  0.5077 0.4762
##       2812
# Run a t-test for Pre by Limb
ttest_result<- t.test(Pre ~ Limb, data = planetprelimb, var.equal = TRUE)
# Print the t-test result
print(ttest_result)
## 
##  Two Sample t-test
## 
## data:  Pre by Limb
## t = 0.60234, df = 2812, p-value = 0.547
## alternative hypothesis: true difference in means between group Fins and group Legs is not equal to 0
## 95 percent confidence interval:
##  -0.3794240  0.7158927
## sample estimates:
## mean in group Fins mean in group Legs 
##           73.52746           73.35922
#Run a Cohen's d effect size
cohen_d_result <- planetprelimb %>%
  rstatix::cohens_d(Pre ~ Limb)

# View the result
cohen_d_result
## # A tibble: 1 × 7
##   .y.   group1 group2 effsize    n1    n2 magnitude 
## * <chr> <chr>  <chr>    <dbl> <int> <int> <ord>     
## 1 Pre   Fins   Legs    0.0227  1475  1339 negligible

Chunk 7: Non parametric Mann Whitney U

# Mann Whitney U Non-Parametric
mann_whitney_result <- wilcox.test(Pre ~ Limb, data = planetprelimb, exact = FALSE)

# View the result
print(mann_whitney_result)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Pre by Limb
## W = 1002323, p-value = 0.491
## alternative hypothesis: true location shift is not equal to 0
# R calculates W and SPSS calculates U. This is how we know we match:
# Custom function to compute Mann-Whitney U statistic as SPSS would
compute_U <- function(group1, group2) {
  # Combine data to rank it
  combined_data <- c(group1, group2)
  ranks <- rank(combined_data)
  
  # Sum of ranks for each group
  rank_sum1 <- sum(ranks[1:length(group1)])
  rank_sum2 <- sum(ranks[(length(group1)+1):length(combined_data)])
  
  # Calculate U for each group
  U1 <- rank_sum1 - length(group1)*(length(group1) + 1)/2
  U2 <- rank_sum2 - length(group2)*(length(group2) + 1)/2
  
  # Return the smaller U value (the one that SPSS reports)
  return(min(U1, U2))
}

# Apply the custom function to your groups
group1 <- planetprelimb$Pre[planetprelimb$Limb == "Fins"]
group2 <- planetprelimb$Pre[planetprelimb$Limb == "Legs"]

# Compute U
U_value <- compute_U(group1, group2)

# Print U value
print(paste("Mann-Whitney U (Custom Calculation):", U_value))
## [1] "Mann-Whitney U (Custom Calculation): 972702"
# Effect size calculation (r = Z / sqrt(n))
# First, calculate Z and n for effect size
Z <- qnorm(1 - mann_whitney_result$p.value / 2) # Assuming two-tailed test
n1 <- length(group1)
n2 <- length(group2)
n <- n1 + n2

# Calculate effect size r
effect_size_r <- Z / sqrt(n)

# Print effect size
print(paste("Effect size (r):", effect_size_r))
## [1] "Effect size (r): 0.0129816945906126"

Chunk 8: Descriptives of pre and post with paired samples

library(rstatix)
library(dplyr)
library(psych)
library(ggplot2)
library(tidyr)

# Generate the dataset
prepost<- planetmath%>%
  dplyr::select(Pre,Post)%>%
  na.omit()%>%
  mutate(Difference = Post - Pre)

# We already have descriptives of pre, but we'll do it again
predescriptives<- psych::describe(prepost$Pre)
postdescriptives<- psych::describe(prepost$Post)
differencedescr<- psych::describe(prepost$Difference)

as.data.frame(predescriptives)
##    vars    n     mean       sd median  trimmed    mad min max range       skew
## X1    1 2814 73.44741 7.398603     73 73.56172 8.8956  50  87    37 -0.1856475
##      kurtosis        se
## X1 -0.3916062 0.1394722
as.data.frame(postdescriptives)
##    vars    n     mean       sd median  trimmed    mad min max range       skew
## X1    1 2814 88.60981 4.979288     89 88.62655 5.9304  72 100    28 -0.2140512
##     kurtosis         se
## X1 0.1700273 0.09386532
as.data.frame(differencedescr)
##    vars    n    mean       sd median  trimmed    mad min max range      skew
## X1    1 2814 15.1624 8.724091     13 14.56927 8.8956   0  41    41 0.5777075
##      kurtosis        se
## X1 -0.5178854 0.1644592
predescriptives$Variable <- "Pre"
postdescriptives$Variable <- "Post"
differencedescr$Variable <- "Difference"

# Combine all into one dataset
desc_combined <- rbind(predescriptives, postdescriptives, differencedescr)%>%
  dplyr::select(Variable, mean, median, sd, skew, kurtosis)

write.csv(desc_combined, "dependent_desc.csv")


# Generate the dataset
prepost2 <- planetmath %>%
  dplyr::select(Pre, Post) %>%
  na.omit()

# Melt the data for plotting
prepost_melted <- prepost2 %>%
  pivot_longer(cols = c(Pre, Post), names_to = "Test", values_to = "Score")

# Histogram Plot for Pre and Post with Legend
hist_plot <- ggplot(prepost_melted, aes(x = Score, fill = Test)) +
  geom_histogram(alpha = 0.6, position = "identity", bins = 30, color = "black", aes(y = ..density..)) +
  scale_fill_manual(values = c("Pre" = "lightblue", "Post" = "lightgreen")) +
  labs(title = "Histograms for Pre and Post Tests",
       x = "Score", y = "Density") +
  theme_minimal() +
  theme(legend.title = element_blank(), # Remove legend title
        legend.position = "top")  # Show legend at the top

# Boxplot Plot for Pre and Post with Pre on the left
boxplot_plot <- ggplot(prepost_melted, aes(x = factor(Test, levels = c("Pre", "Post")), y = Score, fill = Test)) +
  geom_boxplot(alpha = 0.5, color = "black") +
  scale_fill_manual(values = c("Pre" = "lightblue", "Post" = "lightgreen")) +
  labs(title = "Boxplots for Pre and Post Tests",
       x = "Test", y = "Score") +
  theme_minimal() +
  theme(legend.position = "none")  # Remove the legend for boxplots

# Print both plots
print(hist_plot)

print(boxplot_plot)

# We now want to check the assumption of normality
# Plot a histogram of the Difference variable
hist(prepost$Difference, main = "Histogram of Difference", 
     xlab = "Difference (Post - Pre)", col = "lightblue", 
     border = "black")

# Q-Q plot to check for normality
qqnorm(prepost$Difference)
qqline(prepost$Difference, col = "red")

# Perform Shapiro-Wilk test for normality on the Difference variable
shapiro_test <- shapiro.test(prepost$Difference)
shapiro_test
## 
##  Shapiro-Wilk normality test
## 
## data:  prepost$Difference
## W = 0.94281, p-value < 2.2e-16
# Run the t-test
paired_result <- t.test(prepost$Pre, prepost$Post, paired = TRUE)
paired_result
## 
##  Paired t-test
## 
## data:  prepost$Pre and prepost$Post
## t = -92.196, df = 2813, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -15.48488 -14.83993
## sample estimates:
## mean difference 
##        -15.1624
# Calculate Cohen's d
mean_dif<- mean(prepost$Difference)
sd_dif<- sd(prepost$Difference)
cohen_d<- mean_dif/sd_dif
cohen_d
## [1] 1.737992

Chunk 9: Non parametric Wilcoxon Singed Rank Test

# Load necessary libraries
library(tidyr)
library(rstatix)
library(dplyr)

prepostsubset <- planetmath %>%
  dplyr::select(Pre, Post) %>%
  na.omit()

# Perform Wilcoxon Signed-Rank Test
test_result <- wilcox.test(prepostsubset$Pre, prepostsubset$Post, paired = TRUE)

# Calculate Z to nearly match SPSS (R and SPSS round a bit differently)
# Extract the sum of ranks (V) from the test result
V <- test_result$statistic

# Get the number of non-tied pairs (n)
n <- length(prepostsubset$Pre)

# Calculate mean (mu) and standard deviation (sigma) for the ranks
mu <- n * (n + 1) / 4
sigma <- sqrt(n * (n + 1) * (2 * n + 1) / 24)

# Calculate Z statistic
Z <- (V - mu) / sigma

# Print Z statistic
print(test_result)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  prepostsubset$Pre and prepostsubset$Post
## V = 0, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
cat("Z statistic:", Z, "\n")
## Z statistic: -45.94426

Chunk 10: Internal consistency reliability

# Load the psych package
library(psych)

# Subset the relevant columns (Matheff1, Matheff2, and Matheff3)
math_eff_columns <- planetmath %>% 
  dplyr::select(Matheff1, Matheff2, Matheff3)%>%
  na.omit()

# Run Cronbach's alpha
cronbach_alpha_result <- psych::omega(math_eff_columns)

# Print the result
print(cronbach_alpha_result)
## Omega 
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip, 
##     digits = digits, title = title, sl = sl, labels = labels, 
##     plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option, 
##     covar = covar)
## Alpha:                 0.9 
## G.6:                   0.87 
## Omega Hierarchical:    0.03 
## Omega H asymptotic:    0.03 
## Omega Total            0.91 
## 
## Schmid Leiman Factor loadings greater than  0.2 
##              g  F1*   F2*   F3*   h2   h2   u2   p2  com
## Matheff1       0.93             0.89 0.89 0.11 0.03 1.06
## Matheff2       0.88             0.80 0.80 0.20 0.03 1.07
## Matheff3       0.77             0.62 0.62 0.38 0.04 1.10
## 
## With Sums of squares  of:
##    g  F1*  F2*  F3*   h2 
## 0.07 2.23 0.01 0.00 1.82 
## 
## general/max  0.03   max/min =   Inf
## mean percent general =  0.03    with sd =  0.01 and cv of  0.17 
## Explained Common Variance of the general factor =  0.03 
## 
## The degrees of freedom are -3  and the fit is  0 
## The number of observations was  377  with Chi Square =  0  with prob <  NA
## The root mean square of the residuals is  0 
## The df corrected root mean square of the residuals is  NA
## 
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 0  and the fit is  1.96 
## The number of observations was  377  with Chi Square =  731.67  with prob <  NA
## The root mean square of the residuals is  0.74 
## The df corrected root mean square of the residuals is  NA 
## 
## Measures of factor score adequacy             
##                                                   g  F1*   F2* F3*
## Correlation of scores with factors             0.17 0.95  0.20   0
## Multiple R square of scores with factors       0.03 0.90  0.04   0
## Minimum correlation of factor score estimates -0.94 0.81 -0.92  -1
## 
##  Total, General and Subset omega for each subset
##                                                  g  F1* F2* F3*
## Omega total for total scores and subscales    0.91 0.91  NA  NA
## Omega general for total scores and subscales  0.03 0.03  NA  NA
## Omega group for total scores and subscales    0.88 0.88  NA  NA

Chunk 11: Descriptives for the one-way ANOVA on planet and postpost tests

Be sure to re-load your libraries, re-load your data and recode your data on Chunks 1, 2 and 3

Don’t forget to chunk it!


``` r
# YOUR AWESOME CODE GOES IN THIS SPACE
```
# Load necessary libraries
library(tidyr)
library(rstatix)
library(dplyr)

# Filter for Planet and PostPost
planetanova <- planetmath%>%
  dplyr::select(Planet, PostPost)%>%
  na.omit()


# Get descriptive statistics by profession
descriptives_planet <- psych::describeBy(planetanova$PostPost, group = planetanova$Planet, mat = TRUE)
overall_desc_planet<- psych::describe(planetanova$PostPost)


# Convert the matrix into a data frame
descriptives_planet<- as.data.frame(descriptives_planet)%>%
  dplyr::select(-item)

overall_desc_planet<- as.data.frame(overall_desc_planet)%>%
  mutate(group1 = "Total")%>%
  relocate(group1, .before = everything())
  
descriptives_planet<- rbind(descriptives_planet, overall_desc_planet)

write.csv(descriptives_planet, "descriptives_planet.csv")

descriptives_planet
##     group1 vars    n     mean       sd median  trimmed    mad min max range
## X11  Earth    1  203 83.22660 4.368868     81 83.08589 2.9652  70  96    26
## X12   Mars    1  256 83.38672 4.766186     82 83.33981 4.4478  68  96    28
## X13  Venus    1 2355 82.97410 4.580602     81 83.03342 4.4478  68  97    29
## X1   Total    1 2814 83.02985 4.583138     81 83.06661 4.4478  68  97    29
##            skew  kurtosis         se
## X11  0.30816625 0.3107537 0.30663445
## X12  0.07640464 0.4721368 0.29788662
## X13 -0.05626483 0.5086031 0.09439024
## X1  -0.01908987 0.5095194 0.08639744
# Create a bloxplot combined with a means plot of the groups
library(ggplot2)

ggplot(planetanova, aes(x = Planet, y = PostPost)) +
  geom_boxplot(fill = "blue3", alpha = 0.3, color = "black") + 
  stat_summary(fun = mean, geom = "point", color = "red", size = 3) + 
  stat_summary(fun = mean, geom = "line", aes(group = 1), color = "red", linewidth = 1) +
  scale_y_continuous(limits = c(0, 100)) +
  theme_minimal() +  
  labs(title = "Boxplot with Mean of PostPost by Planet",
       x = "Planet",
       y = "PostPost")

Chunk 12: Checking Statistical Assumptions of ANOVA

# Shapiro-Wilke Test for normality
aov_model <- aov(PostPost ~ Planet, data = planetanova)
shapiro.test(residuals(aov_model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(aov_model)
## W = 0.96078, p-value < 2.2e-16
# QQ Plot for normality
qqnorm(residuals(aov_model))
qqline(residuals(aov_model), col = "red")

# Levene's test for homogeneity of variance
library(car)
leveneTest(PostPost ~ Planet, data = planetanova)
## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value Pr(>F)
## group    2  1.1356 0.3214
##       2811
#Bartlett's test of homogeneity of variance
bartlett.test(PostPost ~ Planet, data = planetanova)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  PostPost by Planet
## Bartlett's K-squared = 1.6992, df = 2, p-value = 0.4276

Chunk 13: Running ANOVA Omnibus on R

This will write an APA ANOVA table in your RMarkdown folder

Install apaTables if you have not install.packages(‘apaTables’)

# Be sure to load the library
library(apaTables)
library(dplyr)
library(tidyverse)
library(rstatix)

# Run the ANOVA
anova_results <- aov(PostPost ~ Planet, data = planetanova)

apa.aov.table(anova_results, filename = "anova_results.doc")
## 
## 
## ANOVA results using PostPost as the dependent variable
##  
## 
##    Predictor         SS   df         MS        F    p partial_eta2
##  (Intercept) 1406113.42    1 1406113.42 66947.90 .000             
##       Planet      47.78    2      23.89     1.14 .321          .00
##        Error   59039.71 2811      21.00                           
##  CI_90_partial_eta2
##                    
##          [.00, .00]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared
summary(anova_results)
##               Df Sum Sq Mean Sq F value Pr(>F)
## Planet         2     48   23.89   1.137  0.321
## Residuals   2811  59040   21.00

Chunk 14: Post-Hoc results with Tukey

Tukey is variances are equal

Games Howell if variances are not equal

library(rstatix)

# Perform Tukey's HSD test if variances are assumed equal
Tukey<- TukeyHSD(anova_results)

# Perform Games-Howel if variances were not equal
GamesHowell<-planetanova%>%
games_howell_test(PostPost~Planet)

Tukey
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = PostPost ~ Planet, data = planetanova)
## 
## $Planet
##                   diff        lwr       upr     p adj
## Mars-Earth   0.1601178 -0.8498621 1.1700977 0.9266575
## Venus-Earth -0.2525033 -1.0386094 0.5336028 0.7317040
## Venus-Mars  -0.4126211 -1.1198539 0.2946117 0.3578727
GamesHowell
## # A tibble: 3 × 8
##   .y.      group1 group2 estimate conf.low conf.high p.adj p.adj.signif
## * <chr>    <chr>  <chr>     <dbl>    <dbl>     <dbl> <dbl> <chr>       
## 1 PostPost Earth  Mars      0.160   -0.845     1.17  0.926 ns          
## 2 PostPost Earth  Venus    -0.253   -1.01      0.504 0.711 ns          
## 3 PostPost Mars   Venus    -0.413   -1.15      0.323 0.385 ns

Chunk 15: Calculate Cohen’s d for effect sizes

This calculates them for all three pairs

# Subset your data for Cohen's d
cohen_data_venus_earth<- planetanova%>%
  filter(Planet %in% c("Venus", "Earth"))

cohen_data_venus_mars<- planetanova%>%
  filter(Planet %in% c("Venus", "Mars"))

cohen_data_earth_mars<- planetanova%>%
  filter(Planet %in% c("Earth", "Mars"))

# Run cohen's d on subset data
 rstatix::cohens_d(cohen_data_venus_earth, PostPost ~ Planet)
## # A tibble: 1 × 7
##   .y.      group1 group2 effsize    n1    n2 magnitude 
## * <chr>    <chr>  <chr>    <dbl> <int> <int> <ord>     
## 1 PostPost Earth  Venus   0.0564   203  2355 negligible
 rstatix::cohens_d(cohen_data_venus_mars, PostPost ~ Planet)
## # A tibble: 1 × 7
##   .y.      group1 group2 effsize    n1    n2 magnitude 
## * <chr>    <chr>  <chr>    <dbl> <int> <int> <ord>     
## 1 PostPost Mars   Venus   0.0883   256  2355 negligible
 rstatix::cohens_d(cohen_data_earth_mars, PostPost ~ Planet)
## # A tibble: 1 × 7
##   .y.      group1 group2 effsize    n1    n2 magnitude 
## * <chr>    <chr>  <chr>    <dbl> <int> <int> <ord>     
## 1 PostPost Earth  Mars   -0.0350   203   256 negligible

Chunk 16: Now it is time to do a non-parametric omnibus test for ANOVA called Kruskal-Wallis

library(dplyr)

# Re-generate dataset
planetanova <- planetmath%>%
  dplyr::select(Planet, PostPost)%>%
  na.omit()

# Run the omnibus KW test
kwtest<-kruskal.test(PostPost ~ Planet, data = planetanova)

print(kwtest)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  PostPost by Planet
## Kruskal-Wallis chi-squared = 0.76484, df = 2, p-value = 0.6822
# Create ranks and summarize by group
planet_ranks <- planetanova %>%
  mutate(Rank = rank(PostPost)) %>%
  group_by(Planet) %>%
  summarise(N = n(), Mean_Rank = mean(Rank), Sum_Rank = sum(Rank))

# Print the ranks
print(planet_ranks)
## # A tibble: 3 × 4
##   Planet     N Mean_Rank Sum_Rank
##   <chr>  <int>     <dbl>    <dbl>
## 1 Earth    203     1416.  287496.
## 2 Mars     256     1448.  370582.
## 3 Venus   2355     1402. 3302626

Chunk 17: We will run a effect size eta-squared for the KW test

# Run eta-squared for omnibus effect size
H <- kwtest$statistic
k <- length(unique(planetanova$Planet))
N <- nrow(planetanova)

eta_squared <- (H - k + 1) / (N - k)

# Rename the output explicitly
names(eta_squared) <- "eta_squared"

# Print the result
eta_squared
##   eta_squared 
## -0.0004394035

Chunk 18: We will run a non-parametric post-hoc to the KW called a Dunn test (cannot do in SPSS exactly like this)

#Load the library
library(dunn.test)

# Run post-hoc dunn test
dt<- dunn.test(planetanova$PostPost, planetanova$Planet, kw = TRUE, label = TRUE)
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 0.7648, df = 2, p-value = 0.68
## 
## 
##                            Comparison of x by group                            
##                                 (No adjustment)                                
## Col Mean-|
## Row Mean |      Earth       Mars
## ---------+----------------------
##     Mars |  -0.417417
##          |     0.3382
##          |
##    Venus |   0.236934   0.859460
##          |     0.4064     0.1950
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
print(dt)
## $chi2
## [1] 0.7648368
## 
## $Z
## [1] -0.4174178  0.2369341  0.8594609
## 
## $P
## [1] 0.3381864 0.4063540 0.1950431
## 
## $P.adjusted
## [1] 0.3381864 0.4063540 0.1950431
## 
## $comparisons
## [1] "Earth - Mars"  "Earth - Venus" "Mars - Venus"

Chunk 19: We will run Wilcoxon singed rank correlations on the pairs after post-hoc

We have to use the output from our post-hoc dunn test to fill in the correct values

# Signed rank correlation for pairwise tests
# Z-values from Dunn's test results
z_mars_earth <- 0.417417
z_venus_earth <- 0.236934
z_mars_venus <- 0.859460

# Calculate observations
n <- nrow(planetanova)  # Total number of observations

# Singed rank correlation formula
r_mars_earth <- z_mars_earth / sqrt(n)
r_venus_earth <- z_venus_earth / sqrt(n)
r_mars_venus <- z_mars_venus / sqrt(n)

# Print effect sizes
r_mars_earth 
## [1] 0.007868792
r_venus_earth
## [1] 0.004466479
r_mars_venus
## [1] 0.01620181

Chunk 20: bivariate correlation

library(ggplot2)
library(ggpubr)
library(Hmisc)
library(bannerCommenter)
## Warning: package 'bannerCommenter' was built under R version 4.4.2
library(readxl)

# Import the Practice Math Planet Data and name it "planetmath"
planetmath<- readxl:: read_xlsx('Practice_Math_Planet_Data.xlsx')


data <- planetmath%>%
  dplyr::select(Pre, Post)

# descriptive statistics
desc_post<- psych::describe(data$Post)%>%
  as.data.frame()%>%
  mutate(Variable = "Post", group1 = "Overall")%>%
  dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)

desc_pre <- psych::describe(data$Pre) %>%
  as.data.frame() %>%
  mutate(Variable = "Pre", group1 = "Overall")%>%
  dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)

bivariatedesc<- rbind(desc_pre, desc_post)

# Homoscedasticity Check (Residuals vs Fitted Plot)
model <- lm(Pre ~ Post, data = data)
plot(model, which = 1)  
title("Residuals vs Fitted Plot")

# Normality Check (Q-Q plots)
# Q-Q plot for Age
ggqqplot(data$Pre, main = "Q-Q Plot for Pre", ylab = "Quantiles")

# Q-Q plot for Experience
ggqqplot(data$Post, main = "Q-Q Plot for Post", ylab = "Quantiles")

# Shapiro-Wilk test for normality
shapiro_test_pre <- shapiro.test(data$Pre)
shapiro_test_post <- shapiro.test(data$Post)

# Linear model for Residuals vs. Fitted plot
banner("Normality test for age and experience shows violation of the assumption")
## 
## ###############################################################################
## ##  Normality test for age and experience shows violation of the assumption  ##
## ###############################################################################
shapiro_test_pre
## 
##  Shapiro-Wilk normality test
## 
## data:  data$Pre
## W = 0.97911, p-value < 2.2e-16
shapiro_test_post
## 
##  Shapiro-Wilk normality test
## 
## data:  data$Post
## W = 0.97452, p-value < 2.2e-16
# Compute Spearman correlation manually
cor_test_result <- cor.test(data$Pre, data$Post, method = "spearman")
## Warning in cor.test.default(data$Pre, data$Post, method = "spearman"): Cannot
## compute exact p-value with ties
# Scatterplot with linear fit (Linearity check); we use a spearman correlation due to violation of normality
ggplot(data, aes(x = Pre, y = Post)) +
  geom_point(alpha = 0.6) +  # Scatterplot with transparency
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # Linear fit line
  annotate("text", x = min(data$Pre), y = max(data$Post), 
           label = paste("Spearman ρ =", formatC(cor_test_result$estimate,
                                        format = "f", digits = 4)),
           hjust = 0, vjust = 1) +  # Manually add p-value label
  labs(title = "Scatterplot of Pre and Post",
       x = "Age",
       y = "Experience") +
  theme_minimal()

banner("spearman Correlation")
## 
## ##################################################################
## ##                     spearman Correlation                     ##
## ##################################################################
# Correlation results using spearman since normality is violated
cor_results <- rcorr(as.matrix(data[, c("Pre", "Post")]), type = "spearman")
print(cor_results)
##      Pre Post
## Pre    1    0
## Post   0    1
## 
## n= 2814 
## 
## 
## P
##      Pre    Post  
## Pre         0.9747
## Post 0.9747
banner("model results")
## 
## #################################################################
## ##                        model results                        ##
## #################################################################
summary(model)
## 
## Call:
## lm(formula = Pre ~ Post, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.6124  -5.6460  -0.1982   5.8270  13.3876 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 67.33086    2.48412  27.105   <2e-16 ***
## Post         0.06903    0.02799   2.466   0.0137 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.392 on 2812 degrees of freedom
## Multiple R-squared:  0.002158,   Adjusted R-squared:  0.001803 
## F-statistic: 6.082 on 1 and 2812 DF,  p-value: 0.01372

chunk 21 multiple regression

library(ggplot2)
library(lmtest)
library(car)
library(bannerCommenter)
library(gridExtra)
library(dplyr)
library(psych)

# Select relevant data
data <- planetmath %>%
  dplyr::select(PostPost, Instruction_Type, Pre)

# Overall descriptive statistics
desc_postpost <- psych::describe(data$PostPost)%>%
  as.data.frame()%>%
  mutate(Variable = "PostPost", group1 = "Overall")%>%
  dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)

desc_pre <- psych::describe(data$Pre) %>%
  as.data.frame() %>%
  mutate(Variable = "Pre", group1 = "Overall")%>%
  dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)

# Descriptive statistics by instruction type
desc_postpost_inst <- psych::describeBy(data$PostPost, group = data$Instruction_Type, mat = TRUE)%>%
  as.data.frame()%>%
  mutate(Variable = "PostPost_Inst")%>%
  dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)

desc_pre_inst <- psych::describeBy(data$Pre, group = data$Instruction_Type, mat = TRUE)%>%
  as.data.frame()%>%
  mutate(Variable = "Pre_Inst")%>%
  dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)

# Combine all results
final_results <- rbind(desc_postpost, desc_pre, desc_postpost_inst, desc_pre_inst)

# Save to CSV
write.csv(final_results, "hw_mult_reg_desc.csv", row.names = FALSE)

Chunk 22: Statistical assumptions in multiple regression model

banner("Section 1:", "Model 1 Before Transformation to DV", emph = TRUE)
## 
## ###########################################################################
## ###########################################################################
## ###                                                                     ###
## ###                              SECTION 1:                             ###
## ###                 MODEL 1 BEFORE TRANSFORMATION TO DV                 ###
## ###                                                                     ###
## ###########################################################################
## ###########################################################################
#Check normality for first model
model1<- lm(PostPost~Pre + Instruction_Type, data = data)

# Check linearity assumption
p1 <- ggplot(data, aes(x = Pre, y = PostPost)) + 
  geom_point() + 
  geom_smooth(method = "lm", col = "blue") + 
  ggtitle("Pre vs PostPost no Trans")

# Check normality of residuals
p2 <- ggplot(data.frame(resid = residuals(model1)), aes(sample = resid)) +
  stat_qq() +
  stat_qq_line(color = "red") +
  labs(title = "Q-Q Plot of Residuals no Transf") +
  theme_minimal()

# Residuals vs. Fitted plot for Homoscedasticity
p3 <- ggplot(data.frame(Fitted = fitted(model1), Residuals = residuals(model1)), 
             aes(x = Fitted, y = Residuals)) + 
  geom_point() + 
  geom_smooth(method = "lm", col = "blue", se = FALSE) + 
  labs(title = "Residuals vs. Fitted no Tansf") + 
  theme_minimal()

# Histogram of Residuals
p4 <- ggplot(data.frame(resid = residuals(model1)), aes(x = resid)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Residuals no Transf") +
  theme_minimal()

p5<- ggplot(data, aes(x = Instruction_Type, y = PostPost)) +
  geom_jitter() +  # Adds some noise to prevent overplotting
  labs(title = "Inst. Type vs. PostPost no Transf")

p6<- ggplot(data, aes(x = Instruction_Type, y = PostPost)) +
  geom_boxplot() +
  labs(title = "Inst. Type vs. PostPost no Transf")

# Arrange the plots using grid.arrange
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2)

banner("Homoscedasticity Test Model 1 (Untransformed)")
## 
## #################################################################
## ##        Homoscedasticity Test Model 1 (Untransformed)        ##
## #################################################################
# Breusch-Pagan test (p > 0.05 suggests homoscedasticity)
bptest(model1)  
## 
##  studentized Breusch-Pagan test
## 
## data:  model1
## BP = 201.28, df = 2, p-value < 2.2e-16
banner("Independence  of errors (Untransformed")
## 
## ##################################################################
## ##            Independence  of errors (Untransformed            ##
## ##################################################################
# Durbin-Watson test of independence of errors
dwtest(model1)
## 
##  Durbin-Watson test
## 
## data:  model1
## DW = 2.0491, p-value = 0.9036
## alternative hypothesis: true autocorrelation is greater than 0
banner("VIF for Model 1 (Untransformed)")
## 
## #################################################################
## ##               VIF for Model 1 (Untransformed)               ##
## #################################################################
# Multicolinearity
vif(model1)
##              Pre Instruction_Type 
##         1.000096         1.000096
banner("Multiple regression results untransformed data")
## 
## ##################################################################
## ##        Multiple regression results untransformed data        ##
## ##################################################################
# Print the model
summary(model1)
## 
## Call:
## lm(formula = PostPost ~ Pre + Instruction_Type, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5459  -2.5816  -0.8238   2.6638  13.7020 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      71.76519    0.86817  82.663   <2e-16 ***
## Pre               0.15806    0.01129  13.997   <2e-16 ***
## Instruction_Type -0.17714    0.12063  -1.468    0.142    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.431 on 2811 degrees of freedom
## Multiple R-squared:  0.06596,    Adjusted R-squared:  0.06529 
## F-statistic: 99.25 on 2 and 2811 DF,  p-value: < 2.2e-16

Chunk 23: Multiple Regression after transforming data to a logatrithm

banner("Section 2:", "Model 2 after Logarithm Transformation to DV", emph = TRUE)
## 
## ############################################################################
## ############################################################################
## ###                                                                      ###
## ###                              SECTION 2:                              ###
## ###             MODEL 2 AFTER LOGARITHM TRANSFORMATION TO DV             ###
## ###                                                                      ###
## ############################################################################
## ############################################################################
# Transform dependent variable and run model

data$log_PostPost <- log(data$PostPost)

# Build the model
model2 <- lm(log_PostPost~Pre + Instruction_Type, data = data)

# Check linearity assumption
p1 <- ggplot(data, aes(x = log_PostPost, y = Pre)) + 
  geom_point() + 
  geom_smooth(method = "lm", col = "red") + 
  ggtitle("Pre vs Post after Trans")

# Check normality of residuals
p2 <- ggplot(data.frame(resid = residuals(model2)), aes(sample = resid)) +
  stat_qq() +
  stat_qq_line(color = "blue") +
  labs(title = "Q-Q Plot of Residuals after Transf") +
  theme_minimal()

# Residuals vs. Fitted plot for Homoscedasticity
p3 <- ggplot(data.frame(Fitted = fitted(model2), Residuals = residuals(model2)),
             aes(x = Fitted, y = Residuals)) + 
  geom_point() + 
  geom_smooth(method = "lm", col = "red", se = FALSE) + 
  labs(title = "Residuals vs. Fitted after Tansf") + 
  theme_minimal()

# Histogram of Residuals
p4 <- ggplot(data.frame(resid = residuals(model2)), aes(x = resid)) +
  geom_histogram( fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Residuals after Transf") +
  theme_minimal()

p5<- ggplot(data, aes(x = Instruction_Type, y = log_PostPost)) +
  geom_jitter() +  # Adds some noise to prevent overplotting
  labs(title = "Inst. Type vs. PostPost after Transf")

p6<- ggplot(data, aes(x = Instruction_Type, y = log_PostPost)) +
  geom_boxplot() +
  labs(title = "Inst. Type vs. PostPost after Transf")

# Arrange the plots using grid.arrange
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2)

banner("Homoscedasticity Test Model 2 (Transformed)")
## 
## #################################################################
## ##         Homoscedasticity Test Model 2 (Transformed)         ##
## #################################################################
# Breusch-Pagan test (p > 0.05 suggests homoscedasticity)
bptest(model2)  
## 
##  studentized Breusch-Pagan test
## 
## data:  model2
## BP = 235.04, df = 2, p-value < 2.2e-16
banner("Independence  of errors (Transformed)")
## 
## #################################################################
## ##            Independence  of errors (Transformed)            ##
## #################################################################
# Durbin-Watson test of independence of errors
dwtest(model2)
## 
##  Durbin-Watson test
## 
## data:  model2
## DW = 2.0476, p-value = 0.8966
## alternative hypothesis: true autocorrelation is greater than 0
banner("VIF for Model 1 (Transformed)")
## 
## #################################################################
## ##                VIF for Model 1 (Transformed)                ##
## #################################################################
# Multicolinearity
vif(model2)
##              Pre Instruction_Type 
##         1.000096         1.000096
banner("Multiple regression results transformed data")
## 
## ##################################################################
## ##         Multiple regression results transformed data         ##
## ##################################################################
# Print the model
summary(model2)
## 
## Call:
## lm(formula = log_PostPost ~ Pre + Instruction_Type, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.179727 -0.030037 -0.008455  0.033337  0.155590 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.2783646  0.0104962 407.610   <2e-16 ***
## Pre               0.0019514  0.0001365  14.293   <2e-16 ***
## Instruction_Type -0.0020680  0.0014584  -1.418    0.156    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05357 on 2811 degrees of freedom
## Multiple R-squared:  0.0685, Adjusted R-squared:  0.06784 
## F-statistic: 103.4 on 2 and 2811 DF,  p-value: < 2.2e-16

Chunk 24: Repeated measures ANOVA on Pre, Post and Post-Post

Be sure to run chunks 2 and 3 again to reload and recode the data

library(car)
library(tidyverse)
library(dplyr)
library(psych)
library(gridExtra)
library(rstatix)
library(nlme)
library(dplyr)
library(purrr)

testdata <- planetmath%>%
  dplyr::select(ID, Pre, Post, PostPost)%>%
  mutate(DifferencePostPost_Pre = PostPost - Pre)%>%
  mutate(DifferencePost_Pre = Post- Pre)%>%
  mutate(DifferencePostPost_Post = PostPost- Post)

# Compute descriptive statistics
desc_Pre <- psych::describe(testdata$Pre)
desc_Post <- psych::describe(testdata$Post)
desc_PostPost <- psych::describe(testdata$PostPost)
desc_differencePostPost_Pre <- psych::describe(testdata$DifferencePostPost_Pre)
desc_differencePost_Pre <- psych::describe(testdata$DifferencePost_Pre)
desc_differencePostPost_Post <- psych::describe(testdata$DifferencePostPost_Post)


as.data.frame(desc_Pre)
##    vars    n     mean       sd median  trimmed    mad min max range       skew
## X1    1 2814 73.44741 7.398603     73 73.56172 8.8956  50  87    37 -0.1856475
##      kurtosis        se
## X1 -0.3916062 0.1394722
as.data.frame(desc_Post)
##    vars    n     mean       sd median  trimmed    mad min max range       skew
## X1    1 2814 88.60981 4.979288     89 88.62655 5.9304  72 100    28 -0.2140512
##     kurtosis         se
## X1 0.1700273 0.09386532
as.data.frame(desc_PostPost)
##    vars    n     mean       sd median  trimmed    mad min max range        skew
## X1    1 2814 83.02985 4.583138     81 83.06661 4.4478  68  97    29 -0.01908987
##     kurtosis         se
## X1 0.5095194 0.08639744
as.data.frame(desc_differencePostPost_Pre)
##    vars    n     mean       sd median  trimmed   mad min max range      skew
## X1    1 2814 9.582445 7.643458      8 8.830373 7.413  -4  38    42 0.9501486
##    kurtosis       se
## X1 0.715229 0.144088
as.data.frame(desc_differencePost_Pre)
##    vars    n    mean       sd median  trimmed    mad min max range      skew
## X1    1 2814 15.1624 8.724091     13 14.56927 8.8956   0  41    41 0.5777075
##      kurtosis        se
## X1 -0.5178854 0.1644592
as.data.frame(desc_differencePostPost_Post)
##    vars    n      mean       sd median   trimmed    mad min max range      skew
## X1    1 2814 -5.579957 2.980891     -4 -5.092806 1.4826 -13  -3    10 -1.256129
##     kurtosis         se
## X1 0.1073983 0.05619324
desc_Pre$Variable <- "Pre"
desc_Post$Variable<- "Post"
desc_PostPost$Variable <- "PostPost"
desc_differencePostPost_Pre$Variable <- "Difference_PostPost_Pre"
desc_differencePost_Pre$Variable<- "Difference_Post_Pre"
desc_differencePostPost_Post$Variable<- "Difference_PostPost_Post"

# Combine all into one dataset
desc_combined <- rbind(desc_Pre, desc_Post, desc_PostPost, desc_differencePostPost_Pre, desc_differencePost_Pre, desc_differencePostPost_Post)%>%
  mutate(upper = mean+(se*2))%>%
  mutate(lower = mean-(se*2))%>%
  dplyr::select(Variable, mean, upper, lower, se, sd, skew, kurtosis)


write.csv(desc_combined, "repeated_desc.csv")

# We now want to check the assumption of normality
# Plot a histogram of the Difference variable
hist_PostPost_Pre <- ggplot(testdata, aes(x = DifferencePostPost_Pre)) +
  geom_histogram(fill = "lightblue", color = "black", bins = 10) +
  ggtitle("Histogram of Difference PostPost and Pre") +
  xlab("Difference (PostPost - Pre)") +
  theme_minimal()

hist_PostPost_Post <- ggplot(testdata, aes(x = DifferencePostPost_Post)) +
  geom_histogram(fill = "lightblue", color = "black", bins = 10) +
  ggtitle("Histogram of Difference PostPost and Post") +
  xlab("Difference (PostPost - Post)") +
  theme_minimal()

hist_Post_Pre <- ggplot(testdata, aes(x = DifferencePost_Pre)) +
  geom_histogram(fill = "lightblue", color = "black", bins = 10) +
  ggtitle("Histogram of Difference Post and Pre") +
  xlab("Difference (Post - Pre)") +
  theme_minimal()

# Create Q-Q plots
qq_PostPost_Pre <- ggplot(testdata, aes(sample = DifferencePostPost_Pre)) +
  stat_qq() + stat_qq_line(color = "red") +
  ggtitle("Q-Q Plot: Difference PostPost-Pre") +
  theme_minimal()

qq_PostPost_Post <- ggplot(testdata, aes(sample = DifferencePostPost_Post)) +
  stat_qq() + stat_qq_line(color = "red") +
  ggtitle("Q-Q Plot: Difference PostPost-Post") +
  theme_minimal()

qq_Post_Pre <- ggplot(testdata, aes(sample = DifferencePost_Pre)) +
  stat_qq() + stat_qq_line(color = "red") +
  ggtitle("Q-Q Plot: Difference Post-Pre") +
  theme_minimal()

# Arrange plots in a grid
grid.arrange(hist_PostPost_Pre, qq_PostPost_Pre,
             hist_PostPost_Post, qq_PostPost_Post,
             hist_Post_Pre, qq_Post_Pre,
             ncol = 2)

# Perform Shapiro-Wilk test for normality on the Difference variable
shapiro_testPostPost_Pre <- shapiro.test(testdata$DifferencePostPost_Pre)
shapiro_testPostPost_Pre
## 
##  Shapiro-Wilk normality test
## 
## data:  testdata$DifferencePostPost_Pre
## W = 0.92258, p-value < 2.2e-16
# Perform Shapiro-Wilk test for normality on the Difference variable
shapiro_testPostPost_Post <- shapiro.test(testdata$DifferencePostPost_Post)
shapiro_testPostPost_Post
## 
##  Shapiro-Wilk normality test
## 
## data:  testdata$DifferencePostPost_Post
## W = 0.7415, p-value < 2.2e-16
# Perform Shapiro-Wilk test for normality on the Difference variable
shapiro_testPost_Pre <- shapiro.test(testdata$DifferencePost_Pre)
shapiro_testPost_Pre
## 
##  Shapiro-Wilk normality test
## 
## data:  testdata$DifferencePost_Pre
## W = 0.94281, p-value < 2.2e-16
# Convert to long format and add an ID
testdata_long <- testdata %>%
  dplyr::select(ID, Pre, Post, PostPost) %>%
  pivot_longer(cols = c(Pre, Post, PostPost),
               names_to = "Time", 
               values_to = "Score") %>%
  mutate(Time = factor(Time, levels = c("Pre", "Post", "PostPost")))


#Greenhouse-Guesser correction automatically applied to this ANOVA test see the "ges" indicating an adjusted  eta-squared.
anova_lm <- anova_test(dv = Score, wid = ID, within = Time, data = testdata_long)
anova_lm
## ANOVA Table (type III tests)
## 
## $ANOVA
##   Effect DFn  DFd        F p p<.05   ges
## 1   Time   2 5626 6923.411 0     * 0.539
## 
## $`Mauchly's Test for Sphericity`
##   Effect     W p p<.05
## 1   Time 0.292 0     *
## 
## $`Sphericity Corrections`
##   Effect   GGe        DF[GG] p[GG] p[GG]<.05   HFe        DF[HF] p[HF]
## 1   Time 0.585 1.17, 3293.07     0         * 0.585 1.17, 3293.61     0
##   p[HF]<.05
## 1         *
get_anova_table(anova_lm)
## ANOVA Table (type III tests)
## 
##   Effect  DFn     DFd        F p p<.05   ges
## 1   Time 1.17 3293.07 6923.411 0     * 0.539
#Plot the Difference
ggplot(testdata_long, aes(x = Time, y= Score, fill=Time))+
  geom_boxplot()

# Post-hoc with a bonferroni correction
pwc <- testdata_long %>%
  pairwise_t_test(Score ~ Time, paired = TRUE, p.adjust.method = "bonferroni")

pwc
## # A tibble: 3 × 10
##   .y.   group1 group2      n1    n2 statistic    df     p p.adj p.adj.signif
## * <chr> <chr>  <chr>    <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>       
## 1 Score Pre    Post      2814  2814     -92.2  2813     0     0 ****        
## 2 Score Pre    PostPost  2814  2814     -66.5  2813     0     0 ****        
## 3 Score Post   PostPost  2814  2814      99.3  2813     0     0 ****
# Cohen's d effect size into a table
tesddata_prepost_d<- testdata_long%>%
  filter(Time == c("Pre", "Post"))%>%
    mutate(Time = as.character(Time))%>%
  rstatix::cohens_d(Score ~ Time)%>%
  mutate(Model = "Pre to Post")

tesddata_prepost_Post_d<- testdata_long%>%
  filter(Time == c("Pre", "PostPost"))%>%
  mutate(Time = as.character(Time))%>%
  rstatix::cohens_d(Score ~ Time)%>%
  mutate(Model = "Pre to PostPost")

testdata_PostPost_Post_d <- testdata_long %>%
  filter(Time %in% c("Post", "PostPost")) %>%
  mutate(Time = as.character(Time)) %>%
  rstatix::cohens_d(Score ~ Time) %>%
  mutate(Model = "Post to PostPost")


effectsize<- rbind(tesddata_prepost_d, tesddata_prepost_Post_d, testdata_PostPost_Post_d)

effectsize
## # A tibble: 3 × 8
##   .y.   group1   group2   effsize    n1    n2 magnitude Model           
##   <chr> <chr>    <chr>      <dbl> <int> <int> <ord>     <chr>           
## 1 Score Post     Pre         2.46  1407  1407 large     Pre to Post     
## 2 Score PostPost Pre         1.55  1407  1407 large     Pre to PostPost 
## 3 Score Post     PostPost    1.17  2814  2814 large     Post to PostPost
write.csv(effectsize, "repeated_pair_cohen.csv")

Chunk 25: Non-parametric repeated measures Freidman Test

Use the testdata_long dataset from the last chunk

library(dplyr)
library(stats)
library(tidyverse)
library(ggpubr)
library(rstatix)
library(tidyr)
library(purrr)
library(tibble)

#Use the testdata_long dataset from the last chunk

testdata_long$ID <- factor(testdata_long$ID, ordered = FALSE)
testdata_long$Time <- factor(testdata_long$Time, ordered = FALSE)

# Run the omnibus test
friedtest<- friedman_test(testdata_long, Score ~ Time | ID)

friedtest
## # A tibble: 1 × 6
##   .y.       n statistic    df     p method       
## * <chr> <int>     <dbl> <dbl> <dbl> <chr>        
## 1 Score  2814     5464.     2     0 Friedman test
# Get the omnibus effect size
friedman_effsize_result <- friedman_effsize(testdata_long, Score ~ Time | ID)

friedman_effsize_result
## # A tibble: 1 × 5
##   .y.       n effsize method    magnitude
## * <chr> <int>   <dbl> <chr>     <ord>    
## 1 Score  2814   0.971 Kendall W large
# Create planet_test_wide from planetmath
planet_test_wide <- planetmath %>%
  dplyr::select(Pre, Post, PostPost)

# Define pairwise comparisons
comparisons <- list(
  c("Post", "Pre"),
  c("PostPost", "Pre"),
  c("PostPost", "Post")
)

# Function to perform Wilcoxon test and compute statistics
perform_test <- function(pair) {
  test <- wilcox.test(planet_test_wide[[pair[1]]], planet_test_wide[[pair[2]]], paired = TRUE)
  
  # Extract the test statistic (V)
  V_stat <- test$statistic
  
  # Sample size (n) for effect size and Z-statistic calculations
  n <- sum(!is.na(planet_test_wide[[pair[1]]]) & !is.na(planet_test_wide[[pair[2]]]))  # Correct for NAs
  
  # Expected value of V
  mu_V <- n * (n + 1) / 4
  
  # Standard deviation of V
  sigma_V <- sqrt((n * (n + 1) * (2 * n + 1)) / 24)
  
  # Compute Z-statistic: Adjust sign based on positive ranks
  Zstat <- (V_stat - mu_V) / sigma_V
  
  # If Z is positive for a comparison with a reverse direction (like Post to Pre), flip the sign
  if (V_stat < mu_V) {
    Zstat <- -Zstat
  }
  
  # Compute effect size (r)
  r_effect_size <- abs(Zstat) / sqrt(n)  # You can choose to use absolute or raw Z value
  
  # Return a tibble with results
  tibble(
    Group1 = pair[1],
    Group2 = pair[2],
    n = n,
    V = V_stat,
    Z = Zstat,  # Retain the sign
    p = test$p.value,
    r = r_effect_size
  )
}

# Apply function to all comparisons and store results in a table
wilcoxon_results <- purrr::map_dfr(comparisons, perform_test)

# Print results
print(wilcoxon_results)
## # A tibble: 3 × 7
##   Group1   Group2     n        V     Z     p     r
##   <chr>    <chr>  <int>    <dbl> <dbl> <dbl> <dbl>
## 1 Post     Pre     2814 3935415   45.4     0 0.855
## 2 PostPost Pre     2814 3881884.  44.1     0 0.832
## 3 PostPost Post    2814       0   45.9     0 0.866
# Save results to CSV
write.csv(wilcoxon_results, "wilcoxon_repeated.csv")

Chunk 26: Factorial ANOVA descriptives

The data are already set-up so you just need to load them.

library(psych)
library(dplyr)
library(ggplot2)


factorial<- read.csv("Factorial_for_Practice_and_HW.csv")
factorial$TeachReading<- as.numeric(factorial$TeachReading)
factorial$ResistsWriting<- as.numeric(factorial$ResistsWriting)
factorial$ID<- as.character(factorial$ID)

# Create a new variable that concatenates school and content
factorial <- factorial %>%
  mutate(School_Cont = paste(School, "_", Content_Area))


# Get descriptive statistics by school, content, and school_content combined
resist_school <- as.data.frame(psych::describeBy(factorial$ResistsWriting, group = factorial$School, mat = TRUE))%>%
  mutate(Category = "Resist_School")%>%
  dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)

resist_content<- as.data.frame(psych::describeBy(factorial$ResistsWriting, group = factorial$Content_Area, mat = TRUE))%>%
  mutate(Category = "Resist_Content")%>%
  dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)

resist_school_content<- as.data.frame(psych::describeBy(factorial$ResistsWriting, group = factorial$School_Cont, mat = TRUE))%>%
  mutate(Category = "Resist_School_Content")%>%
  dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)

overall_resist<- as.data.frame(psych::describe(factorial$ResistsWriting))%>%
  mutate(Category = "Resist_Overall")%>%
  mutate(group1 = "Overall")%>%
  dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)

Factorial_Desc<- rbind(resist_school, resist_content, resist_school_content, overall_resist)

Factorial_Desc
##                   Category                    group1   n     mean       sd
## X11          Resist_School                  School 1  61 25.68852 6.682168
## X12          Resist_School                  School 2 111 28.45045 7.699751
## X111        Resist_Content             Language Arts  47 22.72340 6.195027
## X121        Resist_Content                      Math  41 31.46341 8.772393
## X13         Resist_Content                   Science  46 28.93478 6.416653
## X14         Resist_Content            Social Studies  38 27.26316 5.176267
## X112 Resist_School_Content  School 1 _ Language Arts  10 19.00000 6.377042
## X122 Resist_School_Content           School 1 _ Math  15 26.80000 7.589466
## X131 Resist_School_Content        School 1 _ Science  19 28.00000 5.228129
## X141 Resist_School_Content School 1 _ Social Studies  17 26.05882 5.273407
## X15  Resist_School_Content  School 2 _ Language Arts  37 23.72973 5.829278
## X16  Resist_School_Content           School 2 _ Math  26 34.15385 8.384234
## X17  Resist_School_Content        School 2 _ Science  27 29.59259 7.158960
## X18  Resist_School_Content School 2 _ Social Studies  21 28.23810 5.009039
## X1          Resist_Overall                   Overall 172 27.47093 7.453895
##      median    kurtosis        skew
## X11    27.0 -0.29753346 -0.20723409
## X12    28.0  0.77700815  0.38944058
## X111   25.0 -1.09614916 -0.19590131
## X121   31.0 -0.11750874 -0.07083268
## X13    29.0  2.17206354  0.70398419
## X14    27.0  0.96021548 -0.15470751
## X112   17.5 -1.62605482  0.47892084
## X122   28.0 -0.62307485 -0.05841430
## X131   29.0 -0.29399793  0.22098294
## X141   27.0  0.06378897 -0.70016856
## X15    25.0 -0.70547202 -0.34894247
## X16    34.0  0.28254322 -0.22613311
## X17    30.0  1.84260784  0.67769262
## X18    28.0  0.65709388  0.40931170
## X1     28.0  0.75885027  0.28860860
write.csv(Factorial_Desc, "Factorial_Desc.csv")

Chunk 27 Check statistical assumptions of factorial

library(rstatix)
library(car)
library(ggplot2)

# Shapiro-Wilk test for normality
shapiro.test(factorial$ResistsWriting)  # Assesses overall normality
## 
##  Shapiro-Wilk normality test
## 
## data:  factorial$ResistsWriting
## W = 0.96647, p-value = 0.000366
# QQ Plots
qqnorm(factorial$ResistsWriting)
qqline(factorial$ResistsWriting, col = "red")

# Normality within groups
factorial %>%
  group_by(School, Content_Area) %>%
  shapiro_test(ResistsWriting)
## # A tibble: 8 × 5
##   School   Content_Area   variable       statistic      p
##   <chr>    <chr>          <chr>              <dbl>  <dbl>
## 1 School 1 Language Arts  ResistsWriting     0.831 0.0344
## 2 School 1 Math           ResistsWriting     0.976 0.937 
## 3 School 1 Science        ResistsWriting     0.970 0.774 
## 4 School 1 Social Studies ResistsWriting     0.932 0.232 
## 5 School 2 Language Arts  ResistsWriting     0.943 0.0566
## 6 School 2 Math           ResistsWriting     0.973 0.703 
## 7 School 2 Science        ResistsWriting     0.934 0.0888
## 8 School 2 Social Studies ResistsWriting     0.943 0.244
# Equality of variance
leveneTest(ResistsWriting ~ School * Content_Area, data = factorial)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   7  0.9423 0.4756
##       164
hist(factorial$ResistsWriting)

library(ggplot2)

ggplot(factorial, aes(x = ResistsWriting)) +
  geom_histogram(fill = "blue", color = "black") +
  facet_wrap(~ School_Cont) +
  theme_minimal()

# Chunk 28: Run the factorial ANOVA omnibus test with post-hoc Tukey and pairwise Bonferroni # Partial eta-squared also included

library(effectsize)
library(bannerCommenter)
library(car)

banner("Omnibus")
## 
## #################################################################
## ##                           Omnibus                           ##
## #################################################################
#Omnibus
factorial_anova <- aov(ResistsWriting ~ School * Content_Area, data = factorial)
summary(factorial_anova)
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## School                1    300   300.3   7.112  0.00842 ** 
## Content_Area          3   2070   690.1  16.345 2.43e-09 ***
## School:Content_Area   3    206    68.6   1.626  0.18542    
## Residuals           164   6924    42.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
banner("Copy SPSS")
## 
## #################################################################
## ##                          Copy SPSS                          ##
## #################################################################
factorial$School <- factor(factorial$School)
factorial$Content_Area <- factor(factorial$Content_Area)
model <- lm(ResistsWriting ~ School * Content_Area, data = factorial)
spsstable<- Anova(model, type = 3)
spsstable<- as.data.frame(spsstable)

spsstable <- spsstable %>%
  rownames_to_column("Variable") %>%
  mutate(`Mean Sq` = `Sum Sq` / Df) %>%
  dplyr::select(Variable, Df, `Mean Sq`, `Pr(>F)`)%>%
  rename(Variable = 1) 



# Eta-squared effect size
banner("eta-squared")
## 
## #################################################################
## ##                         eta-squared                         ##
## #################################################################
eta_squared(factorial_anova)
## # Effect Size for ANOVA (Type I)
## 
## Parameter           | Eta2 (partial) |       95% CI
## ---------------------------------------------------
## School              |           0.04 | [0.01, 1.00]
## Content_Area        |           0.23 | [0.13, 1.00]
## School:Content_Area |           0.03 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].
banner("partial eta-squared")
## 
## #################################################################
## ##                     partial eta-squared                     ##
## #################################################################
partialeta<-partial_eta_squared(factorial_anova)%>%
  as.data.frame()%>%
  rownames_to_column("Variable")%>%
  rename(Variable = 1) 

spsstable<- left_join(spsstable, partialeta, by = 'Variable')%>%
as.data.frame()

spsstable <- spsstable 

names(spsstable)[names(spsstable) == '.'] <- 'Eta'

write.csv(spsstable, "spsstable.csv")

banner("Tukey post-hoc")
## 
## ##################################################################
## ##                        Tukey post-hoc                        ##
## ##################################################################
# Tukey post-hoc analysis
TukeyHSD(factorial_anova, which = "School")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = ResistsWriting ~ School * Content_Area, data = factorial)
## 
## $School
##                       diff       lwr      upr     p adj
## School 2-School 1 2.761926 0.7170348 4.806817 0.0084227
TukeyHSD(factorial_anova, which = "Content_Area")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = ResistsWriting ~ School * Content_Area, data = factorial)
## 
## $Content_Area
##                                   diff       lwr        upr     p adj
## Math-Language Arts            9.162827  5.558698 12.7669566 0.0000000
## Science-Language Arts         6.764530  3.266581 10.2624794 0.0000079
## Social Studies-Language Arts  5.187708  1.508379  8.8670373 0.0019143
## Science-Math                 -2.398297 -6.020633  1.2240383 0.3174021
## Social Studies-Math          -3.975119 -7.772898 -0.1773398 0.0363535
## Social Studies-Science       -1.576822 -5.273987  2.1203431 0.6857484
TukeyHSD(factorial_anova, which = "School:Content_Area")  # If interaction is significant
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = ResistsWriting ~ School * Content_Area, data = factorial)
## 
## $`School:Content_Area`
##                                                       diff         lwr
## School 2:Language Arts-School 1:Language Arts    4.7297297  -2.3811067
## School 1:Math-School 1:Language Arts             7.8000000  -0.3451108
## School 2:Math-School 1:Language Arts            15.1538462   7.7298592
## School 1:Science-School 1:Language Arts          9.0000000   1.2053823
## School 2:Science-School 1:Language Arts         10.5925926   3.2068936
## School 1:Social Studies-School 1:Language Arts   7.0588235  -0.8923282
## School 2:Social Studies-School 1:Language Arts   9.2380952   1.5725364
## School 1:Math-School 2:Language Arts             3.0702703  -3.0367281
## School 2:Math-School 2:Language Arts            10.4241164   5.3184156
## School 1:Science-School 2:Language Arts          4.2702703  -1.3607744
## School 2:Science-School 2:Language Arts          5.8628629   0.8129967
## School 1:Social Studies-School 2:Language Arts   2.3290938  -3.5167092
## School 2:Social Studies-School 2:Language Arts   4.5083655  -0.9426336
## School 2:Math-School 1:Math                      7.3538462   0.8849192
## School 1:Science-School 1:Math                   1.2000000  -5.6911174
## School 2:Science-School 1:Math                   2.7925926  -3.6323574
## School 1:Social Studies-School 1:Math           -0.7411765  -7.8088669
## School 2:Social Studies-School 1:Math            1.4380952  -5.3066973
## School 1:Science-School 2:Math                  -6.1538462 -12.1754947
## School 2:Science-School 2:Math                  -4.5612536 -10.0432910
## School 1:Social Studies-School 2:Math           -8.0950226 -14.3179641
## School 2:Social Studies-School 2:Math           -5.9157509 -11.7693804
## School 2:Science-School 1:Science                1.5925926  -4.3817876
## School 1:Social Studies-School 1:Science        -1.9411765  -8.6019185
## School 2:Social Studies-School 1:Science         0.2380952  -6.0789817
## School 1:Social Studies-School 2:Science        -3.5337691  -9.7109826
## School 2:Social Studies-School 2:Science        -1.3544974  -7.1594905
## School 2:Social Studies-School 1:Social Studies  2.1792717  -4.3299687
##                                                         upr     p adj
## School 2:Language Arts-School 1:Language Arts   11.84056615 0.4569118
## School 1:Math-School 1:Language Arts            15.94511079 0.0712488
## School 2:Math-School 1:Language Arts            22.57783307 0.0000001
## School 1:Science-School 1:Language Arts         16.79461771 0.0117403
## School 2:Science-School 1:Language Arts         17.97829154 0.0004990
## School 1:Social Studies-School 1:Language Arts  15.00997529 0.1222725
## School 2:Social Studies-School 1:Language Arts  16.90365407 0.0069635
## School 1:Math-School 2:Language Arts             9.17726861 0.7825143
## School 2:Math-School 2:Language Arts            15.52981721 0.0000001
## School 1:Science-School 2:Language Arts          9.90131494 0.2846939
## School 2:Science-School 2:Language Arts         10.91272898 0.0110078
## School 1:Social Studies-School 2:Language Arts   8.17489684 0.9239350
## School 2:Social Studies-School 2:Language Arts   9.95936459 0.1865633
## School 2:Math-School 1:Math                     13.82277309 0.0140426
## School 1:Science-School 1:Math                   8.09111742 0.9994533
## School 2:Science-School 1:Math                   9.21754263 0.8843167
## School 1:Social Studies-School 1:Math            6.32651398 0.9999820
## School 2:Social Studies-School 1:Math            8.18288776 0.9979704
## School 1:Science-School 2:Math                  -0.13219761 0.0413041
## School 2:Science-School 2:Math                   0.92078391 0.1805325
## School 1:Social Studies-School 2:Math           -1.87208118 0.0024361
## School 2:Social Studies-School 2:Math           -0.06212144 0.0456178
## School 2:Science-School 1:Science                7.56697276 0.9918708
## School 1:Social Studies-School 1:Science         4.71956551 0.9861833
## School 2:Social Studies-School 1:Science         6.55517222 1.0000000
## School 1:Social Studies-School 2:Science         2.64344451 0.6501607
## School 2:Social Studies-School 2:Science         4.45049578 0.9964143
## School 2:Social Studies-School 1:Social Studies  8.68851211 0.9695548
banner("Bonferroni post-hoc pairwise")
## 
## ##################################################################
## ##                 Bonferroni post-hoc pairwise                 ##
## ##################################################################
# Pairwise t-tests with Bonferonni correction (alternative to Tukey)
pairwise.t.test(factorial$ResistsWriting, factorial$School, p.adjust.method = "bonferroni")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  factorial$ResistsWriting and factorial$School 
## 
##          School 1
## School 2 0.02    
## 
## P value adjustment method: bonferroni
pairwise.t.test(factorial$ResistsWriting, factorial$Content_Area, p.adjust.method = "bonferroni")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  factorial$ResistsWriting and factorial$Content_Area 
## 
##                Language Arts Math   Science
## Math           5.6e-08       -      -      
## Science        0.0001        0.5017 -      
## Social Studies 0.0147        0.0388 1.0000 
## 
## P value adjustment method: bonferroni
interaction_variable <- interaction(factorial$School, factorial$Content_Area)
pairwise.t.test(factorial$ResistsWriting, interaction_variable, p.adjust.method = "bonferroni")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  factorial$ResistsWriting and interaction_variable 
## 
##                         School 1.Language Arts School 2.Language Arts
## School 2.Language Arts  1.00000                -                     
## School 1.Math           0.10506                1.00000               
## School 2.Math           8.7e-08                8.7e-08               
## School 1.Science        0.01432                0.59101               
## School 2.Science        0.00054                0.01336               
## School 1.Social Studies 0.19908                1.00000               
## School 2.Social Studies 0.00822                0.33687               
##                         School 1.Math School 2.Math School 1.Science
## School 2.Language Arts  -             -             -               
## School 1.Math           -             -             -               
## School 2.Math           0.01734       -             -               
## School 1.Science        1.00000       0.05650       -               
## School 2.Science        1.00000       0.32302       1.00000         
## School 1.Social Studies 1.00000       0.00274       1.00000         
## School 2.Social Studies 1.00000       0.06317       1.00000         
##                         School 2.Science School 1.Social Studies
## School 2.Language Arts  -                -                      
## School 1.Math           -                -                      
## School 2.Math           -                -                      
## School 1.Science        -                -                      
## School 2.Science        -                -                      
## School 1.Social Studies 1.00000          -                      
## School 2.Social Studies 1.00000          1.00000                
## 
## P value adjustment method: bonferroni

Chunk 29: Plot all the IVs and interactions given DV

library(ggplot2)
library(gridExtra)

p1 <- ggplot(factorial, aes(x = School, y = ResistsWriting)) +
  geom_boxplot(fill = "skyblue", alpha = 0.7) +
  stat_summary(fun = mean, geom = "point", shape = 20, size = 3, color = "red") +
  labs(title = "Resistance to Writing by School", x = "School", y = "Resistance to Writing") +
  theme_minimal()

p2 <- ggplot(factorial, aes(x = Content_Area, y = ResistsWriting)) +
  geom_boxplot(fill = "lightgreen", alpha = 0.7) +
  stat_summary(fun = mean, geom = "point", shape = 20, size = 3, color = "red") +
  labs(title = "Resistance to Writing by Content Area", x = "Content Area", y = "Resistance to Writing") +
  theme_minimal()

p3 <- ggplot(factorial, aes(x = School_Cont, y = ResistsWriting)) +
  geom_boxplot(fill = "lightcoral", alpha = 0.7) +
  stat_summary(fun = mean, geom = "point", shape = 20, size = 3, color = "red") +
  labs(title = "Resistance to Writing by School-Content", x = "School & Content", y = "Resistance to Writing") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels

grid.arrange(p3, p2, p1, ncol = 2)

Chunk 30: Cohen’s d for all significant effects

Only run on omnibus significance

library(rstatix)

# Cohen's d between schools

factorial_school_cohen <- factorial%>%
  rstatix::cohens_d(ResistsWriting ~ School)

# Cohen's d between content areas

factorial_math_la<- factorial%>%
  dplyr::filter(Content_Area == c("Math", "Language Arts"))%>%
  rstatix::cohens_d(ResistsWriting~Content_Area)

factorial_sci_la<- factorial%>%
  dplyr::filter(Content_Area == c("Science", "Language Arts"))%>%
  rstatix::cohens_d(ResistsWriting~Content_Area)

factorial_sss_la<- factorial%>%
  dplyr::filter(Content_Area == c("Social Studies", "Language Arts"))%>%
  rstatix::cohens_d(ResistsWriting~Content_Area)

factorial_ss_math <- factorial %>%
  filter(Content_Area %in% c("Math", "Social Studies"))%>%
  mutate(Content_Area = factor(Content_Area))%>%
  rstatix::cohens_d(ResistsWriting ~ Content_Area)

# School content combinations
library(dplyr)
library(rstatix)

library(dplyr)
library(rstatix)

# School pairwise comparisons
S2M_S2S <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Math", "School 2 _ Social Studies")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2M_S2L <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Math", "School 2 _ Language Arts")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2M_S2Sc <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Math", "School 2 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2M_S1SS <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Math", "School 1 _ Social Studies")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2M_S1L <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Math", "School 1 _ Language Arts")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2M_S1M <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Math", "School 1 _ Math")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2M_S1Sc <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Math", "School 1 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2S_S2L <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Social Studies", "School 2 _ Language Arts")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2S_S2Sc <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Social Studies", "School 2 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2S_S1SS <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Social Studies", "School 1 _ Social Studies")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2S_S1L <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Social Studies", "School 1 _ Language Arts")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2S_S1M <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Social Studies", "School 1 _ Math")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2S_S1Sc <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Social Studies", "School 1 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2L_S2Sc <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Language Arts", "School 2 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2L_S1SS <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Language Arts", "School 1 _ Social Studies")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2L_S1L <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Language Arts", "School 1 _ Language Arts")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2L_S1M <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Language Arts", "School 1 _ Math")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2L_S1Sc <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Language Arts", "School 1 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2Sc_S1SS <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Science", "School 1 _ Social Studies")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2Sc_S1L <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Science", "School 1 _ Language Arts")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2Sc_S1M <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Science", "School 1 _ Math")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S2Sc_S1Sc <- factorial %>%
  filter(School_Cont %in% c("School 2 _ Science", "School 1 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S1SS_S1L <- factorial %>%
  filter(School_Cont %in% c("School 1 _ Social Studies", "School 1 _ Language Arts")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S1SS_S1M <- factorial %>%
  filter(School_Cont %in% c("School 1 _ Social Studies", "School 1 _ Math")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S1SS_S1Sc <- factorial %>%
  filter(School_Cont %in% c("School 1 _ Social Studies", "School 1 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S1L_S1M <- factorial %>%
  filter(School_Cont %in% c("School 1 _ Language Arts", "School 1 _ Math")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S1L_S1Sc <- factorial %>%
  filter(School_Cont %in% c("School 1 _ Language Arts", "School 1 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

S1M_S1Sc <- factorial %>%
  filter(School_Cont %in% c("School 1 _ Math", "School 1 _ Science")) %>%
  rstatix::cohens_d(ResistsWriting ~ School_Cont)

factorial_d<-rbind(factorial_school_cohen, factorial_math_la, factorial_sci_la, factorial_sss_la, factorial_ss_math, S2M_S2S, S2M_S2L, S2M_S2Sc, S2M_S1SS, S2M_S1L, S2M_S1M, S2M_S1Sc,
                              S2S_S2L, S2S_S2Sc, S2S_S1SS, S2S_S1L, S2S_S1M, S2S_S1Sc,
                              S2L_S2Sc, S2L_S1SS, S2L_S1L, S2L_S1M, S2L_S1Sc, S2Sc_S1SS,
                              S2Sc_S1L, S2Sc_S1M, S2Sc_S1Sc, S1SS_S1L, S1SS_S1M, S1SS_S1Sc,
                              S1L_S1M, S1L_S1Sc, S1M_S1Sc)

factorial_d
## # A tibble: 33 × 7
##    .y.            group1                    group2 effsize    n1    n2 magnitude
##    <chr>          <chr>                     <chr>    <dbl> <int> <int> <ord>    
##  1 ResistsWriting School 1                  Schoo…  -0.383    61   111 small    
##  2 ResistsWriting Language Arts             Math    -1.04     26    25 large    
##  3 ResistsWriting Language Arts             Math     3.50     26     0 large    
##  4 ResistsWriting Language Arts             Math     3.50     26     0 large    
##  5 ResistsWriting Math                      Socia…   0.583    41    38 moderate 
##  6 ResistsWriting School 2 _ Math           Schoo…   0.857    26    21 large    
##  7 ResistsWriting School 2 _ Language Arts  Schoo…  -1.44     37    26 large    
##  8 ResistsWriting School 2 _ Math           Schoo…   0.585    26    27 moderate 
##  9 ResistsWriting School 1 _ Social Studies Schoo…  -1.16     17    26 large    
## 10 ResistsWriting School 1 _ Language Arts  Schoo…  -2.03     10    26 large    
## # ℹ 23 more rows
write.csv(factorial_d, "factorial_d_effect.csv")

Chunk 31: ANCOVA capture the data and do descriptive statistics

library(tidyverse)
library(ggpubr)
library(ggplot2)
library(rstatix)
library(broom)
library(bannerCommenter)
library(emmeans)
library(tidyr)
library(dplyr)

# Read in data
ancovadata <- read.csv("Factorial_for_Practice_and_HW.csv")

# Get descriptive statistics by school, content, and school_content combined for resist
resist_content<- as.data.frame(psych::describeBy(ancovadata$ResistsWriting, group = ancovadata$Content_Area, mat = TRUE))%>%
  mutate(Category = "Resist_Content")%>%
  dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)

overall_resist<- as.data.frame(psych::describe(ancovadata$ResistsWriting))%>%
  mutate(Category = "Resist_Overall")%>%
  mutate(group1 = "Overall")%>%
  dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)

# Get descriptives for teach reading by content and overall
TeachReading_content<- as.data.frame(psych::describeBy(ancovadata$TeachReading, group = ancovadata$Content_Area, mat = TRUE))%>%
  mutate(Category = "TeachReading_Content")%>%
  dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)

overall_TeachReading<- as.data.frame(psych::describe(ancovadata$TeachReading))%>%
  mutate(Category = "TeachReading_Overall")%>%
  mutate(group1 = "Overall")%>%
  dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)

descriptive_stats_all <- bind_rows(
  resist_content,
  overall_resist,
  TeachReading_content,
  overall_TeachReading
)

write.csv(descriptive_stats_all, "descriptive_ancova.csv")

descriptive_stats_all
##                     Category         group1   n     mean       sd median
## X11...1       Resist_Content  Language Arts  47 22.72340 6.195027   25.0
## X12...2       Resist_Content           Math  41 31.46341 8.772393   31.0
## X13...3       Resist_Content        Science  46 28.93478 6.416653   29.0
## X14...4       Resist_Content Social Studies  38 27.26316 5.176267   27.0
## X1...5        Resist_Overall        Overall 172 27.47093 7.453895   28.0
## X11...6 TeachReading_Content  Language Arts  47 17.44681 4.595880   16.0
## X12...7 TeachReading_Content           Math  41 23.68293 5.583185   24.0
## X13...8 TeachReading_Content        Science  46 20.39130 4.509143   21.5
## X14...9 TeachReading_Content Social Studies  38 20.36842 5.429753   20.0
## X1...10 TeachReading_Overall        Overall 172 20.36628 5.448281   21.0
##           kurtosis        skew
## X11...1 -1.0961492 -0.19590131
## X12...2 -0.1175087 -0.07083268
## X13...3  2.1720635  0.70398419
## X14...4  0.9602155 -0.15470751
## X1...5   0.7588503  0.28860860
## X11...6 -0.9284476  0.53094187
## X12...7 -0.3699153 -0.20792093
## X13...8 -0.7847273 -0.18704527
## X14...9  1.1047640  0.69741463
## X1...10 -0.2401019  0.31243481
# Boxplot of resist writing and teachg reading after pivoting the data
ancova_long <- ancovadata %>%
  pivot_longer(cols = c(ResistsWriting, TeachReading),
               names_to = "Variable",
               values_to = "Score")

ggplot(ancova_long, aes(x = Content_Area, y = Score, fill = Variable)) +
  geom_boxplot(position = position_dodge(width = 0.8)) +
  theme_minimal() +
  labs(title = "Boxplots of ResistsWriting and TeachReading by Content Area",
       x = "Content Area", y = "Score", fill = "Variable")

Chunk 32: check statistical assumptions

# Check linearity by group (covariate vs DV by group)
ggplot(ancovadata, aes(x = TeachReading, y = ResistsWriting, color = Content_Area)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  stat_regline_equation(
    aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~~")),
    show.legend = FALSE
  ) +
  facet_wrap(~ Content_Area) +
  theme_minimal()

# Homogeneity of regression slopes
banner("Check interaction: Content_Area * TeachReading (Homogeneity of Regression Slopes)")
## 
## #########################################################################################
## ##  Check interaction: Content_Area * TeachReading (Homogeneity of Regression Slopes)  ##
## #########################################################################################
interaction_test <- ancovadata %>%
  anova_test(ResistsWriting ~ Content_Area * TeachReading)
get_anova_table(interaction_test)
## ANOVA Table (type II tests)
## 
##                      Effect DFn DFd      F        p p<.05   ges
## 1              Content_Area   3 164  4.482 5.00e-03     * 0.076
## 2              TeachReading   1 164 94.719 5.93e-18     * 0.366
## 3 Content_Area:TeachReading   3 164  2.493 6.20e-02       0.044
# Fit models for residuals
model_resist <- lm(ResistsWriting ~ TeachReading + Content_Area, data = ancovadata)
model_teach  <- lm(TeachReading ~ ResistsWriting + Content_Area , data = ancovadata)
model.metrics_resist <- augment(model_resist, data = ancovadata)
model.metrics_teach  <- augment(model_teach, data = ancovadata)


# Assess normality of residuals - Shapiro-Wilk
shapiro_teach <- model.metrics_teach %>%
  group_by(Content_Area) %>%
  shapiro_test(.resid) %>%
  mutate(Test = "Shapiro - TeachReading") %>%
  dplyr::select(Test, Content_Area, everything())

shapiro_resist <- model.metrics_resist %>%
  group_by(Content_Area) %>%
  shapiro_test(.resid) %>%
  mutate(Test = "Shapiro - ResistsWriting") %>%
  dplyr::select(Test, Content_Area, everything())

shapiro_all <- bind_rows(shapiro_teach, shapiro_resist) %>%
  dplyr::select(Test, Content_Area, statistic, p)

shapiro_all
## # A tibble: 8 × 4
##   Test                     Content_Area   statistic          p
##   <chr>                    <chr>              <dbl>      <dbl>
## 1 Shapiro - TeachReading   Language Arts      0.982 0.656     
## 2 Shapiro - TeachReading   Math               0.891 0.000939  
## 3 Shapiro - TeachReading   Science            0.893 0.000491  
## 4 Shapiro - TeachReading   Social Studies     0.983 0.816     
## 5 Shapiro - ResistsWriting Language Arts      0.939 0.0161    
## 6 Shapiro - ResistsWriting Math               0.925 0.00967   
## 7 Shapiro - ResistsWriting Science            0.800 0.00000190
## 8 Shapiro - ResistsWriting Social Studies     0.966 0.291
# Run the test on Raw variables
shapiro_resist_raw <- ancovadata %>%
  group_by(Content_Area) %>%
  shapiro_test(ResistsWriting) %>%
  mutate(Test = "Raw ResistsWriting")

shapiro_teach_raw <- ancovadata %>%
  group_by(Content_Area) %>%
  shapiro_test(TeachReading) %>%
  mutate(Test = "Raw TeachReading")

shapiro_raw <- bind_rows(shapiro_teach_raw, shapiro_resist_raw) %>%
  dplyr::select(Test, Content_Area, statistic, p)

shapiro_raw
## # A tibble: 8 × 4
##   Test               Content_Area   statistic       p
##   <chr>              <chr>              <dbl>   <dbl>
## 1 Raw TeachReading   Language Arts      0.905 0.00102
## 2 Raw TeachReading   Math               0.959 0.150  
## 3 Raw TeachReading   Science            0.952 0.0556 
## 4 Raw TeachReading   Social Studies     0.948 0.0783 
## 5 Raw ResistsWriting Language Arts      0.930 0.00785
## 6 Raw ResistsWriting Math               0.980 0.666  
## 7 Raw ResistsWriting Science            0.946 0.0340 
## 8 Raw ResistsWriting Social Studies     0.949 0.0815
# QQ plot for visual check of normality
ggqqplot(model.metrics_resist$.resid)

# Homogeneity of variance - residuals
model.metrics_resist %>%
  levene_test(.resid ~ Content_Area) %>%
  as.data.frame() %>%
  mutate(Test = "Levene's on Residuals") %>%
  dplyr::select(Test, everything())
##                    Test df1 df2 statistic         p
## 1 Levene's on Residuals   3 168  1.723983 0.1639861
# Optional: Levene's on raw outcome (less common in ANCOVA but sometimes requested)
levene_test(ResistsWriting ~ Content_Area, data = ancovadata) %>%
  as.data.frame() %>%
  mutate(Test = "Levene's on Raw Outcome") %>%
  dplyr::select(Test, everything())
##                      Test df1 df2 statistic          p
## 1 Levene's on Raw Outcome   3 168  2.958167 0.03396039
# Outlier check: standardized residuals > |3|
model.metrics_resist %>%
  filter(abs(.std.resid) > 3) %>%
  as.data.frame() %>%
  mutate(Test = "Residual Outlier Check") %>%
  dplyr::select(Test, everything())
##                     Test       ID   School Content_Area TeachReading
## 1 Residual Outlier Check 45435431 School 2         Math           19
## 2 Residual Outlier Check 45487317 School 2      Science           12
##   ResistsWriting  .fitted   .resid       .hat   .sigma   .cooksd .std.resid
## 1             52 27.69503 24.30497 0.02958160 5.115280 0.1251505   4.530748
## 2             52 22.18223 29.81777 0.03840795 4.925774 0.2490739   5.583853

Chunk 33: ANCOVA test with post-hoc

Estimated marginal means included with plot

# ANCOVA omnibus test (Type II SS by default in rstatix)
banner("ANCOVA Omnibus Results")
## 
## ##################################################################
## ##                    ANCOVA Omnibus Results                    ##
## ##################################################################
res.aov <- ancovadata %>%
  anova_test(ResistsWriting ~ TeachReading + Content_Area)

anova_table <- as.data.frame(get_anova_table(res.aov))  

anova_table
##         Effect DFn DFd      F        p p<.05   ges
## 1 TeachReading   1 167 92.244 1.15e-17     * 0.356
## 2 Content_Area   3 167  4.365 5.00e-03     * 0.073
# Mean Squares
banner("Mean Squares Between")
## 
## ##################################################################
## ##                     Mean Squares Between                     ##
## ##################################################################
anova_table_df <- get_anova_table(res.aov)

# You already have these:
F_values <- anova_table_df$F
DFn_values <- anova_table_df$DFn  # Degrees of freedom for between-group effects

# Step 2: Compute MSW (Mean Square Within) from the model residuals
# Fit a linear model to your ANCOVA
model <- lm(ResistsWriting ~ TeachReading + Content_Area, data = ancovadata)

# Get the residual sum of squares
SSResiduals <- sum(model$residuals^2)

# Get the residual degrees of freedom (total observations - number of predictors - 1)
DFResiduals <- length(model$residuals) - length(coef(model))

# Compute MSW
MSW <- SSResiduals / DFResiduals

# Step 3: Compute MSB for each effect (TeachReading, Content_Area)
MSB_values <- F_values * MSW

# Step 4: Combine the results into a new table
anova_table_df$MSB <- MSB_values

# Step 5: Display the updated table
print(anova_table_df)
## ANOVA Table (type II tests)
## 
##         Effect DFn DFd      F        p p<.05   ges       MSB
## 1 TeachReading   1 167 92.244 1.15e-17     * 0.356 2735.4546
## 2 Content_Area   3 167  4.365 5.00e-03     * 0.073  129.4421
# Post-hoc tests adjusted for covariate (Bonferroni)
pwc <- ancovadata %>%
  emmeans_test(
    ResistsWriting ~ Content_Area, covariate = TeachReading,
    p.adjust.method = "bonferroni"
  ) %>%
  mutate(Test = "Post-Hoc") %>%
  dplyr::select(Test, everything())

pwc
## # A tibble: 6 × 10
##   Test    term  .y.   group1 group2    df statistic       p   p.adj p.adj.signif
##   <chr>   <chr> <chr> <chr>  <chr>  <dbl>     <dbl>   <dbl>   <dbl> <chr>       
## 1 Post-H… Teac… Resi… Langu… Math     167    -2.92  0.00401 0.0241  *           
## 2 Post-H… Teac… Resi… Langu… Scien…   167    -3.32  0.00109 0.00656 **          
## 3 Post-H… Teac… Resi… Langu… Socia…   167    -1.80  0.0730  0.438   ns          
## 4 Post-H… Teac… Resi… Math   Scien…   167    -0.100 0.920   1       ns          
## 5 Post-H… Teac… Resi… Math   Socia…   167     1.22  0.224   1       ns          
## 6 Post-H… Teac… Resi… Scien… Socia…   167     1.38  0.168   1       ns
write.csv(pwc, "ancova_posth.csv")

# Extract estimated marginal means (EMMs)
emm <- get_emmeans(pwc) %>%
  as_tibble() %>%
  mutate(Adjusted_SD = se * sqrt(df)) %>%  # Using 'se' as the standard error column
  mutate(Test = "EMM") %>%
  dplyr::select(Test, everything())


emm
## # A tibble: 4 × 10
##   Test  TeachReading Content_Area   emmean    se    df conf.low conf.high method
##   <chr>        <dbl> <fct>           <dbl> <dbl> <dbl>    <dbl>     <dbl> <chr> 
## 1 EMM           20.4 Language Arts    25.1 0.831   167     23.4      26.7 Emmea…
## 2 EMM           20.4 Math             28.8 0.895   167     27.0      30.6 Emmea…
## 3 EMM           20.4 Science          28.9 0.803   167     27.3      30.5 Emmea…
## 4 EMM           20.4 Social Studies   27.3 0.883   167     25.5      29.0 Emmea…
## # ℹ 1 more variable: Adjusted_SD <dbl>
# Visualize the EMMs with confidence intervals
emm <- emm %>%
  mutate(Content_Area = factor(Content_Area, levels = unique(Content_Area)))



ggplot(emm, aes(x = Content_Area, y = emmean, group = 1)) +
  geom_line(color = "black", linewidth = 1) +
  geom_point(size = 3, color = "black", fill = "lightblue", shape = 21) +
  geom_errorbar(
    aes(ymin = emmean - 1.96 * se, ymax = emmean + 1.96 * se),
    width = 0.2, color = "black", linewidth = 1
  ) +
  labs(
    title = "Estimated Marginal Means by Content Area",
    y = "Estimated Marginal Mean (ResistsWriting)",
    x = "Content Area"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  ylim(0, 40)