``` r
# YOUR AWESOME CODE GOES IN THIS SPACE
```
packages <- c(
'psych', 'summarytools', 'Hmisc', 'dplyr', 'stats', 'rstatix',
'ggplot2', 'apaTables', 'effectsize', 'car', 'rcompanion',
'vcd', 'tukeytrend', 'lme4', 'lmtest', 'broom', 'modelr',
'tidyverse', 'lubridate', 'janitor', 'data.table', 'readr', 'knitr', 'mosaicData')
lapply(packages, library, character.only = TRUE)
#Load this library in case
library(readxl)
# Import the Practice Math Planet Data and name it "planetmath"
planetmath<- readxl:: read_xlsx('Practice_Math_Planet_Data.xlsx')
library(tidyverse)
library(dplyr)
planetmath <- planetmath %>%
mutate(
Planet = case_when(
Planet == 1 ~ "Earth",
Planet == 2 ~ "Venus",
Planet == 3 ~ "Mars",
TRUE ~ as.character(Planet)
),
Eye_Color = case_when(
Eye_Color == 1 ~ "Blue",
Eye_Color == 2 ~ "Green",
Eye_Color == 3 ~ "Brown",
TRUE ~ as.character(Eye_Color)
),
Limb = case_when(
Limb == 1 ~ "Legs",
Limb == 2 ~ "Fins",
TRUE ~ as.character(Limb)
),
Earth_Grade = case_when(
Earth_Grade == 1 ~ "6th_Grade",
Earth_Grade == 2 ~ "7th_Grade",
Earth_Grade == 3 ~ "8th_Grade",
TRUE ~ as.character(Earth_Grade)
),
Instruction_Type = case_when(
Instruction_Type == 1 ~ "Face_to_Face",
Instruction_Type == 2 ~ "Purely_online_asynchronous",
Instruction_Type == 3 ~ "Online_synchronous",
TRUE ~ as.character(Instruction_Type)
)
)%>%
mutate(Math_Eff_Tot = Matheff1+Matheff2+Matheff3)
# Reverse coding math effiacy quesiton 3
planetmath <- planetmath %>%
mutate(REVMatheff3 = case_when(
Matheff3 == 6 ~ 1,
Matheff3 == 5 ~ 2,
Matheff3 == 4 ~ 3,
Matheff3 == 3 ~ 4,
Matheff3 == 2 ~ 5,
Matheff3 == 1 ~ 6,
TRUE ~ NA_real_
))
library(dplyr)
library(tidyr)
# Frequency tables
limb_table <- table(planetmath$Limb)
planet_table <- table(planetmath$Planet)
# Convert to data frames
df_limb <- as.data.frame(limb_table) %>%
rename(Limb = Var1, Count = Freq) %>%
mutate(Proportion = Count / sum(Count))
df_planet <- as.data.frame(planet_table) %>%
rename(Planet = Var1, Count = Freq) %>%
mutate(Proportion = Count / sum(Count))
# Print results
df_limb
## Limb Count Proportion
## 1 Fins 1475 0.5241649
## 2 Legs 1339 0.4758351
df_planet
## Planet Count Proportion
## 1 Earth 203 0.0721393
## 2 Mars 256 0.0909737
## 3 Venus 2355 0.8368870
# Create a contingency table for Limb and Planet (Planet as rows)
limb_planet_table <- table(planetmath$Planet, planetmath$Limb)
# Compute row-wise proportions (percent of limbs given planet)
prop_table <- prop.table(limb_planet_table, margin = 1)
# Combine counts and row-wise proportions into a formatted table
formatted_table <- paste0(limb_planet_table, " (", round(100 * prop_table, 2), "%)")
# Convert to matrix with appropriate dimensions
dim(formatted_table) <- dim(limb_planet_table)
dimnames(formatted_table) <- dimnames(limb_planet_table)
# Print the contingency table with row-wise proportions and write to a CSV
print("Contingency Table with Row-Wise Proportions:")
## [1] "Contingency Table with Row-Wise Proportions:"
print(formatted_table)
##
## Fins Legs
## Earth "114 (56.16%)" "89 (43.84%)"
## Mars "131 (51.17%)" "125 (48.83%)"
## Venus "1230 (52.23%)" "1125 (47.77%)"
write.csv(formatted_table, 'chi_table.csv', row.names = TRUE)
# Perform the Chi-Square Test
chi_test <- chisq.test(limb_planet_table)
# Print the Chi-Square test results
print("Chi-Square Test Results:")
## [1] "Chi-Square Test Results:"
print(chi_test)
##
## Pearson's Chi-squared test
##
## data: limb_planet_table
## X-squared = 1.3312, df = 2, p-value = 0.514
print("Effect size metrics, this uses V because it's 3x2")
## [1] "Effect size metrics, this uses V because it's 3x2"
assocstats(limb_planet_table)
## X^2 df P(> X^2)
## Likelihood Ratio 1.3350 2 0.51300
## Pearson 1.3312 2 0.51396
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.022
## Cramer's V : 0.022
library(psych)
library(car)
# Reduce the dataset
planetprelimb<- planetmath%>%
dplyr::select(Limb,Pre)%>%
na.omit()
# Descriptives of pre
descpre<-psych::describe(planetprelimb$Pre)
descpre
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2814 73.45 7.4 73 73.56 8.9 50 87 37 -0.19 -0.39 0.14
write.csv(descpre, "pre_descriptives.csv")
# Get descriptive statistics by limb
descriptives_limb <- psych::describeBy(planetprelimb$Pre, group = planetprelimb$Limb, mat = TRUE)
# Convert the matrix into a data frame
descriptives_limb<- as.data.frame(descriptives_limb)
write.csv(descriptives_limb, "descriptives_limb.csv")
descriptives_limb
## item group1 vars n mean sd median trimmed mad min max
## X11 1 Fins 1 1475 73.52746 7.351314 73 73.65792 8.8956 50 87
## X12 2 Legs 1 1339 73.35922 7.452100 73 73.44921 8.8956 50 87
## range skew kurtosis se
## X11 37 -0.1999649 -0.3622542 0.1914119
## X12 37 -0.1693996 -0.4261442 0.2036519
# Run Levene's test for homogeneity of variances
levene_test_result <- leveneTest(Pre ~ Limb, data = planetprelimb)
# Print Levene's test result
print("Levene's Test for Homogeneity of Variance:")
## [1] "Levene's Test for Homogeneity of Variance:"
print(levene_test_result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.5077 0.4762
## 2812
# Run a t-test for Pre by Limb
ttest_result<- t.test(Pre ~ Limb, data = planetprelimb, var.equal = TRUE)
# Print the t-test result
print(ttest_result)
##
## Two Sample t-test
##
## data: Pre by Limb
## t = 0.60234, df = 2812, p-value = 0.547
## alternative hypothesis: true difference in means between group Fins and group Legs is not equal to 0
## 95 percent confidence interval:
## -0.3794240 0.7158927
## sample estimates:
## mean in group Fins mean in group Legs
## 73.52746 73.35922
#Run a Cohen's d effect size
cohen_d_result <- planetprelimb %>%
rstatix::cohens_d(Pre ~ Limb)
# View the result
cohen_d_result
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 Pre Fins Legs 0.0227 1475 1339 negligible
# Mann Whitney U Non-Parametric
mann_whitney_result <- wilcox.test(Pre ~ Limb, data = planetprelimb, exact = FALSE)
# View the result
print(mann_whitney_result)
##
## Wilcoxon rank sum test with continuity correction
##
## data: Pre by Limb
## W = 1002323, p-value = 0.491
## alternative hypothesis: true location shift is not equal to 0
# R calculates W and SPSS calculates U. This is how we know we match:
# Custom function to compute Mann-Whitney U statistic as SPSS would
compute_U <- function(group1, group2) {
# Combine data to rank it
combined_data <- c(group1, group2)
ranks <- rank(combined_data)
# Sum of ranks for each group
rank_sum1 <- sum(ranks[1:length(group1)])
rank_sum2 <- sum(ranks[(length(group1)+1):length(combined_data)])
# Calculate U for each group
U1 <- rank_sum1 - length(group1)*(length(group1) + 1)/2
U2 <- rank_sum2 - length(group2)*(length(group2) + 1)/2
# Return the smaller U value (the one that SPSS reports)
return(min(U1, U2))
}
# Apply the custom function to your groups
group1 <- planetprelimb$Pre[planetprelimb$Limb == "Fins"]
group2 <- planetprelimb$Pre[planetprelimb$Limb == "Legs"]
# Compute U
U_value <- compute_U(group1, group2)
# Print U value
print(paste("Mann-Whitney U (Custom Calculation):", U_value))
## [1] "Mann-Whitney U (Custom Calculation): 972702"
# Effect size calculation (r = Z / sqrt(n))
# First, calculate Z and n for effect size
Z <- qnorm(1 - mann_whitney_result$p.value / 2) # Assuming two-tailed test
n1 <- length(group1)
n2 <- length(group2)
n <- n1 + n2
# Calculate effect size r
effect_size_r <- Z / sqrt(n)
# Print effect size
print(paste("Effect size (r):", effect_size_r))
## [1] "Effect size (r): 0.0129816945906126"
library(rstatix)
library(dplyr)
library(psych)
library(ggplot2)
library(tidyr)
# Generate the dataset
prepost<- planetmath%>%
dplyr::select(Pre,Post)%>%
na.omit()%>%
mutate(Difference = Post - Pre)
# We already have descriptives of pre, but we'll do it again
predescriptives<- psych::describe(prepost$Pre)
postdescriptives<- psych::describe(prepost$Post)
differencedescr<- psych::describe(prepost$Difference)
as.data.frame(predescriptives)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2814 73.44741 7.398603 73 73.56172 8.8956 50 87 37 -0.1856475
## kurtosis se
## X1 -0.3916062 0.1394722
as.data.frame(postdescriptives)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2814 88.60981 4.979288 89 88.62655 5.9304 72 100 28 -0.2140512
## kurtosis se
## X1 0.1700273 0.09386532
as.data.frame(differencedescr)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2814 15.1624 8.724091 13 14.56927 8.8956 0 41 41 0.5777075
## kurtosis se
## X1 -0.5178854 0.1644592
predescriptives$Variable <- "Pre"
postdescriptives$Variable <- "Post"
differencedescr$Variable <- "Difference"
# Combine all into one dataset
desc_combined <- rbind(predescriptives, postdescriptives, differencedescr)%>%
dplyr::select(Variable, mean, median, sd, skew, kurtosis)
write.csv(desc_combined, "dependent_desc.csv")
# Generate the dataset
prepost2 <- planetmath %>%
dplyr::select(Pre, Post) %>%
na.omit()
# Melt the data for plotting
prepost_melted <- prepost2 %>%
pivot_longer(cols = c(Pre, Post), names_to = "Test", values_to = "Score")
# Histogram Plot for Pre and Post with Legend
hist_plot <- ggplot(prepost_melted, aes(x = Score, fill = Test)) +
geom_histogram(alpha = 0.6, position = "identity", bins = 30, color = "black", aes(y = ..density..)) +
scale_fill_manual(values = c("Pre" = "lightblue", "Post" = "lightgreen")) +
labs(title = "Histograms for Pre and Post Tests",
x = "Score", y = "Density") +
theme_minimal() +
theme(legend.title = element_blank(), # Remove legend title
legend.position = "top") # Show legend at the top
# Boxplot Plot for Pre and Post with Pre on the left
boxplot_plot <- ggplot(prepost_melted, aes(x = factor(Test, levels = c("Pre", "Post")), y = Score, fill = Test)) +
geom_boxplot(alpha = 0.5, color = "black") +
scale_fill_manual(values = c("Pre" = "lightblue", "Post" = "lightgreen")) +
labs(title = "Boxplots for Pre and Post Tests",
x = "Test", y = "Score") +
theme_minimal() +
theme(legend.position = "none") # Remove the legend for boxplots
# Print both plots
print(hist_plot)
print(boxplot_plot)
# We now want to check the assumption of normality
# Plot a histogram of the Difference variable
hist(prepost$Difference, main = "Histogram of Difference",
xlab = "Difference (Post - Pre)", col = "lightblue",
border = "black")
# Q-Q plot to check for normality
qqnorm(prepost$Difference)
qqline(prepost$Difference, col = "red")
# Perform Shapiro-Wilk test for normality on the Difference variable
shapiro_test <- shapiro.test(prepost$Difference)
shapiro_test
##
## Shapiro-Wilk normality test
##
## data: prepost$Difference
## W = 0.94281, p-value < 2.2e-16
# Run the t-test
paired_result <- t.test(prepost$Pre, prepost$Post, paired = TRUE)
paired_result
##
## Paired t-test
##
## data: prepost$Pre and prepost$Post
## t = -92.196, df = 2813, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -15.48488 -14.83993
## sample estimates:
## mean difference
## -15.1624
# Calculate Cohen's d
mean_dif<- mean(prepost$Difference)
sd_dif<- sd(prepost$Difference)
cohen_d<- mean_dif/sd_dif
cohen_d
## [1] 1.737992
# Load necessary libraries
library(tidyr)
library(rstatix)
library(dplyr)
prepostsubset <- planetmath %>%
dplyr::select(Pre, Post) %>%
na.omit()
# Perform Wilcoxon Signed-Rank Test
test_result <- wilcox.test(prepostsubset$Pre, prepostsubset$Post, paired = TRUE)
# Calculate Z to nearly match SPSS (R and SPSS round a bit differently)
# Extract the sum of ranks (V) from the test result
V <- test_result$statistic
# Get the number of non-tied pairs (n)
n <- length(prepostsubset$Pre)
# Calculate mean (mu) and standard deviation (sigma) for the ranks
mu <- n * (n + 1) / 4
sigma <- sqrt(n * (n + 1) * (2 * n + 1) / 24)
# Calculate Z statistic
Z <- (V - mu) / sigma
# Print Z statistic
print(test_result)
##
## Wilcoxon signed rank test with continuity correction
##
## data: prepostsubset$Pre and prepostsubset$Post
## V = 0, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
cat("Z statistic:", Z, "\n")
## Z statistic: -45.94426
# Load the psych package
library(psych)
# Subset the relevant columns (Matheff1, Matheff2, and Matheff3)
math_eff_columns <- planetmath %>%
dplyr::select(Matheff1, Matheff2, Matheff3)%>%
na.omit()
# Run Cronbach's alpha
cronbach_alpha_result <- psych::omega(math_eff_columns)
# Print the result
print(cronbach_alpha_result)
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.9
## G.6: 0.87
## Omega Hierarchical: 0.03
## Omega H asymptotic: 0.03
## Omega Total 0.91
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* h2 h2 u2 p2 com
## Matheff1 0.93 0.89 0.89 0.11 0.03 1.06
## Matheff2 0.88 0.80 0.80 0.20 0.03 1.07
## Matheff3 0.77 0.62 0.62 0.38 0.04 1.10
##
## With Sums of squares of:
## g F1* F2* F3* h2
## 0.07 2.23 0.01 0.00 1.82
##
## general/max 0.03 max/min = Inf
## mean percent general = 0.03 with sd = 0.01 and cv of 0.17
## Explained Common Variance of the general factor = 0.03
##
## The degrees of freedom are -3 and the fit is 0
## The number of observations was 377 with Chi Square = 0 with prob < NA
## The root mean square of the residuals is 0
## The df corrected root mean square of the residuals is NA
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 0 and the fit is 1.96
## The number of observations was 377 with Chi Square = 731.67 with prob < NA
## The root mean square of the residuals is 0.74
## The df corrected root mean square of the residuals is NA
##
## Measures of factor score adequacy
## g F1* F2* F3*
## Correlation of scores with factors 0.17 0.95 0.20 0
## Multiple R square of scores with factors 0.03 0.90 0.04 0
## Minimum correlation of factor score estimates -0.94 0.81 -0.92 -1
##
## Total, General and Subset omega for each subset
## g F1* F2* F3*
## Omega total for total scores and subscales 0.91 0.91 NA NA
## Omega general for total scores and subscales 0.03 0.03 NA NA
## Omega group for total scores and subscales 0.88 0.88 NA NA
``` r
# YOUR AWESOME CODE GOES IN THIS SPACE
```
# Load necessary libraries
library(tidyr)
library(rstatix)
library(dplyr)
# Filter for Planet and PostPost
planetanova <- planetmath%>%
dplyr::select(Planet, PostPost)%>%
na.omit()
# Get descriptive statistics by profession
descriptives_planet <- psych::describeBy(planetanova$PostPost, group = planetanova$Planet, mat = TRUE)
overall_desc_planet<- psych::describe(planetanova$PostPost)
# Convert the matrix into a data frame
descriptives_planet<- as.data.frame(descriptives_planet)%>%
dplyr::select(-item)
overall_desc_planet<- as.data.frame(overall_desc_planet)%>%
mutate(group1 = "Total")%>%
relocate(group1, .before = everything())
descriptives_planet<- rbind(descriptives_planet, overall_desc_planet)
write.csv(descriptives_planet, "descriptives_planet.csv")
descriptives_planet
## group1 vars n mean sd median trimmed mad min max range
## X11 Earth 1 203 83.22660 4.368868 81 83.08589 2.9652 70 96 26
## X12 Mars 1 256 83.38672 4.766186 82 83.33981 4.4478 68 96 28
## X13 Venus 1 2355 82.97410 4.580602 81 83.03342 4.4478 68 97 29
## X1 Total 1 2814 83.02985 4.583138 81 83.06661 4.4478 68 97 29
## skew kurtosis se
## X11 0.30816625 0.3107537 0.30663445
## X12 0.07640464 0.4721368 0.29788662
## X13 -0.05626483 0.5086031 0.09439024
## X1 -0.01908987 0.5095194 0.08639744
# Create a bloxplot combined with a means plot of the groups
library(ggplot2)
ggplot(planetanova, aes(x = Planet, y = PostPost)) +
geom_boxplot(fill = "blue3", alpha = 0.3, color = "black") +
stat_summary(fun = mean, geom = "point", color = "red", size = 3) +
stat_summary(fun = mean, geom = "line", aes(group = 1), color = "red", linewidth = 1) +
scale_y_continuous(limits = c(0, 100)) +
theme_minimal() +
labs(title = "Boxplot with Mean of PostPost by Planet",
x = "Planet",
y = "PostPost")
# Shapiro-Wilke Test for normality
aov_model <- aov(PostPost ~ Planet, data = planetanova)
shapiro.test(residuals(aov_model))
##
## Shapiro-Wilk normality test
##
## data: residuals(aov_model)
## W = 0.96078, p-value < 2.2e-16
# QQ Plot for normality
qqnorm(residuals(aov_model))
qqline(residuals(aov_model), col = "red")
# Levene's test for homogeneity of variance
library(car)
leveneTest(PostPost ~ Planet, data = planetanova)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.1356 0.3214
## 2811
#Bartlett's test of homogeneity of variance
bartlett.test(PostPost ~ Planet, data = planetanova)
##
## Bartlett test of homogeneity of variances
##
## data: PostPost by Planet
## Bartlett's K-squared = 1.6992, df = 2, p-value = 0.4276
# Be sure to load the library
library(apaTables)
library(dplyr)
library(tidyverse)
library(rstatix)
# Run the ANOVA
anova_results <- aov(PostPost ~ Planet, data = planetanova)
apa.aov.table(anova_results, filename = "anova_results.doc")
##
##
## ANOVA results using PostPost as the dependent variable
##
##
## Predictor SS df MS F p partial_eta2
## (Intercept) 1406113.42 1 1406113.42 66947.90 .000
## Planet 47.78 2 23.89 1.14 .321 .00
## Error 59039.71 2811 21.00
## CI_90_partial_eta2
##
## [.00, .00]
##
##
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared
summary(anova_results)
## Df Sum Sq Mean Sq F value Pr(>F)
## Planet 2 48 23.89 1.137 0.321
## Residuals 2811 59040 21.00
library(rstatix)
# Perform Tukey's HSD test if variances are assumed equal
Tukey<- TukeyHSD(anova_results)
# Perform Games-Howel if variances were not equal
GamesHowell<-planetanova%>%
games_howell_test(PostPost~Planet)
Tukey
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = PostPost ~ Planet, data = planetanova)
##
## $Planet
## diff lwr upr p adj
## Mars-Earth 0.1601178 -0.8498621 1.1700977 0.9266575
## Venus-Earth -0.2525033 -1.0386094 0.5336028 0.7317040
## Venus-Mars -0.4126211 -1.1198539 0.2946117 0.3578727
GamesHowell
## # A tibble: 3 × 8
## .y. group1 group2 estimate conf.low conf.high p.adj p.adj.signif
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 PostPost Earth Mars 0.160 -0.845 1.17 0.926 ns
## 2 PostPost Earth Venus -0.253 -1.01 0.504 0.711 ns
## 3 PostPost Mars Venus -0.413 -1.15 0.323 0.385 ns
# Subset your data for Cohen's d
cohen_data_venus_earth<- planetanova%>%
filter(Planet %in% c("Venus", "Earth"))
cohen_data_venus_mars<- planetanova%>%
filter(Planet %in% c("Venus", "Mars"))
cohen_data_earth_mars<- planetanova%>%
filter(Planet %in% c("Earth", "Mars"))
# Run cohen's d on subset data
rstatix::cohens_d(cohen_data_venus_earth, PostPost ~ Planet)
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 PostPost Earth Venus 0.0564 203 2355 negligible
rstatix::cohens_d(cohen_data_venus_mars, PostPost ~ Planet)
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 PostPost Mars Venus 0.0883 256 2355 negligible
rstatix::cohens_d(cohen_data_earth_mars, PostPost ~ Planet)
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 PostPost Earth Mars -0.0350 203 256 negligible
library(dplyr)
# Re-generate dataset
planetanova <- planetmath%>%
dplyr::select(Planet, PostPost)%>%
na.omit()
# Run the omnibus KW test
kwtest<-kruskal.test(PostPost ~ Planet, data = planetanova)
print(kwtest)
##
## Kruskal-Wallis rank sum test
##
## data: PostPost by Planet
## Kruskal-Wallis chi-squared = 0.76484, df = 2, p-value = 0.6822
# Create ranks and summarize by group
planet_ranks <- planetanova %>%
mutate(Rank = rank(PostPost)) %>%
group_by(Planet) %>%
summarise(N = n(), Mean_Rank = mean(Rank), Sum_Rank = sum(Rank))
# Print the ranks
print(planet_ranks)
## # A tibble: 3 × 4
## Planet N Mean_Rank Sum_Rank
## <chr> <int> <dbl> <dbl>
## 1 Earth 203 1416. 287496.
## 2 Mars 256 1448. 370582.
## 3 Venus 2355 1402. 3302626
# Run eta-squared for omnibus effect size
H <- kwtest$statistic
k <- length(unique(planetanova$Planet))
N <- nrow(planetanova)
eta_squared <- (H - k + 1) / (N - k)
# Rename the output explicitly
names(eta_squared) <- "eta_squared"
# Print the result
eta_squared
## eta_squared
## -0.0004394035
#Load the library
library(dunn.test)
# Run post-hoc dunn test
dt<- dunn.test(planetanova$PostPost, planetanova$Planet, kw = TRUE, label = TRUE)
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 0.7648, df = 2, p-value = 0.68
##
##
## Comparison of x by group
## (No adjustment)
## Col Mean-|
## Row Mean | Earth Mars
## ---------+----------------------
## Mars | -0.417417
## | 0.3382
## |
## Venus | 0.236934 0.859460
## | 0.4064 0.1950
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
print(dt)
## $chi2
## [1] 0.7648368
##
## $Z
## [1] -0.4174178 0.2369341 0.8594609
##
## $P
## [1] 0.3381864 0.4063540 0.1950431
##
## $P.adjusted
## [1] 0.3381864 0.4063540 0.1950431
##
## $comparisons
## [1] "Earth - Mars" "Earth - Venus" "Mars - Venus"
# Signed rank correlation for pairwise tests
# Z-values from Dunn's test results
z_mars_earth <- 0.417417
z_venus_earth <- 0.236934
z_mars_venus <- 0.859460
# Calculate observations
n <- nrow(planetanova) # Total number of observations
# Singed rank correlation formula
r_mars_earth <- z_mars_earth / sqrt(n)
r_venus_earth <- z_venus_earth / sqrt(n)
r_mars_venus <- z_mars_venus / sqrt(n)
# Print effect sizes
r_mars_earth
## [1] 0.007868792
r_venus_earth
## [1] 0.004466479
r_mars_venus
## [1] 0.01620181
library(ggplot2)
library(ggpubr)
library(Hmisc)
library(bannerCommenter)
## Warning: package 'bannerCommenter' was built under R version 4.4.2
library(readxl)
# Import the Practice Math Planet Data and name it "planetmath"
planetmath<- readxl:: read_xlsx('Practice_Math_Planet_Data.xlsx')
data <- planetmath%>%
dplyr::select(Pre, Post)
# descriptive statistics
desc_post<- psych::describe(data$Post)%>%
as.data.frame()%>%
mutate(Variable = "Post", group1 = "Overall")%>%
dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)
desc_pre <- psych::describe(data$Pre) %>%
as.data.frame() %>%
mutate(Variable = "Pre", group1 = "Overall")%>%
dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)
bivariatedesc<- rbind(desc_pre, desc_post)
# Homoscedasticity Check (Residuals vs Fitted Plot)
model <- lm(Pre ~ Post, data = data)
plot(model, which = 1)
title("Residuals vs Fitted Plot")
# Normality Check (Q-Q plots)
# Q-Q plot for Age
ggqqplot(data$Pre, main = "Q-Q Plot for Pre", ylab = "Quantiles")
# Q-Q plot for Experience
ggqqplot(data$Post, main = "Q-Q Plot for Post", ylab = "Quantiles")
# Shapiro-Wilk test for normality
shapiro_test_pre <- shapiro.test(data$Pre)
shapiro_test_post <- shapiro.test(data$Post)
# Linear model for Residuals vs. Fitted plot
banner("Normality test for age and experience shows violation of the assumption")
##
## ###############################################################################
## ## Normality test for age and experience shows violation of the assumption ##
## ###############################################################################
shapiro_test_pre
##
## Shapiro-Wilk normality test
##
## data: data$Pre
## W = 0.97911, p-value < 2.2e-16
shapiro_test_post
##
## Shapiro-Wilk normality test
##
## data: data$Post
## W = 0.97452, p-value < 2.2e-16
# Compute Spearman correlation manually
cor_test_result <- cor.test(data$Pre, data$Post, method = "spearman")
## Warning in cor.test.default(data$Pre, data$Post, method = "spearman"): Cannot
## compute exact p-value with ties
# Scatterplot with linear fit (Linearity check); we use a spearman correlation due to violation of normality
ggplot(data, aes(x = Pre, y = Post)) +
geom_point(alpha = 0.6) + # Scatterplot with transparency
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Linear fit line
annotate("text", x = min(data$Pre), y = max(data$Post),
label = paste("Spearman ρ =", formatC(cor_test_result$estimate,
format = "f", digits = 4)),
hjust = 0, vjust = 1) + # Manually add p-value label
labs(title = "Scatterplot of Pre and Post",
x = "Age",
y = "Experience") +
theme_minimal()
banner("spearman Correlation")
##
## ##################################################################
## ## spearman Correlation ##
## ##################################################################
# Correlation results using spearman since normality is violated
cor_results <- rcorr(as.matrix(data[, c("Pre", "Post")]), type = "spearman")
print(cor_results)
## Pre Post
## Pre 1 0
## Post 0 1
##
## n= 2814
##
##
## P
## Pre Post
## Pre 0.9747
## Post 0.9747
banner("model results")
##
## #################################################################
## ## model results ##
## #################################################################
summary(model)
##
## Call:
## lm(formula = Pre ~ Post, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.6124 -5.6460 -0.1982 5.8270 13.3876
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.33086 2.48412 27.105 <2e-16 ***
## Post 0.06903 0.02799 2.466 0.0137 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.392 on 2812 degrees of freedom
## Multiple R-squared: 0.002158, Adjusted R-squared: 0.001803
## F-statistic: 6.082 on 1 and 2812 DF, p-value: 0.01372
library(ggplot2)
library(lmtest)
library(car)
library(bannerCommenter)
library(gridExtra)
library(dplyr)
library(psych)
# Select relevant data
data <- planetmath %>%
dplyr::select(PostPost, Instruction_Type, Pre)
# Overall descriptive statistics
desc_postpost <- psych::describe(data$PostPost)%>%
as.data.frame()%>%
mutate(Variable = "PostPost", group1 = "Overall")%>%
dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)
desc_pre <- psych::describe(data$Pre) %>%
as.data.frame() %>%
mutate(Variable = "Pre", group1 = "Overall")%>%
dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)
# Descriptive statistics by instruction type
desc_postpost_inst <- psych::describeBy(data$PostPost, group = data$Instruction_Type, mat = TRUE)%>%
as.data.frame()%>%
mutate(Variable = "PostPost_Inst")%>%
dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)
desc_pre_inst <- psych::describeBy(data$Pre, group = data$Instruction_Type, mat = TRUE)%>%
as.data.frame()%>%
mutate(Variable = "Pre_Inst")%>%
dplyr::select(Variable, group1, n, mean, sd, skew, kurtosis)
# Combine all results
final_results <- rbind(desc_postpost, desc_pre, desc_postpost_inst, desc_pre_inst)
# Save to CSV
write.csv(final_results, "hw_mult_reg_desc.csv", row.names = FALSE)
banner("Section 1:", "Model 1 Before Transformation to DV", emph = TRUE)
##
## ###########################################################################
## ###########################################################################
## ### ###
## ### SECTION 1: ###
## ### MODEL 1 BEFORE TRANSFORMATION TO DV ###
## ### ###
## ###########################################################################
## ###########################################################################
#Check normality for first model
model1<- lm(PostPost~Pre + Instruction_Type, data = data)
# Check linearity assumption
p1 <- ggplot(data, aes(x = Pre, y = PostPost)) +
geom_point() +
geom_smooth(method = "lm", col = "blue") +
ggtitle("Pre vs PostPost no Trans")
# Check normality of residuals
p2 <- ggplot(data.frame(resid = residuals(model1)), aes(sample = resid)) +
stat_qq() +
stat_qq_line(color = "red") +
labs(title = "Q-Q Plot of Residuals no Transf") +
theme_minimal()
# Residuals vs. Fitted plot for Homoscedasticity
p3 <- ggplot(data.frame(Fitted = fitted(model1), Residuals = residuals(model1)),
aes(x = Fitted, y = Residuals)) +
geom_point() +
geom_smooth(method = "lm", col = "blue", se = FALSE) +
labs(title = "Residuals vs. Fitted no Tansf") +
theme_minimal()
# Histogram of Residuals
p4 <- ggplot(data.frame(resid = residuals(model1)), aes(x = resid)) +
geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram of Residuals no Transf") +
theme_minimal()
p5<- ggplot(data, aes(x = Instruction_Type, y = PostPost)) +
geom_jitter() + # Adds some noise to prevent overplotting
labs(title = "Inst. Type vs. PostPost no Transf")
p6<- ggplot(data, aes(x = Instruction_Type, y = PostPost)) +
geom_boxplot() +
labs(title = "Inst. Type vs. PostPost no Transf")
# Arrange the plots using grid.arrange
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2)
banner("Homoscedasticity Test Model 1 (Untransformed)")
##
## #################################################################
## ## Homoscedasticity Test Model 1 (Untransformed) ##
## #################################################################
# Breusch-Pagan test (p > 0.05 suggests homoscedasticity)
bptest(model1)
##
## studentized Breusch-Pagan test
##
## data: model1
## BP = 201.28, df = 2, p-value < 2.2e-16
banner("Independence of errors (Untransformed")
##
## ##################################################################
## ## Independence of errors (Untransformed ##
## ##################################################################
# Durbin-Watson test of independence of errors
dwtest(model1)
##
## Durbin-Watson test
##
## data: model1
## DW = 2.0491, p-value = 0.9036
## alternative hypothesis: true autocorrelation is greater than 0
banner("VIF for Model 1 (Untransformed)")
##
## #################################################################
## ## VIF for Model 1 (Untransformed) ##
## #################################################################
# Multicolinearity
vif(model1)
## Pre Instruction_Type
## 1.000096 1.000096
banner("Multiple regression results untransformed data")
##
## ##################################################################
## ## Multiple regression results untransformed data ##
## ##################################################################
# Print the model
summary(model1)
##
## Call:
## lm(formula = PostPost ~ Pre + Instruction_Type, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5459 -2.5816 -0.8238 2.6638 13.7020
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.76519 0.86817 82.663 <2e-16 ***
## Pre 0.15806 0.01129 13.997 <2e-16 ***
## Instruction_Type -0.17714 0.12063 -1.468 0.142
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.431 on 2811 degrees of freedom
## Multiple R-squared: 0.06596, Adjusted R-squared: 0.06529
## F-statistic: 99.25 on 2 and 2811 DF, p-value: < 2.2e-16
banner("Section 2:", "Model 2 after Logarithm Transformation to DV", emph = TRUE)
##
## ############################################################################
## ############################################################################
## ### ###
## ### SECTION 2: ###
## ### MODEL 2 AFTER LOGARITHM TRANSFORMATION TO DV ###
## ### ###
## ############################################################################
## ############################################################################
# Transform dependent variable and run model
data$log_PostPost <- log(data$PostPost)
# Build the model
model2 <- lm(log_PostPost~Pre + Instruction_Type, data = data)
# Check linearity assumption
p1 <- ggplot(data, aes(x = log_PostPost, y = Pre)) +
geom_point() +
geom_smooth(method = "lm", col = "red") +
ggtitle("Pre vs Post after Trans")
# Check normality of residuals
p2 <- ggplot(data.frame(resid = residuals(model2)), aes(sample = resid)) +
stat_qq() +
stat_qq_line(color = "blue") +
labs(title = "Q-Q Plot of Residuals after Transf") +
theme_minimal()
# Residuals vs. Fitted plot for Homoscedasticity
p3 <- ggplot(data.frame(Fitted = fitted(model2), Residuals = residuals(model2)),
aes(x = Fitted, y = Residuals)) +
geom_point() +
geom_smooth(method = "lm", col = "red", se = FALSE) +
labs(title = "Residuals vs. Fitted after Tansf") +
theme_minimal()
# Histogram of Residuals
p4 <- ggplot(data.frame(resid = residuals(model2)), aes(x = resid)) +
geom_histogram( fill = "red", color = "black", alpha = 0.7) +
labs(title = "Histogram of Residuals after Transf") +
theme_minimal()
p5<- ggplot(data, aes(x = Instruction_Type, y = log_PostPost)) +
geom_jitter() + # Adds some noise to prevent overplotting
labs(title = "Inst. Type vs. PostPost after Transf")
p6<- ggplot(data, aes(x = Instruction_Type, y = log_PostPost)) +
geom_boxplot() +
labs(title = "Inst. Type vs. PostPost after Transf")
# Arrange the plots using grid.arrange
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2)
banner("Homoscedasticity Test Model 2 (Transformed)")
##
## #################################################################
## ## Homoscedasticity Test Model 2 (Transformed) ##
## #################################################################
# Breusch-Pagan test (p > 0.05 suggests homoscedasticity)
bptest(model2)
##
## studentized Breusch-Pagan test
##
## data: model2
## BP = 235.04, df = 2, p-value < 2.2e-16
banner("Independence of errors (Transformed)")
##
## #################################################################
## ## Independence of errors (Transformed) ##
## #################################################################
# Durbin-Watson test of independence of errors
dwtest(model2)
##
## Durbin-Watson test
##
## data: model2
## DW = 2.0476, p-value = 0.8966
## alternative hypothesis: true autocorrelation is greater than 0
banner("VIF for Model 1 (Transformed)")
##
## #################################################################
## ## VIF for Model 1 (Transformed) ##
## #################################################################
# Multicolinearity
vif(model2)
## Pre Instruction_Type
## 1.000096 1.000096
banner("Multiple regression results transformed data")
##
## ##################################################################
## ## Multiple regression results transformed data ##
## ##################################################################
# Print the model
summary(model2)
##
## Call:
## lm(formula = log_PostPost ~ Pre + Instruction_Type, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.179727 -0.030037 -0.008455 0.033337 0.155590
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.2783646 0.0104962 407.610 <2e-16 ***
## Pre 0.0019514 0.0001365 14.293 <2e-16 ***
## Instruction_Type -0.0020680 0.0014584 -1.418 0.156
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05357 on 2811 degrees of freedom
## Multiple R-squared: 0.0685, Adjusted R-squared: 0.06784
## F-statistic: 103.4 on 2 and 2811 DF, p-value: < 2.2e-16
library(car)
library(tidyverse)
library(dplyr)
library(psych)
library(gridExtra)
library(rstatix)
library(nlme)
library(dplyr)
library(purrr)
testdata <- planetmath%>%
dplyr::select(ID, Pre, Post, PostPost)%>%
mutate(DifferencePostPost_Pre = PostPost - Pre)%>%
mutate(DifferencePost_Pre = Post- Pre)%>%
mutate(DifferencePostPost_Post = PostPost- Post)
# Compute descriptive statistics
desc_Pre <- psych::describe(testdata$Pre)
desc_Post <- psych::describe(testdata$Post)
desc_PostPost <- psych::describe(testdata$PostPost)
desc_differencePostPost_Pre <- psych::describe(testdata$DifferencePostPost_Pre)
desc_differencePost_Pre <- psych::describe(testdata$DifferencePost_Pre)
desc_differencePostPost_Post <- psych::describe(testdata$DifferencePostPost_Post)
as.data.frame(desc_Pre)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2814 73.44741 7.398603 73 73.56172 8.8956 50 87 37 -0.1856475
## kurtosis se
## X1 -0.3916062 0.1394722
as.data.frame(desc_Post)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2814 88.60981 4.979288 89 88.62655 5.9304 72 100 28 -0.2140512
## kurtosis se
## X1 0.1700273 0.09386532
as.data.frame(desc_PostPost)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2814 83.02985 4.583138 81 83.06661 4.4478 68 97 29 -0.01908987
## kurtosis se
## X1 0.5095194 0.08639744
as.data.frame(desc_differencePostPost_Pre)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2814 9.582445 7.643458 8 8.830373 7.413 -4 38 42 0.9501486
## kurtosis se
## X1 0.715229 0.144088
as.data.frame(desc_differencePost_Pre)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2814 15.1624 8.724091 13 14.56927 8.8956 0 41 41 0.5777075
## kurtosis se
## X1 -0.5178854 0.1644592
as.data.frame(desc_differencePostPost_Post)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2814 -5.579957 2.980891 -4 -5.092806 1.4826 -13 -3 10 -1.256129
## kurtosis se
## X1 0.1073983 0.05619324
desc_Pre$Variable <- "Pre"
desc_Post$Variable<- "Post"
desc_PostPost$Variable <- "PostPost"
desc_differencePostPost_Pre$Variable <- "Difference_PostPost_Pre"
desc_differencePost_Pre$Variable<- "Difference_Post_Pre"
desc_differencePostPost_Post$Variable<- "Difference_PostPost_Post"
# Combine all into one dataset
desc_combined <- rbind(desc_Pre, desc_Post, desc_PostPost, desc_differencePostPost_Pre, desc_differencePost_Pre, desc_differencePostPost_Post)%>%
mutate(upper = mean+(se*2))%>%
mutate(lower = mean-(se*2))%>%
dplyr::select(Variable, mean, upper, lower, se, sd, skew, kurtosis)
write.csv(desc_combined, "repeated_desc.csv")
# We now want to check the assumption of normality
# Plot a histogram of the Difference variable
hist_PostPost_Pre <- ggplot(testdata, aes(x = DifferencePostPost_Pre)) +
geom_histogram(fill = "lightblue", color = "black", bins = 10) +
ggtitle("Histogram of Difference PostPost and Pre") +
xlab("Difference (PostPost - Pre)") +
theme_minimal()
hist_PostPost_Post <- ggplot(testdata, aes(x = DifferencePostPost_Post)) +
geom_histogram(fill = "lightblue", color = "black", bins = 10) +
ggtitle("Histogram of Difference PostPost and Post") +
xlab("Difference (PostPost - Post)") +
theme_minimal()
hist_Post_Pre <- ggplot(testdata, aes(x = DifferencePost_Pre)) +
geom_histogram(fill = "lightblue", color = "black", bins = 10) +
ggtitle("Histogram of Difference Post and Pre") +
xlab("Difference (Post - Pre)") +
theme_minimal()
# Create Q-Q plots
qq_PostPost_Pre <- ggplot(testdata, aes(sample = DifferencePostPost_Pre)) +
stat_qq() + stat_qq_line(color = "red") +
ggtitle("Q-Q Plot: Difference PostPost-Pre") +
theme_minimal()
qq_PostPost_Post <- ggplot(testdata, aes(sample = DifferencePostPost_Post)) +
stat_qq() + stat_qq_line(color = "red") +
ggtitle("Q-Q Plot: Difference PostPost-Post") +
theme_minimal()
qq_Post_Pre <- ggplot(testdata, aes(sample = DifferencePost_Pre)) +
stat_qq() + stat_qq_line(color = "red") +
ggtitle("Q-Q Plot: Difference Post-Pre") +
theme_minimal()
# Arrange plots in a grid
grid.arrange(hist_PostPost_Pre, qq_PostPost_Pre,
hist_PostPost_Post, qq_PostPost_Post,
hist_Post_Pre, qq_Post_Pre,
ncol = 2)
# Perform Shapiro-Wilk test for normality on the Difference variable
shapiro_testPostPost_Pre <- shapiro.test(testdata$DifferencePostPost_Pre)
shapiro_testPostPost_Pre
##
## Shapiro-Wilk normality test
##
## data: testdata$DifferencePostPost_Pre
## W = 0.92258, p-value < 2.2e-16
# Perform Shapiro-Wilk test for normality on the Difference variable
shapiro_testPostPost_Post <- shapiro.test(testdata$DifferencePostPost_Post)
shapiro_testPostPost_Post
##
## Shapiro-Wilk normality test
##
## data: testdata$DifferencePostPost_Post
## W = 0.7415, p-value < 2.2e-16
# Perform Shapiro-Wilk test for normality on the Difference variable
shapiro_testPost_Pre <- shapiro.test(testdata$DifferencePost_Pre)
shapiro_testPost_Pre
##
## Shapiro-Wilk normality test
##
## data: testdata$DifferencePost_Pre
## W = 0.94281, p-value < 2.2e-16
# Convert to long format and add an ID
testdata_long <- testdata %>%
dplyr::select(ID, Pre, Post, PostPost) %>%
pivot_longer(cols = c(Pre, Post, PostPost),
names_to = "Time",
values_to = "Score") %>%
mutate(Time = factor(Time, levels = c("Pre", "Post", "PostPost")))
#Greenhouse-Guesser correction automatically applied to this ANOVA test see the "ges" indicating an adjusted eta-squared.
anova_lm <- anova_test(dv = Score, wid = ID, within = Time, data = testdata_long)
anova_lm
## ANOVA Table (type III tests)
##
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 1 Time 2 5626 6923.411 0 * 0.539
##
## $`Mauchly's Test for Sphericity`
## Effect W p p<.05
## 1 Time 0.292 0 *
##
## $`Sphericity Corrections`
## Effect GGe DF[GG] p[GG] p[GG]<.05 HFe DF[HF] p[HF]
## 1 Time 0.585 1.17, 3293.07 0 * 0.585 1.17, 3293.61 0
## p[HF]<.05
## 1 *
get_anova_table(anova_lm)
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 Time 1.17 3293.07 6923.411 0 * 0.539
#Plot the Difference
ggplot(testdata_long, aes(x = Time, y= Score, fill=Time))+
geom_boxplot()
# Post-hoc with a bonferroni correction
pwc <- testdata_long %>%
pairwise_t_test(Score ~ Time, paired = TRUE, p.adjust.method = "bonferroni")
pwc
## # A tibble: 3 × 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Score Pre Post 2814 2814 -92.2 2813 0 0 ****
## 2 Score Pre PostPost 2814 2814 -66.5 2813 0 0 ****
## 3 Score Post PostPost 2814 2814 99.3 2813 0 0 ****
# Cohen's d effect size into a table
tesddata_prepost_d<- testdata_long%>%
filter(Time == c("Pre", "Post"))%>%
mutate(Time = as.character(Time))%>%
rstatix::cohens_d(Score ~ Time)%>%
mutate(Model = "Pre to Post")
tesddata_prepost_Post_d<- testdata_long%>%
filter(Time == c("Pre", "PostPost"))%>%
mutate(Time = as.character(Time))%>%
rstatix::cohens_d(Score ~ Time)%>%
mutate(Model = "Pre to PostPost")
testdata_PostPost_Post_d <- testdata_long %>%
filter(Time %in% c("Post", "PostPost")) %>%
mutate(Time = as.character(Time)) %>%
rstatix::cohens_d(Score ~ Time) %>%
mutate(Model = "Post to PostPost")
effectsize<- rbind(tesddata_prepost_d, tesddata_prepost_Post_d, testdata_PostPost_Post_d)
effectsize
## # A tibble: 3 × 8
## .y. group1 group2 effsize n1 n2 magnitude Model
## <chr> <chr> <chr> <dbl> <int> <int> <ord> <chr>
## 1 Score Post Pre 2.46 1407 1407 large Pre to Post
## 2 Score PostPost Pre 1.55 1407 1407 large Pre to PostPost
## 3 Score Post PostPost 1.17 2814 2814 large Post to PostPost
write.csv(effectsize, "repeated_pair_cohen.csv")
library(dplyr)
library(stats)
library(tidyverse)
library(ggpubr)
library(rstatix)
library(tidyr)
library(purrr)
library(tibble)
#Use the testdata_long dataset from the last chunk
testdata_long$ID <- factor(testdata_long$ID, ordered = FALSE)
testdata_long$Time <- factor(testdata_long$Time, ordered = FALSE)
# Run the omnibus test
friedtest<- friedman_test(testdata_long, Score ~ Time | ID)
friedtest
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <dbl> <dbl> <chr>
## 1 Score 2814 5464. 2 0 Friedman test
# Get the omnibus effect size
friedman_effsize_result <- friedman_effsize(testdata_long, Score ~ Time | ID)
friedman_effsize_result
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 Score 2814 0.971 Kendall W large
# Create planet_test_wide from planetmath
planet_test_wide <- planetmath %>%
dplyr::select(Pre, Post, PostPost)
# Define pairwise comparisons
comparisons <- list(
c("Post", "Pre"),
c("PostPost", "Pre"),
c("PostPost", "Post")
)
# Function to perform Wilcoxon test and compute statistics
perform_test <- function(pair) {
test <- wilcox.test(planet_test_wide[[pair[1]]], planet_test_wide[[pair[2]]], paired = TRUE)
# Extract the test statistic (V)
V_stat <- test$statistic
# Sample size (n) for effect size and Z-statistic calculations
n <- sum(!is.na(planet_test_wide[[pair[1]]]) & !is.na(planet_test_wide[[pair[2]]])) # Correct for NAs
# Expected value of V
mu_V <- n * (n + 1) / 4
# Standard deviation of V
sigma_V <- sqrt((n * (n + 1) * (2 * n + 1)) / 24)
# Compute Z-statistic: Adjust sign based on positive ranks
Zstat <- (V_stat - mu_V) / sigma_V
# If Z is positive for a comparison with a reverse direction (like Post to Pre), flip the sign
if (V_stat < mu_V) {
Zstat <- -Zstat
}
# Compute effect size (r)
r_effect_size <- abs(Zstat) / sqrt(n) # You can choose to use absolute or raw Z value
# Return a tibble with results
tibble(
Group1 = pair[1],
Group2 = pair[2],
n = n,
V = V_stat,
Z = Zstat, # Retain the sign
p = test$p.value,
r = r_effect_size
)
}
# Apply function to all comparisons and store results in a table
wilcoxon_results <- purrr::map_dfr(comparisons, perform_test)
# Print results
print(wilcoxon_results)
## # A tibble: 3 × 7
## Group1 Group2 n V Z p r
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Post Pre 2814 3935415 45.4 0 0.855
## 2 PostPost Pre 2814 3881884. 44.1 0 0.832
## 3 PostPost Post 2814 0 45.9 0 0.866
# Save results to CSV
write.csv(wilcoxon_results, "wilcoxon_repeated.csv")
library(psych)
library(dplyr)
library(ggplot2)
factorial<- read.csv("Factorial_for_Practice_and_HW.csv")
factorial$TeachReading<- as.numeric(factorial$TeachReading)
factorial$ResistsWriting<- as.numeric(factorial$ResistsWriting)
factorial$ID<- as.character(factorial$ID)
# Create a new variable that concatenates school and content
factorial <- factorial %>%
mutate(School_Cont = paste(School, "_", Content_Area))
# Get descriptive statistics by school, content, and school_content combined
resist_school <- as.data.frame(psych::describeBy(factorial$ResistsWriting, group = factorial$School, mat = TRUE))%>%
mutate(Category = "Resist_School")%>%
dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)
resist_content<- as.data.frame(psych::describeBy(factorial$ResistsWriting, group = factorial$Content_Area, mat = TRUE))%>%
mutate(Category = "Resist_Content")%>%
dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)
resist_school_content<- as.data.frame(psych::describeBy(factorial$ResistsWriting, group = factorial$School_Cont, mat = TRUE))%>%
mutate(Category = "Resist_School_Content")%>%
dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)
overall_resist<- as.data.frame(psych::describe(factorial$ResistsWriting))%>%
mutate(Category = "Resist_Overall")%>%
mutate(group1 = "Overall")%>%
dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)
Factorial_Desc<- rbind(resist_school, resist_content, resist_school_content, overall_resist)
Factorial_Desc
## Category group1 n mean sd
## X11 Resist_School School 1 61 25.68852 6.682168
## X12 Resist_School School 2 111 28.45045 7.699751
## X111 Resist_Content Language Arts 47 22.72340 6.195027
## X121 Resist_Content Math 41 31.46341 8.772393
## X13 Resist_Content Science 46 28.93478 6.416653
## X14 Resist_Content Social Studies 38 27.26316 5.176267
## X112 Resist_School_Content School 1 _ Language Arts 10 19.00000 6.377042
## X122 Resist_School_Content School 1 _ Math 15 26.80000 7.589466
## X131 Resist_School_Content School 1 _ Science 19 28.00000 5.228129
## X141 Resist_School_Content School 1 _ Social Studies 17 26.05882 5.273407
## X15 Resist_School_Content School 2 _ Language Arts 37 23.72973 5.829278
## X16 Resist_School_Content School 2 _ Math 26 34.15385 8.384234
## X17 Resist_School_Content School 2 _ Science 27 29.59259 7.158960
## X18 Resist_School_Content School 2 _ Social Studies 21 28.23810 5.009039
## X1 Resist_Overall Overall 172 27.47093 7.453895
## median kurtosis skew
## X11 27.0 -0.29753346 -0.20723409
## X12 28.0 0.77700815 0.38944058
## X111 25.0 -1.09614916 -0.19590131
## X121 31.0 -0.11750874 -0.07083268
## X13 29.0 2.17206354 0.70398419
## X14 27.0 0.96021548 -0.15470751
## X112 17.5 -1.62605482 0.47892084
## X122 28.0 -0.62307485 -0.05841430
## X131 29.0 -0.29399793 0.22098294
## X141 27.0 0.06378897 -0.70016856
## X15 25.0 -0.70547202 -0.34894247
## X16 34.0 0.28254322 -0.22613311
## X17 30.0 1.84260784 0.67769262
## X18 28.0 0.65709388 0.40931170
## X1 28.0 0.75885027 0.28860860
write.csv(Factorial_Desc, "Factorial_Desc.csv")
library(rstatix)
library(car)
library(ggplot2)
# Shapiro-Wilk test for normality
shapiro.test(factorial$ResistsWriting) # Assesses overall normality
##
## Shapiro-Wilk normality test
##
## data: factorial$ResistsWriting
## W = 0.96647, p-value = 0.000366
# QQ Plots
qqnorm(factorial$ResistsWriting)
qqline(factorial$ResistsWriting, col = "red")
# Normality within groups
factorial %>%
group_by(School, Content_Area) %>%
shapiro_test(ResistsWriting)
## # A tibble: 8 × 5
## School Content_Area variable statistic p
## <chr> <chr> <chr> <dbl> <dbl>
## 1 School 1 Language Arts ResistsWriting 0.831 0.0344
## 2 School 1 Math ResistsWriting 0.976 0.937
## 3 School 1 Science ResistsWriting 0.970 0.774
## 4 School 1 Social Studies ResistsWriting 0.932 0.232
## 5 School 2 Language Arts ResistsWriting 0.943 0.0566
## 6 School 2 Math ResistsWriting 0.973 0.703
## 7 School 2 Science ResistsWriting 0.934 0.0888
## 8 School 2 Social Studies ResistsWriting 0.943 0.244
# Equality of variance
leveneTest(ResistsWriting ~ School * Content_Area, data = factorial)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 7 0.9423 0.4756
## 164
hist(factorial$ResistsWriting)
library(ggplot2)
ggplot(factorial, aes(x = ResistsWriting)) +
geom_histogram(fill = "blue", color = "black") +
facet_wrap(~ School_Cont) +
theme_minimal()
# Chunk 28: Run the factorial ANOVA omnibus test with post-hoc Tukey and
pairwise Bonferroni # Partial eta-squared also included
library(effectsize)
library(bannerCommenter)
library(car)
banner("Omnibus")
##
## #################################################################
## ## Omnibus ##
## #################################################################
#Omnibus
factorial_anova <- aov(ResistsWriting ~ School * Content_Area, data = factorial)
summary(factorial_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## School 1 300 300.3 7.112 0.00842 **
## Content_Area 3 2070 690.1 16.345 2.43e-09 ***
## School:Content_Area 3 206 68.6 1.626 0.18542
## Residuals 164 6924 42.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
banner("Copy SPSS")
##
## #################################################################
## ## Copy SPSS ##
## #################################################################
factorial$School <- factor(factorial$School)
factorial$Content_Area <- factor(factorial$Content_Area)
model <- lm(ResistsWriting ~ School * Content_Area, data = factorial)
spsstable<- Anova(model, type = 3)
spsstable<- as.data.frame(spsstable)
spsstable <- spsstable %>%
rownames_to_column("Variable") %>%
mutate(`Mean Sq` = `Sum Sq` / Df) %>%
dplyr::select(Variable, Df, `Mean Sq`, `Pr(>F)`)%>%
rename(Variable = 1)
# Eta-squared effect size
banner("eta-squared")
##
## #################################################################
## ## eta-squared ##
## #################################################################
eta_squared(factorial_anova)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## ---------------------------------------------------
## School | 0.04 | [0.01, 1.00]
## Content_Area | 0.23 | [0.13, 1.00]
## School:Content_Area | 0.03 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
banner("partial eta-squared")
##
## #################################################################
## ## partial eta-squared ##
## #################################################################
partialeta<-partial_eta_squared(factorial_anova)%>%
as.data.frame()%>%
rownames_to_column("Variable")%>%
rename(Variable = 1)
spsstable<- left_join(spsstable, partialeta, by = 'Variable')%>%
as.data.frame()
spsstable <- spsstable
names(spsstable)[names(spsstable) == '.'] <- 'Eta'
write.csv(spsstable, "spsstable.csv")
banner("Tukey post-hoc")
##
## ##################################################################
## ## Tukey post-hoc ##
## ##################################################################
# Tukey post-hoc analysis
TukeyHSD(factorial_anova, which = "School")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = ResistsWriting ~ School * Content_Area, data = factorial)
##
## $School
## diff lwr upr p adj
## School 2-School 1 2.761926 0.7170348 4.806817 0.0084227
TukeyHSD(factorial_anova, which = "Content_Area")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = ResistsWriting ~ School * Content_Area, data = factorial)
##
## $Content_Area
## diff lwr upr p adj
## Math-Language Arts 9.162827 5.558698 12.7669566 0.0000000
## Science-Language Arts 6.764530 3.266581 10.2624794 0.0000079
## Social Studies-Language Arts 5.187708 1.508379 8.8670373 0.0019143
## Science-Math -2.398297 -6.020633 1.2240383 0.3174021
## Social Studies-Math -3.975119 -7.772898 -0.1773398 0.0363535
## Social Studies-Science -1.576822 -5.273987 2.1203431 0.6857484
TukeyHSD(factorial_anova, which = "School:Content_Area") # If interaction is significant
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = ResistsWriting ~ School * Content_Area, data = factorial)
##
## $`School:Content_Area`
## diff lwr
## School 2:Language Arts-School 1:Language Arts 4.7297297 -2.3811067
## School 1:Math-School 1:Language Arts 7.8000000 -0.3451108
## School 2:Math-School 1:Language Arts 15.1538462 7.7298592
## School 1:Science-School 1:Language Arts 9.0000000 1.2053823
## School 2:Science-School 1:Language Arts 10.5925926 3.2068936
## School 1:Social Studies-School 1:Language Arts 7.0588235 -0.8923282
## School 2:Social Studies-School 1:Language Arts 9.2380952 1.5725364
## School 1:Math-School 2:Language Arts 3.0702703 -3.0367281
## School 2:Math-School 2:Language Arts 10.4241164 5.3184156
## School 1:Science-School 2:Language Arts 4.2702703 -1.3607744
## School 2:Science-School 2:Language Arts 5.8628629 0.8129967
## School 1:Social Studies-School 2:Language Arts 2.3290938 -3.5167092
## School 2:Social Studies-School 2:Language Arts 4.5083655 -0.9426336
## School 2:Math-School 1:Math 7.3538462 0.8849192
## School 1:Science-School 1:Math 1.2000000 -5.6911174
## School 2:Science-School 1:Math 2.7925926 -3.6323574
## School 1:Social Studies-School 1:Math -0.7411765 -7.8088669
## School 2:Social Studies-School 1:Math 1.4380952 -5.3066973
## School 1:Science-School 2:Math -6.1538462 -12.1754947
## School 2:Science-School 2:Math -4.5612536 -10.0432910
## School 1:Social Studies-School 2:Math -8.0950226 -14.3179641
## School 2:Social Studies-School 2:Math -5.9157509 -11.7693804
## School 2:Science-School 1:Science 1.5925926 -4.3817876
## School 1:Social Studies-School 1:Science -1.9411765 -8.6019185
## School 2:Social Studies-School 1:Science 0.2380952 -6.0789817
## School 1:Social Studies-School 2:Science -3.5337691 -9.7109826
## School 2:Social Studies-School 2:Science -1.3544974 -7.1594905
## School 2:Social Studies-School 1:Social Studies 2.1792717 -4.3299687
## upr p adj
## School 2:Language Arts-School 1:Language Arts 11.84056615 0.4569118
## School 1:Math-School 1:Language Arts 15.94511079 0.0712488
## School 2:Math-School 1:Language Arts 22.57783307 0.0000001
## School 1:Science-School 1:Language Arts 16.79461771 0.0117403
## School 2:Science-School 1:Language Arts 17.97829154 0.0004990
## School 1:Social Studies-School 1:Language Arts 15.00997529 0.1222725
## School 2:Social Studies-School 1:Language Arts 16.90365407 0.0069635
## School 1:Math-School 2:Language Arts 9.17726861 0.7825143
## School 2:Math-School 2:Language Arts 15.52981721 0.0000001
## School 1:Science-School 2:Language Arts 9.90131494 0.2846939
## School 2:Science-School 2:Language Arts 10.91272898 0.0110078
## School 1:Social Studies-School 2:Language Arts 8.17489684 0.9239350
## School 2:Social Studies-School 2:Language Arts 9.95936459 0.1865633
## School 2:Math-School 1:Math 13.82277309 0.0140426
## School 1:Science-School 1:Math 8.09111742 0.9994533
## School 2:Science-School 1:Math 9.21754263 0.8843167
## School 1:Social Studies-School 1:Math 6.32651398 0.9999820
## School 2:Social Studies-School 1:Math 8.18288776 0.9979704
## School 1:Science-School 2:Math -0.13219761 0.0413041
## School 2:Science-School 2:Math 0.92078391 0.1805325
## School 1:Social Studies-School 2:Math -1.87208118 0.0024361
## School 2:Social Studies-School 2:Math -0.06212144 0.0456178
## School 2:Science-School 1:Science 7.56697276 0.9918708
## School 1:Social Studies-School 1:Science 4.71956551 0.9861833
## School 2:Social Studies-School 1:Science 6.55517222 1.0000000
## School 1:Social Studies-School 2:Science 2.64344451 0.6501607
## School 2:Social Studies-School 2:Science 4.45049578 0.9964143
## School 2:Social Studies-School 1:Social Studies 8.68851211 0.9695548
banner("Bonferroni post-hoc pairwise")
##
## ##################################################################
## ## Bonferroni post-hoc pairwise ##
## ##################################################################
# Pairwise t-tests with Bonferonni correction (alternative to Tukey)
pairwise.t.test(factorial$ResistsWriting, factorial$School, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: factorial$ResistsWriting and factorial$School
##
## School 1
## School 2 0.02
##
## P value adjustment method: bonferroni
pairwise.t.test(factorial$ResistsWriting, factorial$Content_Area, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: factorial$ResistsWriting and factorial$Content_Area
##
## Language Arts Math Science
## Math 5.6e-08 - -
## Science 0.0001 0.5017 -
## Social Studies 0.0147 0.0388 1.0000
##
## P value adjustment method: bonferroni
interaction_variable <- interaction(factorial$School, factorial$Content_Area)
pairwise.t.test(factorial$ResistsWriting, interaction_variable, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: factorial$ResistsWriting and interaction_variable
##
## School 1.Language Arts School 2.Language Arts
## School 2.Language Arts 1.00000 -
## School 1.Math 0.10506 1.00000
## School 2.Math 8.7e-08 8.7e-08
## School 1.Science 0.01432 0.59101
## School 2.Science 0.00054 0.01336
## School 1.Social Studies 0.19908 1.00000
## School 2.Social Studies 0.00822 0.33687
## School 1.Math School 2.Math School 1.Science
## School 2.Language Arts - - -
## School 1.Math - - -
## School 2.Math 0.01734 - -
## School 1.Science 1.00000 0.05650 -
## School 2.Science 1.00000 0.32302 1.00000
## School 1.Social Studies 1.00000 0.00274 1.00000
## School 2.Social Studies 1.00000 0.06317 1.00000
## School 2.Science School 1.Social Studies
## School 2.Language Arts - -
## School 1.Math - -
## School 2.Math - -
## School 1.Science - -
## School 2.Science - -
## School 1.Social Studies 1.00000 -
## School 2.Social Studies 1.00000 1.00000
##
## P value adjustment method: bonferroni
library(ggplot2)
library(gridExtra)
p1 <- ggplot(factorial, aes(x = School, y = ResistsWriting)) +
geom_boxplot(fill = "skyblue", alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 20, size = 3, color = "red") +
labs(title = "Resistance to Writing by School", x = "School", y = "Resistance to Writing") +
theme_minimal()
p2 <- ggplot(factorial, aes(x = Content_Area, y = ResistsWriting)) +
geom_boxplot(fill = "lightgreen", alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 20, size = 3, color = "red") +
labs(title = "Resistance to Writing by Content Area", x = "Content Area", y = "Resistance to Writing") +
theme_minimal()
p3 <- ggplot(factorial, aes(x = School_Cont, y = ResistsWriting)) +
geom_boxplot(fill = "lightcoral", alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 20, size = 3, color = "red") +
labs(title = "Resistance to Writing by School-Content", x = "School & Content", y = "Resistance to Writing") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels
grid.arrange(p3, p2, p1, ncol = 2)
library(rstatix)
# Cohen's d between schools
factorial_school_cohen <- factorial%>%
rstatix::cohens_d(ResistsWriting ~ School)
# Cohen's d between content areas
factorial_math_la<- factorial%>%
dplyr::filter(Content_Area == c("Math", "Language Arts"))%>%
rstatix::cohens_d(ResistsWriting~Content_Area)
factorial_sci_la<- factorial%>%
dplyr::filter(Content_Area == c("Science", "Language Arts"))%>%
rstatix::cohens_d(ResistsWriting~Content_Area)
factorial_sss_la<- factorial%>%
dplyr::filter(Content_Area == c("Social Studies", "Language Arts"))%>%
rstatix::cohens_d(ResistsWriting~Content_Area)
factorial_ss_math <- factorial %>%
filter(Content_Area %in% c("Math", "Social Studies"))%>%
mutate(Content_Area = factor(Content_Area))%>%
rstatix::cohens_d(ResistsWriting ~ Content_Area)
# School content combinations
library(dplyr)
library(rstatix)
library(dplyr)
library(rstatix)
# School pairwise comparisons
S2M_S2S <- factorial %>%
filter(School_Cont %in% c("School 2 _ Math", "School 2 _ Social Studies")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2M_S2L <- factorial %>%
filter(School_Cont %in% c("School 2 _ Math", "School 2 _ Language Arts")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2M_S2Sc <- factorial %>%
filter(School_Cont %in% c("School 2 _ Math", "School 2 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2M_S1SS <- factorial %>%
filter(School_Cont %in% c("School 2 _ Math", "School 1 _ Social Studies")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2M_S1L <- factorial %>%
filter(School_Cont %in% c("School 2 _ Math", "School 1 _ Language Arts")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2M_S1M <- factorial %>%
filter(School_Cont %in% c("School 2 _ Math", "School 1 _ Math")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2M_S1Sc <- factorial %>%
filter(School_Cont %in% c("School 2 _ Math", "School 1 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2S_S2L <- factorial %>%
filter(School_Cont %in% c("School 2 _ Social Studies", "School 2 _ Language Arts")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2S_S2Sc <- factorial %>%
filter(School_Cont %in% c("School 2 _ Social Studies", "School 2 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2S_S1SS <- factorial %>%
filter(School_Cont %in% c("School 2 _ Social Studies", "School 1 _ Social Studies")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2S_S1L <- factorial %>%
filter(School_Cont %in% c("School 2 _ Social Studies", "School 1 _ Language Arts")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2S_S1M <- factorial %>%
filter(School_Cont %in% c("School 2 _ Social Studies", "School 1 _ Math")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2S_S1Sc <- factorial %>%
filter(School_Cont %in% c("School 2 _ Social Studies", "School 1 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2L_S2Sc <- factorial %>%
filter(School_Cont %in% c("School 2 _ Language Arts", "School 2 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2L_S1SS <- factorial %>%
filter(School_Cont %in% c("School 2 _ Language Arts", "School 1 _ Social Studies")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2L_S1L <- factorial %>%
filter(School_Cont %in% c("School 2 _ Language Arts", "School 1 _ Language Arts")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2L_S1M <- factorial %>%
filter(School_Cont %in% c("School 2 _ Language Arts", "School 1 _ Math")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2L_S1Sc <- factorial %>%
filter(School_Cont %in% c("School 2 _ Language Arts", "School 1 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2Sc_S1SS <- factorial %>%
filter(School_Cont %in% c("School 2 _ Science", "School 1 _ Social Studies")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2Sc_S1L <- factorial %>%
filter(School_Cont %in% c("School 2 _ Science", "School 1 _ Language Arts")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2Sc_S1M <- factorial %>%
filter(School_Cont %in% c("School 2 _ Science", "School 1 _ Math")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S2Sc_S1Sc <- factorial %>%
filter(School_Cont %in% c("School 2 _ Science", "School 1 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S1SS_S1L <- factorial %>%
filter(School_Cont %in% c("School 1 _ Social Studies", "School 1 _ Language Arts")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S1SS_S1M <- factorial %>%
filter(School_Cont %in% c("School 1 _ Social Studies", "School 1 _ Math")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S1SS_S1Sc <- factorial %>%
filter(School_Cont %in% c("School 1 _ Social Studies", "School 1 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S1L_S1M <- factorial %>%
filter(School_Cont %in% c("School 1 _ Language Arts", "School 1 _ Math")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S1L_S1Sc <- factorial %>%
filter(School_Cont %in% c("School 1 _ Language Arts", "School 1 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
S1M_S1Sc <- factorial %>%
filter(School_Cont %in% c("School 1 _ Math", "School 1 _ Science")) %>%
rstatix::cohens_d(ResistsWriting ~ School_Cont)
factorial_d<-rbind(factorial_school_cohen, factorial_math_la, factorial_sci_la, factorial_sss_la, factorial_ss_math, S2M_S2S, S2M_S2L, S2M_S2Sc, S2M_S1SS, S2M_S1L, S2M_S1M, S2M_S1Sc,
S2S_S2L, S2S_S2Sc, S2S_S1SS, S2S_S1L, S2S_S1M, S2S_S1Sc,
S2L_S2Sc, S2L_S1SS, S2L_S1L, S2L_S1M, S2L_S1Sc, S2Sc_S1SS,
S2Sc_S1L, S2Sc_S1M, S2Sc_S1Sc, S1SS_S1L, S1SS_S1M, S1SS_S1Sc,
S1L_S1M, S1L_S1Sc, S1M_S1Sc)
factorial_d
## # A tibble: 33 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 ResistsWriting School 1 Schoo… -0.383 61 111 small
## 2 ResistsWriting Language Arts Math -1.04 26 25 large
## 3 ResistsWriting Language Arts Math 3.50 26 0 large
## 4 ResistsWriting Language Arts Math 3.50 26 0 large
## 5 ResistsWriting Math Socia… 0.583 41 38 moderate
## 6 ResistsWriting School 2 _ Math Schoo… 0.857 26 21 large
## 7 ResistsWriting School 2 _ Language Arts Schoo… -1.44 37 26 large
## 8 ResistsWriting School 2 _ Math Schoo… 0.585 26 27 moderate
## 9 ResistsWriting School 1 _ Social Studies Schoo… -1.16 17 26 large
## 10 ResistsWriting School 1 _ Language Arts Schoo… -2.03 10 26 large
## # ℹ 23 more rows
write.csv(factorial_d, "factorial_d_effect.csv")
library(tidyverse)
library(ggpubr)
library(ggplot2)
library(rstatix)
library(broom)
library(bannerCommenter)
library(emmeans)
library(tidyr)
library(dplyr)
# Read in data
ancovadata <- read.csv("Factorial_for_Practice_and_HW.csv")
# Get descriptive statistics by school, content, and school_content combined for resist
resist_content<- as.data.frame(psych::describeBy(ancovadata$ResistsWriting, group = ancovadata$Content_Area, mat = TRUE))%>%
mutate(Category = "Resist_Content")%>%
dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)
overall_resist<- as.data.frame(psych::describe(ancovadata$ResistsWriting))%>%
mutate(Category = "Resist_Overall")%>%
mutate(group1 = "Overall")%>%
dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)
# Get descriptives for teach reading by content and overall
TeachReading_content<- as.data.frame(psych::describeBy(ancovadata$TeachReading, group = ancovadata$Content_Area, mat = TRUE))%>%
mutate(Category = "TeachReading_Content")%>%
dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)
overall_TeachReading<- as.data.frame(psych::describe(ancovadata$TeachReading))%>%
mutate(Category = "TeachReading_Overall")%>%
mutate(group1 = "Overall")%>%
dplyr::select(Category, group1, n, mean, sd, median, kurtosis, skew)
descriptive_stats_all <- bind_rows(
resist_content,
overall_resist,
TeachReading_content,
overall_TeachReading
)
write.csv(descriptive_stats_all, "descriptive_ancova.csv")
descriptive_stats_all
## Category group1 n mean sd median
## X11...1 Resist_Content Language Arts 47 22.72340 6.195027 25.0
## X12...2 Resist_Content Math 41 31.46341 8.772393 31.0
## X13...3 Resist_Content Science 46 28.93478 6.416653 29.0
## X14...4 Resist_Content Social Studies 38 27.26316 5.176267 27.0
## X1...5 Resist_Overall Overall 172 27.47093 7.453895 28.0
## X11...6 TeachReading_Content Language Arts 47 17.44681 4.595880 16.0
## X12...7 TeachReading_Content Math 41 23.68293 5.583185 24.0
## X13...8 TeachReading_Content Science 46 20.39130 4.509143 21.5
## X14...9 TeachReading_Content Social Studies 38 20.36842 5.429753 20.0
## X1...10 TeachReading_Overall Overall 172 20.36628 5.448281 21.0
## kurtosis skew
## X11...1 -1.0961492 -0.19590131
## X12...2 -0.1175087 -0.07083268
## X13...3 2.1720635 0.70398419
## X14...4 0.9602155 -0.15470751
## X1...5 0.7588503 0.28860860
## X11...6 -0.9284476 0.53094187
## X12...7 -0.3699153 -0.20792093
## X13...8 -0.7847273 -0.18704527
## X14...9 1.1047640 0.69741463
## X1...10 -0.2401019 0.31243481
# Boxplot of resist writing and teachg reading after pivoting the data
ancova_long <- ancovadata %>%
pivot_longer(cols = c(ResistsWriting, TeachReading),
names_to = "Variable",
values_to = "Score")
ggplot(ancova_long, aes(x = Content_Area, y = Score, fill = Variable)) +
geom_boxplot(position = position_dodge(width = 0.8)) +
theme_minimal() +
labs(title = "Boxplots of ResistsWriting and TeachReading by Content Area",
x = "Content Area", y = "Score", fill = "Variable")
# Check linearity by group (covariate vs DV by group)
ggplot(ancovadata, aes(x = TeachReading, y = ResistsWriting, color = Content_Area)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
stat_regline_equation(
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~~")),
show.legend = FALSE
) +
facet_wrap(~ Content_Area) +
theme_minimal()
# Homogeneity of regression slopes
banner("Check interaction: Content_Area * TeachReading (Homogeneity of Regression Slopes)")
##
## #########################################################################################
## ## Check interaction: Content_Area * TeachReading (Homogeneity of Regression Slopes) ##
## #########################################################################################
interaction_test <- ancovadata %>%
anova_test(ResistsWriting ~ Content_Area * TeachReading)
get_anova_table(interaction_test)
## ANOVA Table (type II tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 Content_Area 3 164 4.482 5.00e-03 * 0.076
## 2 TeachReading 1 164 94.719 5.93e-18 * 0.366
## 3 Content_Area:TeachReading 3 164 2.493 6.20e-02 0.044
# Fit models for residuals
model_resist <- lm(ResistsWriting ~ TeachReading + Content_Area, data = ancovadata)
model_teach <- lm(TeachReading ~ ResistsWriting + Content_Area , data = ancovadata)
model.metrics_resist <- augment(model_resist, data = ancovadata)
model.metrics_teach <- augment(model_teach, data = ancovadata)
# Assess normality of residuals - Shapiro-Wilk
shapiro_teach <- model.metrics_teach %>%
group_by(Content_Area) %>%
shapiro_test(.resid) %>%
mutate(Test = "Shapiro - TeachReading") %>%
dplyr::select(Test, Content_Area, everything())
shapiro_resist <- model.metrics_resist %>%
group_by(Content_Area) %>%
shapiro_test(.resid) %>%
mutate(Test = "Shapiro - ResistsWriting") %>%
dplyr::select(Test, Content_Area, everything())
shapiro_all <- bind_rows(shapiro_teach, shapiro_resist) %>%
dplyr::select(Test, Content_Area, statistic, p)
shapiro_all
## # A tibble: 8 × 4
## Test Content_Area statistic p
## <chr> <chr> <dbl> <dbl>
## 1 Shapiro - TeachReading Language Arts 0.982 0.656
## 2 Shapiro - TeachReading Math 0.891 0.000939
## 3 Shapiro - TeachReading Science 0.893 0.000491
## 4 Shapiro - TeachReading Social Studies 0.983 0.816
## 5 Shapiro - ResistsWriting Language Arts 0.939 0.0161
## 6 Shapiro - ResistsWriting Math 0.925 0.00967
## 7 Shapiro - ResistsWriting Science 0.800 0.00000190
## 8 Shapiro - ResistsWriting Social Studies 0.966 0.291
# Run the test on Raw variables
shapiro_resist_raw <- ancovadata %>%
group_by(Content_Area) %>%
shapiro_test(ResistsWriting) %>%
mutate(Test = "Raw ResistsWriting")
shapiro_teach_raw <- ancovadata %>%
group_by(Content_Area) %>%
shapiro_test(TeachReading) %>%
mutate(Test = "Raw TeachReading")
shapiro_raw <- bind_rows(shapiro_teach_raw, shapiro_resist_raw) %>%
dplyr::select(Test, Content_Area, statistic, p)
shapiro_raw
## # A tibble: 8 × 4
## Test Content_Area statistic p
## <chr> <chr> <dbl> <dbl>
## 1 Raw TeachReading Language Arts 0.905 0.00102
## 2 Raw TeachReading Math 0.959 0.150
## 3 Raw TeachReading Science 0.952 0.0556
## 4 Raw TeachReading Social Studies 0.948 0.0783
## 5 Raw ResistsWriting Language Arts 0.930 0.00785
## 6 Raw ResistsWriting Math 0.980 0.666
## 7 Raw ResistsWriting Science 0.946 0.0340
## 8 Raw ResistsWriting Social Studies 0.949 0.0815
# QQ plot for visual check of normality
ggqqplot(model.metrics_resist$.resid)
# Homogeneity of variance - residuals
model.metrics_resist %>%
levene_test(.resid ~ Content_Area) %>%
as.data.frame() %>%
mutate(Test = "Levene's on Residuals") %>%
dplyr::select(Test, everything())
## Test df1 df2 statistic p
## 1 Levene's on Residuals 3 168 1.723983 0.1639861
# Optional: Levene's on raw outcome (less common in ANCOVA but sometimes requested)
levene_test(ResistsWriting ~ Content_Area, data = ancovadata) %>%
as.data.frame() %>%
mutate(Test = "Levene's on Raw Outcome") %>%
dplyr::select(Test, everything())
## Test df1 df2 statistic p
## 1 Levene's on Raw Outcome 3 168 2.958167 0.03396039
# Outlier check: standardized residuals > |3|
model.metrics_resist %>%
filter(abs(.std.resid) > 3) %>%
as.data.frame() %>%
mutate(Test = "Residual Outlier Check") %>%
dplyr::select(Test, everything())
## Test ID School Content_Area TeachReading
## 1 Residual Outlier Check 45435431 School 2 Math 19
## 2 Residual Outlier Check 45487317 School 2 Science 12
## ResistsWriting .fitted .resid .hat .sigma .cooksd .std.resid
## 1 52 27.69503 24.30497 0.02958160 5.115280 0.1251505 4.530748
## 2 52 22.18223 29.81777 0.03840795 4.925774 0.2490739 5.583853
# ANCOVA omnibus test (Type II SS by default in rstatix)
banner("ANCOVA Omnibus Results")
##
## ##################################################################
## ## ANCOVA Omnibus Results ##
## ##################################################################
res.aov <- ancovadata %>%
anova_test(ResistsWriting ~ TeachReading + Content_Area)
anova_table <- as.data.frame(get_anova_table(res.aov))
anova_table
## Effect DFn DFd F p p<.05 ges
## 1 TeachReading 1 167 92.244 1.15e-17 * 0.356
## 2 Content_Area 3 167 4.365 5.00e-03 * 0.073
# Mean Squares
banner("Mean Squares Between")
##
## ##################################################################
## ## Mean Squares Between ##
## ##################################################################
anova_table_df <- get_anova_table(res.aov)
# You already have these:
F_values <- anova_table_df$F
DFn_values <- anova_table_df$DFn # Degrees of freedom for between-group effects
# Step 2: Compute MSW (Mean Square Within) from the model residuals
# Fit a linear model to your ANCOVA
model <- lm(ResistsWriting ~ TeachReading + Content_Area, data = ancovadata)
# Get the residual sum of squares
SSResiduals <- sum(model$residuals^2)
# Get the residual degrees of freedom (total observations - number of predictors - 1)
DFResiduals <- length(model$residuals) - length(coef(model))
# Compute MSW
MSW <- SSResiduals / DFResiduals
# Step 3: Compute MSB for each effect (TeachReading, Content_Area)
MSB_values <- F_values * MSW
# Step 4: Combine the results into a new table
anova_table_df$MSB <- MSB_values
# Step 5: Display the updated table
print(anova_table_df)
## ANOVA Table (type II tests)
##
## Effect DFn DFd F p p<.05 ges MSB
## 1 TeachReading 1 167 92.244 1.15e-17 * 0.356 2735.4546
## 2 Content_Area 3 167 4.365 5.00e-03 * 0.073 129.4421
# Post-hoc tests adjusted for covariate (Bonferroni)
pwc <- ancovadata %>%
emmeans_test(
ResistsWriting ~ Content_Area, covariate = TeachReading,
p.adjust.method = "bonferroni"
) %>%
mutate(Test = "Post-Hoc") %>%
dplyr::select(Test, everything())
pwc
## # A tibble: 6 × 10
## Test term .y. group1 group2 df statistic p p.adj p.adj.signif
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Post-H… Teac… Resi… Langu… Math 167 -2.92 0.00401 0.0241 *
## 2 Post-H… Teac… Resi… Langu… Scien… 167 -3.32 0.00109 0.00656 **
## 3 Post-H… Teac… Resi… Langu… Socia… 167 -1.80 0.0730 0.438 ns
## 4 Post-H… Teac… Resi… Math Scien… 167 -0.100 0.920 1 ns
## 5 Post-H… Teac… Resi… Math Socia… 167 1.22 0.224 1 ns
## 6 Post-H… Teac… Resi… Scien… Socia… 167 1.38 0.168 1 ns
write.csv(pwc, "ancova_posth.csv")
# Extract estimated marginal means (EMMs)
emm <- get_emmeans(pwc) %>%
as_tibble() %>%
mutate(Adjusted_SD = se * sqrt(df)) %>% # Using 'se' as the standard error column
mutate(Test = "EMM") %>%
dplyr::select(Test, everything())
emm
## # A tibble: 4 × 10
## Test TeachReading Content_Area emmean se df conf.low conf.high method
## <chr> <dbl> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 EMM 20.4 Language Arts 25.1 0.831 167 23.4 26.7 Emmea…
## 2 EMM 20.4 Math 28.8 0.895 167 27.0 30.6 Emmea…
## 3 EMM 20.4 Science 28.9 0.803 167 27.3 30.5 Emmea…
## 4 EMM 20.4 Social Studies 27.3 0.883 167 25.5 29.0 Emmea…
## # ℹ 1 more variable: Adjusted_SD <dbl>
# Visualize the EMMs with confidence intervals
emm <- emm %>%
mutate(Content_Area = factor(Content_Area, levels = unique(Content_Area)))
ggplot(emm, aes(x = Content_Area, y = emmean, group = 1)) +
geom_line(color = "black", linewidth = 1) +
geom_point(size = 3, color = "black", fill = "lightblue", shape = 21) +
geom_errorbar(
aes(ymin = emmean - 1.96 * se, ymax = emmean + 1.96 * se),
width = 0.2, color = "black", linewidth = 1
) +
labs(
title = "Estimated Marginal Means by Content Area",
y = "Estimated Marginal Mean (ResistsWriting)",
x = "Content Area"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
ylim(0, 40)