# Generate scores
pressure_scores <- rnorm(25, mean = 25, sd = 10)
low_scores <- rnorm(25, mean = 35, sd = 10)
# Combine into a data frame
data <- data.frame(
Score = c(pressure_scores, low_scores),
Situation = factor(c(rep("Pressure", 25), rep("Low", 25)))
)
library(ggplot2)
ggplot(data, aes(x = Situation, y = Score, fill = Situation)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Batting Scores by Situation", y = "Score")
mean_pressure <- mean(data$Score[data$Situation == "Pressure"])
mean_low <- mean(data$Score[data$Situation == "Low"])
observed_diff <- mean_low - mean_pressure
observed_diff
## [1] 11.35468
perm_test <- function(data, n = 1000) {
diffs <- numeric(n)
for (i in 1:n) {
shuffled_scores <- sample(data$Score)
group1 <- shuffled_scores[1:25]
group2 <- shuffled_scores[26:50]
diffs[i] <- mean(group2) - mean(group1)
}
return(diffs)
}
# Repeat the permutation test 1000 times and store the resulting differences
permutation_diffs <- perm_test(data)
hist(permutation_diffs, breaks = 30, main = "Permutation Test: Mean Differences",
xlab = "Difference in Means", col = "lightblue")
abline(v = observed_diff, col = "red", lwd = 2)
legend("topright", legend = paste("Observed Diff =", round(observed_diff, 2)),
col = "red", lwd = 2)
p_value <- mean(abs(permutation_diffs) >= abs(observed_diff))
paste("P-value:", round(p_value, 4))
## [1] "P-value: 0.001"
If the p-value is less than 0.05, we reject the null hypothesis and conclude that the difference in batting performance under pressure and low-pressure situations is statistically significant. Otherwise, we attribute the difference to chance.