Brief analysis of AP statistics performance across time for Psychometrics Lecture Presentation
Data were obtained from : https://apstudents.collegeboard.org/about-ap-scores/score-distributions/ap-statistics
rm(list=ls())
df <- read.csv("Downloads/AP_Scores - Sheet1 (1).csv", header=T)
library(tidyverse)
df <-
df |>
mutate(across(starts_with("S"),
~ as.numeric(gsub("%", "", .x)) * 0.01))
df <- df |> arrange(Year)
df
library(tidyverse)
score_matrix <- as.matrix(
df %>%
select(
Score.5,
Score.4,
Score.3,
Score.2,
Score.1
)
)
rownames(score_matrix) <- df$Year
barplot(
t(score_matrix),
col = rainbow(5),
xlab = "Year",
ylab = "Proportion"
)
# create empty plot
plot.new()
# add legend only
legend(
"center",
legend = colnames(score_matrix),
fill = rainbow(5),
title = "Score"
)
library(patchwork)
plt1 <-
ggplot(df, aes(x = Year, y = Mean.Score)) +
geom_line() +
geom_point() +
labs(
x = "Year",
y = "Mean Score",
title = "Mean Score Over Time"
) +
theme_minimal()
df <-
df |>
mutate(
Test.Takers = as.numeric(gsub(",", "", Test.Takers))
)
plt2 <-
ggplot(df, aes(x = Year, y = Test.Takers)) +
geom_line() +
geom_point() +
scale_y_continuous(
breaks = seq(0, max(df$Test.Takers), by = 50000),
labels = scales::comma
) +
labs(
x = "Year",
y = "Test Takers",
title = "Test Takers Over Time"
) +
theme_minimal()
plt1/plt2
round(mean(df$Mean.Score), 2)
## [1] 2.88
df |> ggplot(aes(x=Mean.Score)) + geom_histogram() +
geom_vline(
xintercept = mean(df$Mean.Score),
color = "#27AE60",
linetype = "solid",
linewidth = 1.2
)
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.