Brief analysis of AP statistics performance across time for Psychometrics Lecture Presentation

Data were obtained from : https://apstudents.collegeboard.org/about-ap-scores/score-distributions/ap-statistics

rm(list=ls())
df <- read.csv("Downloads/AP_Scores - Sheet1 (1).csv", header=T)
library(tidyverse)

df <- 
  df |>
  mutate(across(starts_with("S"),
                ~ as.numeric(gsub("%", "", .x)) * 0.01))

df <- df |> arrange(Year) 
df
library(tidyverse)

score_matrix <- as.matrix(
  df %>%
    select(
      Score.5,
      Score.4,
      Score.3,
      Score.2,
      Score.1
    )
)

rownames(score_matrix) <- df$Year

barplot(
  t(score_matrix),
  col = rainbow(5),
  xlab = "Year",
  ylab = "Proportion"
)

# create empty plot
plot.new()

# add legend only
legend(
  "center",
  legend = colnames(score_matrix),
  fill = rainbow(5),
  title = "Score"
)

library(patchwork)

plt1 <- 
  ggplot(df, aes(x = Year, y = Mean.Score)) +
  geom_line() +
  geom_point() +
  labs(
    x = "Year",
    y = "Mean Score",
    title = "Mean Score Over Time"
  ) +
  theme_minimal()


df <- 
  df |>
  mutate(
    Test.Takers = as.numeric(gsub(",", "", Test.Takers))
  )

plt2 <- 
  ggplot(df, aes(x = Year, y = Test.Takers)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(
    breaks = seq(0, max(df$Test.Takers), by = 50000),
    labels = scales::comma
  ) +
  labs(
    x = "Year",
    y = "Test Takers",
    title = "Test Takers Over Time"
  ) +
  theme_minimal()

plt1/plt2

round(mean(df$Mean.Score), 2)
## [1] 2.88
df |> ggplot(aes(x=Mean.Score)) + geom_histogram() +
  geom_vline(
    xintercept = mean(df$Mean.Score),
    color = "#27AE60",
    linetype = "solid",
    linewidth = 1.2
  )
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.