analysis

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidycensus)

read_csv("STAR.csv")

Rows: 6325 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (6): race, classtype, yearssmall, hsgrad, g4math, g4reading

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# A tibble: 6,325 × 6
    race classtype yearssmall hsgrad g4math g4reading
   <dbl>     <dbl>      <dbl>  <dbl>  <dbl>     <dbl>
 1     1         3          0     NA     NA        NA
 2     2         3          0     NA    706       661
 3     1         3          0      1    711       750
 4     2         1          4     NA    672       659
 5     1         2          0     NA     NA        NA
 6     1         3          0     NA     NA        NA
 7     1         1          4     NA    668       657
 8     1         3          0     NA     NA        NA
 9     1         1          4      1    709       725
10     1         2          0      1    698       692
# ℹ 6,315 more rows

library(tidyverse)
library(brms)

Warning: package 'brms' was built under R version 4.4.1

library(tidybayes)

Warning: package 'tidybayes' was built under R version 4.4.1

raw_df <- read_csv("STAR.csv", show_col_types = FALSE)


x <- raw_df |> 
  mutate(kinder = recode(classtype,
                         `1` = "small",
                         `2` = "regular",
                         `3` = "regular with aid")) |> 
  mutate(race = recode(race,
                       `1` = "white",
                       `2` = "black",
                       `3` = "hispanic",
                       `4` = "hispanic",
                       `5` = "others",
                       `6` = "others")) |> 
  filter(kinder %in% c("small", "regular")) |> 
  select(g4math, kinder) |> 
  drop_na()

fit_kinder <- brm(formula = g4math ~ kinder,
             data = x,
             family = gaussian(),
             silent = 2,
             refresh = 0,
             seed = 9)

ndata = tibble(kinder = c("small", "regular"))

fit_kinder |>
  add_epred_draws(newdata = ndata) |> 
  ggplot(aes(x = .epred, fill = kinder)) +
    geom_density(aes(y = after_stat(count/sum(count))), alpha = 0.5) + scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Prosterior for Exam Score",
       subtitle = "Class size has no effect on exam score",
       x = "Exam Score")

```