library(rethinking)
library(cmdstanr)

library(tidyverse)
library(tidybayes)
library(haven)

for( .file in list.files('nlss2022/data/stata format') ) {
  .name <- strsplit(.file, ".", fixed=TRUE)[[1]][1]
  .path <- paste0('nlss2022/data/stata format/', .file)
  .table <- read_dta(.path) |> as_factor()
  assign( .name , .table )
}
df <- poverty |>
  left_join(
    S05 |> filter(food_code == 701)
  ) |>
  mutate(
    potato_consumption = ((q05_03 + q05_04 + q05_05) / hhsize ) |> replace_na(0),
    potato_value = ((replace_na(q05_03_b, 0) + replace_na(q05_04_b, 0) + replace_na(q05_05_b, 0)) / hhsize),
    pcep_potato = log(potato_value / pcep_food),
    spending = log(pcep),
  ) |>
  filter(is.finite(pcep_potato) & is.finite(pcep)) |>
  mutate(
    P = standardize(pcep_potato),
    S = standardize(spending)
  )
## Joining with `by = join_by(psu_number, hh_number)`
df |>
  ggplot(mapping = aes(x = P)) +
    geom_density() +
    labs(
      x = "Potato consumption as a proportion of overall food consumption (standardized, logscale)"
    )

df |>
  ggplot(mapping = aes(x = S)) +
    geom_density() +
    labs(
      x = "Per Capital total spending (standardized, logscale)"
    )

df |>
  ggplot(mapping = aes(x = S, y = P)) +
    geom_point(shape=1, color="blue", alpha=0.2) +
    labs(x = "Total Spending", y="Potato Consumption")

model <- quap(
  alist(
    P ~ dnorm( mu , sigma ),
    mu <- a + b * S, 
    a ~ dnorm( 0, 2 ),
    b ~ dnorm( 0 , 2 ),
    sigma ~ dunif( 0 , 5 )
  ),
  data=df
)
precis(model, corr=TRUE, digits=4)
##                mean          sd        5.5%       94.5%
## a     -2.447294e-07 0.009821169 -0.01569637  0.01569588
## b     -3.843186e-01 0.009821725 -0.40001560 -0.36862158
## sigma  9.231488e-01 0.006944719  0.91204981  0.93424781
# add some summary statistics
.linked <- link(model)
.simmed <- sim(model)
df |>
  mutate(
    reg.avg = .linked |> apply(2, mean),
    reg.min = .linked |> apply(2, quantile, 0.025),
    reg.max = .linked |> apply(2, quantile, 0.975),
    sim.min = .simmed |> apply(2, quantile, 0.1),
    sim.max = .simmed |> apply(2, quantile, 0.9),
  ) |>
  ggplot(mapping = aes(x = S, y = P)) +
    geom_ribbon(aes(ymin=sim.min, ymax=sim.max), color="lightgray", alpha=0.2) +
    geom_ribbon(aes(ymin=reg.min, ymax=reg.max), color="gray", alpha=0.2) +
    geom_line(aes(y = reg.avg)) +
    geom_point(shape=1, color="blue", alpha=0.2) +
    labs(x = "Total Spending", y="Potato Consumption")