Homework

Loading packages

library(ggdist)
library(ggplot2)
library(distributional)
library(dplyr)
library(tidyverse)

Ilya’s code

data.frame(
  name = c("Normal(0,10)", 
           "Normal(0,1)", 
           "Mixture", 
           "Truncated"),
  dist = c(dist_normal(0,10), 
           dist_normal(0,1),
           dist_mixture(dist_normal(0,10), 
                        dist_normal(0,3), 
                        weights = c(0.3, 0.7)),
           dist_truncated(dist_normal(0,10), lower = 0))
) %>% 
ggplot(aes(y = factor(name, levels = rev(name)))) +
  stat_dist_halfeye(aes(dist = dist))

For grades

non-informative prior
weakly informative prior
weakly informative prior for a “class with strong students”
informative prior based on the fact approximately 70% of students got grades 75-85
a pair of weakly informative priors for difficult and easy classes

data.frame(
  name = c("Uniform(0,100)", 
           "Normal(50,25)", 
           "Normal(70,15)", 
           "Normal(70,5)",
           "pair Normal(40,10), Normal (80,10)"),
  dist = c(dist_uniform(0,100), 
           dist_normal(50,25),
           dist_normal(70,15),
           dist_normal(70,5),
           dist_mixture(dist_normal(40,10), 
                        dist_normal(80,10), 
                        weights = c(0.5, 0.5)))
) %>%  
ggplot(aes(y = factor(name, levels = rev(name)))) +
  stat_dist_halfeye(aes(dist = dist)) +
  ylab('Grades distribution under different priors')

For comparing experimental and control groups in the intervention

weakly skeptical prior on the difference in scores between groups

data.frame(
  name = c("Normal(0,50)"),
  dist = c(dist_normal(0,50))
) %>%  
ggplot(aes(y = factor(name, levels = rev(name)))) +
  stat_dist_halfeye(aes(dist = dist)) +
  ylab('weakly skeptical prior on the difference in scores between groups')

skeptical (pessimistic) prior on the difference, thinking your intervention is unlikely to change a grade by more than 10%

data.frame(
  name = c("Normal(0,3)"),
  dist = c(dist_normal(0,3))
) %>%  
ggplot(aes(y = factor(name, levels = rev(name)))) +
  stat_dist_halfeye(aes(dist = dist)) +
  ylab('skeptical (pessimistic) prior on the difference, thinking your intervention is unlikely to change a grade by more than 10%')

For a regression model, predicting grades in a CS course (make additional assumptions and define prior) for the coefficient for the variables

X1 = “has previously completed CSC108”
X2 = “attends office hours” (1 unit = 1 session)
X3 = “has very high IELTS/TOEFL score”
X4 = “has high self-efficacy”

# I will do something fun here as a back tracing of what Bayesian regression is calculating
# let's say:

# X1 should have a positive effect :D
# not sure about X2 as someone who never attend office hours T^T
# X3 don't think this affect any of my cs abilities...
# X4 don't know the meaning of efficacy...

# oki under this assumption the only one that's informative should be X1

# let's generate the data

N <- 300 # Number of students

sim <- function(n) {
  for (i in 1:n) {
    X1 <- sample(c(0, 1), N, replace = TRUE)
    X2 <- sample(0:12, N, replace = TRUE) # 12 weeks in a semester without midterms :D
    X3 <- sample(c(0, 1), N, replace = TRUE)
    X4 <- sample(c(0, 1), N, replace = TRUE)
    
    beta_X1 <- rnorm(1, 50, 10)
    beta_X2 <- rnorm(1, 10, 10)
    beta_X3 <- rnorm(1, 0, 10)
    beta_X4 <- rnorm(1, 0, 10)
    
    cs_scores <- beta_X1 * X1 + beta_X2 * X2 + beta_X3 * X3 + beta_X4 * X4 + rnorm(N, 0, 100)
    
    cs_scores <- ifelse(cs_scores < 0, 0, cs_scores) # Set negative scores to 0
    cs_scores <- ifelse(cs_scores > 100, 100, cs_scores) # Set scores over 100 to 100
    
    df <- data.frame(cs_scores)
    
    # Plot histogram
    print(ggplot(df, aes(x = cs_scores)) +
      geom_histogram(binwidth = 5, fill = "steelblue", color = "white") +
      labs(title = "Distribution of CS Scores", x = "CS Scores", y = "Frequency"))
  }
}

sim(1)