library(ggdist)
library(ggplot2)
library(distributional)
library(dplyr)
library(tidyverse)Homework
Loading packages
Ilya’s code
data.frame(
name = c("Normal(0,10)",
"Normal(0,1)",
"Mixture",
"Truncated"),
dist = c(dist_normal(0,10),
dist_normal(0,1),
dist_mixture(dist_normal(0,10),
dist_normal(0,3),
weights = c(0.3, 0.7)),
dist_truncated(dist_normal(0,10), lower = 0))
) %>%
ggplot(aes(y = factor(name, levels = rev(name)))) +
stat_dist_halfeye(aes(dist = dist))For grades
non-informative prior
weakly informative prior
weakly informative prior for a “class with strong students”
informative prior based on the fact approximately 70% of students got grades 75-85
a pair of weakly informative priors for difficult and easy classes
data.frame(
name = c("Uniform(0,100)",
"Normal(50,25)",
"Normal(70,15)",
"Normal(70,5)",
"pair Normal(40,10), Normal (80,10)"),
dist = c(dist_uniform(0,100),
dist_normal(50,25),
dist_normal(70,15),
dist_normal(70,5),
dist_mixture(dist_normal(40,10),
dist_normal(80,10),
weights = c(0.5, 0.5)))
) %>%
ggplot(aes(y = factor(name, levels = rev(name)))) +
stat_dist_halfeye(aes(dist = dist)) +
ylab('Grades distribution under different priors')For comparing experimental and control groups in the intervention
- weakly skeptical prior on the difference in scores between groups
data.frame(
name = c("Normal(0,50)"),
dist = c(dist_normal(0,50))
) %>%
ggplot(aes(y = factor(name, levels = rev(name)))) +
stat_dist_halfeye(aes(dist = dist)) +
ylab('weakly skeptical prior on the difference in scores between groups')- skeptical (pessimistic) prior on the difference, thinking your intervention is unlikely to change a grade by more than 10%
data.frame(
name = c("Normal(0,3)"),
dist = c(dist_normal(0,3))
) %>%
ggplot(aes(y = factor(name, levels = rev(name)))) +
stat_dist_halfeye(aes(dist = dist)) +
ylab('skeptical (pessimistic) prior on the difference, thinking your intervention is unlikely to change a grade by more than 10%')For a regression model, predicting grades in a CS course (make additional assumptions and define prior) for the coefficient for the variables
X1 = “has previously completed CSC108”
X2 = “attends office hours” (1 unit = 1 session)
X3 = “has very high IELTS/TOEFL score”
X4 = “has high self-efficacy”
# I will do something fun here as a back tracing of what Bayesian regression is calculating
# let's say:
# X1 should have a positive effect :D
# not sure about X2 as someone who never attend office hours T^T
# X3 don't think this affect any of my cs abilities...
# X4 don't know the meaning of efficacy...
# oki under this assumption the only one that's informative should be X1
# let's generate the data
N <- 300 # Number of students
sim <- function(n) {
for (i in 1:n) {
X1 <- sample(c(0, 1), N, replace = TRUE)
X2 <- sample(0:12, N, replace = TRUE) # 12 weeks in a semester without midterms :D
X3 <- sample(c(0, 1), N, replace = TRUE)
X4 <- sample(c(0, 1), N, replace = TRUE)
beta_X1 <- rnorm(1, 50, 10)
beta_X2 <- rnorm(1, 10, 10)
beta_X3 <- rnorm(1, 0, 10)
beta_X4 <- rnorm(1, 0, 10)
cs_scores <- beta_X1 * X1 + beta_X2 * X2 + beta_X3 * X3 + beta_X4 * X4 + rnorm(N, 0, 100)
cs_scores <- ifelse(cs_scores < 0, 0, cs_scores) # Set negative scores to 0
cs_scores <- ifelse(cs_scores > 100, 100, cs_scores) # Set scores over 100 to 100
df <- data.frame(cs_scores)
# Plot histogram
print(ggplot(df, aes(x = cs_scores)) +
geom_histogram(binwidth = 5, fill = "steelblue", color = "white") +
labs(title = "Distribution of CS Scores", x = "CS Scores", y = "Frequency"))
}
}sim(1)