List of Visual Inferences

Simple linear model
Data
Case 1. \(H_0: \beta_1 = 0\) Scatter plot
Case 2. \(H_0: \beta_1 = 0\) Residual plot
Case 3. \(H_0: \beta_1 = 0\) Residual plot
Case 4. \(H_0: e_i \sim NID(0, \sigma^2)\) QQ-plot

Setup code

# devtools::install_github("dicook/nullabor")
library(nullabor)
library(tidyverse)
library(broom)
theme_set(theme_bw(base_size = 30) + 
            theme(axis.text = element_blank(),
                  strip.text = element_blank()))
knitr::opts_chunk$set(
  warning = FALSE,
  message = FALSE, 
  fig.width = 3,
  fig.height = 4,
  fig.path = "img/"
)

Links to relevant papers can be found here.
Clicking on the plot under “Test Statistic” will take it to the section which has the code used to produce the plots.

Simple linear model

Consider a simple linear model for \(i = 1, ..., n\) \[y_i = \beta_0 + \beta_1 x_i + e_i, \qquad e_i\sim NID(0, \sigma^2).\] Let’s do \(n \in \{20, 100, 1000, 10000\}\).

The actual data generating model is

\[y_i \sim NID(1 + 2 x_i + 2 \sin(2x_i), ~1).\]

Case	\(H_0\)	\(x\)	Plot
1	\(\beta_1 = 0\)	Continuous	Scatter plot with line of best fit



2	\(\beta_1 = 0\)	Continuous	Residual plot with \(y=0\) line



3	\(\beta_1 = 0\)	Binary	Boxplot of residuals



4	\(e_i \sim NID(0, \sigma^2)\)	Continuous	QQ-plot

Data

set.seed(1)
nobsv <- c(20, 100, 1000, 10000)
n_null <- 3
nobs <- 100
null_fig_width <- 8

dat1 <- map(nobsv, ~{
    df <- tibble(id = 1:.x) %>% 
      mutate(x = runif(n(), 0, 10),
             y = rnorm(n(), 1 + 2 * x + 2 * sin(2 * x), 1)) 
    lm(y ~ 1, data = df) %>% 
      augment() %>% 
      mutate(x = df$x)
  })

null1 <- map(seq_along(nobsv), function(i) {
  map_dfr(1:n_null, ~{
    fit <- lm(y ~ 1, dat1[[i]])
    beta0_hat <- tidy(fit)$estimate[1]
    sig_hat <- glance(fit)$sigma
    tibble(id = 1:nobsv[i]) %>% 
      mutate(y = rnorm(n(), beta0_hat, sig_hat)) %>% 
      lm(y ~ 1, data = .) %>% 
      augment() %>% 
      mutate(sim = paste0("Sim ", .x),
             x = dat1[[i]]$x)
  })})

dat2 <- map(nobsv, ~{
    df <- tibble(id = 1:.x) %>% 
      mutate(x = sample(c(0, 1), size = n(), replace = TRUE),
            y = rnorm(n(), 1 + 2 * x + 2 * sin(2 * x), 1)) 
    lm(y ~ 1, data = df) %>% 
      augment() %>% 
      mutate(x = df$x)
  })

null2 <- map(seq_along(nobsv), function(i) {
  map_dfr(1:n_null, ~{
    fit <- lm(y ~ 1, dat2[[i]])
    beta0_hat <- tidy(fit)$estimate[1]
    sig_hat <- glance(fit)$sigma
    tibble(id = 1:nobsv[i]) %>% 
      mutate(y = rnorm(n(), beta0_hat, sig_hat)) %>% 
      lm(y ~ 1, data = .) %>% 
      augment() %>% 
      mutate(sim = paste0("Sim ", .x),
             x = dat2[[i]]$x)
  })})

dat3 <- map(seq_along(nobsv), ~{
    lm(y ~ x, data = dat1[[.x]]) %>% 
      augment() 
  })

null3 <- map(seq_along(nobsv), function(i) {
  map_dfr(1:n_null, ~{
    fit <- lm(y ~ x, dat1[[i]])
    beta0_hat <- tidy(fit)$estimate[1]
    beta1_hat <- tidy(fit)$estimate[2]
    sig_hat <- glance(fit)$sigma
    tibble(id = 1:nobsv[i]) %>% 
      mutate(x = dat1[[i]]$x,
             y = rnorm(n(), beta0_hat + beta1_hat * x, sig_hat)) %>% 
      lm(y ~ x, data = .) %>% 
      augment() %>% 
      mutate(sim = paste0("Sim ", .x))
  })})

Case 1. \(H_0: \beta_1 = 0\) Scatter plot

walk(seq_along(nobsv), ~{
  p <- dat1[[.x]] %>% 
    ggplot(aes(x, y)) + 
    geom_point() + 
    geom_smooth(method = "lm") + 
    ggtitle(paste0("n = ", nobsv[.x]))
  print(p)
})

walk(seq_along(nobsv), ~{
  p <- ggplot(null1[[.x]], aes(x, y)) + 
    geom_point() + 
    geom_smooth(method = "lm") + 
    ggtitle(paste0("n = ", nobsv[.x])) + 
    facet_grid(. ~ sim) 
  print(p)
})

Case 2. \(H_0: \beta_1 = 0\) Residual plot

walk(seq_along(nobsv), ~{
  p <- ggplot(dat1[[.x]], aes(x, .resid)) + 
    geom_point() + 
    labs(y = "Residual") + 
    ggtitle(paste0("n = ", nobsv[.x])) + 
    geom_hline(yintercept = 0, color = "blue")
  print(p)
})

walk(seq_along(nobsv), ~{
  p <- ggplot(null1[[.x]], aes(x, .resid)) + 
    geom_point() + 
    geom_hline(yintercept = 0, color = "blue") + 
    labs(y = "Residual") + 
    ggtitle(paste0("n = ", nobsv[.x])) + 
    facet_grid(. ~ sim) 
  print(p)
})

Case 3. \(H_0: \beta_1 = 0\) Residual plot

walk(seq_along(nobsv), ~{
  p <- ggplot(dat2[[.x]], aes(as.factor(x), .resid)) + 
    geom_boxplot() + 
    geom_hline(yintercept = 0, color = "blue") + 
    ggtitle(paste0("n = ", nobsv[.x])) + 
    labs(x = "x", y = "Residual")
  print(p)
})

walk(seq_along(nobsv), ~{
  p <- ggplot(null2[[.x]], aes(as.factor(x), .resid)) + 
    geom_boxplot() + 
    geom_hline(yintercept = 0, color = "blue") + 
    ggtitle(paste0("n = ", nobsv[.x])) + 
    labs(x = "x", y = "Residual") + 
    facet_grid(. ~ sim)
  print(p)
})

Case 4. \(H_0: e_i \sim NID(0, \sigma^2)\) QQ-plot

walk(seq_along(nobsv), ~{
  p <- ggplot(dat3[[.x]], aes(sample = .resid)) + 
    geom_qq() +
    geom_qq_line(color = "blue") + 
    ggtitle(paste0("n = ", nobsv[.x])) 
  print(p)
})

walk(seq_along(nobsv), ~{
  p <- ggplot(null3[[.x]], aes(sample = .resid)) + 
    geom_qq() +
    geom_qq_line(color = "blue") + 
    ggtitle(paste0("n = ", nobsv[.x])) + 
    facet_grid(. ~ sim) 
  print(p)
})