Import data

tornados <- read.csv("../00_data/tornados.csv")

Introduction

Questions

Variation

ggplot(data = tornados) +
  geom_bar(mapping = aes(x = mo))

Visualizing distributions

tornados %>%
    ggplot(aes(x = mo)) +
    geom_bar()

tornados %>%
    ggplot(mapping = aes(x = f1)) +
    geom_histogram(binwidth = 0.5)

tornados %>%
    
    filter(f1 < 400) %>%
    
    ggplot(aes(x = f1)) +
    geom_histogram(binwidth = 0.5)

tornados %>%
    ggplot(aes(x = mag, color = st)) +
    geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 756 rows containing non-finite outside the scale range
## (`stat_bin()`).

Typical values

tornados %>%
    
    # Filter out tornados < 4 
    filter(mag < 4) %>%
    
    # Plot
    ggplot(aes(x = mag)) +
    geom_histogram(binwidth = 0.5)

tornados %>%
    ggplot(aes(mo)) +
    geom_histogram(binwidth = 0.25)

Unusual Values

tornados %>%
    ggplot(aes(wid)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

tornados %>%
    ggplot(aes(wid)) +
    geom_histogram() +
    coord_cartesian(ylim = c(0, 50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Missing values

tornados %>%
    
    # filter(wid < 3 | wid > 3500) %>%
    
    mutate(wid = ifelse(wid < 3 | wid > 3500, NA, wid)) %>%
    
    # Plot
    ggplot(aes(x = mo, y = wid)) +
    geom_point()
## Warning: Removed 540 rows containing missing values or values outside the scale range
## (`geom_point()`).

Covariation

A categorical and continuous variable

tornados %>%
    
    ggplot(aes(x = tz, y = mag)) +
    geom_boxplot()
## Warning: Removed 756 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables

library(hexbin)
## Warning: package 'hexbin' was built under R version 4.4.3
tornados %>%
    ggplot(aes(x = mag, y = f1)) +
    geom_hex()
## Warning: Removed 756 rows containing non-finite outside the scale range
## (`stat_binhex()`).

Two continuous variables

tornados %>%
    filter(wid < 3500) %>%
    ggplot(aes(x = f2, y = mag)) +
    geom_boxplot(aes(group = cut_width(ns, 0.9)))
## Warning: Removed 756 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

tornados |> 
  filter(mag <= 4) |> 
  ggplot(aes(x = mag, y = f1)) +
  geom_point() +
  coord_cartesian(xlim = c(1, 6 ), ylim = c(100, 500))

Patterns and models

library(modelr)
## Warning: package 'modelr' was built under R version 4.4.3
mod <- lm(log(price) ~ log(carat), data = diamonds)

diamonds2 <- diamonds %>% 
  add_residuals(mod) %>% 
  mutate(resid = exp(resid))

ggplot(data = diamonds2) + 
  geom_point(mapping = aes(x = carat, y = resid))