Import data
tornados <- read.csv("../00_data/tornados.csv")
Introduction
Questions
Variation
ggplot(data = tornados) +
geom_bar(mapping = aes(x = mo))

Visualizing distributions
tornados %>%
ggplot(aes(x = mo)) +
geom_bar()

tornados %>%
ggplot(mapping = aes(x = f1)) +
geom_histogram(binwidth = 0.5)

tornados %>%
filter(f1 < 400) %>%
ggplot(aes(x = f1)) +
geom_histogram(binwidth = 0.5)

tornados %>%
ggplot(aes(x = mag, color = st)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 756 rows containing non-finite outside the scale range
## (`stat_bin()`).

Typical values
tornados %>%
# Filter out tornados < 4
filter(mag < 4) %>%
# Plot
ggplot(aes(x = mag)) +
geom_histogram(binwidth = 0.5)

tornados %>%
ggplot(aes(mo)) +
geom_histogram(binwidth = 0.25)

Unusual Values
tornados %>%
ggplot(aes(wid)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

tornados %>%
ggplot(aes(wid)) +
geom_histogram() +
coord_cartesian(ylim = c(0, 50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Missing values
tornados %>%
# filter(wid < 3 | wid > 3500) %>%
mutate(wid = ifelse(wid < 3 | wid > 3500, NA, wid)) %>%
# Plot
ggplot(aes(x = mo, y = wid)) +
geom_point()
## Warning: Removed 540 rows containing missing values or values outside the scale range
## (`geom_point()`).

Covariation
A categorical and continuous variable
tornados %>%
ggplot(aes(x = tz, y = mag)) +
geom_boxplot()
## Warning: Removed 756 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables
library(hexbin)
## Warning: package 'hexbin' was built under R version 4.4.3
tornados %>%
ggplot(aes(x = mag, y = f1)) +
geom_hex()
## Warning: Removed 756 rows containing non-finite outside the scale range
## (`stat_binhex()`).

Two continuous variables
tornados %>%
filter(wid < 3500) %>%
ggplot(aes(x = f2, y = mag)) +
geom_boxplot(aes(group = cut_width(ns, 0.9)))
## Warning: Removed 756 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

tornados |>
filter(mag <= 4) |>
ggplot(aes(x = mag, y = f1)) +
geom_point() +
coord_cartesian(xlim = c(1, 6 ), ylim = c(100, 500))

Patterns and models
library(modelr)
## Warning: package 'modelr' was built under R version 4.4.3
mod <- lm(log(price) ~ log(carat), data = diamonds)
diamonds2 <- diamonds %>%
add_residuals(mod) %>%
mutate(resid = exp(resid))
ggplot(data = diamonds2) +
geom_point(mapping = aes(x = carat, y = resid))
