Import data
haunted_places <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-10-10/haunted_places.csv')
## Rows: 10992 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): city, country, description, location, state, state_abbrev
## dbl (4): longitude, latitude, city_longitude, city_latitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Introduction
Questions
Variation
Visualizing distributions
ggplot(data = haunted_places) +
geom_bar(mapping = aes(x = state)) +
coord_flip()

haunted_places %>% count(state)
## # A tibble: 51 Ă— 2
## state n
## <chr> <int>
## 1 Alabama 224
## 2 Alaska 32
## 3 Arizona 156
## 4 Arkansas 119
## 5 California 1070
## 6 Colorado 166
## 7 Connecticut 185
## 8 Delaware 37
## 9 Florida 328
## 10 Georgia 289
## # ℹ 41 more rows
ggplot(data = haunted_places) +
geom_histogram(mapping = aes(x = latitude), binwidth = 0.5)
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(data = haunted_places, mapping = aes(x = latitude, colour = state)) +
geom_freqpoly(binwidth = 0.1)
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

Typical values
ggplot(data = haunted_places, mapping = aes(x = longitude)) +
geom_histogram(binwidth = 0.1)
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

Unusual values
haunted_places %>%
ggplot(aes(x = longitude)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

haunted_places %>%
ggplot(aes(x = longitude)) +
geom_histogram() +
coord_cartesian(xlim = c(-85, 0))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

Missing Values
haunted_places %>%
# filter(longitude < -85 | longitude > 0) %>%
mutate(longitude = ifelse(longitude < -85 | longitude > 0, NA, longitude)) %>%
# Plot
ggplot(aes(x = latitude, y = longitude)) +
geom_point()
## Warning: Removed 6774 rows containing missing values or values outside the scale range
## (`geom_point()`).

Covariation
A categorical and continuous variable
haunted_places %>%
ggplot(aes(x = state, y = latitude)) +
geom_boxplot()
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables
haunted_places %>%
count(state, country) %>%
ggplot(aes(x = country, y = state, fill = n)) +
geom_tile()

Two continous variables
library(hexbin)
haunted_places %>%
ggplot(aes(x = latitude, y = longitude)) +
geom_hex()
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_binhex()`).

Patterns and models
ggplot(data = haunted_places) +
geom_jitter(mapping = aes(x = latitude, y = longitude))
## Warning: Removed 1261 rows containing missing values or values outside the scale range
## (`geom_point()`).
