Import data

haunted_places <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-10-10/haunted_places.csv')
## Rows: 10992 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): city, country, description, location, state, state_abbrev
## dbl (4): longitude, latitude, city_longitude, city_latitude
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Introduction

Questions

Variation

Visualizing distributions

ggplot(data = haunted_places) +
    geom_bar(mapping = aes(x = state)) +
    coord_flip()

haunted_places %>% count(state)
## # A tibble: 51 Ă— 2
##    state           n
##    <chr>       <int>
##  1 Alabama       224
##  2 Alaska         32
##  3 Arizona       156
##  4 Arkansas      119
##  5 California   1070
##  6 Colorado      166
##  7 Connecticut   185
##  8 Delaware       37
##  9 Florida       328
## 10 Georgia       289
## # ℹ 41 more rows
ggplot(data = haunted_places) +
  geom_histogram(mapping = aes(x = latitude), binwidth = 0.5)
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(data = haunted_places, mapping = aes(x = latitude, colour = state)) +
  geom_freqpoly(binwidth = 0.1)
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

Typical values

ggplot(data = haunted_places, mapping = aes(x = longitude)) +
  geom_histogram(binwidth = 0.1)
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

Unusual values

haunted_places %>%
    ggplot(aes(x = longitude)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

haunted_places %>%
    ggplot(aes(x = longitude)) +
    geom_histogram() +
    coord_cartesian(xlim = c(-85, 0))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_bin()`).

Missing Values

haunted_places %>%
    # filter(longitude < -85 | longitude > 0) %>%
    
    mutate(longitude = ifelse(longitude < -85 | longitude > 0, NA, longitude)) %>%
    
    # Plot
    ggplot(aes(x = latitude, y = longitude)) +
    geom_point()
## Warning: Removed 6774 rows containing missing values or values outside the scale range
## (`geom_point()`).

Covariation

A categorical and continuous variable

haunted_places %>%
    ggplot(aes(x = state, y = latitude)) +
    geom_boxplot()
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables

haunted_places %>%
    count(state, country) %>%
    ggplot(aes(x = country, y = state, fill = n)) +
    geom_tile()

Two continous variables

library(hexbin)
haunted_places %>%
    ggplot(aes(x = latitude, y = longitude)) +
    geom_hex()
## Warning: Removed 1261 rows containing non-finite outside the scale range
## (`stat_binhex()`).

Patterns and models

ggplot(data = haunted_places) + 
  geom_jitter(mapping = aes(x = latitude, y = longitude))
## Warning: Removed 1261 rows containing missing values or values outside the scale range
## (`geom_point()`).