Import Data
# excel file
data <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2019/2019-06-25/ufo_sightings.csv") %>%
janitor::clean_names() %>%
# Remove NA from state
filter(!is.na(state))
## Rows: 80332 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): date_time, city_area, state, country, ufo_shape, described_encounte...
## dbl (3): encounter_length, latitude, longitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data
## # A tibble: 74,535 × 11
## date_time city_area state country ufo_shape encounter_length
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 10/10/1949 20:30 san marcos tx us cylinder 2700
## 2 10/10/1949 21:00 lackland afb tx <NA> light 7200
## 3 10/10/1956 21:00 edna tx us circle 20
## 4 10/10/1960 20:00 kaneohe hi us light 900
## 5 10/10/1961 19:00 bristol tn us sphere 300
## 6 10/10/1965 23:45 norwalk ct us disk 1200
## 7 10/10/1966 20:00 pell city al us disk 180
## 8 10/10/1966 21:00 live oak fl us disk 120
## 9 10/10/1968 13:00 hawthorne ca us circle 300
## 10 10/10/1968 19:00 brevard nc us fireball 180
## # ℹ 74,525 more rows
## # ℹ 5 more variables: described_encounter_length <chr>, description <chr>,
## # date_documented <chr>, latitude <dbl>, longitude <dbl>
A categorical and continuous variable
data %>%
ggplot (aes(x = ufo_shape, y = encounter_length)) +
geom_boxplot()
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables
data %>%
count(state, ufo_shape) %>%
ggplot(aes(x = state, y = ufo_shape, fill = n))

geom_tile(nr.rm = FALSE)
## Warning in geom_tile(nr.rm = FALSE): Ignoring unknown parameters: `nr.rm`
## geom_tile: na.rm = FALSE, lineend = butt, linejoin = mitre
## stat_identity: na.rm = FALSE
## position_identity