knitr::opts_chunk$set(echo = TRUE, results = "markup")
# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
ufo_sightings <- read.csv("../00_data/ufo_sightings.csv")
places <- read_csv("../00_data/places.csv")
## Rows: 14417 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): city, alternate_city_names, state, country, country_code, timezone
## dbl (4): latitude, longitude, population, elevation_m
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ufo_sightings %>% count(Data.Shape, sort = TRUE) %>% View()
shape1 <- c("triangle", "circle", "fireball", "disk")
shape_options <- c("light", "triangle", "circle", "fireball", "unknown", "other", "sphere", "disk", "oval", "formation", "cigar", "changing", "flash", "rectangle", "cylinder", "diamond", "chevron", "teardrop", "egg", "cone", "cross", "delta", "round", "changed", "crescent", "flare", "hexagon", "pyramid")
f1 <- factor(shape1, levels = shape_options)
f1
## [1] triangle circle fireball disk
## 28 Levels: light triangle circle fireball unknown other sphere disk ... pyramid
sort(f1)
## [1] triangle circle fireball disk
## 28 Levels: light triangle circle fireball unknown other sphere disk ... pyramid
Make two bar charts here - one before ordering another after
# Unordered Factor Levels
encounters_duration_by_shape <- ufo_sightings %>%
group_by(Data.Shape) %>%
summarise(duration = mean(Data.Encounter.duration, na.rm = TRUE))
encounters_duration_by_shape
## # A tibble: 28 × 2
## Data.Shape duration
## <chr> <dbl>
## 1 changed 3600
## 2 changing 2152.
## 3 chevron 484.
## 4 cigar 2241.
## 5 circle 2746.
## 6 cone 1660.
## 7 crescent 37800
## 8 cross 765.
## 9 cylinder 4317.
## 10 delta 2682.
## # ℹ 18 more rows
# Plot
encounters_duration_by_shape %>%
ggplot(aes(x = duration, y = Data.Shape)) +
geom_point()
# Labeling
labs(y = NULL, x = "Mean UFO Shape Sightings Durations")
## $y
## NULL
##
## $x
## [1] "Mean UFO Shape Sightings Durations"
##
## attr(,"class")
## [1] "labels"
# Ordered Factor Levels
encounters_duration_by_shape %>%
ggplot(aes(x = duration, y = fct_reorder(.f = Data.Shape, .x = duration))) +
geom_point()
Show examples of three functions:
tibble(ufo_sightings %>%
mutate(shape_rev = fct_recode(Data.Shape, "unclear" = "unknown")) %>%
select(Data.Shape, shape_rev) %>%
filter(Data.Shape == "unknown"))
## # A tibble: 4,359 × 2
## Data.Shape shape_rev
## <chr> <fct>
## 1 unknown unclear
## 2 unknown unclear
## 3 unknown unclear
## 4 unknown unclear
## 5 unknown unclear
## 6 unknown unclear
## 7 unknown unclear
## 8 unknown unclear
## 9 unknown unclear
## 10 unknown unclear
## # ℹ 4,349 more rows
tibble(ufo_sightings %>%
mutate(shape_col = fct_collapse(Data.Shape, "Indescribable" = c("unknown", "other", "changing", "changed"))) %>%
select(Data.Shape, shape_col) %>%
filter(Data.Shape != "circle"))
## # A tibble: 54,742 × 2
## Data.Shape shape_col
## <chr> <fct>
## 1 disk disk
## 2 changing Indescribable
## 3 changing Indescribable
## 4 cigar cigar
## 5 cylinder cylinder
## 6 cylinder cylinder
## 7 cylinder cylinder
## 8 disk disk
## 9 disk disk
## 10 disk disk
## # ℹ 54,732 more rows
ufo_sightings %>% mutate(shape_lump = fct_lump(Data.Shape)) %>% distinct(shape_lump)
## shape_lump
## 1 disk
## 2 changing
## 3 cigar
## 4 circle
## 5 cylinder
## 6 egg
## 7 fireball
## 8 flash
## 9 formation
## 10 light
## 11 other
## 12 oval
## 13 sphere
## 14 teardrop
## 15 triangle
## 16 unknown
## 17 rectangle
## 18 diamond
## 19 Other
## 20 chevron
No need to do anything here.