knitr::opts_chunk$set(echo = TRUE, results = "markup")

# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)

Import your data

ufo_sightings <- read.csv("../00_data/ufo_sightings.csv")

places <- read_csv("../00_data/places.csv")
## Rows: 14417 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): city, alternate_city_names, state, country, country_code, timezone
## dbl (4): latitude, longitude, population, elevation_m
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

ufo_sightings %>% count(Data.Shape, sort = TRUE) %>% View()

shape1 <- c("triangle", "circle", "fireball", "disk")

shape_options <- c("light", "triangle", "circle", "fireball", "unknown", "other", "sphere", "disk", "oval", "formation", "cigar", "changing", "flash", "rectangle", "cylinder", "diamond", "chevron", "teardrop", "egg", "cone", "cross", "delta", "round", "changed", "crescent", "flare", "hexagon", "pyramid")

f1 <- factor(shape1, levels = shape_options)

f1
## [1] triangle circle   fireball disk    
## 28 Levels: light triangle circle fireball unknown other sphere disk ... pyramid
sort(f1)
## [1] triangle circle   fireball disk    
## 28 Levels: light triangle circle fireball unknown other sphere disk ... pyramid

Modify factor order

Make two bar charts here - one before ordering another after

# Unordered Factor Levels
encounters_duration_by_shape <- ufo_sightings %>%
    
    group_by(Data.Shape) %>% 
    summarise(duration = mean(Data.Encounter.duration, na.rm = TRUE))

encounters_duration_by_shape
## # A tibble: 28 × 2
##    Data.Shape duration
##    <chr>         <dbl>
##  1 changed       3600 
##  2 changing      2152.
##  3 chevron        484.
##  4 cigar         2241.
##  5 circle        2746.
##  6 cone          1660.
##  7 crescent     37800 
##  8 cross          765.
##  9 cylinder      4317.
## 10 delta         2682.
## # ℹ 18 more rows
# Plot
encounters_duration_by_shape %>%
    
    ggplot(aes(x = duration, y = Data.Shape)) +
    geom_point()

# Labeling
labs(y = NULL, x = "Mean UFO Shape Sightings Durations")
## $y
## NULL
## 
## $x
## [1] "Mean UFO Shape Sightings Durations"
## 
## attr(,"class")
## [1] "labels"
# Ordered Factor Levels
encounters_duration_by_shape %>%
    
    ggplot(aes(x = duration, y = fct_reorder(.f = Data.Shape, .x = duration))) +
    geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
tibble(ufo_sightings %>%
    
    mutate(shape_rev = fct_recode(Data.Shape, "unclear" = "unknown")) %>%
    select(Data.Shape, shape_rev) %>%
    filter(Data.Shape == "unknown"))
## # A tibble: 4,359 × 2
##    Data.Shape shape_rev
##    <chr>      <fct>    
##  1 unknown    unclear  
##  2 unknown    unclear  
##  3 unknown    unclear  
##  4 unknown    unclear  
##  5 unknown    unclear  
##  6 unknown    unclear  
##  7 unknown    unclear  
##  8 unknown    unclear  
##  9 unknown    unclear  
## 10 unknown    unclear  
## # ℹ 4,349 more rows
  • fct_collapse
tibble(ufo_sightings %>%
           
    mutate(shape_col = fct_collapse(Data.Shape, "Indescribable" = c("unknown", "other", "changing", "changed"))) %>%
    select(Data.Shape, shape_col) %>%
    filter(Data.Shape != "circle"))
## # A tibble: 54,742 × 2
##    Data.Shape shape_col    
##    <chr>      <fct>        
##  1 disk       disk         
##  2 changing   Indescribable
##  3 changing   Indescribable
##  4 cigar      cigar        
##  5 cylinder   cylinder     
##  6 cylinder   cylinder     
##  7 cylinder   cylinder     
##  8 disk       disk         
##  9 disk       disk         
## 10 disk       disk         
## # ℹ 54,732 more rows
  • fct_lump
ufo_sightings %>% mutate(shape_lump = fct_lump(Data.Shape)) %>% distinct(shape_lump)
##    shape_lump
## 1        disk
## 2    changing
## 3       cigar
## 4      circle
## 5    cylinder
## 6         egg
## 7    fireball
## 8       flash
## 9   formation
## 10      light
## 11      other
## 12       oval
## 13     sphere
## 14   teardrop
## 15   triangle
## 16    unknown
## 17  rectangle
## 18    diamond
## 19      Other
## 20    chevron

Chapter 16

No need to do anything here.