Import your data

data <- read_excel("../00_data/Apply_1.xlsx")

Chapter 15

Create a factor

# Convert character to date 
age_diff <- data %>% 
    
    group_by(actor_2_birthdate) %>% 
    summarise(
        avg_age_diff = mean(age_difference)
    )

age_diff
## # A tibble: 640 × 2
##    actor_2_birthdate avg_age_diff
##    <chr>                    <dbl>
##  1 1906-10-06                9   
##  2 1907-05-12                7.33
##  3 1907-06-04                3   
##  4 1907-07-16               12   
##  5 1910-06-03               21   
##  6 1911-07-16               19   
##  7 1913-11-05               12   
##  8 1915-08-29               16   
##  9 1916-07-01                7   
## 10 1917-10-22               10   
## # ℹ 630 more rows
# Plot 

age_diff %>%
    
    ggplot(aes(x = avg_age_diff, y = actor_2_birthdate)) + geom_point()

Ordered Factor Levels

age_diff %>% 
    
    ggplot(aes(x = avg_age_diff, y = fct_reorder(.f = actor_2_birthdate, .x = avg_age_diff))) +
    geom_point()

# Labeling 
labs(y = NULL, x = "Average Age Difference of Couples")
## $y
## NULL
## 
## $x
## [1] "Average Age Difference of Couples"
## 
## attr(,"class")
## [1] "labels"

Modify factor order

Make two bar charts here - one before ordering another after

# Before Ordering 
data %>% 
    ggplot(aes(x = age_difference, y = actor_2_birthdate)) + 
    geom_point()

# After Ordering
age_diff %>% 
    
    ggplot(aes(x = avg_age_diff, y = actor_2_birthdate)) + geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
data %>% 
    
    mutate(character_2_gender_rev = fct_recode(character_2_gender, "woman" = "female")) %>% select(character_2_gender, character_2_gender_rev) %>% filter(character_2_gender == "woman")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `character_2_gender_rev = fct_recode(character_2_gender, woman =
##   "female")`.
## Caused by warning:
## ! Unknown levels in `f`: female
## # A tibble: 940 × 2
##    character_2_gender character_2_gender_rev
##    <chr>              <fct>                 
##  1 woman              woman                 
##  2 woman              woman                 
##  3 woman              woman                 
##  4 woman              woman                 
##  5 woman              woman                 
##  6 woman              woman                 
##  7 woman              woman                 
##  8 woman              woman                 
##  9 woman              woman                 
## 10 woman              woman                 
## # ℹ 930 more rows
  • fct_collapse
data %>% 
    
    mutate(character_2_gender_col = fct_collapse(character_2_gender, "woman" = c("woman"))) %>% select(character_2_gender, character_2_gender_col) %>% filter(character_2_gender != "woman")
## # A tibble: 215 × 2
##    character_2_gender character_2_gender_col
##    <chr>              <fct>                 
##  1 man                man                   
##  2 man                man                   
##  3 man                man                   
##  4 man                man                   
##  5 man                man                   
##  6 man                man                   
##  7 man                man                   
##  8 man                man                   
##  9 man                man                   
## 10 man                man                   
## # ℹ 205 more rows
  • fct_lump
data %>% count(character_2_gender)
## # A tibble: 2 × 2
##   character_2_gender     n
##   <chr>              <int>
## 1 man                  215
## 2 woman                940
data %>%
    mutate(character_2_gender_lump = fct_lump(character_2_gender)) %>% distinct(character_2_gender_lump)
## # A tibble: 2 × 1
##   character_2_gender_lump
##   <fct>                  
## 1 Other                  
## 2 woman

Chapter 16

No need to do anything here.