data <- read_excel("../00_data/Apply_1.xlsx")
# Convert character to date
age_diff <- data %>%
group_by(actor_2_birthdate) %>%
summarise(
avg_age_diff = mean(age_difference)
)
age_diff
## # A tibble: 640 × 2
## actor_2_birthdate avg_age_diff
## <chr> <dbl>
## 1 1906-10-06 9
## 2 1907-05-12 7.33
## 3 1907-06-04 3
## 4 1907-07-16 12
## 5 1910-06-03 21
## 6 1911-07-16 19
## 7 1913-11-05 12
## 8 1915-08-29 16
## 9 1916-07-01 7
## 10 1917-10-22 10
## # ℹ 630 more rows
# Plot
age_diff %>%
ggplot(aes(x = avg_age_diff, y = actor_2_birthdate)) + geom_point()
Ordered Factor Levels
age_diff %>%
ggplot(aes(x = avg_age_diff, y = fct_reorder(.f = actor_2_birthdate, .x = avg_age_diff))) +
geom_point()
# Labeling
labs(y = NULL, x = "Average Age Difference of Couples")
## $y
## NULL
##
## $x
## [1] "Average Age Difference of Couples"
##
## attr(,"class")
## [1] "labels"
Make two bar charts here - one before ordering another after
# Before Ordering
data %>%
ggplot(aes(x = age_difference, y = actor_2_birthdate)) +
geom_point()
# After Ordering
age_diff %>%
ggplot(aes(x = avg_age_diff, y = actor_2_birthdate)) + geom_point()
Show examples of three functions:
data %>%
mutate(character_2_gender_rev = fct_recode(character_2_gender, "woman" = "female")) %>% select(character_2_gender, character_2_gender_rev) %>% filter(character_2_gender == "woman")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `character_2_gender_rev = fct_recode(character_2_gender, woman =
## "female")`.
## Caused by warning:
## ! Unknown levels in `f`: female
## # A tibble: 940 × 2
## character_2_gender character_2_gender_rev
## <chr> <fct>
## 1 woman woman
## 2 woman woman
## 3 woman woman
## 4 woman woman
## 5 woman woman
## 6 woman woman
## 7 woman woman
## 8 woman woman
## 9 woman woman
## 10 woman woman
## # ℹ 930 more rows
data %>%
mutate(character_2_gender_col = fct_collapse(character_2_gender, "woman" = c("woman"))) %>% select(character_2_gender, character_2_gender_col) %>% filter(character_2_gender != "woman")
## # A tibble: 215 × 2
## character_2_gender character_2_gender_col
## <chr> <fct>
## 1 man man
## 2 man man
## 3 man man
## 4 man man
## 5 man man
## 6 man man
## 7 man man
## 8 man man
## 9 man man
## 10 man man
## # ℹ 205 more rows
data %>% count(character_2_gender)
## # A tibble: 2 × 2
## character_2_gender n
## <chr> <int>
## 1 man 215
## 2 woman 940
data %>%
mutate(character_2_gender_lump = fct_lump(character_2_gender)) %>% distinct(character_2_gender_lump)
## # A tibble: 2 × 1
## character_2_gender_lump
## <fct>
## 1 Other
## 2 woman
No need to do anything here.