library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.0.0 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(wordcloud2)
library(babynames)
Open the packages
babynames %>%
filter(name == "Lulu")
My name “Lulu”is used in various years of data.
babynames %>%
filter(year == 1993)
This shows the number and frequency of names used in the year of my birth (1993).
babynames %>%
filter(year == 1993, name == "Lulu") %>%
mutate(percent = prop * 100)
In the year of my birthday, the newborn was named “Lulu”.
babynames %>%
filter(year == 1993) %>%
mutate(rank = row_number()) %>%
mutate(percent = round(prop * 100, 1))
This shows the popularity ranking of baby names in the year I was born.
babynames %>%
filter(year == 1993) %>%
mutate(rank = row_number()) %>%
mutate(percent = round(prop * 100, 1)) %>%
filter(name == "Lulu")
This shows the rank of my name in my year of birth.
babynames %>%
filter(year == 1993) %>% # use only one year
filter(sex == "F") %>% # use only one sex
select(name, n) %>% # select the two relevant variables: the name and how often it occurs
top_n(100, n) %>% # use only the top names or it could get too big
wordcloud2(size = .5)
This shows the word cloud in 1993 for a female infant(100).
babynames %>%
filter(name == "Lulu") %>%
ggplot(aes(x = year, y = prop)) +
geom_line()
This shows the popularity of my name among baby names between 1880 and 2017.
babynames %>%
filter(name == "Lulu", sex == "F") %>%
top_n(1, prop)
babynames %>%
filter(name == "Lulu", sex == "M") %>%
top_n(1, prop)
babynames %>%
filter(name == "Lulu", sex == "F") %>%
top_n(10, prop)%>%
arrange(-prop)
The most popular year for my name is 1881.
babynames %>%
filter(name == "Eva", sex == "F") %>%
top_n(1, prop)
babynames %>%
filter(name == "Eva", sex == "F") %>%
top_n(10, prop)%>%
arrange(-prop)
babynames %>%
filter(name == "Lulu" | name == "Eva", sex == "F") %>%
mutate(percent = round(prop * 100, 1)) %>%
ggplot(aes(x = year, y = percent, color = name)) +
geom_line()
It shows the contrast between my name and the popularity of Eva.