library(tidyverse)
## ── Attaching packages ──────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.0.0 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## ── Conflicts ─────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(wordcloud2)
library(babynames)
Rank of the name Alexandra in 1999
babynames %>%
filter(year == 1999, sex == "F") %>%
mutate(rank = row_number()) %>%
mutate(percent = round(prop * 100, 1)) %>%
filter(name == "Alexandra")
Word cloud of girl’s names in 1999
babynames %>%
filter(year == 1999) %>% # use only one year
filter(sex == "F") %>% # use only one sex
select(name, n) %>% # select the two relevant variables: the name and how often it occurs
top_n(100, n) %>% # use only the top names or it could get too big
wordcloud2(size = .5) # generate the word cloud at a font size of .5
Graph of the name Alexandra over time
babynames %>% # start with the data
filter(name == "Alexandra", sex == "F") %>% # choose the name and sex
mutate(percent = round(prop * 100, 1)) %>% # create a new variable called percent
ggplot(aes(x = year, y = percent)) + # put year on the x-axis and prop (proportion) on y
geom_line(color = "blue") # make it a line graph and give the line a color
Most popular year for the name Alexandra
babynames %>% # Start with the dataset
filter(name == "Alexandra", sex == "F") %>% # only look at the name and sex you want
top_n(10, prop) %>% # get the top 10 names
arrange(-prop) # sort in descending order
Compared to other names
babynames %>%
filter(name == "Alexandra" | name == "Vicki" | name == "Cassidy") %>%
filter(sex == "F") %>%
ggplot(aes(x = year, y = n, color = name)) +
geom_line()