Name analysis

library(tidyverse)

## ── Attaching packages ──────── tidyverse 1.2.1 ──

## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  2.0.0     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.3.1     ✔ forcats 0.3.0

## ── Conflicts ─────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(wordcloud2)
library(babynames)

Rank of the name Alexandra in 1999

babynames %>%                             
  filter(year == 1999, sex == "F") %>%    
  mutate(rank = row_number()) %>%         
  mutate(percent = round(prop * 100, 1)) %>% 
  filter(name == "Alexandra")

Word cloud of girl’s names in 1999

babynames %>%
  filter(year == 1999) %>%     # use only one year
  filter(sex == "F") %>%       # use only one sex
  select(name, n) %>%          # select the two relevant variables: the name and how often it occurs
  top_n(100, n) %>%            # use only the top names or it could get too big
  wordcloud2(size = .5)        # generate the word cloud at a font size of .5

Graph of the name Alexandra over time

babynames %>%                                    # start with the data
  filter(name == "Alexandra", sex == "F") %>%      # choose the name and sex
  mutate(percent = round(prop * 100, 1)) %>%     # create a new variable called percent
  ggplot(aes(x = year, y = percent)) +           # put year on the x-axis and prop (proportion) on y
  geom_line(color = "blue")                      # make it a line graph and give the line a color

Most popular year for the name Alexandra

babynames %>%                                  # Start with the dataset
  filter(name == "Alexandra", sex == "F") %>%    # only look at the name and sex you want
  top_n(10, prop) %>%                          # get the top 10 names
  arrange(-prop)                               # sort in descending order

Compared to other names

babynames %>%
  filter(name == "Alexandra" | name == "Vicki" | name == "Cassidy") %>% 
  filter(sex == "F") %>% 
  ggplot(aes(x = year, y = n, color = name)) +
  geom_line()