library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  2.0.0     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.3.1     ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(wordcloud2)
library(babynames)

Open the packages

babynames %>%
  filter(name == "Lulu")

My name “Lulu”is used in various years of data.

babynames %>%
  filter(year == 1993)

This shows the number and frequency of names used in the year of my birth (1993).

babynames %>%
  filter(year == 1993, name == "Lulu") %>%
  mutate(percent = prop * 100)

In the year of my birthday, the newborn was named “Lulu”.

babynames %>%
filter(year == 1993) %>%    
  mutate(rank = row_number()) %>%         
  mutate(percent = round(prop * 100, 1))

This shows the popularity ranking of baby names in the year I was born.

babynames %>%
filter(year == 1993) %>%    
  mutate(rank = row_number()) %>%         
  mutate(percent = round(prop * 100, 1)) %>% 
  filter(name == "Lulu") 

This shows the rank of my name in my year of birth.

babynames %>%
  filter(year == 1993) %>%     # use only one year
  filter(sex == "F") %>%       # use only one sex
  select(name, n) %>%          # select the two relevant variables: the name and how often it occurs
  top_n(100, n) %>%            # use only the top names or it could get too big
  wordcloud2(size = .5) 

This shows the word cloud in 1993 for a female infant(100).

babynames %>%     
  filter(name == "Lulu") %>%       
  ggplot(aes(x = year, y = prop)) +    
  geom_line()  

This shows the popularity of my name among baby names between 1880 and 2017.

babynames %>%                                
  filter(name == "Lulu", sex == "F") %>%       
  top_n(1, prop)                                  
babynames %>%                                
  filter(name == "Lulu", sex == "M") %>%       
  top_n(1, prop)                                  
babynames %>%                                
  filter(name == "Lulu", sex == "F") %>%       
  top_n(10, prop)%>%                         
  arrange(-prop) 

The most popular year for my name is 1881.

babynames %>%                                
  filter(name == "Eva", sex == "F") %>%       
  top_n(1, prop) 
babynames %>%                                
  filter(name == "Eva", sex == "F") %>%       
  top_n(10, prop)%>%                         
  arrange(-prop) 
babynames %>%
  filter(name == "Lulu" | name == "Eva", sex == "F") %>%  
  mutate(percent = round(prop * 100, 1)) %>%  
  ggplot(aes(x = year, y = percent, color = name)) +
  geom_line()

It shows the contrast between my name and the popularity of Eva.