library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dplyr)
library("babynames", lib.loc="~/R/x86_64-pc-linux-gnu-library/3.5")
str(babynames)
## Classes 'tbl_df', 'tbl' and 'data.frame': 1858689 obs. of 5 variables:
## $ year: num 1880 1880 1880 1880 1880 1880 1880 1880 1880 1880 ...
## $ sex : chr "F" "F" "F" "F" ...
## $ name: chr "Mary" "Anna" "Emma" "Elizabeth" ...
## $ n : int 7065 2604 2003 1939 1746 1578 1472 1414 1320 1288 ...
## $ prop: num 0.0724 0.0267 0.0205 0.0199 0.0179 ...
summary(babynames)
## year sex name n
## Min. :1880 Length:1858689 Length:1858689 Min. : 5.0
## 1st Qu.:1950 Class :character Class :character 1st Qu.: 7.0
## Median :1983 Mode :character Mode :character Median : 12.0
## Mean :1973 Mean : 183.4
## 3rd Qu.:2002 3rd Qu.: 32.0
## Max. :2015 Max. :99680.0
## prop
## Min. :2.260e-06
## 1st Qu.:3.900e-06
## Median :7.350e-06
## Mean :1.391e-04
## 3rd Qu.:2.324e-05
## Max. :8.155e-02
babynames %>%
filter(name == 'Mary') -> m
str(m)
## Classes 'tbl_df', 'tbl' and 'data.frame': 266 obs. of 5 variables:
## $ year: num 1880 1880 1881 1881 1882 ...
## $ sex : chr "F" "M" "F" "M" ...
## $ name: chr "Mary" "Mary" "Mary" "Mary" ...
## $ n : int 7065 27 6919 29 8148 30 8012 32 9217 36 ...
## $ prop: num 0.072384 0.000228 0.069991 0.000268 0.070426 ...
m %>% ggplot(aes(x=year,y=n)) +
geom_point()

babynames %>%
filter(name == 'Ibrahim') -> i
str(i)
## Classes 'tbl_df', 'tbl' and 'data.frame': 52 obs. of 5 variables:
## $ year: num 1954 1961 1963 1964 1967 ...
## $ sex : chr "M" "M" "M" "M" ...
## $ name: chr "Ibrahim" "Ibrahim" "Ibrahim" "Ibrahim" ...
## $ n : int 5 5 6 10 9 6 10 13 12 27 ...
## $ prop: num 2.42e-06 2.32e-06 2.90e-06 4.93e-06 5.06e-06 ...
i %>% ggplot(aes(x=year,y=n)) +
geom_point()

babynames %>%
filter(name == 'Jordan') -> j
str(j)
## Classes 'tbl_df', 'tbl' and 'data.frame': 194 obs. of 5 variables:
## $ year: num 1880 1881 1882 1883 1884 ...
## $ sex : chr "M" "M" "M" "M" ...
## $ name: chr "Jordan" "Jordan" "Jordan" "Jordan" ...
## $ n : int 23 18 12 17 25 17 9 12 18 7 ...
## $ prop: num 1.94e-04 1.66e-04 9.83e-05 1.51e-04 2.04e-04 ...
j %>% ggplot(aes(x=year,y=n)) +
geom_point()

j %>% ggplot(aes(x=year,y=n,color=sex)) +
geom_point()

babynames %>%
filter(name == 'Harold') -> h
str(h)
## Classes 'tbl_df', 'tbl' and 'data.frame': 230 obs. of 5 variables:
## $ year: num 1880 1881 1882 1883 1884 ...
## $ sex : chr "M" "M" "M" "M" ...
## $ name: chr "Harold" "Harold" "Harold" "Harold" ...
## $ n : int 113 120 127 108 191 201 224 279 298 340 ...
## $ prop: num 0.000954 0.001108 0.001041 0.00096 0.001556 ...
h %>% ggplot(aes(x=year,y=n,color=sex)) +
geom_point()
