Read Data
# csv
read.csv("https://raw.githubusercontent.com/hadley/data-baby-names/master/baby-names.csv") %>%
system.time()## user system elapsed
## 16.40 0.31 17.40
# fread
fread("https://raw.githubusercontent.com/hadley/data-baby-names/master/baby-names.csv") %>%
system.time()## user system elapsed
## 0.17 0.11 0.83
Create topNames and topNames1990
babynames <- fread("https://raw.githubusercontent.com/hadley/data-baby-names/master/baby-names.csv")
topNames <- babynames %>%
mutate(decade = (year %/% 10) * 10) %>%
group_by(decade, name) %>%
summarize(Percent = mean(percent)) %>%
top_n(Percent, n = 10) %>% ungroup()## `summarise()` has grouped output by 'decade'. You can override using the `.groups` argument.
topNames <- topNames %>% arrange(desc(Percent))
topNames1990 <- filter(topNames, decade == "1990")
colnames(topNames1990) <- c("Decade", "Name", "Percent", "Position")Create topNames1990 table
| Name | Percent |
|---|---|
| Matthew | 0.01708 |
| Joshua | 0.01597 |
| Christopher | 0.01593 |
| Michael | 0.01498 |
| Jacob | 0.01457 |
| Nicholas | 0.01341 |
| Andrew | 0.01325 |
| Daniel | 0.01321 |
| Jessica | 0.01279 |
| Joseph | 0.01266 |
Top Names of the Nineties Plot