Read Data
# csv
read.csv("https://raw.githubusercontent.com/hadley/data-baby-names/master/baby-names.csv") %>%
system.time()
## user system elapsed
## 16.40 0.31 17.40
# fread
fread("https://raw.githubusercontent.com/hadley/data-baby-names/master/baby-names.csv") %>%
system.time()
## user system elapsed
## 0.17 0.11 0.83
Create topNames and topNames1990
babynames <- fread("https://raw.githubusercontent.com/hadley/data-baby-names/master/baby-names.csv")
topNames <- babynames %>%
mutate(decade = (year %/% 10) * 10) %>%
group_by(decade, name) %>%
summarize(Percent = mean(percent)) %>%
top_n(Percent, n = 10) %>% ungroup()
## `summarise()` has grouped output by 'decade'. You can override using the `.groups` argument.
topNames <- topNames %>% arrange(desc(Percent))
topNames1990 <- filter(topNames, decade == "1990")
colnames(topNames1990) <- c("Decade", "Name", "Percent", "Position")
Create topNames1990 table
Name | Percent |
---|---|
Matthew | 0.01708 |
Joshua | 0.01597 |
Christopher | 0.01593 |
Michael | 0.01498 |
Jacob | 0.01457 |
Nicholas | 0.01341 |
Andrew | 0.01325 |
Daniel | 0.01321 |
Jessica | 0.01279 |
Joseph | 0.01266 |
Top Names of the Nineties Plot