The dataset had to be appended to include the year as a variable and also to include every file year from 1880 to 2018 as a single dataset. Imported dataset shown below.

library(readr)
all_baby_names <- read_csv("~/Downloads/asgn1/all_baby_names", 
    col_types = cols(Sex = col_character()))
## Warning: Missing column names filled in: 'X1' [1]
bbynames <- all_baby_names[,-1]
head(bbynames)
## # A tibble: 6 x 4
##   Name      Sex   Count  year
##   <chr>     <chr> <dbl> <dbl>
## 1 Mary      F      7065  1880
## 2 Anna      F      2604  1880
## 3 Emma      F      2003  1880
## 4 Elizabeth F      1939  1880
## 5 Minnie    F      1746  1880
## 6 Margaret  F      1578  1880
tail(bbynames)
## # A tibble: 6 x 4
##   Name   Sex   Count  year
##   <chr>  <chr> <dbl> <dbl>
## 1 Zykeem M         5  2018
## 2 Zylas  M         5  2018
## 3 Zyran  M         5  2018
## 4 Zyrie  M         5  2018
## 5 Zyron  M         5  2018
## 6 Zzyzx  M         5  2018

Please note: Working Directory set for Mac. Rerunning the write.csv function will override existing files in folder. For correct output, delete the files “all_baby_names” and “mostpop.csv” prior to running code

##Question 1 Our team interpreted this question to mean the following: The most popular male/female baby name within a single year (does not combine count of all years for single name) The most popular male baby name is James and the count is 94,757 The most popular female baby name is Linda and the count is 99,689

bbynames[ order(bbynames$Count, decreasing = TRUE), ]
## # A tibble: 1,957,046 x 4
##    Name    Sex   Count  year
##    <chr>   <chr> <dbl> <dbl>
##  1 Linda   F     99689  1947
##  2 Linda   F     96211  1948
##  3 James   M     94757  1947
##  4 Michael M     92704  1957
##  5 Robert  M     91640  1947
##  6 Linda   F     91016  1949
##  7 Michael M     90656  1956
##  8 Michael M     90517  1958
##  9 James   M     88584  1948
## 10 Michael M     88528  1954
## # … with 1,957,036 more rows

##Question 2 The top five baby names for males for the year 1950 are shown in the plot below.

males1950 <- bbynames[bbynames$Sex == "M" & bbynames$year == 1950, ]
top5males <- head(males1950, 5)
barplot(top5males$Count, names = top5males$Name,
        xlab = "Male Baby Names",
        ylab = "Frequency",
        main = "Top 5 Male Baby Names",
        col = "blue")

##Question 3 The top five baby names for males for the year 1980 are shown in the plot below.

females1980 <- bbynames[bbynames$Sex == "F" & bbynames$year == 1980, ]
top5females <- head(females1980, 5)
barplot(top5females$Count, names = top5females$Name,
        xlab = "Female Baby Names",
        ylab = "Frequency", 
        main = "Top 5 Female Baby Names",
        ylim = range(pretty(c(0, top5females$Count))),
        col = "pink")

Question 4

The file with the top 10 baby names ever has been saved as a csv file names mostpop.csv in the current folder

topbbynames <- bbynames[ order(bbynames$Count, decreasing = TRUE), ]
topbbynames <- bbynames[,-4]
topbbynames <- aggregate(. ~  Name + Sex, data = topbbynames, sum)
topbbynames <- topbbynames[ order(topbbynames$Count, decreasing = TRUE), ]
top10names <- head(topbbynames, 10)

write.csv(top10names, file = "mostpop.csv")