Read in the data from source.
BIRTHS <- readr::read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/births/US_births_1994-2003_CDC_NCHS.csv")
## Parsed with column specification:
## cols(
## year = col_double(),
## month = col_double(),
## date_of_month = col_double(),
## day_of_week = col_double(),
## births = col_double()
## )
Replace day_of_week with name of day instead of number.
BIRTHS <- dplyr::mutate(BIRTHS, day_of_week = replace(day_of_week, day_of_week == "1", "Monday"))
BIRTHS <- dplyr::mutate(BIRTHS, day_of_week = replace(day_of_week, day_of_week == "2", "Tuesday"))
BIRTHS <- dplyr::mutate(BIRTHS, day_of_week = replace(day_of_week, day_of_week == "3", "Wednesday"))
BIRTHS <- dplyr::mutate(BIRTHS, day_of_week = replace(day_of_week, day_of_week == "4", "Thursday"))
BIRTHS <- dplyr::mutate(BIRTHS, day_of_week = replace(day_of_week, day_of_week == "5", "Friday"))
BIRTHS <- dplyr::mutate(BIRTHS, day_of_week = replace(day_of_week, day_of_week == "6", "Saturday"))
BIRTHS <- dplyr::mutate(BIRTHS, day_of_week = replace(day_of_week, day_of_week == "7", "Sunday"))
BIRTHS
## # A tibble: 3,652 x 5
## year month date_of_month day_of_week births
## <dbl> <dbl> <dbl> <chr> <dbl>
## 1 1994 1 1 Saturday 8096
## 2 1994 1 2 Sunday 7772
## 3 1994 1 3 Monday 10142
## 4 1994 1 4 Tuesday 11248
## 5 1994 1 5 Wednesday 11053
## 6 1994 1 6 Thursday 11406
## 7 1994 1 7 Friday 11251
## 8 1994 1 8 Saturday 8653
## 9 1994 1 9 Sunday 7910
## 10 1994 1 10 Monday 10498
## # ... with 3,642 more rows
Add a column in the data to hold the name of the month, based on the number of the month.
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "1", "Janruary"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "2", "February"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "3", "March"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "4", "April"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "5", "May"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "6", "June"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "7", "July"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "8", "August"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "9", "September"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "10", "October"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "11", "November"))
BIRTHS <- dplyr::mutate(BIRTHS, monthName = replace(month, month == "12", "December"))
BIRTHS
## # A tibble: 3,652 x 6
## year month date_of_month day_of_week births monthName
## <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 1994 1 1 Saturday 8096 1
## 2 1994 1 2 Sunday 7772 1
## 3 1994 1 3 Monday 10142 1
## 4 1994 1 4 Tuesday 11248 1
## 5 1994 1 5 Wednesday 11053 1
## 6 1994 1 6 Thursday 11406 1
## 7 1994 1 7 Friday 11251 1
## 8 1994 1 8 Saturday 8653 1
## 9 1994 1 9 Sunday 7910 1
## 10 1994 1 10 Monday 10498 1
## # ... with 3,642 more rows
Birth data arranged by the number of births, and stored in ARG_BIRTHS.
ARG_BIRTHS <- dplyr::arrange(BIRTHS, desc(births))
ARG_BIRTHS
## # A tibble: 3,652 x 6
## year month date_of_month day_of_week births monthName
## <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 1999 9 9 Thursday 14540 9
## 2 2003 12 30 Tuesday 14438 December
## 3 2003 9 16 Tuesday 14145 9
## 4 2003 9 3 Wednesday 14119 9
## 5 2003 9 23 Tuesday 14036 9
## 6 2002 9 12 Thursday 13982 9
## 7 2001 12 28 Friday 13918 December
## 8 2003 9 10 Wednesday 13908 9
## 9 2002 9 24 Tuesday 13884 9
## 10 2002 9 17 Tuesday 13883 9
## # ... with 3,642 more rows
Group birth data by each month in the over all dataset, and get the total number of births in those months. Then arrange the data by the month. Then plot the data.
MONTHLY_BIRTHS <- BIRTHS %>%
group_by(month) %>%
summarise(monthlyBirths = sum(births)) %>%
arrange(month)
plot(MONTHLY_BIRTHS$monthlyBirths, type = "o", col = "red", xlab = "Month", ylab = "Number of births",
main = "NUMBER OF BIRTHS PER MONTH")
Group birth data based on each year, and sum the total births for that year.
YEARLY_BIRTHS <- BIRTHS %>%
group_by(year) %>%
summarise(yearlyBirths = sum(year)) %>%
arrange(year)
YEARLY_BIRTHS
## # A tibble: 10 x 2
## year yearlyBirths
## <dbl> <dbl>
## 1 1994 727810
## 2 1995 728175
## 3 1996 730536
## 4 1997 728905
## 5 1998 729270
## 6 1999 729635
## 7 2000 732000
## 8 2001 730365
## 9 2002 730730
## 10 2003 731095