Question 1

library(DataComputing)
## Loading required package: ggplot2
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: lubridate
## Loading required package: tidyr
## Loading required package: stringr
## Loading required package: knitr
## Loading required package: mosaic
## Loading required package: lattice
## Loading required package: car
## Loading required package: mosaicData
## 
## Attaching package: 'mosaic'
## The following object is masked from 'package:car':
## 
##     logit
## The following object is masked from 'package:lubridate':
## 
##     interval
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cov, D, fivenum, IQR, median, prop.test,
##     quantile, sd, t.test, var
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
## Loading required package: manipulate
## Loading required package: base64enc
## Loading required package: curl
daily_births <- Birthdays %>%
  group_by(date) %>%
  tally(births)
ggplot(daily_births, aes(x = date, y = n)) + geom_point() + ylab("total")

Question 2a

daily_births <- Birthdays %>%
  group_by(week=week(date)) %>%
  tally(births)
ggplot(daily_births, aes(x = week, y = n)) + geom_point() + ylab("total")

Question 2b

daily_births <- Birthdays %>%
  group_by(month=month(date)) %>%
  tally(births)
ggplot(daily_births, aes(x = month, y = n)) + geom_point() + ylab("total")

Question 2c

daily_births <- Birthdays %>%
  group_by(month=month(date)) %>%
  tally(births)
ggplot(daily_births, aes(x = month, y = n)) + geom_point() + ylab("total")

Question 3

daily_births <- Birthdays %>%
  group_by(wday=wday(date)) %>%
  tally(births)
ggplot(daily_births, aes(x = wday, y = n)) + geom_point() + ylab("total")

Question 4

two_year_span <- Birthdays %>%
  filter( year == 1983 | year == 1984) %>%
  group_by(date, wday) %>%
  tally(births)
ggplot(two_year_span, aes(x = date, y = n, col = wday)) + geom_point() + ylab("total")

The data shows that most births tend to happen on the weekdays then the weekends.

Question 5

Holidays <- read.csv("http://tiny.cc/dcf/US-Holidays.csv") %>%
  mutate(date = lubridate::dmy(date))

Question 6

myHolidays <- Holidays %>%
  filter( year == 1983 | year == 1984)

ggplot(two_year_span, aes(x = date, y = n)) + geom_point(aes(col=wday)) + ylab("total") + geom_vline(data = Holidays, aes(xintercept=as.numeric(date), col = wday(date,label=TRUE))) + geom_text(data = myHolidays, aes(x=as.POSIXct(myHolidays$date, wday(date)), y=9000, label = holiday, angle=65))

Question 7

Holidays$date <- as.Date(Holidays$date)
two_year_span$date <- as.Date(two_year_span$date)
join_holidays <- left_join(two_year_span, Holidays)
## Joining by: "date"

Question 8

join_holidays_mutated <- join_holidays %>%
  mutate(is_holiday = ifelse(is.na(holiday), "no", "yes"))
join_holidays_mutated
## Source: local data frame [731 x 6]
## Groups: date [731]
## 
##          date   wday     n        holiday  year is_holiday
##        (date) (fctr) (int)         (fctr) (int)      (chr)
## 1  1983-01-01    Sat  8174 New Year's Day  1983        yes
## 2  1983-01-02    Sun  8085             NA    NA         no
## 3  1983-01-03    Mon  9523             NA    NA         no
## 4  1983-01-04   Tues 10094             NA    NA         no
## 5  1983-01-05    Wed  9966             NA    NA         no
## 6  1983-01-06  Thurs  9990             NA    NA         no
## 7  1983-01-07    Fri  9947             NA    NA         no
## 8  1983-01-08    Sat  8525             NA    NA         no
## 9  1983-01-09    Sun  8287             NA    NA         no
## 10 1983-01-10    Mon  9930             NA    NA         no
## ..        ...    ...   ...            ...   ...        ...

Question 9

ggplot() + geom_point(data=join_holidays_mutated, aes(x=date, y=n, col=wday, size=is_holiday)) + ylab("total") + geom_vline(data = Holidays, aes(xintercept=as.numeric(date), col = wday(date, label = TRUE))) + geom_text(data = myHolidays, aes(x=as.Date(date), y=9000, label=holiday, angle=65))

There are many outliers in the data and the general trend is actually pretty uniform (I would say Thanksgiving is an exception in this pattern).