library(tidyr)
library(ggplot2)
library(ggthemes)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
tmp = read.csv('birthdaysExample.csv')
head(tmp)
## dates
## 1 11/25/14
## 2 6/8/14
## 3 9/12/14
## 4 5/26/14
## 5 2/20/14
## 6 6/19/14
tmp$dates = as.Date(tmp$dates, "%m/%d/%y")
head(tmp)
## dates
## 1 2014-11-25
## 2 2014-06-08
## 3 2014-09-12
## 4 2014-05-26
## 5 2014-02-20
## 6 2014-06-19
str(tmp)
## 'data.frame': 1033 obs. of 1 variable:
## $ dates: Date, format: "2014-11-25" "2014-06-08" ...
March has 98 birthdays which is the highest compare to other months:
tmp$month = format(tmp$dates, "%m")
head(tmp)
## dates month
## 1 2014-11-25 11
## 2 2014-06-08 06
## 3 2014-09-12 09
## 4 2014-05-26 05
## 5 2014-02-20 02
## 6 2014-06-19 06
countByMonth = tmp %>%
group_by(month) %>%
summarise(n = n())
head(countByMonth)
## Source: local data frame [6 x 2]
##
## month n
## 1 01 89
## 2 02 79
## 3 03 98
## 4 04 81
## 5 05 72
## 6 06 93
max(names(table(countByMonth$n)))
## [1] "98"
countByMonth
## Source: local data frame [12 x 2]
##
## month n
## 1 01 89
## 2 02 79
## 3 03 98
## 4 04 81
## 5 05 72
## 6 06 93
## 7 07 86
## 8 08 91
## 9 09 96
## 10 10 89
## 11 11 87
## 12 12 72
max(table(tmp$dates))
## [1] 8
t = table(tmp$dates)
d = as.data.frame(t)
filter(d, Freq==8)
## Var1 Freq
## 1 2014-02-06 8
## 2 2014-05-22 8
## 3 2014-07-16 8
2014-02-06, 2014-05-22 and 2014-07-16 have the most number of birthdays.
tmp$day = format(tmp$dates, "%d")
head(tmp)
## dates month day
## 1 2014-11-25 11 25
## 2 2014-06-08 06 08
## 3 2014-09-12 09 12
## 4 2014-05-26 05 26
## 5 2014-02-20 02 20
## 6 2014-06-19 06 19
By accumulating the birth for each day of every month, the 14th has the most number of birthday.
I print out the unique number of days within each month:
tmp$month_num = as.numeric(tmp$month)
vector = numeric(12)
for (i in 1:12){
vector[i] = length(unique((tmp[tmp$month_num==i,])$day))
}
vector
## [1] 31 25 30 28 29 29 31 28 30 31 28 28
We can see that only Jan, July, Sep and Oct have enough birthdays to fill the entire month.