bday <- read.csv('c:/bigdata/dev/datasets/birthdaysExample.csv')
bday$dates <- as.Date(bday$dates, format = "%m/%d/%y")
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
bday_df <- data.frame(dates = bday$dates, year = year(bday$dates), 
                month = month(bday$dates), 
                day = day(bday$dates))
summary(bday_df)
##      dates                 year          month             day      
##  Min.   :2020-01-01   Min.   :2020   Min.   : 1.000   Min.   : 1.0  
##  1st Qu.:2020-03-28   1st Qu.:2020   1st Qu.: 3.000   1st Qu.: 8.0  
##  Median :2020-07-02   Median :2020   Median : 7.000   Median :16.0  
##  Mean   :2020-06-30   Mean   :2020   Mean   : 6.474   Mean   :15.7  
##  3rd Qu.:2020-09-28   3rd Qu.:2020   3rd Qu.: 9.000   3rd Qu.:23.0  
##  Max.   :2020-12-31   Max.   :2020   Max.   :12.000   Max.   :31.0
table(bday_df$month)
## 
##  1  2  3  4  5  6  7  8  9 10 11 12 
## 89 79 98 81 72 93 86 91 96 89 87 72
table(bday_df$day)
## 
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 
## 36 36 26 33 34 33 34 36 40 35 21 33 30 48 24 39 40 39 40 31 31 35 39 32 29 
## 26 27 28 29 30 31 
## 30 36 37 32 28 16
bday_mates <- nrow(subset(bday_df, month == 12 & day == 09))
bday_mates
## [1] 5
library(ggplot2)
table(bday_df$month)
## 
##  1  2  3  4  5  6  7  8  9 10 11 12 
## 89 79 98 81 72 93 86 91 96 89 87 72
ggplot(bday_df) + geom_histogram(aes(x = month),
                                   color = I('black'),
                                   fill = I('green'),
                                   binwidth = 1) +
                                   scale_y_discrete() +
                                   scale_x_discrete() + 
                                   ggtitle("Facebook Friends Birthdays By Month")

ggsave('bdayHistogram.png')
## Saving 7 x 5 in image
# March has the highest number of birthdays in months
# 14th day has the highest number of birthdays in days

qplot(data = bday_df, x = month,
      color = I('black'), fill = I('red'), binwidth = 1, 
      xlab = 'month of the year', ylab = '#birthdays in the month') +
  scale_x_discrete(breaks = c(1:12))

ggsave('bdayHistogram1.png')
## Saving 7 x 5 in image
max(table(bday_df$month))
## [1] 98
qplot(data = bday_df, x = day,
      color = I('black'), fill = I('blue'), binwidth = 1, 
      xlab = 'day of the year', ylab = '#birthdays on the day') +
  scale_x_continuous(breaks = seq(1, 31, 2))

ggsave('bdayHistogram2.png')
## Saving 7 x 5 in image
max(table(bday_df$day))
## [1] 48
length(unique(bday_df$dates))
## [1] 348
# No, I have 348 friends