bday <- read.csv('c:/bigdata/dev/datasets/birthdaysExample.csv')
bday$dates <- as.Date(bday$dates, format = "%m/%d/%y")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
bday_df <- data.frame(dates = bday$dates, year = year(bday$dates),
month = month(bday$dates),
day = day(bday$dates))
summary(bday_df)
## dates year month day
## Min. :2020-01-01 Min. :2020 Min. : 1.000 Min. : 1.0
## 1st Qu.:2020-03-28 1st Qu.:2020 1st Qu.: 3.000 1st Qu.: 8.0
## Median :2020-07-02 Median :2020 Median : 7.000 Median :16.0
## Mean :2020-06-30 Mean :2020 Mean : 6.474 Mean :15.7
## 3rd Qu.:2020-09-28 3rd Qu.:2020 3rd Qu.: 9.000 3rd Qu.:23.0
## Max. :2020-12-31 Max. :2020 Max. :12.000 Max. :31.0
table(bday_df$month)
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 89 79 98 81 72 93 86 91 96 89 87 72
table(bday_df$day)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 36 36 26 33 34 33 34 36 40 35 21 33 30 48 24 39 40 39 40 31 31 35 39 32 29
## 26 27 28 29 30 31
## 30 36 37 32 28 16
bday_mates <- nrow(subset(bday_df, month == 12 & day == 09))
bday_mates
## [1] 5
library(ggplot2)
table(bday_df$month)
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 89 79 98 81 72 93 86 91 96 89 87 72
ggplot(bday_df) + geom_histogram(aes(x = month),
color = I('black'),
fill = I('green'),
binwidth = 1) +
scale_y_discrete() +
scale_x_discrete() +
ggtitle("Facebook Friends Birthdays By Month")

ggsave('bdayHistogram.png')
## Saving 7 x 5 in image
# March has the highest number of birthdays in months
# 14th day has the highest number of birthdays in days
qplot(data = bday_df, x = month,
color = I('black'), fill = I('red'), binwidth = 1,
xlab = 'month of the year', ylab = '#birthdays in the month') +
scale_x_discrete(breaks = c(1:12))

ggsave('bdayHistogram1.png')
## Saving 7 x 5 in image
max(table(bday_df$month))
## [1] 98
qplot(data = bday_df, x = day,
color = I('black'), fill = I('blue'), binwidth = 1,
xlab = 'day of the year', ylab = '#birthdays on the day') +
scale_x_continuous(breaks = seq(1, 31, 2))

ggsave('bdayHistogram2.png')
## Saving 7 x 5 in image
max(table(bday_df$day))
## [1] 48
length(unique(bday_df$dates))
## [1] 348
# No, I have 348 friends