Load the libraries

library(ggplot2)
library(ggthemes)
library(stringr)
# Set the theme to theme_solarized where the font size is 20
theme_set(theme_solarized(20)) 

Load the birthdays data csv to a data frame

# header = TRUE, indicates that the first line in the file is 
# the name of the variables
birthdays = read.csv('birthdays.csv', header = TRUE, strip.white=TRUE)

summary(birthdays)
##                                Title                Start   
##  Abhishek Ramesh Keshav's birthday: 1   9/18/2015 0:00 : 3  
##  Adarsh Harindra nath's birthday  : 1   10/26/2015 0:00: 2  
##  Aishwarya's birthday             : 1   12/26/2015 0:00: 2  
##  Anthony Carfang's birthday       : 1   5/18/2015 0:00 : 2  
##  Arjun GN's birthday              : 1   5/28/2015 0:00 : 2  
##  Baady's birthday                 : 1   6/21/2015 0:00 : 2  
##  (Other)                          :42   (Other)        :35  
##               End         Duration 
##  9/19/2015 0:00 : 3   24:00:00:48  
##  10/27/2015 0:00: 2                
##  12/27/2015 0:00: 2                
##  5/19/2015 0:00 : 2                
##  5/29/2015 0:00 : 2                
##  6/22/2015 0:00 : 2                
##  (Other)        :35

Strip time and get the data

# Strip time using the 'format' option, 'usetz' is set to FALSE to not have the time zone
birthdays$Start <- strptime(birthdays$Start, format = "%m/%d/%Y")
birthdays$End <- strptime(birthdays$End, format = "%m/%d/%Y")

# Format the date to get the day of the birthday
format(birthdays$Start, "%d")
##  [1] "20" "29" "02" "15" "26" "05" "06" "29" "17" "22" "29" "06" "16" "18"
## [15] "18" "22" "28" "28" "01" "14" "21" "21" "23" "28" "01" "09" "12" "12"
## [29] "15" "31" "17" "18" "18" "18" "27" "14" "24" "26" "26" "28" "06" "10"
## [43] "01" "05" "26" "26" "05" "06"
# Format the date to get the month of the birthday
format(birthdays$Start, "%m")
##  [1] "01" "01" "02" "02" "02" "03" "03" "03" "04" "04" "04" "05" "05" "05"
## [15] "05" "05" "05" "05" "06" "06" "06" "06" "06" "07" "08" "08" "08" "08"
## [29] "08" "08" "09" "09" "09" "09" "09" "10" "10" "10" "10" "10" "11" "11"
## [43] "12" "12" "12" "12" "01" "01"
# Add new columns
birthdays$month <- as.numeric(format(birthdays$Start, "%m"))
birthdays$day <- as.numeric(format(birthdays$Start, "%d"))

Create histograms

ggplot(aes(x = birthdays$month), data = birthdays) +
  geom_histogram(color = 'black', fill = '#48CCDD', binwidth = 1) +
  scale_x_continuous(limits = c(1,12), breaks = seq(1, 12, 1)) + 
  xlab('Month') +
  ylab('Counts for the Birthday Month') 

ggsave('birthdaymonth.jpg')
## Saving 7 x 5 in image
summary(birthdays$month)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.000   6.000   6.583   9.000  12.000
ggplot(aes(x = birthdays$day), data = birthdays) +
  geom_histogram(color = 'black', fill = '#48CCDD', binwidth = 1) +
  scale_x_continuous(limits = c(1, 31), breaks = seq(1, 31, 1)) + 
  xlab('Day') +
  ylab('Counts for the Day of Birth')

ggsave('dayofbirthday.jpg')
## Saving 7 x 5 in image

Which month contains the most number of birthdays?

# Create a table
birthMonthTable <- table(birthdays$month)

# Get the month with most birthdays
mostCommonMonth <- which(birthMonthTable == max(birthMonthTable))

# Print the table with the month and the corresponding number of birthdays
birthMonthTable <- format(birthdays$Start, "%b")

# Change the order of factors in a factor variable(categorical variable)
birthMonthTable <- factor(birthMonthTable, levels=c("Jan", "Feb", "Mar", "Apr", 
                                                    "May", "Jun", "Jul", "Aug", 
                                                    "Sep", "Oct", "Nov", "Dec"))

# Print the most common month
month.abb[mostCommonMonth]
## [1] "May"

How many birthdays are in each month?

table(birthMonthTable)
## birthMonthTable
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec 
##   4   3   3   3   7   5   1   6   5   5   2   4

Which day of the year has the most number of birthdays?

# Create a table
birthDayTable <- table(birthdays$day)

# Get the month with most birthdays
mostCommonDay <- which(birthDayTable == max(birthDayTable))

# Print the most common day
# The output has the most frequently occuring numbers 
# followed by the index numbers in the table
mostCommonDay
## 18 26 
## 12 18

Do you have at least 365 friends that have birthdays on everyday of the year?

# NO, my data set was too small as it had only 48 observations