This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

setwd("~/Courses/Exploratory Data Analysis(Udacity)")
getwd()
## [1] "/Users/ahada/Courses/Exploratory Data Analysis(Udacity)"
library(ggplot2)
library(gridExtra)
## Loading required package: grid
library(lubridate)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggthemes)
theme_set(theme_few(20))

Reading the csv file into a dataframe

fb_bday = read.csv("facebookBirthdays.csv", header = T)
fb_bday$Birthday = mdy(fb_bday$Birthday)

Extracting Month, Day, DOW (Day of the week) and DOY (Day of the year) using lubridate package

fb_bday$Month = month(fb_bday$Birthday, label = T)
fb_bday$Day = day(fb_bday$Birthday)
fb_bday$DOW = wday(fb_bday$Birthday, label = T)
fb_bday$DOY = yday(fb_bday$Birthday)

Histogram of birthdays per month

ggplot(data = fb_bday, aes(x = Month)) +
  geom_histogram(color = 'black', fill = '#56B4E9') +
  ylab('Number of birthdays per month') +
  ggtitle('My facebook friends birthdays by month')

plot of chunk unnamed-chunk-5

How many birthdays are in each month?

table(fb_bday$Month)
## 
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec 
##  48  41  37  34  30  40  49  40  45  49  45  44

Histogram of birthdays per day of the month(DOM)

ggplot(data = fb_bday, aes(x = Day)) +
  geom_histogram(color = 'black', fill = '#56B4E9') +
  xlab('Day of the month') +
  ylab('Number of birthdays per day of the month') +
  ggtitle('My facebook friends birthdays by day') +
  scale_x_continuous(breaks = seq(1,31,5))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-7

How many birthdays are in each day of the month(DOM)?

table(fb_bday$Day)
## 
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 
## 28 13 16 21 17 12 18 18 12 20 16 14 12 23 16 13 11 18 16 17 17 19 14 22 17 
## 26 27 28 29 30 31 
## 10 25  9 10 16 12

Histogram of birthdays per day of the year (DOY)

ggplot(data = fb_bday, aes(x = DOY)) +
  geom_histogram(binwidth = 1, color = 'black') +
  xlab('Day of the year') +
  ylab('Number of birthdays per day of the year') +
  scale_x_continuous(breaks = seq(1,365,30)) +
  ggtitle('My facebook friends birthdays by day') 

plot of chunk unnamed-chunk-9

How many birthdays are in each day of the year(DOY)?

table(fb_bday$DOY)
## 
##   1   3   4   5   6   7   8  10  12  13  14  15  18  20  21  22  24  25 
##  11   1   2   1   1   2   2   5   1   1   1   3   1   3   2   4   1   1 
##  27  29  30  31  32  33  35  37  38  39  40  41  42  43  44  45  46  47 
##   1   1   1   2   1   2   1   1   2   1   1   1   4   1   1   4   2   2 
##  48  49  50  52  53  54  55  56  57  58  62  63  64  65  66  67  68  69 
##   1   3   1   3   2   2   1   1   1   2   4   2   2   2   1   1   1   1 
##  70  72  73  74  75  78  80  81  83  84  85  86  87  89  90  91  92  93 
##   2   2   1   1   3   2   2   1   2   1   1   1   1   1   2   3   3   2 
##  94  96  97  99 100 103 104 105 108 109 113 114 116 117 120 122 123 124 
##   2   2   1   1   1   1   5   2   1   2   2   1   1   3   1   2   1   1 
## 125 127 129 130 131 132 134 135 136 137 138 139 140 145 149 150 151 155 
##   4   2   1   1   1   1   2   1   1   1   3   2   1   1   2   1   1   1 
## 157 159 160 163 165 168 169 170 171 172 173 174 175 176 178 179 180 181 
##   1   2   1   2   3   1   4   2   1   3   2   1   4   2   5   2   1   2 
## 182 184 185 186 187 188 189 191 192 193 194 196 197 198 199 202 203 204 
##   1   1   4   3   3   2   1   3   2   2   1   3   1   2   1   2   1   3 
## 205 206 207 208 209 210 211 212 214 216 217 220 221 222 226 227 228 229 
##   1   1   2   2   1   3   1   2   1   2   3   3   2   1   2   2   2   1 
## 231 232 233 234 236 237 238 239 240 241 242 243 244 245 247 248 249 250 
##   2   2   1   3   2   1   1   4   1   1   1   2   4   2   1   1   1   1 
## 251 253 254 255 256 258 259 261 263 264 265 267 268 269 270 271 273 274 
##   1   3   4   3   1   1   1   3   3   1   4   2   1   1   3   2   1   4 
## 275 276 277 280 281 282 283 285 286 287 288 290 292 293 295 296 297 298 
##   3   1   1   2   3   2   2   2   1   2   1   2   1   1   1   4   4   3 
## 299 300 301 302 303 304 305 307 308 309 310 312 313 314 315 317 318 320 
##   1   1   1   1   3   2   1   4   3   1   1   2   3   2   1   1   3   2 
## 321 322 323 324 325 327 328 329 330 331 334 335 337 338 339 341 342 345 
##   3   2   3   1   3   1   3   1   1   2   1   3   2   1   2   5   2   2 
## 346 347 350 353 354 356 357 358 359 360 361 362 363 364 365 
##   2   3   1   1   5   1   1   1   4   1   1   1   1   3   1

How many people have the same birthday?

Birthday_count = as.data.frame(table(fb_bday$Birthday))
Birthday_count2 = Birthday_count[order(Birthday_count$Freq, decreasing = T), ]
head(Birthday_count2)
##           Var1 Freq
## 1   2015-01-01   11
## 8   2015-01-10    5
## 79  2015-04-14    5
## 123 2015-06-27    5
## 250 2015-12-07    5
## 257 2015-12-20    5