This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
setwd("~/Courses/Exploratory Data Analysis(Udacity)")
getwd()
## [1] "/Users/ahada/Courses/Exploratory Data Analysis(Udacity)"
library(ggplot2)
library(gridExtra)
## Loading required package: grid
library(lubridate)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggthemes)
theme_set(theme_few(20))
fb_bday = read.csv("facebookBirthdays.csv", header = T)
fb_bday$Birthday = mdy(fb_bday$Birthday)
fb_bday$Month = month(fb_bday$Birthday, label = T)
fb_bday$Day = day(fb_bday$Birthday)
fb_bday$DOW = wday(fb_bday$Birthday, label = T)
fb_bday$DOY = yday(fb_bday$Birthday)
ggplot(data = fb_bday, aes(x = Month)) +
geom_histogram(color = 'black', fill = '#56B4E9') +
ylab('Number of birthdays per month') +
ggtitle('My facebook friends birthdays by month')
table(fb_bday$Month)
##
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 48 41 37 34 30 40 49 40 45 49 45 44
ggplot(data = fb_bday, aes(x = Day)) +
geom_histogram(color = 'black', fill = '#56B4E9') +
xlab('Day of the month') +
ylab('Number of birthdays per day of the month') +
ggtitle('My facebook friends birthdays by day') +
scale_x_continuous(breaks = seq(1,31,5))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
table(fb_bday$Day)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 28 13 16 21 17 12 18 18 12 20 16 14 12 23 16 13 11 18 16 17 17 19 14 22 17
## 26 27 28 29 30 31
## 10 25 9 10 16 12
ggplot(data = fb_bday, aes(x = DOY)) +
geom_histogram(binwidth = 1, color = 'black') +
xlab('Day of the year') +
ylab('Number of birthdays per day of the year') +
scale_x_continuous(breaks = seq(1,365,30)) +
ggtitle('My facebook friends birthdays by day')
table(fb_bday$DOY)
##
## 1 3 4 5 6 7 8 10 12 13 14 15 18 20 21 22 24 25
## 11 1 2 1 1 2 2 5 1 1 1 3 1 3 2 4 1 1
## 27 29 30 31 32 33 35 37 38 39 40 41 42 43 44 45 46 47
## 1 1 1 2 1 2 1 1 2 1 1 1 4 1 1 4 2 2
## 48 49 50 52 53 54 55 56 57 58 62 63 64 65 66 67 68 69
## 1 3 1 3 2 2 1 1 1 2 4 2 2 2 1 1 1 1
## 70 72 73 74 75 78 80 81 83 84 85 86 87 89 90 91 92 93
## 2 2 1 1 3 2 2 1 2 1 1 1 1 1 2 3 3 2
## 94 96 97 99 100 103 104 105 108 109 113 114 116 117 120 122 123 124
## 2 2 1 1 1 1 5 2 1 2 2 1 1 3 1 2 1 1
## 125 127 129 130 131 132 134 135 136 137 138 139 140 145 149 150 151 155
## 4 2 1 1 1 1 2 1 1 1 3 2 1 1 2 1 1 1
## 157 159 160 163 165 168 169 170 171 172 173 174 175 176 178 179 180 181
## 1 2 1 2 3 1 4 2 1 3 2 1 4 2 5 2 1 2
## 182 184 185 186 187 188 189 191 192 193 194 196 197 198 199 202 203 204
## 1 1 4 3 3 2 1 3 2 2 1 3 1 2 1 2 1 3
## 205 206 207 208 209 210 211 212 214 216 217 220 221 222 226 227 228 229
## 1 1 2 2 1 3 1 2 1 2 3 3 2 1 2 2 2 1
## 231 232 233 234 236 237 238 239 240 241 242 243 244 245 247 248 249 250
## 2 2 1 3 2 1 1 4 1 1 1 2 4 2 1 1 1 1
## 251 253 254 255 256 258 259 261 263 264 265 267 268 269 270 271 273 274
## 1 3 4 3 1 1 1 3 3 1 4 2 1 1 3 2 1 4
## 275 276 277 280 281 282 283 285 286 287 288 290 292 293 295 296 297 298
## 3 1 1 2 3 2 2 2 1 2 1 2 1 1 1 4 4 3
## 299 300 301 302 303 304 305 307 308 309 310 312 313 314 315 317 318 320
## 1 1 1 1 3 2 1 4 3 1 1 2 3 2 1 1 3 2
## 321 322 323 324 325 327 328 329 330 331 334 335 337 338 339 341 342 345
## 3 2 3 1 3 1 3 1 1 2 1 3 2 1 2 5 2 2
## 346 347 350 353 354 356 357 358 359 360 361 362 363 364 365
## 2 3 1 1 5 1 1 1 4 1 1 1 1 3 1
Birthday_count = as.data.frame(table(fb_bday$Birthday))
Birthday_count2 = Birthday_count[order(Birthday_count$Freq, decreasing = T), ]
head(Birthday_count2)
## Var1 Freq
## 1 2015-01-01 11
## 8 2015-01-10 5
## 79 2015-04-14 5
## 123 2015-06-27 5
## 250 2015-12-07 5
## 257 2015-12-20 5