setwd("D:/R/Udacity/EDA_Course_Materials/lesson3")
library(rmarkdown)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Investigation of my friends’ distribution birth months and days

My aim in this project is to answer the folowing questions:

  • Which month contains the most number of birthdays?

  • How many birthdays are in each month?

  • Which day of the year has the most number of birthdays?

  • Do I have at least 365 friends that have birthdays on everyday of the year?

csvdata <- read.csv('FBbd.csv')

#convert vector into necessary time format
bdates <- strptime(csvdata$Start, "%d/%m/%Y %H:%M")
names(unclass(bdates))
##  [1] "sec"    "min"    "hour"   "mday"   "mon"    "year"   "wday"  
##  [8] "yday"   "isdst"  "zone"   "gmtoff"
#extract days and months
days <- bdates$mday
mon <- as.numeric(bdates$mon) + 1
yday <- as.numeric(bdates$yday) + 1

#creat dataframes
df <- data.frame(days, mon, yday, count = 1)

Which month contains the most number of birthdays?

Histogram shows that mostly my friends have birthdays in the range from March to July and most of them were born in March.

qplot(df$mon, binwidth = 1,
      main = 'Birthdays per month',
      xlab = 'Months',
      ylab = 'Number of birthdays in month',
      color = I('black'), fill = I('#A41470')) +
    scale_x_continuous(breaks = seq(1,12,1), lim = c(1,13)) +
    scale_y_continuous(breaks = seq(0,30,2), lim = c(0,30))

ggsave('BD_num.png')
## Saving 7 x 5 in image

How many birthdays are in each month?

by_mon<-group_by(df, mon)
summarise(by_mon, sum(count))
## Source: local data frame [12 x 2]
## 
##    mon sum(count)
## 1    1         20
## 2    2         15
## 3    3         29
## 4    4         25
## 5    5         27
## 6    6         26
## 7    7         25
## 8    8         20
## 9    9         14
## 10  10         14
## 11  11         21
## 12  12         16

Which day of the year has the most number of birthdays?

Among my friends the most number of birthdays is on 1 of May.

# convert day of year into "day-month format
by_yday<-group_by(df, yday)
df_yday<-summarise(by_yday, total = sum(count))
df_yday$yday <- format(strptime(df_yday$yday, format="%j"), format="%d-%m")
ordered <- df_yday[order(-df_yday$total),]

Do I have at least 365 friends that have birthdays on everyday of the year?

I sorted days of birth of my friends. Final table shows that I have 252 observations so it already means that my friends’ birthdays are not every day of the year.

bd <- select(df, days, mon)
bd_ordered <- bd[order(as.numeric(bd$mon), as.numeric(bd$days)),]
row.names(bd_ordered) <- c(1:252)
bd_ordered
##     days mon
## 1      1   1
## 2      1   1
## 3      1   1
## 4      1   1
## 5      6   1
## 6      6   1
## 7     11   1
## 8     11   1
## 9     16   1
## 10    17   1
## 11    19   1
## 12    22   1
## 13    22   1
## 14    23   1
## 15    23   1
## 16    24   1
## 17    27   1
## 18    29   1
## 19    30   1
## 20    31   1
## 21     2   2
## 22     5   2
## 23     8   2
## 24    10   2
## 25    15   2
## 26    19   2
## 27    19   2
## 28    19   2
## 29    19   2
## 30    20   2
## 31    20   2
## 32    21   2
## 33    22   2
## 34    22   2
## 35    24   2
## 36     2   3
## 37     5   3
## 38     5   3
## 39     6   3
## 40     7   3
## 41     8   3
## 42     9   3
## 43    10   3
## 44    11   3
## 45    11   3
## 46    11   3
## 47    12   3
## 48    13   3
## 49    15   3
## 50    15   3
## 51    15   3
## 52    17   3
## 53    17   3
## 54    17   3
## 55    18   3
## 56    20   3
## 57    21   3
## 58    21   3
## 59    23   3
## 60    24   3
## 61    24   3
## 62    27   3
## 63    28   3
## 64    29   3
## 65     1   4
## 66     2   4
## 67     4   4
## 68     4   4
## 69     7   4
## 70     8   4
## 71    12   4
## 72    12   4
## 73    15   4
## 74    16   4
## 75    16   4
## 76    16   4
## 77    16   4
## 78    18   4
## 79    19   4
## 80    21   4
## 81    21   4
## 82    22   4
## 83    22   4
## 84    23   4
## 85    24   4
## 86    26   4
## 87    29   4
## 88    29   4
## 89    29   4
## 90     1   5
## 91     1   5
## 92     1   5
## 93     1   5
## 94     1   5
## 95     2   5
## 96     2   5
## 97     8   5
## 98     8   5
## 99     8   5
## 100    9   5
## 101    9   5
## 102   10   5
## 103   11   5
## 104   11   5
## 105   17   5
## 106   19   5
## 107   19   5
## 108   20   5
## 109   20   5
## 110   22   5
## 111   24   5
## 112   25   5
## 113   25   5
## 114   25   5
## 115   31   5
## 116   31   5
## 117    1   6
## 118    1   6
## 119    2   6
## 120    2   6
## 121    3   6
## 122    3   6
## 123    4   6
## 124    5   6
## 125    7   6
## 126   12   6
## 127   12   6
## 128   13   6
## 129   13   6
## 130   13   6
## 131   14   6
## 132   15   6
## 133   16   6
## 134   21   6
## 135   22   6
## 136   24   6
## 137   25   6
## 138   25   6
## 139   26   6
## 140   27   6
## 141   29   6
## 142   29   6
## 143    3   7
## 144    3   7
## 145    5   7
## 146    6   7
## 147    7   7
## 148    8   7
## 149   10   7
## 150   11   7
## 151   12   7
## 152   15   7
## 153   16   7
## 154   16   7
## 155   17   7
## 156   17   7
## 157   17   7
## 158   19   7
## 159   20   7
## 160   20   7
## 161   22   7
## 162   23   7
## 163   24   7
## 164   26   7
## 165   28   7
## 166   28   7
## 167   30   7
## 168    1   8
## 169    4   8
## 170    5   8
## 171    6   8
## 172    8   8
## 173   10   8
## 174   10   8
## 175   11   8
## 176   17   8
## 177   18   8
## 178   18   8
## 179   18   8
## 180   19   8
## 181   23   8
## 182   24   8
## 183   26   8
## 184   28   8
## 185   28   8
## 186   28   8
## 187   31   8
## 188   10   9
## 189   12   9
## 190   15   9
## 191   17   9
## 192   17   9
## 193   19   9
## 194   20   9
## 195   20   9
## 196   20   9
## 197   22   9
## 198   23   9
## 199   25   9
## 200   26   9
## 201   27   9
## 202    2  10
## 203    4  10
## 204    5  10
## 205    9  10
## 206    9  10
## 207   12  10
## 208   13  10
## 209   15  10
## 210   15  10
## 211   17  10
## 212   17  10
## 213   18  10
## 214   18  10
## 215   21  10
## 216    3  11
## 217    3  11
## 218    4  11
## 219    4  11
## 220    5  11
## 221    5  11
## 222    5  11
## 223    5  11
## 224    6  11
## 225    7  11
## 226   10  11
## 227   11  11
## 228   12  11
## 229   13  11
## 230   13  11
## 231   17  11
## 232   20  11
## 233   22  11
## 234   28  11
## 235   28  11
## 236   30  11
## 237    6  12
## 238    6  12
## 239    8  12
## 240   10  12
## 241   13  12
## 242   15  12
## 243   17  12
## 244   21  12
## 245   21  12
## 246   23  12
## 247   23  12
## 248   25  12
## 249   27  12
## 250   28  12
## 251   29  12
## 252   31  12
qplot(data = by_yday, x = yday, binwidth = 1,
      main = 'Birthdays during one year',
      xlab = 'Days of year',
      ylab = 'Number of birthday in that day',
      fill = I('#A41470')) +
      scale_x_continuous(breaks = seq(0,365,30), lim = c(1,366))