DataM: In-class Exercise 0518: Dates & Times

In-class exercise 1.
In-class exercise 2.
- Load and check
- Plot
In-class exercise 3.
- The data
- Plot
In-class exercise 4.

library(dplyr)
library(ggplot2)

In-class exercise 1.

Find out the number of days you have spent at NCKU as a registered student or staff person.

Sys.Date() - as.Date('2015-09-01')

Time difference of 1724 days

In-class exercise 2.

Reproduce the plot of calls for police assistances around 24 hours in New York City using the data set here.

Target output

Load and check

dta <- read.csv('../data/police_NYC.csv', header = TRUE)
dta <- dta %>% mutate(Hour = as.integer(Hour))
str(dta)

'data.frame':   24 obs. of  2 variables:
 $ Hour : int  0 1 2 3 4 5 6 7 8 9 ...
 $ Calls: int  1080 910 770 780 380 390 200 300 275 395 ...

dta

Plot

ggplot(dta, aes(Hour, Calls)) +
  geom_bar(width=1, stat="identity", fill="cyan", col="gray", alpha=0.2) +
  geom_abline(intercept = mean(dta$Calls), slope=0, col="pink", lwd=.9) +
  scale_x_continuous(
    breaks = 0:23 - .5,
    labels = c(0,  rep('', 4), 5, rep('', 4), 10, rep('', 4),
               15, rep('', 4), 20, rep('', 3))) + 
  coord_polar(theta = 'x', start = -pi/24) +
  theme_bw() +
  theme(panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank())

In-class exercise 3.

Assume that a friend of yours will live to be 100 years old. Find out how often his or her birthday falls on each day of the week. Plot it.

The data

Approach 1 (True weekdays derived from the specified birthday)

nDays <- as.numeric(as.Date('2090-01-01') - as.Date('1990-01-01'))
Dates <- (as.Date('1990-01-01'):as.Date('2090-01-01'))
Counts <- sapply(
  1:nDays,
  function(d) weekdays(as.Date(d, origin = '1990-01-01'))) %>% table()
dta <- data.frame(
         Weekday = factor(
           names(Counts),
           levels = paste0('週', c('日', '一', '二', '三', '四', '五', '六'))),
         Counts = as.vector(Counts))
dta

Approach 2

nDays <- as.numeric(as.Date('2090-01-01') - as.Date('1990-01-01'))
dta <- data.frame(
  Weekday = factor(c('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat')[1:7],
                   levels = c('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat')),
  Counts = as.vector(table((1:nDays) %% 7)))
dta

Plot

ggplot(dta, aes(Weekday, Counts)) +
  geom_bar(width=1, stat="identity", fill="cyan", col="gray", alpha=0.2) +
  geom_abline(intercept = mean(dta$Counts), slope=0, col="pink", lwd=.9) +
  coord_polar(theta = 'x', start = -pi/7) +
  theme_bw() +
  theme(panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank())

qplot(y = Weekday, x = Counts, data = dta, xlim = c(5200, 5225)) +
  geom_segment(aes(xend = 5200, yend = Weekday)) +
  theme_bw() +
  theme(panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank())

In-class exercise 4.

Reproduce the plot of fertility rate and college acceptance rate in Taiwan from 1981 to 2009 using the data set here.

Target output

Load and check

dta <- read.table('../data/birth_college.txt', header = TRUE)
str(dta)

'data.frame':   29 obs. of  2 variables:
 $ Birth   : num  23 22.1 20.6 19.6 18 15.9 16 17.2 15.7 16.6 ...
 $ Entrance: int  NA NA NA NA NA NA NA NA NA NA ...

summary(dta)

     Birth          Entrance    
 Min.   : 8.30   Min.   :44.00  
 1st Qu.:11.00   1st Qu.:59.50  
 Median :15.30   Median :70.50  
 Mean   :14.41   Mean   :72.25  
 3rd Qu.:16.00   3rd Qu.:89.50  
 Max.   :23.00   Max.   :97.00  
                 NA's   :13

dta

Data tranformation

dta <- dta %>% mutate(Year = 1981:2009)
dta

Data visualization

par(mar = c(5, 4, 4, 4) + 0.3) 
with(dta, plot(x = Year, y = Birth, type = 'n', yaxt = 'n',
               ylim = c(0, 60), ylab = 'Birth rate (0.1%)'))
axis(2, at = seq(0, 60, by = 10), labels = seq(0, 60, by = 10))
abline(v = 1981:2009, lty = 2, lwd = .5, col = 'grey90')
with(dta, points(x = Year, y = Birth, pch = 1))
par(new = TRUE)
with(dta, plot(x = Year, y = Entrance, type = 'p', axes = FALSE,
               ylim = c(40, 100), xlab = '', ylab = '', pch = 19))
axis(1, at = seq(1980, 2010, by = 5), labels = seq(1980, 2010, by = 5))
axis(4, at = seq(40, 100, by = 10), labels = seq(40, 100, by = 10))
mtext('Acceptance rate (%)', side = 4, line = 3)
legend(1979.875, 102.29, c('Birth', 'College'),
       pch = c(1, 19), cex = 1)