library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(ggplot2)  
library(scales)   
library(gridExtra)
library(readr)

## 
## Attaching package: 'readr'

## The following object is masked from 'package:scales':
## 
##     col_factor

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v tibble  3.0.5     v dplyr   1.0.3
## v tidyr   1.1.2     v stringr 1.4.0
## v purrr   0.3.4     v forcats 0.5.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x readr::col_factor()      masks scales::col_factor()
## x dplyr::combine()         masks gridExtra::combine()
## x lubridate::date()        masks base::date()
## x purrr::discard()         masks scales::discard()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()

harMetDaily.09.11 <- read_csv("Met_HARV_Daily_2009_2011.csv")

## Warning: Missing column names filled in: 'X1' [1]

## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   date = col_date(format = ""),
##   f.airt = col_character(),
##   f.airtmax = col_character(),
##   f.airtmin = col_character(),
##   f.rh = col_character(),
##   f.rhmax = col_character(),
##   f.rhmin = col_character(),
##   f.dewp = col_character(),
##   f.dewpmax = col_character(),
##   f.dewpmin = col_character(),
##   f.prec = col_logical(),
##   f.slrt = col_character(),
##   f.part = col_character(),
##   f.netr = col_character(),
##   f.bar = col_character(),
##   f.wspd = col_character(),
##   f.wres = col_character(),
##   f.wdir = col_character(),
##   f.wdev = col_character(),
##   f.gspd = col_character()
##   # ... with 3 more columns
## )
## i Use `spec()` for the full column specifications.

## Warning: 9 parsing failures.
##  row    col           expected actual                           file
## 1087 f.prec 1/0/T/F/TRUE/FALSE      E 'Met_HARV_Daily_2009_2011.csv'
## 1088 f.prec 1/0/T/F/TRUE/FALSE      E 'Met_HARV_Daily_2009_2011.csv'
## 1089 f.prec 1/0/T/F/TRUE/FALSE      E 'Met_HARV_Daily_2009_2011.csv'
## 1090 f.prec 1/0/T/F/TRUE/FALSE      E 'Met_HARV_Daily_2009_2011.csv'
## 1091 f.prec 1/0/T/F/TRUE/FALSE      E 'Met_HARV_Daily_2009_2011.csv'
## .... ...... .................. ...... ..............................
## See problems(...) for more details.

# The data is available on the following website : https://data.neonscience.org/

str(harMetDaily.09.11$date)

##  Date[1:1095], format: "2009-01-01" "2009-01-02" "2009-01-03" "2009-01-04" "2009-01-05" ...

harMetDaily.09.11 %>% 
  select(date, airt) %>% 
  head(10)

## # A tibble: 10 x 2
##    date        airt
##    <date>     <dbl>
##  1 2009-01-01 -15.1
##  2 2009-01-02  -9.1
##  3 2009-01-03  -5.5
##  4 2009-01-04  -6.4
##  5 2009-01-05  -2.4
##  6 2009-01-06  -4.9
##  7 2009-01-07  -2.6
##  8 2009-01-08  -3.2
##  9 2009-01-09  -9.9
## 10 2009-01-10 -11.1

df <- harMetDaily.09.11 %>% 
  select(date, airt)

ggplot(df, aes(date, airt)) +
  geom_point(color = "purple",
             size = 3,
             pch = 18) +
  labs(x= "Date", 
       y= expression(paste("Temperature ( ", degree ~ C, " )")), 
       title = "Air Temperature (2009-2011)\nNEON Harvard Forest Field Site") +
  scale_x_date(labels = date_format("%b %y")) +
  stat_smooth(colour = "green") +
  theme(plot.title = element_text(color="darkblue", size=14, face="bold.italic"),
        text = element_text(size=14,face="bold"),
        axis.title.x = element_text(size=14, face="bold"),
        axis.title.y = element_text(size=14, face="bold"))

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# https://www.neonscience.org/resources/learning-hub/tutorials/dc-time-series-plot-ggplot-r

APPLICATION

1. Number of crimes in Toronto

Torontocrimes <- read_csv("Torontocrimes.csv")

## Warning: Missing column names filled in: 'X1' [1]

## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   event_unique_id = col_character(),
##   occurrencedate = col_datetime(format = ""),
##   reporteddate = col_datetime(format = ""),
##   premisetype = col_character(),
##   offence = col_character(),
##   reportedmonth = col_character(),
##   reporteddayofweek = col_character(),
##   occurrencemonth = col_character(),
##   occurrencedayofweek = col_character(),
##   MCI = col_character(),
##   Division = col_character(),
##   Neighbourhood = col_character()
## )
## i Use `spec()` for the full column specifications.

nf <-  count(Torontocrimes, occurrencedate)

ggplot(nf, aes(occurrencedate, n)) + geom_line()

If we are interested in day

str(Torontocrimes$occurrencedate)

##  POSIXct[1:166500], format: "2014-06-20 10:55:00" "2014-07-02 00:20:00" "2014-07-02 00:20:00" ...

Torontocrimes %>% 
  filter(occurrenceyear == 2018) %>% 
  select(occurrencedate) %>% 
  mutate(date = as.Date(occurrencedate)) %>% 
  group_by(date) %>% 
  summarise(frequency =  n()) %>% 
  head(10)

## # A tibble: 10 x 2
##    date       frequency
##    <date>         <int>
##  1 2018-01-01       177
##  2 2018-01-02        83
##  3 2018-01-03        67
##  4 2018-01-04        72
##  5 2018-01-05       100
##  6 2018-01-06        89
##  7 2018-01-07        90
##  8 2018-01-08       119
##  9 2018-01-09        87
## 10 2018-01-10        90

Checking

Torontocrimes %>% 
  filter(occurrenceyear == 2018 & occurrencemonth == "January" & occurrenceday == 1) %>% 
  count()

## # A tibble: 1 x 1
##       n
##   <int>
## 1   177

nf <- Torontocrimes %>% 
  filter(occurrenceyear == 2018) %>% 
  select(occurrencedate) %>% 
  mutate(date = as.Date(occurrencedate)) %>% 
  group_by(date) %>% 
  summarise(frequency =  n())

ggplot(nf, aes(date, frequency)) + 
  geom_line() +
    theme(plot.title = element_text(color="darkblue", size=14, face="bold.italic"),
        text = element_text(size=14,face="bold"),
        axis.title.x = element_text(size=14, face="bold"),
        axis.title.y = element_text(size=14, face="bold"))

Time-Series Calendar Heatmaps

Using ggTimeSeries package

library(ggTimeSeries)

Torontocrimes %>% 
  select(occurrencedate) %>% 
  mutate(date = as.Date(occurrencedate)) %>% 
  group_by(date) %>% 
  summarise(frequency =  n()) %>% 
  ggplot_calendar_heatmap("date", "frequency") +
  xlab(NULL) +
  ylab(NULL) +
  facet_wrap(~Year, ncol = 1) +
  scale_fill_gradient(high="red", low="yellow")+
    theme(plot.title = element_text(color="darkblue", size=14, face="bold.italic"),
        text = element_text(size=14,face="bold"),
        axis.title.x = element_text(size=14, face="bold"),
        axis.title.y = element_text(size=14, face="bold"))

library(gapminder)

gapminder %>% 
  filter(country == "Canada") %>% 
  select(year, lifeExp) %>% 
  ggplot(aes(x=year, y=lifeExp)) +
  geom_line()+
  geom_point(size = 4, color = "blue")+
  labs(x = NULL, y = NULL) + 
  theme(text = element_text(size = 16), 
        panel.border = element_rect(fill = NA, colour = "grey20")) +
  labs(title =  "Life expectancy in Canada",
       x = NULL,
       y = "Life expectancy (Years)") +
  theme_minimal() +
    theme(plot.title = element_text(color="darkblue", size=14, face="bold.italic"),
        text = element_text(size=14,face="bold"),
        axis.title.x = element_text(size=14, face="bold"),
        axis.title.y = element_text(size=14, face="bold"))

Visualizing Times Series

APPLICATION

1. Number of crimes in Toronto

Time-Series Calendar Heatmaps

Using ggTimeSeries package