1 Goal


The goal of this tutorial is to learn how to use the time series linear model.


2 Preparing the data


#First we load the libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(ggplot2)
library(forecast)

# In this tutorial we will use the dataset of minimum temperature in melbourne
# https://datamarket.com/data/set/2324/daily-minimum-temperatures-in-melbourne-australia-1981-1990
Temperatures <- read.csv("daily-minimum-temperatures-in-me.csv", stringsAsFactors = FALSE)
head(Temperatures)
##         Date Daily.minimum.temperatures.in.Melbourne..Australia..1981.1990
## 1 1981-01-01                                                          20.7
## 2 1981-01-02                                                          17.9
## 3 1981-01-03                                                          18.8
## 4 1981-01-04                                                          14.6
## 5 1981-01-05                                                          15.8
## 6 1981-01-06                                                          15.8
colnames(Temperatures) <- c("Date", "Temperature")

# First we have to change the date to POSIXct
Temperatures$Date <- strptime(Temperatures$Date, "%Y-%m-%d" )
Temperatures$Date <- as.POSIXct(Temperatures$Date)
Temperatures$Temperature <- as.numeric(Temperatures$Temperature)
## Warning: NAs introduced by coercion
# Let's check the structure of the table
str(Temperatures)
## 'data.frame':    3652 obs. of  2 variables:
##  $ Date       : POSIXct, format: "1981-01-01" "1981-01-02" ...
##  $ Temperature: num  20.7 17.9 18.8 14.6 15.8 15.8 15.8 17.4 21.8 20 ...
# Now we create different columns for different time configurations
# Month
Temperatures <- mutate(Temperatures, MonthYear = paste(year(Date),formatC(month(Date), width = 2, flag = "0")))

# Day of the week
Temperatures <- mutate(Temperatures, Yearday = paste(year(Date), formatC(month(Date), width = 2, flag = "0"),
                                                     formatC(day(Date), width = 2, flag = "0")))

# Week of the year
Temperatures <- mutate(Temperatures, Week = week(Date))

# Year
Temperatures <- mutate(Temperatures, Year = year(Date))
Temperatures$Year <- as.factor(Temperatures$Year)
# Let's check the structure of the table
str(Temperatures)
## 'data.frame':    3652 obs. of  6 variables:
##  $ Date       : POSIXct, format: "1981-01-01" "1981-01-02" ...
##  $ Temperature: num  20.7 17.9 18.8 14.6 15.8 15.8 15.8 17.4 21.8 20 ...
##  $ MonthYear  : chr  "1981 01" "1981 01" "1981 01" "1981 01" ...
##  $ Yearday    : chr  "1981 01 01" "1981 01 02" "1981 01 03" "1981 01 04" ...
##  $ Week       : num  1 1 1 1 1 1 1 2 2 2 ...
##  $ Year       : Factor w/ 10 levels "1981","1982",..: 1 1 1 1 1 1 1 1 1 1 ...

3 Creating time series of daily temperatures

3.1 For each month


# First we need to use the year column to aggregate
Temps_month <- aggregate(Temperatures$Temperature, by = list(Temperatures$MonthYear), FUN = function(x) mean(x, na.rm=T))

# Now we create the time series adding the right period
# It's better to define the end of the time series if it's not at the end of one year
myts <- ts(Temps_month$x, frequency=12, start = c(1981, 01), end = c(1990, 12))
plot(myts)

# Now that we have several periods we can decompose 
myds_month <- decompose(myts)
plot(myds_month)