The goal of this tutorial is to learn how to use the time series linear model.
#First we load the libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggplot2)
library(forecast)
# In this tutorial we will use the dataset of minimum temperature in melbourne
# https://datamarket.com/data/set/2324/daily-minimum-temperatures-in-melbourne-australia-1981-1990
Temperatures <- read.csv("daily-minimum-temperatures-in-me.csv", stringsAsFactors = FALSE)
head(Temperatures)
## Date Daily.minimum.temperatures.in.Melbourne..Australia..1981.1990
## 1 1981-01-01 20.7
## 2 1981-01-02 17.9
## 3 1981-01-03 18.8
## 4 1981-01-04 14.6
## 5 1981-01-05 15.8
## 6 1981-01-06 15.8
colnames(Temperatures) <- c("Date", "Temperature")
# First we have to change the date to POSIXct
Temperatures$Date <- strptime(Temperatures$Date, "%Y-%m-%d" )
Temperatures$Date <- as.POSIXct(Temperatures$Date)
Temperatures$Temperature <- as.numeric(Temperatures$Temperature)
## Warning: NAs introduced by coercion
# Let's check the structure of the table
str(Temperatures)
## 'data.frame': 3652 obs. of 2 variables:
## $ Date : POSIXct, format: "1981-01-01" "1981-01-02" ...
## $ Temperature: num 20.7 17.9 18.8 14.6 15.8 15.8 15.8 17.4 21.8 20 ...
# Now we create different columns for different time configurations
# Month
Temperatures <- mutate(Temperatures, MonthYear = paste(year(Date),formatC(month(Date), width = 2, flag = "0")))
# Day of the week
Temperatures <- mutate(Temperatures, Yearday = paste(year(Date), formatC(month(Date), width = 2, flag = "0"),
formatC(day(Date), width = 2, flag = "0")))
# Week of the year
Temperatures <- mutate(Temperatures, Week = week(Date))
# Year
Temperatures <- mutate(Temperatures, Year = year(Date))
Temperatures$Year <- as.factor(Temperatures$Year)
# Let's check the structure of the table
str(Temperatures)
## 'data.frame': 3652 obs. of 6 variables:
## $ Date : POSIXct, format: "1981-01-01" "1981-01-02" ...
## $ Temperature: num 20.7 17.9 18.8 14.6 15.8 15.8 15.8 17.4 21.8 20 ...
## $ MonthYear : chr "1981 01" "1981 01" "1981 01" "1981 01" ...
## $ Yearday : chr "1981 01 01" "1981 01 02" "1981 01 03" "1981 01 04" ...
## $ Week : num 1 1 1 1 1 1 1 2 2 2 ...
## $ Year : Factor w/ 10 levels "1981","1982",..: 1 1 1 1 1 1 1 1 1 1 ...
# First we need to use the year column to aggregate
Temps_month <- aggregate(Temperatures$Temperature, by = list(Temperatures$MonthYear), FUN = function(x) mean(x, na.rm=T))
# Now we create the time series adding the right period
# It's better to define the end of the time series if it's not at the end of one year
myts <- ts(Temps_month$x, frequency=12, start = c(1981, 01), end = c(1990, 12))
plot(myts)
# Now that we have several periods we can decompose
myds_month <- decompose(myts)
plot(myds_month)
# We need to create a data frame in order to use this function
# The first column of this df should be the time series
# The second one should be the numerical value of time
my_df_ts <- data.frame(temperature = myts, as.numeric(time(myts)))
names(my_df_ts) <- c("temperature", "time")
# Then we can create a model using tslm
# We can model using trend, season and random
mymodel <- tslm(temperature~season+trend,my_df_ts)
# And forecast using this same model
# We are going to predict the next 10 years
# We can see the ascending trend
my_fc <- forecast(mymodel,h=120)
autoplot(my_fc)
In this tutorial we have learnt the necessary steps to use the tslm function to create models.