# 1 Goal

The goal of this tutorial is to learn how to use the time series linear model.

# 2 Preparing the data

``````#First we load the libraries
library(dplyr)``````
``````##
## Attaching package: 'dplyr'``````
``````## The following objects are masked from 'package:stats':
##
##     filter, lag``````
``````## The following objects are masked from 'package:base':
##
##     intersect, setdiff, setequal, union``````
``library(lubridate)``
``````##
## Attaching package: 'lubridate'``````
``````## The following object is masked from 'package:base':
##
##     date``````
``````library(ggplot2)
library(forecast)

# In this tutorial we will use the dataset of minimum temperature in melbourne
# https://datamarket.com/data/set/2324/daily-minimum-temperatures-in-melbourne-australia-1981-1990
Temperatures <- read.csv("daily-minimum-temperatures-in-me.csv", stringsAsFactors = FALSE)
``````##         Date Daily.minimum.temperatures.in.Melbourne..Australia..1981.1990
## 1 1981-01-01                                                          20.7
## 2 1981-01-02                                                          17.9
## 3 1981-01-03                                                          18.8
## 4 1981-01-04                                                          14.6
## 5 1981-01-05                                                          15.8
## 6 1981-01-06                                                          15.8``````
``````colnames(Temperatures) <- c("Date", "Temperature")

# First we have to change the date to POSIXct
Temperatures\$Date <- strptime(Temperatures\$Date, "%Y-%m-%d" )
Temperatures\$Date <- as.POSIXct(Temperatures\$Date)
Temperatures\$Temperature <- as.numeric(Temperatures\$Temperature)``````
``## Warning: NAs introduced by coercion``
``````# Let's check the structure of the table
str(Temperatures)``````
``````## 'data.frame':    3652 obs. of  2 variables:
##  \$ Date       : POSIXct, format: "1981-01-01" "1981-01-02" ...
##  \$ Temperature: num  20.7 17.9 18.8 14.6 15.8 15.8 15.8 17.4 21.8 20 ...``````
``````# Now we create different columns for different time configurations
# Month
Temperatures <- mutate(Temperatures, MonthYear = paste(year(Date),formatC(month(Date), width = 2, flag = "0")))

# Day of the week
Temperatures <- mutate(Temperatures, Yearday = paste(year(Date), formatC(month(Date), width = 2, flag = "0"),
formatC(day(Date), width = 2, flag = "0")))

# Week of the year
Temperatures <- mutate(Temperatures, Week = week(Date))

# Year
Temperatures <- mutate(Temperatures, Year = year(Date))
Temperatures\$Year <- as.factor(Temperatures\$Year)
# Let's check the structure of the table
str(Temperatures)``````
``````## 'data.frame':    3652 obs. of  6 variables:
##  \$ Date       : POSIXct, format: "1981-01-01" "1981-01-02" ...
##  \$ Temperature: num  20.7 17.9 18.8 14.6 15.8 15.8 15.8 17.4 21.8 20 ...
##  \$ MonthYear  : chr  "1981 01" "1981 01" "1981 01" "1981 01" ...
##  \$ Yearday    : chr  "1981 01 01" "1981 01 02" "1981 01 03" "1981 01 04" ...
##  \$ Week       : num  1 1 1 1 1 1 1 2 2 2 ...
##  \$ Year       : Factor w/ 10 levels "1981","1982",..: 1 1 1 1 1 1 1 1 1 1 ...``````

# 3 Creating time series of daily temperatures

## 3.1 For each month

``````# First we need to use the year column to aggregate
Temps_month <- aggregate(Temperatures\$Temperature, by = list(Temperatures\$MonthYear), FUN = function(x) mean(x, na.rm=T))

# Now we create the time series adding the right period
# It's better to define the end of the time series if it's not at the end of one year
myts <- ts(Temps_month\$x, frequency=12, start = c(1981, 01), end = c(1990, 12))
plot(myts)``````

``````# Now that we have several periods we can decompose
myds_month <- decompose(myts)
plot(myds_month)``````