Time Series Graphics (Aug 30, 2023)

# Create a time series data
gdp <- c(1.45, 7.33, 3.86, 6.90, 6.7, 6.35, 7.15, 6.93,
         6.34, 6.12, -9.52, 5.71)
year <- 2010:2021

# Plot using the plot() function
plot(year,gdp, type="l")

# Plot using the autoplot() function
library(fpp2)

gdp <- ts(gdp, start=2010, end=2021)
autoplot(gdp)

White Noise and Random Walk (Sep 6, 2023)

# i.i.d noise

# i.i.d normal noise
Z <- rnorm(100,0,20)
Time <- c(1:100)

plot(Time, Z, type="l")

hist(Z)

# i.i.d uniform noise
Z <- runif(100,-10,10)
plot(Time, Z, type="l")

# Check for autocorrelation / serial correlation at lag 1
# Using traditional method
Z1 <- Z[1:99]
Z2 <- Z[2:100]

head(data.frame(Z1,Z2))

##            Z1          Z2
## 1  6.02745855  4.11482474
## 2  4.11482474  6.92811033
## 3  6.92811033  0.00286744
## 4  0.00286744 -6.74911663
## 5 -6.74911663 -2.61290452
## 6 -2.61290452  9.96872708

cor(Z1,Z2)

## [1] -0.08778694

# Durbin Watson test
library(car)
durbinWatsonTest(Z)

## [1] 2.159699

# Sample time series with perfect autocorrelation
Z <- rep(c(1,-1), times=50)
plot(Time, Z, type="l")

# Random Walk
a <- rnorm(100,0,1) 
S <- vector()
S[1] <- 0

# Generate 100 random walks
# Note that the random walk produces a different result every run
for(t in 2:100) {
  S[t] <- S[t-1] + a[t]
}
plot(Time, S, type="l")

# Check the autocorrelation of the random walk
durbinWatsonTest(S)

## [1] 0.03133421

Least Squares Estimation (Sept 13, 2023)

library(fpp2)
data(uschange)
class(uschange)

## [1] "mts"    "ts"     "matrix"

head(uschange)

##         Consumption     Income Production   Savings Unemployment
## 1970 Q1   0.6159862  0.9722610 -2.4527003 4.8103115          0.9
## 1970 Q2   0.4603757  1.1690847 -0.5515251 7.2879923          0.5
## 1970 Q3   0.8767914  1.5532705 -0.3587079 7.2890131          0.5
## 1970 Q4  -0.2742451 -0.2552724 -2.1854549 0.9852296          0.7
## 1971 Q1   1.8973708  1.9871536  1.9097341 3.6577706         -0.1
## 1971 Q2   0.9119929  1.4473342  0.9015358 6.0513418         -0.1

# Simple Linear Regression
autoplot(uschange[,c("Consumption","Income")]) +
  ylab("% change")

library(ggplot2)
uschange %>%
  as.data.frame() %>%
  ggplot(aes(x=Income, y=Consumption)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

# "Consumption and Income seems to be linearly associated
# based on the graphs"

# Fit the linear regression model
# tslm() for time series objects "mts" or "ts"

fit <- tslm(Consumption ~ Income, data=uschange)
summary(fit)

## 
## Call:
## tslm(formula = Consumption ~ Income, data = uschange)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.40845 -0.31816  0.02558  0.29978  1.45157 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.54510    0.05569   9.789  < 2e-16 ***
## Income       0.28060    0.04744   5.915 1.58e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6026 on 185 degrees of freedom
## Multiple R-squared:  0.159,  Adjusted R-squared:  0.1545 
## F-statistic: 34.98 on 1 and 185 DF,  p-value: 1.577e-08

# Exercise 1.1
# Use Income and Unemployment to Predict Consumption
autoplot(uschange[,c("Consumption","Income","Unemployment")]) + 
  ylab("% change")

library(tidyverse)
uschange %>%
  as.data.frame() %>%
  select(Consumption, Income, Unemployment) %>%
  cor()

##              Consumption     Income Unemployment
## Consumption    1.0000000  0.3987795   -0.5405567
## Income         0.3987795  1.0000000   -0.2304698
## Unemployment  -0.5405567 -0.2304698    1.0000000

uschange %>%
  as.data.frame() %>%
  ggplot(aes(x=Unemployment, y=Consumption)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

# Unemployment seems to be a good predictor for Consumption

# Fit multiple linear regression model
fit <- tslm(Consumption ~ Income + Unemployment, data=uschange)
summary(fit)

## 
## Call:
## tslm(formula = Consumption ~ Income + Unemployment, data = uschange)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5065 -0.3491  0.0201  0.2521  1.7427 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.60644    0.04889  12.404  < 2e-16 ***
## Income        0.20376    0.04226   4.822 2.97e-06 ***
## Unemployment -0.82752    0.10489  -7.890 2.62e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5223 on 184 degrees of freedom
## Multiple R-squared:  0.3716, Adjusted R-squared:  0.3648 
## F-statistic:  54.4 on 2 and 184 DF,  p-value: < 2.2e-16

# Check the fitted values visually
autoplot(uschange[,"Consumption"], series="Observed") +
  autolayer(fitted(fit), series="Fitted") + 
  xlab("Year") + ylab("Fitted/Observed")

# Check the prediction performance
# Observed vs Fitted
cbind(Data=uschange[,"Consumption"],
      Fitted=fitted(fit)) %>%
  as.data.frame() %>%
  ggplot(aes(x=Data, y=Fitted)) +
  geom_point() + 
  xlab("Observed") + ylab("Fitted") +
  geom_abline(intercept=0, slope=1)

# Diagnostic Checking
checkresiduals(fit)

## 
##  Breusch-Godfrey test for serial correlation of order up to 8
## 
## data:  Residuals from Linear regression model
## LM test = 19.068, df = 8, p-value = 0.0145

# Residual vs Fitted
cbind(Fitted=fitted(fit),
      Residuals=residuals(fit)) %>%
  as.data.frame() %>%
  ggplot(aes(x=Fitted, y=Residuals)) +
  geom_point()

# Influencial Observations
dist <- cooks.distance(fit)
summary(dist)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 2.600e-07 2.718e-04 1.244e-03 9.221e-03 5.312e-03 2.727e-01

# Multicoliearity
uschange[,c("Income", "Unemployment")] %>%
  as.data.frame() %>%
  cor()

##                  Income Unemployment
## Income        1.0000000   -0.2304698
## Unemployment -0.2304698    1.0000000

# Independence of errors (residuals)
residuals(fit) %>%
  as.vector() %>%
  durbinWatsonTest()

## [1] 1.864056

# Generate Forecasts
newdata <- data.frame(
  Income = c(1,1.2,0.8,1,1.5,0.5,1),
  Unemployment = c(0,0,0,0,0,0,0)
)

fcast <- forecast(fit,newdata=newdata)
autoplot(uschange[,"Consumption"]) +
  ylab("% change in US Consumption") +
  autolayer(fcast, series="forecast")

# Assignment
# Forecast Iloilo City Dengue Cases Using
# Rainfall and Humidity 2012 - 2015
# Jan 2012 to Mar 2015
# Deadline Sept 20 (printed)

dengue <- read.csv("C:\\Users\\Asus\\Documents\\UP Files\\UPV Subjects\\Stat 145\\ilo-dengue.csv")
head(dengue)

##   Cases Rainfall Humidity
## 1   130      0.7     72.9
## 2   199      3.2     79.8
## 3   486      8.7     78.8
## 4   548     13.0     81.4
## 5   441     13.4     79.3
## 6   223      8.1     80.8

class(dengue)

## [1] "data.frame"

dengue.ts <- ts(dengue, start=c(2012,1), end=c(2015,3),
                frequency=12)
class(dengue.ts)

## [1] "mts"    "ts"     "matrix"

head(dengue.ts)

##          Cases Rainfall Humidity
## Jan 2012   130      0.7     72.9
## Feb 2012   199      3.2     79.8
## Mar 2012   486      8.7     78.8
## Apr 2012   548     13.0     81.4
## May 2012   441     13.4     79.3
## Jun 2012   223      8.1     80.8

# Nonlinear Regression as Time Series Model
data(ausair)
autoplot(ausair)

time <- c(1:47)
ausair2 <- ts(cbind(ausair,time),start=1,end=47)
head(ausair2)

## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
##    ausair time
## 1  7.3187    1
## 2  7.3266    2
## 3  7.7956    3
## 4  9.3846    4
## 5 10.6647    5
## 6 11.0551    6

# Fit a simple linear regression with time as predictor
fit1 <- tslm(ausair ~ time, data=ausair2)
summary(fit)

## 
## Call:
## tslm(formula = Consumption ~ Income + Unemployment, data = uschange)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5065 -0.3491  0.0201  0.2521  1.7427 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.60644    0.04889  12.404  < 2e-16 ***
## Income        0.20376    0.04226   4.822 2.97e-06 ***
## Unemployment -0.82752    0.10489  -7.890 2.62e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5223 on 184 degrees of freedom
## Multiple R-squared:  0.3716, Adjusted R-squared:  0.3648 
## F-statistic:  54.4 on 2 and 184 DF,  p-value: < 2.2e-16

fit2 <- tslm(ausair ~ trend, data=ausair)
summary(fit2)

## 
## Call:
## tslm(formula = ausair ~ trend, data = ausair)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.422 -4.582 -2.176  6.434 10.435 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.33603    1.78943  -1.864   0.0688 .  
## trend        1.39360    0.06491  21.470   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.036 on 45 degrees of freedom
## Multiple R-squared:  0.9111, Adjusted R-squared:  0.9091 
## F-statistic:   461 on 1 and 45 DF,  p-value: < 2.2e-16

# Fit a nonlinear regression
time <- c(1:47)
sq.time <- time^2

ausair3 <- ts(cbind(ausair,time,sq.time),
              start=1,end=47)
fit <- tslm(ausair ~ time + sq.time, data=ausair3)
summary(fit)

## 
## Call:
## tslm(formula = ausair ~ time + sq.time, data = ausair3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8196 -1.2744  0.0429  1.3279  3.7717 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.94079    0.92748  10.718 7.54e-14 ***
## time        -0.23213    0.08913  -2.604   0.0125 *  
## sq.time      0.03387    0.00180  18.812  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.03 on 44 degrees of freedom
## Multiple R-squared:  0.9902, Adjusted R-squared:  0.9897 
## F-statistic:  2215 on 2 and 44 DF,  p-value: < 2.2e-16

# Show the fitted line
autoplot(ausair3[,"ausair"], series="Observed") +
  autolayer(fitted(fit), series="Fitted") +
  xlab("Year") + ylab("Air Passengers")

autoplot(ausair2[,"ausair"], series="Observed") +
  autolayer(fitted(fit1), series="Fitted") +
  xlab("Year") + ylab("Air Passengers")

# Forecast the airpassenger in 2017, 2018, 2019, 2020

newdata <- data.frame(
  time=c(48,49,50,51),
  sq.time=c(48^2, 49^2, 50^2, 51^2)
)

fcast <- forecast(fit, newdata=newdata)
autoplot(ausair3[,"ausair"]) +
  ylab("Air Passengers") +
  autolayer(fcast, series="Forecast")

fcast$mean

## Time Series:
## Start = 48 
## End = 51 
## Frequency = 1 
##        1        2        3        4 
## 76.83354 79.88674 83.00768 86.19636

Exponential Smoothing (Sept 20,2023)

# Simple Exponential Smoothing (SES)
setwd("C:\\Users\\Asus\\Documents\\UP Files\\UPV Subjects\\Stat 145")
temp <- read.csv(".\\ilo-temp.csv")

temp.ts <- ts(temp, start=c(2012,12), end=c(2015,5), 
              frequency=12)

# Generate SES using ses() function
fcast <- ses(temp.ts, alpha=0.74, h=5)
fcast$mean

##           Jun      Jul      Aug      Sep      Oct
## 2015 28.38054 28.38054 28.38054 28.38054 28.38054

# From Excel 28.38054
autoplot(fcast) +
  autolayer(fitted(fcast), series="Smoothed") +
  ylab("Iloilo Temperature Forecast") + xlab("Year")

Stat 145 R Class Demos

Introduction to Time Series Analysis and Forecasting

Time Series Graphics (Aug 30, 2023)

White Noise and Random Walk (Sep 6, 2023)

Least Squares Estimation (Sept 13, 2023)

Exponential Smoothing (Sept 20,2023)