Time Series Graphics (Aug 30, 2023)

# Create a time series data
gdp <- c(1.45, 7.33, 3.86, 6.90, 6.7, 6.35, 7.15, 6.93,
         6.34, 6.12, -9.52, 5.71)
year <- 2010:2021

# Plot using the plot() function
plot(year,gdp, type="l")

# Plot using the autoplot() function
library(fpp2)

gdp <- ts(gdp, start=2010, end=2021)
autoplot(gdp)

White Noise and Random Walk (Sep 6, 2023)

# i.i.d noise

# i.i.d normal noise
Z <- rnorm(100,0,20)
Time <- c(1:100)

plot(Time, Z, type="l")

hist(Z)

# i.i.d uniform noise
Z <- runif(100,-10,10)
plot(Time, Z, type="l")

# Check for autocorrelation / serial correlation at lag 1
# Using traditional method
Z1 <- Z[1:99]
Z2 <- Z[2:100]

head(data.frame(Z1,Z2))
##            Z1          Z2
## 1  6.02745855  4.11482474
## 2  4.11482474  6.92811033
## 3  6.92811033  0.00286744
## 4  0.00286744 -6.74911663
## 5 -6.74911663 -2.61290452
## 6 -2.61290452  9.96872708
cor(Z1,Z2)
## [1] -0.08778694
# Durbin Watson test
library(car)
durbinWatsonTest(Z)
## [1] 2.159699
# Sample time series with perfect autocorrelation
Z <- rep(c(1,-1), times=50)
plot(Time, Z, type="l")

# Random Walk
a <- rnorm(100,0,1) 
S <- vector()
S[1] <- 0

# Generate 100 random walks
# Note that the random walk produces a different result every run
for(t in 2:100) {
  S[t] <- S[t-1] + a[t]
}
plot(Time, S, type="l")

# Check the autocorrelation of the random walk
durbinWatsonTest(S)
## [1] 0.03133421

Least Squares Estimation (Sept 13, 2023)

library(fpp2)
data(uschange)
class(uschange)
## [1] "mts"    "ts"     "matrix"
head(uschange)
##         Consumption     Income Production   Savings Unemployment
## 1970 Q1   0.6159862  0.9722610 -2.4527003 4.8103115          0.9
## 1970 Q2   0.4603757  1.1690847 -0.5515251 7.2879923          0.5
## 1970 Q3   0.8767914  1.5532705 -0.3587079 7.2890131          0.5
## 1970 Q4  -0.2742451 -0.2552724 -2.1854549 0.9852296          0.7
## 1971 Q1   1.8973708  1.9871536  1.9097341 3.6577706         -0.1
## 1971 Q2   0.9119929  1.4473342  0.9015358 6.0513418         -0.1
# Simple Linear Regression
autoplot(uschange[,c("Consumption","Income")]) +
  ylab("% change")

library(ggplot2)
uschange %>%
  as.data.frame() %>%
  ggplot(aes(x=Income, y=Consumption)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

# "Consumption and Income seems to be linearly associated
# based on the graphs"

# Fit the linear regression model
# tslm() for time series objects "mts" or "ts"

fit <- tslm(Consumption ~ Income, data=uschange)
summary(fit)
## 
## Call:
## tslm(formula = Consumption ~ Income, data = uschange)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.40845 -0.31816  0.02558  0.29978  1.45157 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.54510    0.05569   9.789  < 2e-16 ***
## Income       0.28060    0.04744   5.915 1.58e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6026 on 185 degrees of freedom
## Multiple R-squared:  0.159,  Adjusted R-squared:  0.1545 
## F-statistic: 34.98 on 1 and 185 DF,  p-value: 1.577e-08
# Exercise 1.1
# Use Income and Unemployment to Predict Consumption
autoplot(uschange[,c("Consumption","Income","Unemployment")]) + 
  ylab("% change")

library(tidyverse)
uschange %>%
  as.data.frame() %>%
  select(Consumption, Income, Unemployment) %>%
  cor()
##              Consumption     Income Unemployment
## Consumption    1.0000000  0.3987795   -0.5405567
## Income         0.3987795  1.0000000   -0.2304698
## Unemployment  -0.5405567 -0.2304698    1.0000000
uschange %>%
  as.data.frame() %>%
  ggplot(aes(x=Unemployment, y=Consumption)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

# Unemployment seems to be a good predictor for Consumption

# Fit multiple linear regression model
fit <- tslm(Consumption ~ Income + Unemployment, data=uschange)
summary(fit)
## 
## Call:
## tslm(formula = Consumption ~ Income + Unemployment, data = uschange)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5065 -0.3491  0.0201  0.2521  1.7427 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.60644    0.04889  12.404  < 2e-16 ***
## Income        0.20376    0.04226   4.822 2.97e-06 ***
## Unemployment -0.82752    0.10489  -7.890 2.62e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5223 on 184 degrees of freedom
## Multiple R-squared:  0.3716, Adjusted R-squared:  0.3648 
## F-statistic:  54.4 on 2 and 184 DF,  p-value: < 2.2e-16
# Check the fitted values visually
autoplot(uschange[,"Consumption"], series="Observed") +
  autolayer(fitted(fit), series="Fitted") + 
  xlab("Year") + ylab("Fitted/Observed")

# Check the prediction performance
# Observed vs Fitted
cbind(Data=uschange[,"Consumption"],
      Fitted=fitted(fit)) %>%
  as.data.frame() %>%
  ggplot(aes(x=Data, y=Fitted)) +
  geom_point() + 
  xlab("Observed") + ylab("Fitted") +
  geom_abline(intercept=0, slope=1)

# Diagnostic Checking
checkresiduals(fit)

## 
##  Breusch-Godfrey test for serial correlation of order up to 8
## 
## data:  Residuals from Linear regression model
## LM test = 19.068, df = 8, p-value = 0.0145
# Residual vs Fitted
cbind(Fitted=fitted(fit),
      Residuals=residuals(fit)) %>%
  as.data.frame() %>%
  ggplot(aes(x=Fitted, y=Residuals)) +
  geom_point()

# Influencial Observations
dist <- cooks.distance(fit)
summary(dist)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 2.600e-07 2.718e-04 1.244e-03 9.221e-03 5.312e-03 2.727e-01
# Multicoliearity
uschange[,c("Income", "Unemployment")] %>%
  as.data.frame() %>%
  cor()
##                  Income Unemployment
## Income        1.0000000   -0.2304698
## Unemployment -0.2304698    1.0000000
# Independence of errors (residuals)
residuals(fit) %>%
  as.vector() %>%
  durbinWatsonTest()
## [1] 1.864056
# Generate Forecasts
newdata <- data.frame(
  Income = c(1,1.2,0.8,1,1.5,0.5,1),
  Unemployment = c(0,0,0,0,0,0,0)
)

fcast <- forecast(fit,newdata=newdata)
autoplot(uschange[,"Consumption"]) +
  ylab("% change in US Consumption") +
  autolayer(fcast, series="forecast")

# Assignment
# Forecast Iloilo City Dengue Cases Using
# Rainfall and Humidity 2012 - 2015
# Jan 2012 to Mar 2015
# Deadline Sept 20 (printed)

dengue <- read.csv("C:\\Users\\Asus\\Documents\\UP Files\\UPV Subjects\\Stat 145\\ilo-dengue.csv")
head(dengue)
##   Cases Rainfall Humidity
## 1   130      0.7     72.9
## 2   199      3.2     79.8
## 3   486      8.7     78.8
## 4   548     13.0     81.4
## 5   441     13.4     79.3
## 6   223      8.1     80.8
class(dengue)
## [1] "data.frame"
dengue.ts <- ts(dengue, start=c(2012,1), end=c(2015,3),
                frequency=12)
class(dengue.ts)
## [1] "mts"    "ts"     "matrix"
head(dengue.ts)
##          Cases Rainfall Humidity
## Jan 2012   130      0.7     72.9
## Feb 2012   199      3.2     79.8
## Mar 2012   486      8.7     78.8
## Apr 2012   548     13.0     81.4
## May 2012   441     13.4     79.3
## Jun 2012   223      8.1     80.8
# Nonlinear Regression as Time Series Model
data(ausair)
autoplot(ausair)

time <- c(1:47)
ausair2 <- ts(cbind(ausair,time),start=1,end=47)
head(ausair2)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
##    ausair time
## 1  7.3187    1
## 2  7.3266    2
## 3  7.7956    3
## 4  9.3846    4
## 5 10.6647    5
## 6 11.0551    6
# Fit a simple linear regression with time as predictor
fit1 <- tslm(ausair ~ time, data=ausair2)
summary(fit)
## 
## Call:
## tslm(formula = Consumption ~ Income + Unemployment, data = uschange)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5065 -0.3491  0.0201  0.2521  1.7427 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.60644    0.04889  12.404  < 2e-16 ***
## Income        0.20376    0.04226   4.822 2.97e-06 ***
## Unemployment -0.82752    0.10489  -7.890 2.62e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5223 on 184 degrees of freedom
## Multiple R-squared:  0.3716, Adjusted R-squared:  0.3648 
## F-statistic:  54.4 on 2 and 184 DF,  p-value: < 2.2e-16
fit2 <- tslm(ausair ~ trend, data=ausair)
summary(fit2)
## 
## Call:
## tslm(formula = ausair ~ trend, data = ausair)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.422 -4.582 -2.176  6.434 10.435 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.33603    1.78943  -1.864   0.0688 .  
## trend        1.39360    0.06491  21.470   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.036 on 45 degrees of freedom
## Multiple R-squared:  0.9111, Adjusted R-squared:  0.9091 
## F-statistic:   461 on 1 and 45 DF,  p-value: < 2.2e-16
# Fit a nonlinear regression
time <- c(1:47)
sq.time <- time^2

ausair3 <- ts(cbind(ausair,time,sq.time),
              start=1,end=47)
fit <- tslm(ausair ~ time + sq.time, data=ausair3)
summary(fit)
## 
## Call:
## tslm(formula = ausair ~ time + sq.time, data = ausair3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8196 -1.2744  0.0429  1.3279  3.7717 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.94079    0.92748  10.718 7.54e-14 ***
## time        -0.23213    0.08913  -2.604   0.0125 *  
## sq.time      0.03387    0.00180  18.812  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.03 on 44 degrees of freedom
## Multiple R-squared:  0.9902, Adjusted R-squared:  0.9897 
## F-statistic:  2215 on 2 and 44 DF,  p-value: < 2.2e-16
# Show the fitted line
autoplot(ausair3[,"ausair"], series="Observed") +
  autolayer(fitted(fit), series="Fitted") +
  xlab("Year") + ylab("Air Passengers")

autoplot(ausair2[,"ausair"], series="Observed") +
  autolayer(fitted(fit1), series="Fitted") +
  xlab("Year") + ylab("Air Passengers")

# Forecast the airpassenger in 2017, 2018, 2019, 2020

newdata <- data.frame(
  time=c(48,49,50,51),
  sq.time=c(48^2, 49^2, 50^2, 51^2)
)

fcast <- forecast(fit, newdata=newdata)
autoplot(ausair3[,"ausair"]) +
  ylab("Air Passengers") +
  autolayer(fcast, series="Forecast")

fcast$mean
## Time Series:
## Start = 48 
## End = 51 
## Frequency = 1 
##        1        2        3        4 
## 76.83354 79.88674 83.00768 86.19636

Exponential Smoothing (Sept 20,2023)

# Simple Exponential Smoothing (SES)
setwd("C:\\Users\\Asus\\Documents\\UP Files\\UPV Subjects\\Stat 145")
temp <- read.csv(".\\ilo-temp.csv")

temp.ts <- ts(temp, start=c(2012,12), end=c(2015,5), 
              frequency=12)

# Generate SES using ses() function
fcast <- ses(temp.ts, alpha=0.74, h=5)
fcast$mean
##           Jun      Jul      Aug      Sep      Oct
## 2015 28.38054 28.38054 28.38054 28.38054 28.38054
# From Excel 28.38054
autoplot(fcast) +
  autolayer(fitted(fcast), series="Smoothed") +
  ylab("Iloilo Temperature Forecast") + xlab("Year")