Least Squares Estimation (Sept 13, 2023)
library(fpp2)
data(uschange)
class(uschange)
## [1] "mts" "ts" "matrix"
head(uschange)
## Consumption Income Production Savings Unemployment
## 1970 Q1 0.6159862 0.9722610 -2.4527003 4.8103115 0.9
## 1970 Q2 0.4603757 1.1690847 -0.5515251 7.2879923 0.5
## 1970 Q3 0.8767914 1.5532705 -0.3587079 7.2890131 0.5
## 1970 Q4 -0.2742451 -0.2552724 -2.1854549 0.9852296 0.7
## 1971 Q1 1.8973708 1.9871536 1.9097341 3.6577706 -0.1
## 1971 Q2 0.9119929 1.4473342 0.9015358 6.0513418 -0.1
# Simple Linear Regression
autoplot(uschange[,c("Consumption","Income")]) +
ylab("% change")

library(ggplot2)
uschange %>%
as.data.frame() %>%
ggplot(aes(x=Income, y=Consumption)) +
geom_point() +
geom_smooth(method="lm", se=FALSE)

# "Consumption and Income seems to be linearly associated
# based on the graphs"
# Fit the linear regression model
# tslm() for time series objects "mts" or "ts"
fit <- tslm(Consumption ~ Income, data=uschange)
summary(fit)
##
## Call:
## tslm(formula = Consumption ~ Income, data = uschange)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.40845 -0.31816 0.02558 0.29978 1.45157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.54510 0.05569 9.789 < 2e-16 ***
## Income 0.28060 0.04744 5.915 1.58e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6026 on 185 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.1545
## F-statistic: 34.98 on 1 and 185 DF, p-value: 1.577e-08
# Exercise 1.1
# Use Income and Unemployment to Predict Consumption
autoplot(uschange[,c("Consumption","Income","Unemployment")]) +
ylab("% change")

library(tidyverse)
uschange %>%
as.data.frame() %>%
select(Consumption, Income, Unemployment) %>%
cor()
## Consumption Income Unemployment
## Consumption 1.0000000 0.3987795 -0.5405567
## Income 0.3987795 1.0000000 -0.2304698
## Unemployment -0.5405567 -0.2304698 1.0000000
uschange %>%
as.data.frame() %>%
ggplot(aes(x=Unemployment, y=Consumption)) +
geom_point() +
geom_smooth(method="lm", se=FALSE)

# Unemployment seems to be a good predictor for Consumption
# Fit multiple linear regression model
fit <- tslm(Consumption ~ Income + Unemployment, data=uschange)
summary(fit)
##
## Call:
## tslm(formula = Consumption ~ Income + Unemployment, data = uschange)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5065 -0.3491 0.0201 0.2521 1.7427
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.60644 0.04889 12.404 < 2e-16 ***
## Income 0.20376 0.04226 4.822 2.97e-06 ***
## Unemployment -0.82752 0.10489 -7.890 2.62e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5223 on 184 degrees of freedom
## Multiple R-squared: 0.3716, Adjusted R-squared: 0.3648
## F-statistic: 54.4 on 2 and 184 DF, p-value: < 2.2e-16
# Check the fitted values visually
autoplot(uschange[,"Consumption"], series="Observed") +
autolayer(fitted(fit), series="Fitted") +
xlab("Year") + ylab("Fitted/Observed")

# Check the prediction performance
# Observed vs Fitted
cbind(Data=uschange[,"Consumption"],
Fitted=fitted(fit)) %>%
as.data.frame() %>%
ggplot(aes(x=Data, y=Fitted)) +
geom_point() +
xlab("Observed") + ylab("Fitted") +
geom_abline(intercept=0, slope=1)

# Diagnostic Checking
checkresiduals(fit)

##
## Breusch-Godfrey test for serial correlation of order up to 8
##
## data: Residuals from Linear regression model
## LM test = 19.068, df = 8, p-value = 0.0145
# Residual vs Fitted
cbind(Fitted=fitted(fit),
Residuals=residuals(fit)) %>%
as.data.frame() %>%
ggplot(aes(x=Fitted, y=Residuals)) +
geom_point()

# Influencial Observations
dist <- cooks.distance(fit)
summary(dist)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.600e-07 2.718e-04 1.244e-03 9.221e-03 5.312e-03 2.727e-01
# Multicoliearity
uschange[,c("Income", "Unemployment")] %>%
as.data.frame() %>%
cor()
## Income Unemployment
## Income 1.0000000 -0.2304698
## Unemployment -0.2304698 1.0000000
# Independence of errors (residuals)
residuals(fit) %>%
as.vector() %>%
durbinWatsonTest()
## [1] 1.864056
# Generate Forecasts
newdata <- data.frame(
Income = c(1,1.2,0.8,1,1.5,0.5,1),
Unemployment = c(0,0,0,0,0,0,0)
)
fcast <- forecast(fit,newdata=newdata)
autoplot(uschange[,"Consumption"]) +
ylab("% change in US Consumption") +
autolayer(fcast, series="forecast")

# Assignment
# Forecast Iloilo City Dengue Cases Using
# Rainfall and Humidity 2012 - 2015
# Jan 2012 to Mar 2015
# Deadline Sept 20 (printed)
dengue <- read.csv("C:\\Users\\Asus\\Documents\\UP Files\\UPV Subjects\\Stat 145\\ilo-dengue.csv")
head(dengue)
## Cases Rainfall Humidity
## 1 130 0.7 72.9
## 2 199 3.2 79.8
## 3 486 8.7 78.8
## 4 548 13.0 81.4
## 5 441 13.4 79.3
## 6 223 8.1 80.8
class(dengue)
## [1] "data.frame"
dengue.ts <- ts(dengue, start=c(2012,1), end=c(2015,3),
frequency=12)
class(dengue.ts)
## [1] "mts" "ts" "matrix"
head(dengue.ts)
## Cases Rainfall Humidity
## Jan 2012 130 0.7 72.9
## Feb 2012 199 3.2 79.8
## Mar 2012 486 8.7 78.8
## Apr 2012 548 13.0 81.4
## May 2012 441 13.4 79.3
## Jun 2012 223 8.1 80.8
# Nonlinear Regression as Time Series Model
data(ausair)
autoplot(ausair)

time <- c(1:47)
ausair2 <- ts(cbind(ausair,time),start=1,end=47)
head(ausair2)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## ausair time
## 1 7.3187 1
## 2 7.3266 2
## 3 7.7956 3
## 4 9.3846 4
## 5 10.6647 5
## 6 11.0551 6
# Fit a simple linear regression with time as predictor
fit1 <- tslm(ausair ~ time, data=ausair2)
summary(fit)
##
## Call:
## tslm(formula = Consumption ~ Income + Unemployment, data = uschange)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5065 -0.3491 0.0201 0.2521 1.7427
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.60644 0.04889 12.404 < 2e-16 ***
## Income 0.20376 0.04226 4.822 2.97e-06 ***
## Unemployment -0.82752 0.10489 -7.890 2.62e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5223 on 184 degrees of freedom
## Multiple R-squared: 0.3716, Adjusted R-squared: 0.3648
## F-statistic: 54.4 on 2 and 184 DF, p-value: < 2.2e-16
fit2 <- tslm(ausair ~ trend, data=ausair)
summary(fit2)
##
## Call:
## tslm(formula = ausair ~ trend, data = ausair)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.422 -4.582 -2.176 6.434 10.435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.33603 1.78943 -1.864 0.0688 .
## trend 1.39360 0.06491 21.470 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.036 on 45 degrees of freedom
## Multiple R-squared: 0.9111, Adjusted R-squared: 0.9091
## F-statistic: 461 on 1 and 45 DF, p-value: < 2.2e-16
# Fit a nonlinear regression
time <- c(1:47)
sq.time <- time^2
ausair3 <- ts(cbind(ausair,time,sq.time),
start=1,end=47)
fit <- tslm(ausair ~ time + sq.time, data=ausair3)
summary(fit)
##
## Call:
## tslm(formula = ausair ~ time + sq.time, data = ausair3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8196 -1.2744 0.0429 1.3279 3.7717
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.94079 0.92748 10.718 7.54e-14 ***
## time -0.23213 0.08913 -2.604 0.0125 *
## sq.time 0.03387 0.00180 18.812 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.03 on 44 degrees of freedom
## Multiple R-squared: 0.9902, Adjusted R-squared: 0.9897
## F-statistic: 2215 on 2 and 44 DF, p-value: < 2.2e-16
# Show the fitted line
autoplot(ausair3[,"ausair"], series="Observed") +
autolayer(fitted(fit), series="Fitted") +
xlab("Year") + ylab("Air Passengers")

autoplot(ausair2[,"ausair"], series="Observed") +
autolayer(fitted(fit1), series="Fitted") +
xlab("Year") + ylab("Air Passengers")

# Forecast the airpassenger in 2017, 2018, 2019, 2020
newdata <- data.frame(
time=c(48,49,50,51),
sq.time=c(48^2, 49^2, 50^2, 51^2)
)
fcast <- forecast(fit, newdata=newdata)
autoplot(ausair3[,"ausair"]) +
ylab("Air Passengers") +
autolayer(fcast, series="Forecast")

fcast$mean
## Time Series:
## Start = 48
## End = 51
## Frequency = 1
## 1 2 3 4
## 76.83354 79.88674 83.00768 86.19636