Data Example: Temperature in Atlanta, Georgia
Data: Average monthly temperature records starting in 1879 until 2016.
Available from the iWearherNet.com The Weather Bureau (now the National Weather Service) began keeping weather records for Atlanta 138 years, 8 months and 19 days ago on October 1, 1878. Provided in Fahrenheit degrees
Is there seasonality and trend in temperature in Atlanta? Is the residual process after removing seasonality and trend stationary?
Remove trend and seasonal components to get stationary residuals.
Choose a model to fit the residual process.
Forecasting can be carried out by forecasting residuals and then inverting the transformations carried out in Step 2.
data = read.table("AvTempAtlanta.txt",header=T)
names(data)
[1] "Year" "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct"
[12] "Nov" "Dec" "Annual"
temp = as.vector(t(data[,-c(1,14)]))
temp = ts(temp,start=1879,frequency=12)
ts.plot(temp, ylab="Temperature")
Example for kernel regression In this example a data generated from normal distribution is used. Kernel regression using normal kernel is applied to the data.
x=rnorm(1000,0,1)
y=rnorm(1000,0,.02)+sin(x)
plot(x,y)
ysmooth=ksmooth(x,y,kernel = "normal")
lines(ysmooth, col="red")
Create equally spaced time points for fitting trends
time.pts = c(1:length(temp))
time.pts = c(time.pts - min(time.pts))/max(time.pts)
mav.fit = ksmooth(time.pts, temp, kernel = "box")
temp.fit.mav = ts(mav.fit$y,start=1879,frequency=12)
ts.plot(temp,ylab="Temperature")
lines(temp.fit.mav,lwd=2,col="purple")
abline(temp.fit.mav[1],0,lwd=2,col="blue")
x1 = time.pts
x2 = time.pts^2
lm.fit = lm(temp~x1+x2)
summary(lm.fit)
Call:
lm(formula = temp ~ x1 + x2)
Residuals:
Min 1Q Median 3Q Max
-32.770 -11.886 0.514 13.418 22.942
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 61.4247 0.9841 62.420 <2e-16 ***
x1 -1.5723 4.5481 -0.346 0.730
x2 3.4937 4.4062 0.793 0.428
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 13.36 on 1653 degrees of freedom
Multiple R-squared: 0.002098, Adjusted R-squared: 0.0008903
F-statistic: 1.737 on 2 and 1653 DF, p-value: 0.1763
temp.fit.lm = ts(fitted(lm.fit),start=1879,frequency=12)
ts.plot(temp,ylab="Temperature")
lines(temp.fit.lm,lwd=2,col="green")
abline(temp.fit.lm[1],0,lwd=2,col="blue")
LOESS (locally estimated scatterplot smoothing)
loc.fit = loess(temp~time.pts)
temp.fit.loc = ts(fitted(loc.fit),start=1879,frequency=12)
gam: Generalized additive models with integrated smoothness estimation. Fits a generalized additive model (GAM) to data, the term `GAM’ being taken to include any quadratically penalized GLM and a variety of other models estimated by a quadratically penalised likelihood type approach
library(mgcv)
Loading required package: nlme
Attaching package: ‘nlme’
The following object is masked from ‘package:forecast’:
getResponse
This is mgcv 1.8-17. For overview type 'help("mgcv-package")'.
gam.fit = gam(temp~s(time.pts))
temp.fit.gam = ts(fitted(gam.fit),start=1879,frequency=12)
ts.plot(temp,ylab="Temperature")
lines(temp.fit.loc,lwd=2,col="brown")
lines(temp.fit.gam,lwd=2,col="red")
abline(temp.fit.loc[1],0,lwd=2,col="blue")
all.val = c(temp.fit.mav,temp.fit.lm,temp.fit.gam,temp.fit.loc)
ylim= c(min(all.val),max(all.val))
ts.plot(temp.fit.lm,lwd=2,col="green",ylim=ylim,ylab="Temperature")
lines(temp.fit.mav,lwd=2,col="purple")
lines(temp.fit.gam,lwd=2,col="red")
lines(temp.fit.loc,lwd=2,col="brown")
legend(x=1900,y=64,legend=c("MAV","LM","GAM","LOESS"),lty = 1, col=c("purple","green","red","brown"))
library(TSA)
package ‘TSA’ was built under R version 3.4.4
Attaching package: ‘TSA’
The following objects are masked from ‘package:stats’:
acf, arima
The following object is masked from ‘package:utils’:
tar
month = season(temp)
model1 = lm(temp~month)
summary(model1)
Call:
lm(formula = temp ~ month)
Residuals:
Min 1Q Median 3Q Max
-13.9072 -1.9636 -0.0986 1.9437 12.5275
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 43.2072 0.2725 158.534 < 2e-16 ***
monthFebruary 2.7514 0.3854 7.139 1.41e-12 ***
monthMarch 10.0232 0.3854 26.005 < 2e-16 ***
monthApril 18.4014 0.3854 47.742 < 2e-16 ***
monthMay 26.5623 0.3854 68.916 < 2e-16 ***
monthJune 33.4913 0.3854 86.893 < 2e-16 ***
monthJuly 35.7978 0.3854 92.877 < 2e-16 ***
monthAugust 35.0630 0.3854 90.971 < 2e-16 ***
monthSeptember 30.0913 0.3854 78.071 < 2e-16 ***
monthOctober 19.7543 0.3854 51.252 < 2e-16 ***
monthNovember 9.3420 0.3854 24.238 < 2e-16 ***
monthDecember 1.8652 0.3854 4.839 1.43e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.202 on 1644 degrees of freedom
Multiple R-squared: 0.943, Adjusted R-squared: 0.9427
F-statistic: 2474 on 11 and 1644 DF, p-value: < 2.2e-16
model2 = lm(temp~month-1)
summary(model2)
Call:
lm(formula = temp ~ month - 1)
Residuals:
Min 1Q Median 3Q Max
-13.9072 -1.9636 -0.0986 1.9437 12.5275
Coefficients:
Estimate Std. Error t value Pr(>|t|)
monthJanuary 43.2072 0.2725 158.5 <2e-16 ***
monthFebruary 45.9587 0.2725 168.6 <2e-16 ***
monthMarch 53.2304 0.2725 195.3 <2e-16 ***
monthApril 61.6087 0.2725 226.1 <2e-16 ***
monthMay 69.7696 0.2725 256.0 <2e-16 ***
monthJune 76.6986 0.2725 281.4 <2e-16 ***
monthJuly 79.0051 0.2725 289.9 <2e-16 ***
monthAugust 78.2703 0.2725 287.2 <2e-16 ***
monthSeptember 73.2986 0.2725 268.9 <2e-16 ***
monthOctober 62.9616 0.2725 231.0 <2e-16 ***
monthNovember 52.5493 0.2725 192.8 <2e-16 ***
monthDecember 45.0725 0.2725 165.4 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.202 on 1644 degrees of freedom
Multiple R-squared: 0.9975, Adjusted R-squared: 0.9974
F-statistic: 5.369e+04 on 12 and 1644 DF, p-value: < 2.2e-16
har1=harmonic(temp,1)
model3=lm(temp~har1)
summary(model3)
Call:
lm(formula = temp ~ har1)
Residuals:
Min 1Q Median 3Q Max
-14.2003 -2.0059 -0.2022 2.0005 12.4493
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 61.80254 0.08133 759.870 < 2e-16 ***
har1cos(2*pi*t) -18.30228 0.11502 -159.119 < 2e-16 ***
har1sin(2*pi*t) -0.69366 0.11502 -6.031 2.01e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.31 on 1653 degrees of freedom
Multiple R-squared: 0.9388, Adjusted R-squared: 0.9387
F-statistic: 1.268e+04 on 2 and 1653 DF, p-value: < 2.2e-16
har2=harmonic(temp,2)
model4=lm(temp~har2)
summary(model4)
Call:
lm(formula = temp ~ har2)
Residuals:
Min 1Q Median 3Q Max
-13.5699 -2.0031 -0.1105 1.9524 12.5301
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 61.80254 0.07888 783.490 < 2e-16 ***
har2cos(2*pi*t) -18.30228 0.11155 -164.065 < 2e-16 ***
har2cos(4*pi*t) -0.63031 0.11155 -5.650 1.88e-08 ***
har2sin(2*pi*t) -0.69366 0.11155 -6.218 6.36e-10 ***
har2sin(4*pi*t) 0.96246 0.11155 8.628 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.21 on 1651 degrees of freedom
Multiple R-squared: 0.9425, Adjusted R-squared: 0.9424
F-statistic: 6766 on 4 and 1651 DF, p-value: < 2.2e-16
st1 = coef(model2)
st2 = fitted(model4)[1:12]
plot(1:12,st1,lwd=2,type="l",xlab="Month",ylab="Seasonality")
lines(1:12,st2,lwd=2, col="brown")
x1 = time.pts
x2 = time.pts^2
har2=harmonic(temp,2)
lm.fit = lm(temp~x1+x2+har2)
summary(lm.fit)
Call:
lm(formula = temp ~ x1 + x2 + har2)
Residuals:
Min 1Q Median 3Q Max
-13.8242 -1.9505 -0.1554 1.9744 12.7916
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 61.4620 0.2324 264.456 < 2e-16 ***
x1 -1.6526 1.0741 -1.538 0.124119
x2 3.5021 1.0406 3.365 0.000782 ***
har2cos(2*pi*t) -18.3012 0.1097 -166.844 < 2e-16 ***
har2cos(4*pi*t) -0.6292 0.1097 -5.736 1.15e-08 ***
har2sin(2*pi*t) -0.6895 0.1097 -6.286 4.17e-10 ***
har2sin(4*pi*t) 0.9644 0.1097 8.792 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.156 on 1649 degrees of freedom
Multiple R-squared: 0.9445, Adjusted R-squared: 0.9443
F-statistic: 4675 on 6 and 1649 DF, p-value: < 2.2e-16
dif.fit.lm = ts((temp-fitted(lm.fit)),start=1879,frequency=12)
ts.plot(dif.fit.lm,ylab="Residual Process")
gam.fit = gam(temp~s(time.pts)+har2)
dif.fit.gam = ts((temp-fitted(gam.fit)),start=1879,frequency=12)
ts.plot(dif.fit.lm,ylab ="Residual Process", col="brown")
lines(dif.fit.gam,col="blue")