setwd("~/USM_MBA_678")
Sept11Travel <- read.csv("Sept11Travel.csv", header=TRUE, stringsAsFactors=FALSE)
str(Sept11Travel)
## 'data.frame': 172 obs. of 4 variables:
## $ Month: chr "Jan-90" "Feb-90" "Mar-90" "Apr-90" ...
## $ Air : int 35153577 32965187 39993913 37981886 38419672 42819023 45770315 48763670 38173223 39051877 ...
## $ Rail : int 454115779 435086002 568289732 568101697 539628385 570694457 618571581 609210368 488444939 514253920 ...
## $ VMT : num 163 153 178 179 189 ...
head(Sept11Travel)
## Month Air Rail VMT
## 1 Jan-90 35153577 454115779 163.28
## 2 Feb-90 32965187 435086002 153.25
## 3 Mar-90 39993913 568289732 178.42
## 4 Apr-90 37981886 568101697 178.68
## 5 May-90 38419672 539628385 188.88
## 6 Jun-90 42819023 570694457 189.16
library(forecast)
Sept11Travel1.ts <- ts(Sept11Travel$Air, start=c(1990,1), end=c(2001,8), freq=12)
plot(Sept11Travel1.ts, xlab="Time", ylab="AIR RPM 000s", ylim=c(30000000, 70000000), bty="l")
In the time series graph of AIR RPM, from the data appears an almost linear upward trend with additive seasonality. Next the log() function will be used to change the scale of the graph to better identify the trend of the data.
AirRPMLog <- log(Sept11Travel1.ts)
plot(AirRPMLog, xlab="Time", ylab="log scaling of AIR RPM 000s", bty="l")
As the time series already showed what was an evidently linear upward trend, the log scaling has not in this case added much value in terms of discerning the trend.
Fitting Trend Lines
As the trend was visually linear, a linear fit seemed appropriate.
AirRPMLinear <-tslm(Sept11Travel1.ts ~ trend)
summary(AirRPMLinear)
##
## Call:
## tslm(formula = Sept11Travel1.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9466409 -3410590 -681183 3360750 11823514
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35728435 834749 42.80 <2e-16 ***
## trend 177097 10272 17.24 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4912000 on 138 degrees of freedom
## Multiple R-squared: 0.6829, Adjusted R-squared: 0.6806
## F-statistic: 297.2 on 1 and 138 DF, p-value: < 2.2e-16
plot(Sept11Travel1.ts, xlab="Time", ylab="Air RPM", bty="l")
lines(AirRPMLinear$fitted, lwd=2)
#AirRPMQuad <- tslm(Sept11Travel1.ts ~ trend + I(trend^2))
#summary(AirRPMQuad)
In order to suppress seasonality, we can aggregate by year in this example.
yearlyAirRPM <- aggregate(Sept11Travel1.ts,nfrequency=1,FUN=sum)
plot(yearlyAirRPM,bty="l")
Apart from a downward slope from 1990 to 1991, the Air RPM data clearly shows an upward linear trend when aggregated on a yearly basis.
Sept11Travel2.ts <- ts(Sept11Travel$Rail, start=c(1990,1), end=c(2001,8), freq=12)
plot(Sept11Travel2.ts, xlab="Time", ylab="Rail PM", bty="l")
In the time series graph of Rail PM, from the data appears a sinusoidal or cyclical trend with seasonality that could be described as between additive and multiplicative.
AirRPMLog <- log(Sept11Travel2.ts)
plot(AirRPMLog, xlab="Time", ylab="log scaling of Rail PM", bty="l")
Log scaling has not changed the trending pattern of the Rail PM data to any great effect.
Fitting Trend Lines
We fit a linear line to the data but since there is some curvature we also add a quadratic line too.
RailLinear <-tslm(Sept11Travel2.ts ~ trend)
summary(RailLinear)
##
## Call:
## tslm(formula = Sept11Travel2.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -142225897 -40750574 -4370229 41272192 133135874
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 547632872 10511773 52.097 < 2e-16 ***
## trend -837744 129357 -6.476 1.53e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 61860000 on 138 degrees of freedom
## Multiple R-squared: 0.2331, Adjusted R-squared: 0.2275
## F-statistic: 41.94 on 1 and 138 DF, p-value: 1.532e-09
plot(Sept11Travel2.ts, xlab="Time", ylab="Rail PM", bty="l")
lines(RailLinear$fitted, lwd=2)
RailQuad <- tslm(Sept11Travel2.ts ~ trend + I(trend^2))
lines(RailQuad$fitted, lty=2, lwd=3)
summary(RailQuad)
##
## Call:
## tslm(formula = Sept11Travel2.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -137634923 -37327572 -1559409 41652351 125112936
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 576828666 15618390 36.93 < 2e-16 ***
## trend -2071369 511394 -4.05 8.52e-05 ***
## I(trend^2) 8749 3513 2.49 0.014 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60720000 on 137 degrees of freedom
## Multiple R-squared: 0.2663, Adjusted R-squared: 0.2556
## F-statistic: 24.86 on 2 and 137 DF, p-value: 6.14e-10
Sept11Travel3.ts <- ts(Sept11Travel$VMT, start=c(1990,1), end=c(2001,8), freq=12)
plot(Sept11Travel3.ts, xlab="Time", ylab="VMT Billions", bty="l")
In the time series graph of VMT, from the data appears a linear upward trend with additive seasonality
AirRPMLog <- log(Sept11Travel3.ts)
plot(AirRPMLog, xlab="Time", ylab="log scaling of VMT", bty="l")
Log scaling has not changed the trending pattern of the VMT data to any great effect.
Fitting Trend Lines
As the trend was visually linear, a linear fit seemed appropriate.
VMTLinear <-tslm(Sept11Travel3.ts ~ trend)
summary(VMTLinear)
##
## Call:
## tslm(formula = Sept11Travel3.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.554 -9.185 0.559 11.087 23.272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 173.17137 2.42094 71.53 <2e-16 ***
## trend 0.44249 0.02979 14.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.25 on 138 degrees of freedom
## Multiple R-squared: 0.6152, Adjusted R-squared: 0.6124
## F-statistic: 220.6 on 1 and 138 DF, p-value: < 2.2e-16
plot(Sept11Travel3.ts, xlab="Time", ylab="VMT", bty="l")
lines(VMTLinear$fitted, lwd=2)
setwd("~/USM_MBA_678")
AppShip <- read.csv("ApplianceShipments.csv", header=TRUE, stringsAsFactors=FALSE)
str(AppShip)
## 'data.frame': 20 obs. of 2 variables:
## $ Quarter : chr "Q1-1985" "Q2-1985" "Q3-1985" "Q4-1985" ...
## $ Shipments: int 4009 4321 4224 3944 4123 4522 4657 4030 4493 4806 ...
head(AppShip)
## Quarter Shipments
## 1 Q1-1985 4009
## 2 Q2-1985 4321
## 3 Q3-1985 4224
## 4 Q4-1985 3944
## 5 Q1-1986 4123
## 6 Q2-1986 4522
library(forecast)
AppShip.ts <- ts(AppShip$Shipments, start=c(1985,1), end=c(1989,4), freq=4)
plot(AppShip.ts, xlab="Time", ylab="Shipments (mils USD)", bty="l")
The question asks which of the four components level, trend, seasonality and noise seem to be present in the this series. Since by definition all series have levels (Shmueli, page 28), and the above series exhibits both an upward linear trend and possibly a somewhat seasonal or at least cyclical pattern, we can adjust for these using the diff() function at lag = 1 and lag =4 respectively to see what characteristics the series then exhibits:
First we adjust for trend and see that by using a lag = 1, the series does adjust to a level overall trend but with still much noise.
AppShipAdjusted<-diff(AppShip.ts, lag=1)
plot(AppShipAdjusted, xlab="Time", ylab="Shipments - Trend Adjusted", bty="l")
Seasonally adjusting does seem to remove variability from the series, but the trend shifts to a downward one.
AppShipAdjusted<-diff(AppShip.ts, lag=12)
plot(AppShipAdjusted, xlab="Time", ylab="Shipments - Seasonality Adjusted", bty="l")
If we adjust for both trend and seasonality we are left with what could be considered noise.
AppShipAdjusted<-diff(diff(AppShip.ts, lag=1),lag=12)
plot(AppShipAdjusted, xlab="Time", ylab="Shipments - Seasonality Adjusted", bty="l")
setwd("~/USM_MBA_678")
ShampSales <- read.csv("ShampooSales.csv", header=TRUE, stringsAsFactors=FALSE)
str(ShampSales)
## 'data.frame': 36 obs. of 2 variables:
## $ Month : chr "Jan-95" "Feb-95" "Mar-95" "Apr-95" ...
## $ Shampoo.Sales: num 266 146 183 119 180 ...
head(ShampSales)
## Month Shampoo.Sales
## 1 Jan-95 266.0
## 2 Feb-95 145.9
## 3 Mar-95 183.1
## 4 Apr-95 119.3
## 5 May-95 180.3
## 6 Jun-95 168.5
library(forecast)
ShampSales.ts <- ts(ShampSales$Shampoo.Sales, start=c(1995,4), end=c(1997,9), freq=12)
plot(ShampSales.ts, xlab="Time", ylab="Shampoo Sales", bty="l")
Here of the four components, trend seems to be the most apparent. If we remove the long term time trend by differencing, transformation this should be apparent and we can see the amount of trend from one observation to the next:
linear_trend<-diff(ShampSales.ts, lag=1)
plot(linear_trend, xlab="Time", ylab="Shampoo Sales", bty="l")
We may however wish to investigate whether there is any seasonality in the time series. Here however it is not obvious what the lag might be - colder vs. hotter seasons? We use a lag of 12 for illustrative purposes but it is not apparent that there is any seasonality involved here. Possibly there are two main reasons for this: 1) Habits and customs in American culture require that people wash often and 2) Shampoo sales are driven by advertising and sales that tend to keep seasonal shifts in shampoo use to a minimum.
seasonal<-diff(linear_trend, lag=12)
plot(seasonal, xlab="Time", ylab="Shampoo Sales", bty="l")