library(tidyverse)
## -- Attaching packages ----------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.3.4 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## -- Conflicts -------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(forecast)
library(ggfortify)
##
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
##
## gglagplot
options(scipen=9)
setwd("C:/Users/jake/Desktop/class/Week 1/")
ApplianceShipments <- read_csv("ApplianceShipments.csv")
## Parsed with column specification:
## cols(
## Quarter = col_character(),
## Shipments = col_integer()
## )
Sept11Travel <- read_csv("Sept11Travel.csv")
## Parsed with column specification:
## cols(
## Month = col_character(),
## Air = col_integer(),
## Rail = col_integer(),
## VMT = col_double()
## )
ShampooSales <- read_csv("ShampooSales.csv")
## Parsed with column specification:
## cols(
## Month = col_character(),
## `Shampoo Sales` = col_double()
## )
head(Sept11Travel)
## # A tibble: 6 x 4
## Month Air Rail VMT
## <chr> <int> <int> <dbl>
## 1 Jan-90 35153577 454115779 163.28
## 2 Feb-90 32965187 435086002 153.25
## 3 Mar-90 39993913 568289732 178.42
## 4 Apr-90 37981886 568101697 178.68
## 5 May-90 38419672 539628385 188.88
## 6 Jun-90 42819023 570694457 189.16
Air.ts <-ts(Sept11Travel[,2] , start = c(1990, 1) , freq = 12)
Rail.ts <- ts(Sept11Travel[,3] , start = c(1990, 1) , freq = 12)
VMT.ts <- ts(Sept11Travel[,4] , start = c(1990, 1) , freq = 12)
#combine as ts
travel <- cbind(Air.ts ,Rail.ts , VMT.ts)
#nine_six <- as.ts(travel[61:172 ,])
#autoplot(travel, facets = TRUE , title = "3 Travel Plots")
#autoplot(nine_six , facets=TRUE , title = "3 Travel Plots - 1996-2004")
AirLinear <- tslm(Air.ts ~ trend)
#summary(AirLinear)
RailLinear <- tslm(Rail.ts ~trend)
#summary(RailLinear)
CarLinear <- tslm(VMT.ts ~trend)
#summary(CarLinear)
plot(Air.ts , main = "AIR" , xlab = "Date" , ylab = "Miles Traveled" , bty = "l")
lines(AirLinear$fitted , lty = 1 , lwd = 3)
plot(Rail.ts , main = "RAIL" , xlab = "Date" , ylab = "Miles Traveled" , bty = "l")
lines(RailLinear$fitted , lty = 1 , lwd = 3)
plot(VMT.ts , main = "Car" , xlab = "Date" , ylab = "Miles Traveled (Billions)" , bty = "l")
lines(CarLinear$fitted , lty = 1 , lwd = 3)
The Air travel and VMT series display an upward trends over time.
The three series displace strong seasonality, as which you can see directly in the seasonal plots below
There is clear seasonality for all three travel series
ggseasonplot(Air.ts)
ggseasonplot(Rail.ts)
ggseasonplot(VMT.ts)
Plots of each of the individual months over time drives home the seasonality in all three series. Note that there is not a lot of intersection betwenn the line plots of respective months. Supressing the seasonality also clearly shows that the Car and Air series are increasing over time. The Rail series does not appear to display much of a trend component since a downward level shift in the early to mind 90’s.
There also appears to be some random noise in all three series.
monthly <- function(series) {
# set the outer margin area to the right a bit bigger
par(oma = c(0, 0, 0, 2))
# We have 3.0 years of data
xrange <- c(1,15)
yrange <- range(series)
plot(xrange , yrange , type = "n" , xlab = "Year" , ylab = "Miles" , bty = "l")
colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)
#add lines
for (i in 1:12) {
currentMonth <- subset(series , cycle(series) ==i)
lines(currentMonth , type = "b" , lwd = 1.5 ,
lty = linetype[i], col = colors[i], pch=plotchar[i])
}
#title( "Miles by month")
legend("topleft", legend =
c("Jan","Feb","Mar","Apr","May", "Jun","Jul","Aug", "Sep","Oct","Nov","Dec"), cex=0.5, col=colors, pch=plotchar, lty=linetype, title="Month", xpd=NA)
}
monthly(Air.ts)
title( "Air Miles by Month")
monthly(Rail.ts)
title( "Rail Miles by Month")
monthly(VMT.ts)
title( "Car Miles by month (Billions)")
#log
plot(Air.ts , log = "y" , main = "AIR (LOG - Scale)" , xlab = "Date" , ylab = "Miles Traveled (LOG - Scale)" , bty = "l")
plot(Rail.ts , log = "y" , main = "RAIL (LOG - Scale) " , xlab = "Date" , ylab = "Miles Traveled (LOG - Scale)" , bty = "l")
plot(VMT.ts , log = "y" , main = "Car (LOG - Scale) " , xlab = "Date" , ylab = "Miles Traveled (Billions - LOG - Scale)" , bty = "l")
I don’t think that log transformations have much effect on the nature of these series.
Appliance.ts <- ts(ApplianceShipments[,2] , start = c(1985 ,1) , freq = 4)
plot(Appliance.ts , xlab = "Date" , ylab = "Shipments" , bty = "l")
ApplianceLinear <- tslm(Appliance.ts ~ trend)
summary(ApplianceLinear)
##
## Call:
## tslm(formula = Appliance.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -339.06 -171.14 3.13 137.27 393.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4167.663 111.035 37.535 <2e-16 ***
## trend 24.494 9.269 2.643 0.0165 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 239 on 18 degrees of freedom
## Multiple R-squared: 0.2795, Adjusted R-squared: 0.2395
## F-statistic: 6.983 on 1 and 18 DF, p-value: 0.01655
ApplianceQuad <- tslm(Appliance.ts ~ trend +I(trend^2))
summary(ApplianceQuad)
##
## Call:
## tslm(formula = Appliance.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -398.39 -155.53 32.63 181.62 346.62
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3982.931 173.764 22.921 3.19e-14 ***
## trend 74.876 38.109 1.965 0.066 .
## I(trend^2) -2.399 1.763 -1.361 0.191
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 233.6 on 17 degrees of freedom
## Multiple R-squared: 0.3503, Adjusted R-squared: 0.2739
## F-statistic: 4.583 on 2 and 17 DF, p-value: 0.02559
plot(Appliance.ts , main = "Shipments over Time, with Trendline and Quadratic Trendline" ,xlab = "Date" , ylab = "Shipments" , bty = "l")
lines(ApplianceLinear$fitted , lwd = 2)
lines(ApplianceQuad$fitted , lty = 2 , lwd = 3)
-Quadratic term adds some to the R Squared, but is not significant at a 95% level of confidence, due to the p-value of 0.19. Since it improves the Adjusted R Squared from the linear model (0.24) to 0.27, and looks visually appeal
quarterly <- function(series) {
# set the outer margin area to the right a bit bigger
par(oma = c(0, 0, 0, 2))
# We have 3.0 years of data
xrange <- c(1,5)
yrange <- range(series)
plot(xrange , yrange , type = "n" , xlab = "Year" , ylab = "Shipments" , bty = "l")
colors <- rainbow(4)
linetype <- c(1:4)
plotchar <- c(1:4)
#add lines
for (i in 1:4) {
currentMonth <- subset(series , cycle(series) ==i)
lines(currentMonth , type = "b" , lwd = 1.5 ,
lty = linetype[i], col = colors[i], pch=plotchar[i])
}
title( "Quarterly Appliance Shipments")
legend("topleft", legend =
c("Q1" , "Q2" , "Q3" , "Q4"), cex=0.5, col=colors, pch=plotchar, lty=linetype, title="Quarter", xpd=NA)
}
quarterly(Appliance.ts)
-Seasonality appears to be present. The plot by quarter above shows that Q2 is always has the most shipments of the year. It also displays a slight trend upward.
-The season - plot below show increased shipments for Q2
ggseasonplot(Appliance.ts)
-This series also contains some random noise.
plot(Appliance.ts , log = "y" , xlab = "Date" , ylab = "Shipments (log scale)" , main = "Log Shipments Over Time", bty = "l")
head(ShampooSales)
## # A tibble: 6 x 2
## Month `Shampoo Sales`
## <chr> <dbl>
## 1 Jan-95 266.0
## 2 Feb-95 145.9
## 3 Mar-95 183.1
## 4 Apr-95 119.3
## 5 May-95 180.3
## 6 Jun-95 168.5
#monthly series no missing
ShampooSales.ts <- ts(ShampooSales$`Shampoo Sales` , start = c(1985 , 1) , freq= 12)
plot(ShampooSales.ts , xlab= "Date" , ylab = "Sales" , bty = "l")
#fix the axis scale
library(ggfortify)
autoplot(ShampooSales.ts)
plot(ShampooSales.ts , log = "y" , xlab = "Date" , ylab = "Shampoo Sales (log scale)" , bty = "l")
ShampooSalesLinear <- tslm(ShampooSales.ts ~ trend)
summary(ShampooSalesLinear)
##
## Call:
## tslm(formula = ShampooSales.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -108.74 -52.12 -16.13 43.48 194.25
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 89.14 26.72 3.336 0.00207 **
## trend 12.08 1.26 9.590 0.0000000000337 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 78.5 on 34 degrees of freedom
## Multiple R-squared: 0.7301, Adjusted R-squared: 0.7222
## F-statistic: 91.97 on 1 and 34 DF, p-value: 0.00000000003368
*There is a strong linear trend upward in sales over time. COnfirmed by trend t-value of 9.59. R square is 0.73, indicating that the trend line explains over 2/3 of the variance in the series.
ShampooSalesQuad <- tslm(ShampooSales.ts ~ trend + I(trend^2))
summary(ShampooSalesQuad)
##
## Call:
## tslm(formula = ShampooSales.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -104.148 -42.075 -8.438 33.924 144.582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 202.8789 33.3002 6.092 0.000000735 ***
## trend -5.8801 4.1501 -1.417 0.166
## I(trend^2) 0.4854 0.1088 4.461 0.000089294 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 62.93 on 33 degrees of freedom
## Multiple R-squared: 0.8316, Adjusted R-squared: 0.8214
## F-statistic: 81.51 on 2 and 33 DF, p-value: 0.0000000000001708
ShampooSales.ts <- ts(ShampooSales$`Shampoo Sales` , start = c(1985 , 1) , freq= 12)
plot(ShampooSales.ts , xlab= "Date" , ylab = "Sales" , bty = "l")
lines(ShampooSalesLinear$fitted , lty = 1 , lwd = 3)
lines(ShampooSalesQuad$fitted, lty=2, lwd=3)
quarterly_shampoo <- aggregate(ShampooSales.ts , nfrequency = 4 , FUN=sum)
plot(quarterly_shampoo, bty = "l")
# set the outer margin area to the right a bit bigger
par(oma = c(0, 0, 0, 2))
# We have 3.0 years of data
xrange <- c(1,3)
yrange <- range(ShampooSales.ts)
plot(xrange , yrange , type = "n" , xlab = "Year" , ylab = "Shampoo Sales" , bty = "l")
colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)
#add lines
for (i in 1:12) {
currentMonth <- subset(ShampooSales.ts , cycle(ShampooSales.ts) ==i)
lines(currentMonth , type = "b" , lwd = 1.5 ,
lty = linetype[i], col = colors[i], pch=plotchar[i])
}
title("Shampoo Sales by Month")
# add a legend
legend("topleft", legend =
c(1:12), cex=0.5, col=colors, pch=plotchar, lty=linetype, title="Month", xpd=NA)
ggseasonplot(ShampooSales.ts)
*sales appear higher in thrid year for all months
ggseasonplot(ShampooSales.ts)
-hard to decipher seasonality. This make sense, because I would not really expect seasonality for shampoo sales. I would expect people to wash their hair at a consistent rate year round.