library(tidyverse)
## -- Attaching packages ----------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.3.4     v dplyr   0.7.4
## v tidyr   0.7.2     v stringr 1.2.0
## v readr   1.1.1     v forcats 0.2.0
## -- Conflicts -------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(forecast)
library(ggfortify)
## 
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
## 
##     gglagplot
options(scipen=9)

1. September11 Travel Impact

read in the data into R

setwd("C:/Users/jake/Desktop/class/Week 1/")

ApplianceShipments <- read_csv("ApplianceShipments.csv")
## Parsed with column specification:
## cols(
##   Quarter = col_character(),
##   Shipments = col_integer()
## )
Sept11Travel <- read_csv("Sept11Travel.csv")
## Parsed with column specification:
## cols(
##   Month = col_character(),
##   Air = col_integer(),
##   Rail = col_integer(),
##   VMT = col_double()
## )
ShampooSales <- read_csv("ShampooSales.csv")
## Parsed with column specification:
## cols(
##   Month = col_character(),
##   `Shampoo Sales` = col_double()
## )

Convert Series to Time series objects

head(Sept11Travel)
## # A tibble: 6 x 4
##    Month      Air      Rail    VMT
##    <chr>    <int>     <int>  <dbl>
## 1 Jan-90 35153577 454115779 163.28
## 2 Feb-90 32965187 435086002 153.25
## 3 Mar-90 39993913 568289732 178.42
## 4 Apr-90 37981886 568101697 178.68
## 5 May-90 38419672 539628385 188.88
## 6 Jun-90 42819023 570694457 189.16
Air.ts <-ts(Sept11Travel[,2] , start =  c(1990, 1) , freq = 12)
Rail.ts <- ts(Sept11Travel[,3]  , start =  c(1990, 1) , freq = 12)
VMT.ts <- ts(Sept11Travel[,4]  , start =  c(1990, 1) , freq = 12)

#combine as ts
travel <- cbind(Air.ts ,Rail.ts , VMT.ts)

#nine_six <- as.ts(travel[61:172 ,])

A.

#autoplot(travel, facets = TRUE , title = "3 Travel Plots")
#autoplot(nine_six , facets=TRUE  , title = "3 Travel Plots - 1996-2004")
AirLinear <- tslm(Air.ts ~ trend)
#summary(AirLinear)
RailLinear <- tslm(Rail.ts ~trend)
#summary(RailLinear)
CarLinear <- tslm(VMT.ts ~trend)
#summary(CarLinear)
plot(Air.ts ,  main = "AIR" , xlab = "Date" , ylab = "Miles Traveled" , bty = "l")
lines(AirLinear$fitted , lty = 1 , lwd = 3)

plot(Rail.ts ,  main = "RAIL" , xlab = "Date" , ylab = "Miles Traveled" , bty = "l")
lines(RailLinear$fitted , lty = 1 , lwd = 3)

plot(VMT.ts ,  main = "Car" , xlab = "Date" , ylab = "Miles Traveled (Billions)" , bty = "l")
lines(CarLinear$fitted , lty = 1 , lwd = 3)

There is clear seasonality for all three travel series

ggseasonplot(Air.ts)

ggseasonplot(Rail.ts)

ggseasonplot(VMT.ts)

B.

monthly <- function(series) {
# set the outer margin area to the right a bit bigger
par(oma = c(0, 0, 0, 2))

# We have 3.0 years of data
xrange <- c(1,15)
yrange <- range(series)

plot(xrange , yrange , type = "n" , xlab = "Year" , ylab = "Miles" , bty = "l")


colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)

#add lines
for (i in 1:12) {
  currentMonth <- subset(series , cycle(series) ==i)
  lines(currentMonth , type = "b" , lwd = 1.5 ,
        lty = linetype[i], col = colors[i], pch=plotchar[i])
}

#title(  "Miles by month")

legend("topleft", legend = 
         c("Jan","Feb","Mar","Apr","May", "Jun","Jul","Aug", "Sep","Oct","Nov","Dec"), cex=0.5, col=colors, pch=plotchar, lty=linetype, title="Month", xpd=NA)

}

monthly(Air.ts)
title(  "Air Miles by Month")

monthly(Rail.ts)
title(  "Rail Miles by Month")

monthly(VMT.ts)
title(  "Car Miles by month (Billions)")

#log
plot(Air.ts , log = "y" ,  main = "AIR (LOG - Scale)" , xlab = "Date" , ylab = "Miles Traveled (LOG - Scale)" , bty = "l")

plot(Rail.ts , log = "y" ,  main = "RAIL (LOG - Scale) " , xlab = "Date" , ylab = "Miles Traveled (LOG - Scale)" , bty = "l")

plot(VMT.ts , log = "y" ,  main = "Car (LOG - Scale) " , xlab = "Date" , ylab = "Miles Traveled (Billions - LOG - Scale)" , bty = "l")

I don’t think that log transformations have much effect on the nature of these series.

3. Appliance Shipments series

Appliance.ts <- ts(ApplianceShipments[,2] , start = c(1985 ,1) , freq = 4)

plot(Appliance.ts , xlab = "Date" , ylab = "Shipments" , bty = "l")

ApplianceLinear <- tslm(Appliance.ts ~ trend)
summary(ApplianceLinear)
## 
## Call:
## tslm(formula = Appliance.ts ~ trend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -339.06 -171.14    3.13  137.27  393.40 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4167.663    111.035  37.535   <2e-16 ***
## trend         24.494      9.269   2.643   0.0165 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 239 on 18 degrees of freedom
## Multiple R-squared:  0.2795, Adjusted R-squared:  0.2395 
## F-statistic: 6.983 on 1 and 18 DF,  p-value: 0.01655
ApplianceQuad <- tslm(Appliance.ts ~ trend +I(trend^2))

summary(ApplianceQuad)                        
## 
## Call:
## tslm(formula = Appliance.ts ~ trend + I(trend^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -398.39 -155.53   32.63  181.62  346.62 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3982.931    173.764  22.921 3.19e-14 ***
## trend         74.876     38.109   1.965    0.066 .  
## I(trend^2)    -2.399      1.763  -1.361    0.191    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 233.6 on 17 degrees of freedom
## Multiple R-squared:  0.3503, Adjusted R-squared:  0.2739 
## F-statistic: 4.583 on 2 and 17 DF,  p-value: 0.02559
plot(Appliance.ts , main = "Shipments over Time, with Trendline and Quadratic Trendline" ,xlab = "Date" , ylab = "Shipments" , bty = "l")
lines(ApplianceLinear$fitted , lwd = 2)
lines(ApplianceQuad$fitted , lty = 2 , lwd = 3)

-Quadratic term adds some to the R Squared, but is not significant at a 95% level of confidence, due to the p-value of 0.19. Since it improves the Adjusted R Squared from the linear model (0.24) to 0.27, and looks visually appeal

quarterly <- function(series) {
# set the outer margin area to the right a bit bigger
par(oma = c(0, 0, 0, 2))

# We have 3.0 years of data
xrange <- c(1,5)
yrange <- range(series)

plot(xrange , yrange , type = "n" , xlab = "Year" , ylab = "Shipments" , bty = "l")


colors <- rainbow(4)
linetype <- c(1:4)
plotchar <- c(1:4)

#add lines
for (i in 1:4) {
  currentMonth <- subset(series , cycle(series) ==i)
  lines(currentMonth , type = "b" , lwd = 1.5 ,
        lty = linetype[i], col = colors[i], pch=plotchar[i])
}

title(  "Quarterly Appliance Shipments")

legend("topleft", legend = 
         c("Q1" , "Q2" , "Q3" , "Q4"), cex=0.5, col=colors, pch=plotchar, lty=linetype, title="Quarter", xpd=NA)

}




quarterly(Appliance.ts)

-Seasonality appears to be present. The plot by quarter above shows that Q2 is always has the most shipments of the year. It also displays a slight trend upward.

-The season - plot below show increased shipments for Q2

ggseasonplot(Appliance.ts)

-This series also contains some random noise.

log

plot(Appliance.ts ,  log = "y" , xlab = "Date" , ylab = "Shipments (log scale)"  , main = "Log Shipments Over Time", bty = "l")

6. Shampoo Sales

Question 6 : Shampoo Sales

head(ShampooSales)
## # A tibble: 6 x 2
##    Month `Shampoo Sales`
##    <chr>           <dbl>
## 1 Jan-95           266.0
## 2 Feb-95           145.9
## 3 Mar-95           183.1
## 4 Apr-95           119.3
## 5 May-95           180.3
## 6 Jun-95           168.5
#monthly series no missing
ShampooSales.ts <- ts(ShampooSales$`Shampoo Sales` , start = c(1985 , 1) , freq= 12)

plot(ShampooSales.ts , xlab= "Date" , ylab = "Sales"  , bty = "l")

#fix the axis scale

alt plot

library(ggfortify)
autoplot(ShampooSales.ts)

plot log

plot(ShampooSales.ts  , log = "y" , xlab = "Date" , ylab = "Shampoo Sales (log scale)" , bty = "l")

fit trend

ShampooSalesLinear <- tslm(ShampooSales.ts ~ trend)
summary(ShampooSalesLinear)
## 
## Call:
## tslm(formula = ShampooSales.ts ~ trend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -108.74  -52.12  -16.13   43.48  194.25 
## 
## Coefficients:
##             Estimate Std. Error t value        Pr(>|t|)    
## (Intercept)    89.14      26.72   3.336         0.00207 ** 
## trend          12.08       1.26   9.590 0.0000000000337 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 78.5 on 34 degrees of freedom
## Multiple R-squared:  0.7301, Adjusted R-squared:  0.7222 
## F-statistic: 91.97 on 1 and 34 DF,  p-value: 0.00000000003368

*There is a strong linear trend upward in sales over time. COnfirmed by trend t-value of 9.59. R square is 0.73, indicating that the trend line explains over 2/3 of the variance in the series.

quadratic trend as excercise

ShampooSalesQuad <- tslm(ShampooSales.ts ~ trend + I(trend^2))
summary(ShampooSalesQuad)
## 
## Call:
## tslm(formula = ShampooSales.ts ~ trend + I(trend^2))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -104.148  -42.075   -8.438   33.924  144.582 
## 
## Coefficients:
##             Estimate Std. Error t value    Pr(>|t|)    
## (Intercept) 202.8789    33.3002   6.092 0.000000735 ***
## trend        -5.8801     4.1501  -1.417       0.166    
## I(trend^2)    0.4854     0.1088   4.461 0.000089294 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 62.93 on 33 degrees of freedom
## Multiple R-squared:  0.8316, Adjusted R-squared:  0.8214 
## F-statistic: 81.51 on 2 and 33 DF,  p-value: 0.0000000000001708
ShampooSales.ts <- ts(ShampooSales$`Shampoo Sales` , start = c(1985 , 1) , freq= 12)

plot(ShampooSales.ts , xlab= "Date" , ylab = "Sales"  , bty = "l")
lines(ShampooSalesLinear$fitted , lty = 1 , lwd = 3)
lines(ShampooSalesQuad$fitted, lty=2, lwd=3)

The quadratic fit is even better with an r square of 0.83, and the plot visually confirms that the quadratic fit is nice.

aggregate by quarter

quarterly_shampoo <- aggregate(ShampooSales.ts , nfrequency = 4 , FUN=sum)
plot(quarterly_shampoo, bty = "l")

create plots for each season

# set the outer margin area to the right a bit bigger
par(oma = c(0, 0, 0, 2))

# We have 3.0 years of data
xrange <- c(1,3)
yrange <- range(ShampooSales.ts)

plot(xrange , yrange , type = "n" , xlab = "Year" , ylab = "Shampoo Sales" , bty = "l")


colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)

#add lines
for (i in 1:12) {
  currentMonth <- subset(ShampooSales.ts , cycle(ShampooSales.ts) ==i)
  lines(currentMonth , type = "b" , lwd = 1.5 ,
        lty = linetype[i], col = colors[i], pch=plotchar[i])
}

title("Shampoo Sales by Month")

# add a legend 
legend("topleft", legend = 
         c(1:12), cex=0.5, col=colors, pch=plotchar, lty=linetype, title="Month", xpd=NA)

ggseasonplot(ShampooSales.ts)

*sales appear higher in thrid year for all months

ggseasonplot(ShampooSales.ts)

C.

-hard to decipher seasonality. This make sense, because I would not really expect seasonality for shampoo sales. I would expect people to wash their hair at a consistent rate year round.