library(tidyverse)

## -- Attaching packages ----------------------------------------------------- tidyverse 1.2.1 --

## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.3.4     v dplyr   0.7.4
## v tidyr   0.7.2     v stringr 1.2.0
## v readr   1.1.1     v forcats 0.2.0

## -- Conflicts -------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(forecast)
library(ggfortify)

## 
## Attaching package: 'ggfortify'

## The following object is masked from 'package:forecast':
## 
##     gglagplot

options(scipen=9)

1. September11 Travel Impact

read in the data into R

setwd("C:/Users/jake/Desktop/class/Week 1/")

ApplianceShipments <- read_csv("ApplianceShipments.csv")

## Parsed with column specification:
## cols(
##   Quarter = col_character(),
##   Shipments = col_integer()
## )

Sept11Travel <- read_csv("Sept11Travel.csv")

## Parsed with column specification:
## cols(
##   Month = col_character(),
##   Air = col_integer(),
##   Rail = col_integer(),
##   VMT = col_double()
## )

ShampooSales <- read_csv("ShampooSales.csv")

## Parsed with column specification:
## cols(
##   Month = col_character(),
##   `Shampoo Sales` = col_double()
## )

Convert Series to Time series objects

head(Sept11Travel)

## # A tibble: 6 x 4
##    Month      Air      Rail    VMT
##    <chr>    <int>     <int>  <dbl>
## 1 Jan-90 35153577 454115779 163.28
## 2 Feb-90 32965187 435086002 153.25
## 3 Mar-90 39993913 568289732 178.42
## 4 Apr-90 37981886 568101697 178.68
## 5 May-90 38419672 539628385 188.88
## 6 Jun-90 42819023 570694457 189.16

Air.ts <-ts(Sept11Travel[,2] , start =  c(1990, 1) , freq = 12)
Rail.ts <- ts(Sept11Travel[,3]  , start =  c(1990, 1) , freq = 12)
VMT.ts <- ts(Sept11Travel[,4]  , start =  c(1990, 1) , freq = 12)

#combine as ts
travel <- cbind(Air.ts ,Rail.ts , VMT.ts)

#nine_six <- as.ts(travel[61:172 ,])

A.

Plot the 3 series

#autoplot(travel, facets = TRUE , title = "3 Travel Plots")
#autoplot(nine_six , facets=TRUE  , title = "3 Travel Plots - 1996-2004")

AirLinear <- tslm(Air.ts ~ trend)
#summary(AirLinear)
RailLinear <- tslm(Rail.ts ~trend)
#summary(RailLinear)
CarLinear <- tslm(VMT.ts ~trend)
#summary(CarLinear)

plot(Air.ts ,  main = "AIR" , xlab = "Date" , ylab = "Miles Traveled" , bty = "l")
lines(AirLinear$fitted , lty = 1 , lwd = 3)

plot(Rail.ts ,  main = "RAIL" , xlab = "Date" , ylab = "Miles Traveled" , bty = "l")
lines(RailLinear$fitted , lty = 1 , lwd = 3)

plot(VMT.ts ,  main = "Car" , xlab = "Date" , ylab = "Miles Traveled (Billions)" , bty = "l")
lines(CarLinear$fitted , lty = 1 , lwd = 3)

The Air travel and VMT series display an upward trends over time.
While there is not an upward trend in the Rails series, there appears to be a level shif downward around 1996.
Air travel appears to have experienced a level shift as well, which could be attributed to 911.
The three series displace strong seasonality, as which you can see directly in the seasonal plots below

There is clear seasonality for all three travel series

ggseasonplot(Air.ts)

ggseasonplot(Rail.ts)

ggseasonplot(VMT.ts)

Plots of each of the individual months over time drives home the seasonality in all three series. Note that there is not a lot of intersection betwenn the line plots of respective months. Supressing the seasonality also clearly shows that the Car and Air series are increasing over time. The Rail series does not appear to display much of a trend component since a downward level shift in the early to mind 90’s.
There also appears to be some random noise in all three series.

B.

monthly <- function(series) {
# set the outer margin area to the right a bit bigger
par(oma = c(0, 0, 0, 2))

# We have 3.0 years of data
xrange <- c(1,15)
yrange <- range(series)

plot(xrange , yrange , type = "n" , xlab = "Year" , ylab = "Miles" , bty = "l")


colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)

#add lines
for (i in 1:12) {
  currentMonth <- subset(series , cycle(series) ==i)
  lines(currentMonth , type = "b" , lwd = 1.5 ,
        lty = linetype[i], col = colors[i], pch=plotchar[i])
}

#title(  "Miles by month")

legend("topleft", legend = 
         c("Jan","Feb","Mar","Apr","May", "Jun","Jul","Aug", "Sep","Oct","Nov","Dec"), cex=0.5, col=colors, pch=plotchar, lty=linetype, title="Month", xpd=NA)

}

monthly(Air.ts)
title(  "Air Miles by Month")

monthly(Rail.ts)
title(  "Rail Miles by Month")

monthly(VMT.ts)
title(  "Car Miles by month (Billions)")

#log
plot(Air.ts , log = "y" ,  main = "AIR (LOG - Scale)" , xlab = "Date" , ylab = "Miles Traveled (LOG - Scale)" , bty = "l")

plot(Rail.ts , log = "y" ,  main = "RAIL (LOG - Scale) " , xlab = "Date" , ylab = "Miles Traveled (LOG - Scale)" , bty = "l")

plot(VMT.ts , log = "y" ,  main = "Car (LOG - Scale) " , xlab = "Date" , ylab = "Miles Traveled (Billions - LOG - Scale)" , bty = "l")

I don’t think that log transformations have much effect on the nature of these series.

3. Appliance Shipments series

Appliance.ts <- ts(ApplianceShipments[,2] , start = c(1985 ,1) , freq = 4)

plot(Appliance.ts , xlab = "Date" , ylab = "Shipments" , bty = "l")

ApplianceLinear <- tslm(Appliance.ts ~ trend)
summary(ApplianceLinear)

## 
## Call:
## tslm(formula = Appliance.ts ~ trend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -339.06 -171.14    3.13  137.27  393.40 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4167.663    111.035  37.535   <2e-16 ***
## trend         24.494      9.269   2.643   0.0165 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 239 on 18 degrees of freedom
## Multiple R-squared:  0.2795, Adjusted R-squared:  0.2395 
## F-statistic: 6.983 on 1 and 18 DF,  p-value: 0.01655

ApplianceQuad <- tslm(Appliance.ts ~ trend +I(trend^2))

summary(ApplianceQuad)

## 
## Call:
## tslm(formula = Appliance.ts ~ trend + I(trend^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -398.39 -155.53   32.63  181.62  346.62 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3982.931    173.764  22.921 3.19e-14 ***
## trend         74.876     38.109   1.965    0.066 .  
## I(trend^2)    -2.399      1.763  -1.361    0.191    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 233.6 on 17 degrees of freedom
## Multiple R-squared:  0.3503, Adjusted R-squared:  0.2739 
## F-statistic: 4.583 on 2 and 17 DF,  p-value: 0.02559

plot(Appliance.ts , main = "Shipments over Time, with Trendline and Quadratic Trendline" ,xlab = "Date" , ylab = "Shipments" , bty = "l")
lines(ApplianceLinear$fitted , lwd = 2)
lines(ApplianceQuad$fitted , lty = 2 , lwd = 3)

-Quadratic term adds some to the R Squared, but is not significant at a 95% level of confidence, due to the p-value of 0.19. Since it improves the Adjusted R Squared from the linear model (0.24) to 0.27, and looks visually appeal

quarterly <- function(series) {
# set the outer margin area to the right a bit bigger
par(oma = c(0, 0, 0, 2))

# We have 3.0 years of data
xrange <- c(1,5)
yrange <- range(series)

plot(xrange , yrange , type = "n" , xlab = "Year" , ylab = "Shipments" , bty = "l")


colors <- rainbow(4)
linetype <- c(1:4)
plotchar <- c(1:4)

#add lines
for (i in 1:4) {
  currentMonth <- subset(series , cycle(series) ==i)
  lines(currentMonth , type = "b" , lwd = 1.5 ,
        lty = linetype[i], col = colors[i], pch=plotchar[i])
}

title(  "Quarterly Appliance Shipments")

legend("topleft", legend = 
         c("Q1" , "Q2" , "Q3" , "Q4"), cex=0.5, col=colors, pch=plotchar, lty=linetype, title="Quarter", xpd=NA)

}




quarterly(Appliance.ts)

-Seasonality appears to be present. The plot by quarter above shows that Q2 is always has the most shipments of the year. It also displays a slight trend upward.

-The season - plot below show increased shipments for Q2

ggseasonplot(Appliance.ts)

-This series also contains some random noise.

log

plot(Appliance.ts ,  log = "y" , xlab = "Date" , ylab = "Shipments (log scale)"  , main = "Log Shipments Over Time", bty = "l")

6. Shampoo Sales

Question 6 : Shampoo Sales

head(ShampooSales)

## # A tibble: 6 x 2
##    Month `Shampoo Sales`
##    <chr>           <dbl>
## 1 Jan-95           266.0
## 2 Feb-95           145.9
## 3 Mar-95           183.1
## 4 Apr-95           119.3
## 5 May-95           180.3
## 6 Jun-95           168.5

#monthly series no missing

ShampooSales.ts <- ts(ShampooSales$`Shampoo Sales` , start = c(1985 , 1) , freq= 12)

plot(ShampooSales.ts , xlab= "Date" , ylab = "Sales"  , bty = "l")

#fix the axis scale

alt plot

library(ggfortify)
autoplot(ShampooSales.ts)

plot log

plot(ShampooSales.ts  , log = "y" , xlab = "Date" , ylab = "Shampoo Sales (log scale)" , bty = "l")

fit trend

ShampooSalesLinear <- tslm(ShampooSales.ts ~ trend)
summary(ShampooSalesLinear)

## 
## Call:
## tslm(formula = ShampooSales.ts ~ trend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -108.74  -52.12  -16.13   43.48  194.25 
## 
## Coefficients:
##             Estimate Std. Error t value        Pr(>|t|)    
## (Intercept)    89.14      26.72   3.336         0.00207 ** 
## trend          12.08       1.26   9.590 0.0000000000337 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 78.5 on 34 degrees of freedom
## Multiple R-squared:  0.7301, Adjusted R-squared:  0.7222 
## F-statistic: 91.97 on 1 and 34 DF,  p-value: 0.00000000003368

*There is a strong linear trend upward in sales over time. COnfirmed by trend t-value of 9.59. R square is 0.73, indicating that the trend line explains over 2/3 of the variance in the series.

quadratic trend as excercise

ShampooSalesQuad <- tslm(ShampooSales.ts ~ trend + I(trend^2))
summary(ShampooSalesQuad)

## 
## Call:
## tslm(formula = ShampooSales.ts ~ trend + I(trend^2))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -104.148  -42.075   -8.438   33.924  144.582 
## 
## Coefficients:
##             Estimate Std. Error t value    Pr(>|t|)    
## (Intercept) 202.8789    33.3002   6.092 0.000000735 ***
## trend        -5.8801     4.1501  -1.417       0.166    
## I(trend^2)    0.4854     0.1088   4.461 0.000089294 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 62.93 on 33 degrees of freedom
## Multiple R-squared:  0.8316, Adjusted R-squared:  0.8214 
## F-statistic: 81.51 on 2 and 33 DF,  p-value: 0.0000000000001708

ShampooSales.ts <- ts(ShampooSales$`Shampoo Sales` , start = c(1985 , 1) , freq= 12)

plot(ShampooSales.ts , xlab= "Date" , ylab = "Sales"  , bty = "l")
lines(ShampooSalesLinear$fitted , lty = 1 , lwd = 3)
lines(ShampooSalesQuad$fitted, lty=2, lwd=3)

The quadratic fit is even better with an r square of 0.83, and the plot visually confirms that the quadratic fit is nice.

aggregate by quarter

quarterly_shampoo <- aggregate(ShampooSales.ts , nfrequency = 4 , FUN=sum)
plot(quarterly_shampoo, bty = "l")

create plots for each season

# set the outer margin area to the right a bit bigger
par(oma = c(0, 0, 0, 2))

# We have 3.0 years of data
xrange <- c(1,3)
yrange <- range(ShampooSales.ts)

plot(xrange , yrange , type = "n" , xlab = "Year" , ylab = "Shampoo Sales" , bty = "l")


colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)

#add lines
for (i in 1:12) {
  currentMonth <- subset(ShampooSales.ts , cycle(ShampooSales.ts) ==i)
  lines(currentMonth , type = "b" , lwd = 1.5 ,
        lty = linetype[i], col = colors[i], pch=plotchar[i])
}

title("Shampoo Sales by Month")

# add a legend 
legend("topleft", legend = 
         c(1:12), cex=0.5, col=colors, pch=plotchar, lty=linetype, title="Month", xpd=NA)

ggseasonplot(ShampooSales.ts)

*sales appear higher in thrid year for all months

ggseasonplot(ShampooSales.ts)

C.

-hard to decipher seasonality. This make sense, because I would not really expect seasonality for shampoo sales. I would expect people to wash their hair at a consistent rate year round.

Week One

jake

January 29, 2018