Homework 1: Questions 1,3 & 6 from Chapter 2.


Problem 1: Impact of September 11 on Air Travel in the United States

library(readr)
library(forecast)
september11Data<- read_csv("Sept11Travel.csv")
dim(september11Data)
## [1] 172   4
str(september11Data)
## Classes 'tbl_df', 'tbl' and 'data.frame':    172 obs. of  4 variables:
##  $ Month: chr  "Jan-90" "Feb-90" "Mar-90" "Apr-90" ...
##  $ Air  : int  35153577 32965187 39993913 37981886 38419672 42819023 45770315 48763670 38173223 39051877 ...
##  $ Rail : int  454115779 435086002 568289732 568101697 539628385 570694457 618571581 609210368 488444939 514253920 ...
##  $ VMT  : num  163 153 178 179 189 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 4
##   .. ..$ Month: list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ Air  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ Rail : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ VMT  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
head(september11Data)
## # A tibble: 6 x 4
##    Month      Air      Rail    VMT
##    <chr>    <int>     <int>  <dbl>
## 1 Jan-90 35153577 454115779 163.28
## 2 Feb-90 32965187 435086002 153.25
## 3 Mar-90 39993913 568289732 178.42
## 4 Apr-90 37981886 568101697 178.68
## 5 May-90 38419672 539628385 188.88
## 6 Jun-90 42819023 570694457 189.16
min(september11Data$Air)
## [1] 29672427
max(september11Data$Air)
## [1] 69003617
september11Air.ts <-ts(september11Data$Air, start=c(1990,1), end=c(2001,8), freq=12)
plot(september11Air.ts/1000000, xlab="Time", ylab="Air Miles", ylim = c(28, 70), bty="l", main="Air Miles Traveled before 9/11")

min(september11Data$Rail)
## [1] 326874247
max(september11Data$Rail)
## [1] 664013874
september11Rail.ts <- ts(september11Data$Rail, start=c(1990,1), end=c(2001,8), freq=12)
plot(september11Rail.ts/1000000, xlab="Time", ylab="Rail Miles",ylim = c(310, 680), bty="l", main="Rail Miles Traveled Before 9/11")

min(september11Data$VMT)
## [1] 153.25
max(september11Data$VMT)
## [1] 261.3
september11Auto.ts <- ts(september11Data$VMT, start=c(1990,1), end=c(2001,8), freq=12)
plot(september11Auto.ts, xlab="Time", ylab="Auto Miles",ylim = c(150, 280), bty="l", main="Auto Miles Traveled Before 9/11")

A. What time series appear on the plot?

Per the text book, time series components can be divided into systematic and non-systematic:

  • Systematic: level, trend and seasonality. All present on the plots with seasonality being the most observable with the warmer months being the more travelled.Additionally, there is an upward trend for both Air and Auto travel, with the railroads’ popularity decreasing over time to a certain degree.
  • Non-systematic: noise. Unobservable, however, is present on all plots to some degree.

B. Better Visualize the patterns by using:

Trendlines

airLinearModel <- tslm(september11Air.ts ~ trend)
plot(september11Air.ts/1000000, xlab="Time", ylab="Air Miles", ylim = c(28, 70), bty="l", main="Air Miles Traveled before 9/11")
lines(airLinearModel$fitted/1000000)

railLinearModel <- tslm(september11Rail.ts ~ trend + I(trend^2))
plot(september11Rail.ts/1000000, xlab="Time", ylab="Rail Miles",ylim = c(310, 680), bty="l", main="Rail Miles Traveled Before 9/11")
lines(railLinearModel$fitted/1000000)

autoLinearModel <- tslm(september11Auto.ts ~ trend)
plot(september11Auto.ts, xlab="Time", ylab="Auto Miles",ylim = c(150, 280), bty="l", main="Auto Miles Traveled Before 9/11")
lines(autoLinearModel$fitted)

Seasonality

Annual_Travel_Air <- aggregate(september11Air.ts/1000000, nfrequency=1, FUN=sum)
plot(Annual_Travel_Air, bty="l")

Annual_Travel_Rail <- aggregate(september11Rail.ts/1000000, nfrequency=1, FUN=sum)
plot(Annual_Travel_Rail, bty="l")

Annual_Travel_Auto <- aggregate(september11Auto.ts, nfrequency=1, FUN=sum)
plot(Annual_Travel_Auto, bty="l")

Problem 3: Shipment of Household Appliances

A. Plot of the Data

appliancesData<- read_csv("ApplianceShipments.csv")
dim(appliancesData)
## [1] 20  2
str(appliancesData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    20 obs. of  2 variables:
##  $ Quarter  : chr  "Q1-1985" "Q2-1985" "Q3-1985" "Q4-1985" ...
##  $ Shipments: int  4009 4321 4224 3944 4123 4522 4657 4030 4493 4806 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 2
##   .. ..$ Quarter  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ Shipments: list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
head(appliancesData)
## # A tibble: 6 x 2
##   Quarter Shipments
##     <chr>     <int>
## 1 Q1-1985      4009
## 2 Q2-1985      4321
## 3 Q3-1985      4224
## 4 Q4-1985      3944
## 5 Q1-1986      4123
## 6 Q2-1986      4522
Appliances.ts <- ts(appliancesData$Shipments, start = c(1985,1), frequency = 4)
min(appliancesData$Shipments)
## [1] 3944
max(appliancesData$Shipments)
## [1] 4900
plot(Appliances.ts, xlab="Time", ylab="Shipments", ylim = c(3800, 5000), bty="l", main="Shipments of Household Appliances by Quorter")

appliancesLinearModel <- tslm(Appliances.ts ~ trend + I(trend^2))
plot(Appliances.ts, xlab="Time", ylab="Shipments", ylim=c(3800, 5000), bty="l", main="Shipments of Household Appliances by Quorter (Trend Line)")
lines(appliancesLinearModel$fitted)

Annual_Appliances <- aggregate(Appliances.ts, nfrequency=1, FUN=sum)
plot(Annual_Appliances, bty="l")

B. Plot Components

As discussed earlier, noise is always present in time series plots. Out of the systematic components, there is clear positive trend as illustrated by the trend line. Additionally, shipments of appliances seem to be affected by seasonality with the overall level of around 4,400 shipments per quarter.

6. Shampoo sales:

A. Plot the Data

shampooData<- read_csv("ShampooSales.csv")
dim(shampooData)
## [1] 36  2
str(shampooData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    36 obs. of  2 variables:
##  $ Month        : chr  "Jan-95" "Feb-95" "Mar-95" "Apr-95" ...
##  $ Shampoo Sales: num  266 146 183 119 180 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 2
##   .. ..$ Month        : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ Shampoo Sales: list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
head(shampooData)
## # A tibble: 6 x 2
##    Month `Shampoo Sales`
##    <chr>           <dbl>
## 1 Jan-95           266.0
## 2 Feb-95           145.9
## 3 Mar-95           183.1
## 4 Apr-95           119.3
## 5 May-95           180.3
## 6 Jun-95           168.5
Shampoo.ts <- ts(shampooData$'Shampoo Sales', start = c(1995,1), frequency = 12)
max(shampooData$'Shampoo Sales')
## [1] 682
min(shampooData$`Shampoo Sales`)
## [1] 119.3
plot(Shampoo.ts, xlab="Time", ylab="Shampoo Sales", ylim = c(100, 700), bty="l", main="Shampoo Sales by Month")

shampooLinearModel <- tslm(Shampoo.ts ~ trend + I(trend^2))
plot(Shampoo.ts, xlab="Time", ylab="Shampoo Sales", ylim = c(100, 700), bty="l", main="Shampoo Sales by Month")
lines(shampooLinearModel$fitted)

Shampoo_Sales <- aggregate(Shampoo.ts, nfrequency=4, FUN=sum)
plot(Shampoo_Sales, bty="l")

B. Plot Components

As discussed earlier, noise is always present in time series plots. Out of the systematic components, there is clear positive trend as illustrated by the trend line. Additionally, shampoo sales seem to be affected by seasonality with the overall level of around 300 shipments per month.

C. Seasonality of Shampoo sales:

Frankly speaking, I was surprised to see seasonality in shampoo sales. Personally, I don’t think that I use more shampoo during summer (or winter for that matter) since I don’t tend take more or fewer showers depending on a season. That said, that is not true for some people who like to take more showers in the hot summer days, which results in more shampoo use.