library(readr)
library(forecast)
september11Data<- read_csv("Sept11Travel.csv")
dim(september11Data)
## [1] 172 4
str(september11Data)
## Classes 'tbl_df', 'tbl' and 'data.frame': 172 obs. of 4 variables:
## $ Month: chr "Jan-90" "Feb-90" "Mar-90" "Apr-90" ...
## $ Air : int 35153577 32965187 39993913 37981886 38419672 42819023 45770315 48763670 38173223 39051877 ...
## $ Rail : int 454115779 435086002 568289732 568101697 539628385 570694457 618571581 609210368 488444939 514253920 ...
## $ VMT : num 163 153 178 179 189 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 4
## .. ..$ Month: list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Air : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Rail : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ VMT : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
head(september11Data)
## # A tibble: 6 x 4
## Month Air Rail VMT
## <chr> <int> <int> <dbl>
## 1 Jan-90 35153577 454115779 163.28
## 2 Feb-90 32965187 435086002 153.25
## 3 Mar-90 39993913 568289732 178.42
## 4 Apr-90 37981886 568101697 178.68
## 5 May-90 38419672 539628385 188.88
## 6 Jun-90 42819023 570694457 189.16
min(september11Data$Air)
## [1] 29672427
max(september11Data$Air)
## [1] 69003617
september11Air.ts <-ts(september11Data$Air, start=c(1990,1), end=c(2001,8), freq=12)
plot(september11Air.ts/1000000, xlab="Time", ylab="Air Miles", ylim = c(28, 70), bty="l", main="Air Miles Traveled before 9/11")
min(september11Data$Rail)
## [1] 326874247
max(september11Data$Rail)
## [1] 664013874
september11Rail.ts <- ts(september11Data$Rail, start=c(1990,1), end=c(2001,8), freq=12)
plot(september11Rail.ts/1000000, xlab="Time", ylab="Rail Miles",ylim = c(310, 680), bty="l", main="Rail Miles Traveled Before 9/11")
min(september11Data$VMT)
## [1] 153.25
max(september11Data$VMT)
## [1] 261.3
september11Auto.ts <- ts(september11Data$VMT, start=c(1990,1), end=c(2001,8), freq=12)
plot(september11Auto.ts, xlab="Time", ylab="Auto Miles",ylim = c(150, 280), bty="l", main="Auto Miles Traveled Before 9/11")
Per the text book, time series components can be divided into systematic and non-systematic:
airLinearModel <- tslm(september11Air.ts ~ trend)
plot(september11Air.ts/1000000, xlab="Time", ylab="Air Miles", ylim = c(28, 70), bty="l", main="Air Miles Traveled before 9/11")
lines(airLinearModel$fitted/1000000)
railLinearModel <- tslm(september11Rail.ts ~ trend + I(trend^2))
plot(september11Rail.ts/1000000, xlab="Time", ylab="Rail Miles",ylim = c(310, 680), bty="l", main="Rail Miles Traveled Before 9/11")
lines(railLinearModel$fitted/1000000)
autoLinearModel <- tslm(september11Auto.ts ~ trend)
plot(september11Auto.ts, xlab="Time", ylab="Auto Miles",ylim = c(150, 280), bty="l", main="Auto Miles Traveled Before 9/11")
lines(autoLinearModel$fitted)
Annual_Travel_Air <- aggregate(september11Air.ts/1000000, nfrequency=1, FUN=sum)
plot(Annual_Travel_Air, bty="l")
Annual_Travel_Rail <- aggregate(september11Rail.ts/1000000, nfrequency=1, FUN=sum)
plot(Annual_Travel_Rail, bty="l")
Annual_Travel_Auto <- aggregate(september11Auto.ts, nfrequency=1, FUN=sum)
plot(Annual_Travel_Auto, bty="l")
appliancesData<- read_csv("ApplianceShipments.csv")
dim(appliancesData)
## [1] 20 2
str(appliancesData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 20 obs. of 2 variables:
## $ Quarter : chr "Q1-1985" "Q2-1985" "Q3-1985" "Q4-1985" ...
## $ Shipments: int 4009 4321 4224 3944 4123 4522 4657 4030 4493 4806 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 2
## .. ..$ Quarter : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Shipments: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
head(appliancesData)
## # A tibble: 6 x 2
## Quarter Shipments
## <chr> <int>
## 1 Q1-1985 4009
## 2 Q2-1985 4321
## 3 Q3-1985 4224
## 4 Q4-1985 3944
## 5 Q1-1986 4123
## 6 Q2-1986 4522
Appliances.ts <- ts(appliancesData$Shipments, start = c(1985,1), frequency = 4)
min(appliancesData$Shipments)
## [1] 3944
max(appliancesData$Shipments)
## [1] 4900
plot(Appliances.ts, xlab="Time", ylab="Shipments", ylim = c(3800, 5000), bty="l", main="Shipments of Household Appliances by Quorter")
appliancesLinearModel <- tslm(Appliances.ts ~ trend + I(trend^2))
plot(Appliances.ts, xlab="Time", ylab="Shipments", ylim=c(3800, 5000), bty="l", main="Shipments of Household Appliances by Quorter (Trend Line)")
lines(appliancesLinearModel$fitted)
Annual_Appliances <- aggregate(Appliances.ts, nfrequency=1, FUN=sum)
plot(Annual_Appliances, bty="l")
As discussed earlier, noise is always present in time series plots. Out of the systematic components, there is clear positive trend as illustrated by the trend line. Additionally, shipments of appliances seem to be affected by seasonality with the overall level of around 4,400 shipments per quarter.
shampooData<- read_csv("ShampooSales.csv")
dim(shampooData)
## [1] 36 2
str(shampooData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 36 obs. of 2 variables:
## $ Month : chr "Jan-95" "Feb-95" "Mar-95" "Apr-95" ...
## $ Shampoo Sales: num 266 146 183 119 180 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 2
## .. ..$ Month : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Shampoo Sales: list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
head(shampooData)
## # A tibble: 6 x 2
## Month `Shampoo Sales`
## <chr> <dbl>
## 1 Jan-95 266.0
## 2 Feb-95 145.9
## 3 Mar-95 183.1
## 4 Apr-95 119.3
## 5 May-95 180.3
## 6 Jun-95 168.5
Shampoo.ts <- ts(shampooData$'Shampoo Sales', start = c(1995,1), frequency = 12)
max(shampooData$'Shampoo Sales')
## [1] 682
min(shampooData$`Shampoo Sales`)
## [1] 119.3
plot(Shampoo.ts, xlab="Time", ylab="Shampoo Sales", ylim = c(100, 700), bty="l", main="Shampoo Sales by Month")
shampooLinearModel <- tslm(Shampoo.ts ~ trend + I(trend^2))
plot(Shampoo.ts, xlab="Time", ylab="Shampoo Sales", ylim = c(100, 700), bty="l", main="Shampoo Sales by Month")
lines(shampooLinearModel$fitted)
Shampoo_Sales <- aggregate(Shampoo.ts, nfrequency=4, FUN=sum)
plot(Shampoo_Sales, bty="l")
As discussed earlier, noise is always present in time series plots. Out of the systematic components, there is clear positive trend as illustrated by the trend line. Additionally, shampoo sales seem to be affected by seasonality with the overall level of around 300 shipments per month.
Frankly speaking, I was surprised to see seasonality in shampoo sales. Personally, I don’t think that I use more shampoo during summer (or winter for that matter) since I don’t tend take more or fewer showers depending on a season. That said, that is not true for some people who like to take more showers in the hot summer days, which results in more shampoo use.