#Loading necessary libraries:
library(forecast)
library(tidytext)
library(stringr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(knitr)
library(pander)
library(ggfortify)
library(rgdal)
Setting the working directory
setwd("C:/Users/Marissa.Valente/Documents/RStuff/MBA_678")
For this assignment I will be completing the assigned problems from Chapter 2 in the Practical Time Series Forecasting with R textbook by, Galit Schmueli and Kenneth C. Lichtendahl Jr.
First I will import the September 11 Travel csv file used for this problem and look at the \(head()\) and \(glimpse()\).
Sep_11 <- read.csv("Sept11Travel.csv")
str(Sep_11)
## 'data.frame': 172 obs. of 4 variables:
## $ Month: Factor w/ 172 levels "1-Apr","1-Aug",..: 86 75 119 42 130 108 97 53 163 152 ...
## $ Air : int 35153577 32965187 39993913 37981886 38419672 42819023 45770315 48763670 38173223 39051877 ...
## $ Rail : int 454115779 435086002 568289732 568101697 539628385 570694457 618571581 609210368 488444939 514253920 ...
## $ VMT : num 163 153 178 179 189 ...
head(Sep_11)
## Month Air Rail VMT
## 1 Jan-90 35153577 454115779 163.28
## 2 Feb-90 32965187 435086002 153.25
## 3 Mar-90 39993913 568289732 178.42
## 4 Apr-90 37981886 568101697 178.68
## 5 May-90 38419672 539628385 188.88
## 6 Jun-90 42819023 570694457 189.16
glimpse(Sep_11)
## Observations: 172
## Variables: 4
## $ Month <fctr> Jan-90, Feb-90, Mar-90, Apr-90, May-90, Jun-90, Jul-90,...
## $ Air <int> 35153577, 32965187, 39993913, 37981886, 38419672, 428190...
## $ Rail <int> 454115779, 435086002, 568289732, 568101697, 539628385, 5...
## $ VMT <dbl> 163.28, 153.25, 178.42, 178.68, 188.88, 189.16, 195.09, ...
Variables
Air - Actual airline revenue passenger miles
Rail - Rail Passenger Miles
VMT - Vehicle Miles Traveled (Auto)
After looking at the minimum and maximum values for the Air Passenger Miles, Rail Passenger Miles, and VMT monthly values, I converted the values to millions for Air Passgener Miles and Rail Passenger Miles to make it easier to visualize. (See Appendix for calculations)
Air2 <- Sep_11$Air/1000000
Rail2 <- Sep_11$Rail/1000000
VMT <- Sep_11$VMT
Next I will create time series objects and plot the three pre-event time series.
Air_PreSep_11.ts <- ts(Air2, start=c(1990,1), end=c(2001,8), freq=12)
plot(Air_PreSep_11.ts, xlab="Date", ylab="Air Passenger Miles (in millions)", ylim=c(25,70), bty="l")
Rail_PreSep_11.ts <- ts(Rail2, start=c(1990,1), end=c(2001,8), freq=12)
plot(Rail_PreSep_11.ts, xlab="Date", ylab="Rail Passenger Miles (in millions)", ylim=c(300,700), bty="l")
VMT_PreSep_11.ts <- ts(VMT, start=c(1990,1), end=c(2001,8), freq=12)
plot(VMT_PreSep_11.ts, xlab="Date", ylab="Vehicle Miles Traveled", ylim=c(140,300), bty="l")
Figures 1, 2, and 3, are the plots for each of the pre event time series (Air, Rail, and VMT), the time series components that appear include level, trend, seasonality, and noise.
Figures 1 and 3 appear to have an upward linear trend with additive seasonality. While Figure 2 appears to be a downward, nonlinear, quadratic-like trend.
Through further analysis we will be able to determine if seasonality and trend truly exist for each one of the three time series.
plot(Air_PreSep_11.ts, log="y", xlab="Date", ylab="Air Passenger Miles(in millions, log scale)", ylim=c(25,70), bty="l")
plot(Air_PreSep_11.ts, log="xy", xlab="Date (log scale)", ylab="Air Passenger Miles (in millions, log scale)", ylim=c(25,70), bty="l")
plot(Rail_PreSep_11.ts, log="y", xlab="Date", ylab="Rail Passenger Miles (in millions, log scale)", ylim=c(300,700), bty="l")
plot(Rail_PreSep_11.ts, log="xy", xlab="Date (log scale)", ylab="Rail Passenger Miles (in millions, log scale)", ylim=c(300,700), bty="l")
plot(VMT_PreSep_11.ts, log="y", xlab="Date", ylab="VMT (log scale)", ylim=c(140,300), bty="l")
plot(VMT_PreSep_11.ts, log="xy", xlab="Date (log scale)", ylab="Vehicle Miles Traveled (log scale)", ylim=c(140,300), bty="l")
None of the log scales for any of the three times series objects seems to be more linear. Next we will look at trend lines to determine if/where trends exist in these times series.
Air_Linear <- tslm(Air_PreSep_11.ts ~ trend)
summary(Air_Linear)
##
## Call:
## tslm(formula = Air_PreSep_11.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.4664 -3.4106 -0.6812 3.3608 11.8235
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.72843 0.83475 42.80 <2e-16 ***
## trend 0.17710 0.01027 17.24 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.912 on 138 degrees of freedom
## Multiple R-squared: 0.6829, Adjusted R-squared: 0.6806
## F-statistic: 297.2 on 1 and 138 DF, p-value: < 2.2e-16
plot(Air_PreSep_11.ts, xlab="Date", ylab="Air Passenger Miles (in millions)", ylim=c(25,70), bty="l")
lines(Air_Linear$fitted, lwd=2)
Air_Quad <- tslm(Air_PreSep_11.ts ~ trend + I(trend^2))
summary(Air_Quad)
##
## Call:
## tslm(formula = Air_PreSep_11.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7066 -3.3855 -0.4943 3.3340 11.9016
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.745e+01 1.253e+00 29.897 <2e-16 ***
## trend 1.042e-01 4.102e-02 2.541 0.0122 *
## I(trend^2) 5.169e-04 2.818e-04 1.834 0.0688 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.87 on 137 degrees of freedom
## Multiple R-squared: 0.6905, Adjusted R-squared: 0.686
## F-statistic: 152.8 on 2 and 137 DF, p-value: < 2.2e-16
lines(Air_Quad$fitted, lty=2, lwd=3)
title("Air Passenger Miles (in millions)
Linear and Quadratic Trend Lines")
Looking at the adjusted \(R^2\) value for the Air Passenger Miles prior to 9/11 the Linear trend line can explain 68.1% of the variance, while the Quadratic trend line can explain 68.6% of the variance. Therefore, the quadratic trend line is more accurate, although not by much.
Rail_Linear <- tslm(Rail_PreSep_11.ts ~ trend)
summary(Rail_Linear)
##
## Call:
## tslm(formula = Rail_PreSep_11.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -142.23 -40.75 -4.37 41.27 133.14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 547.6329 10.5118 52.097 < 2e-16 ***
## trend -0.8377 0.1294 -6.476 1.53e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 61.86 on 138 degrees of freedom
## Multiple R-squared: 0.2331, Adjusted R-squared: 0.2275
## F-statistic: 41.94 on 1 and 138 DF, p-value: 1.532e-09
plot(Rail_PreSep_11.ts, xlab="Date", ylab="Rail Passenger Miles (in millions)", ylim=c(300,700), bty="l")
lines(Rail_Linear$fitted, lwd=2)
Rail_Quad <- tslm(Rail_PreSep_11.ts ~ trend + I(trend^2))
summary(Rail_Quad)
##
## Call:
## tslm(formula = Rail_PreSep_11.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -137.635 -37.328 -1.559 41.652 125.113
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 576.828666 15.618390 36.93 < 2e-16 ***
## trend -2.071369 0.511394 -4.05 8.52e-05 ***
## I(trend^2) 0.008749 0.003513 2.49 0.014 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60.72 on 137 degrees of freedom
## Multiple R-squared: 0.2663, Adjusted R-squared: 0.2556
## F-statistic: 24.86 on 2 and 137 DF, p-value: 6.14e-10
lines(Rail_Quad$fitted, lty=2, lwd=3)
title("Rail Passenger Miles (in millions)
Linear and Quadratic Trend Lines")
Looking at the adjusted \(R^2\) value for the Rail Passenger Miles prior to 9/11 the Linear trend line can explain 22.8% of the variance, while the Quadratic trend line can explain 25.6% of the variance. Therefore, the quadratic trend line is more accurate than the linear trend line.
VMT_Linear <- tslm(VMT_PreSep_11.ts ~ trend)
summary(VMT_Linear)
##
## Call:
## tslm(formula = VMT_PreSep_11.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.554 -9.185 0.559 11.087 23.272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 173.17137 2.42094 71.53 <2e-16 ***
## trend 0.44249 0.02979 14.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.25 on 138 degrees of freedom
## Multiple R-squared: 0.6152, Adjusted R-squared: 0.6124
## F-statistic: 220.6 on 1 and 138 DF, p-value: < 2.2e-16
plot(VMT_PreSep_11.ts, xlab="Date", ylab="Vehicle Miles Traveled", ylim=c(140,300), bty="l")
lines(VMT_Linear$fitted, lwd=2)
VMT_Quad <- tslm(VMT_PreSep_11.ts ~ trend + I(trend^2))
summary(VMT_Quad)
##
## Call:
## tslm(formula = VMT_PreSep_11.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.503 -8.745 0.798 10.974 23.752
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.745e+02 3.674e+00 47.487 < 2e-16 ***
## trend 3.867e-01 1.203e-01 3.214 0.00163 **
## I(trend^2) 3.956e-04 8.266e-04 0.479 0.63297
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.29 on 137 degrees of freedom
## Multiple R-squared: 0.6158, Adjusted R-squared: 0.6102
## F-statistic: 109.8 on 2 and 137 DF, p-value: < 2.2e-16
lines(VMT_Quad$fitted, lty=2, lwd=3)
title("VMT Linear and Quadratic Trend Lines")
Looking at the adjusted \(R^2\) value for the VMT prior to 9/11 the Linear trend line can explain 61.2% of the variance, while the Quadratic trend line can explain 61.0% of the variance. Therefore, the linear trend line is more accurate, although not by much.
It seems as though all three of the time series objects can be explained by either linear or quadratic trend lines, although the Rail Passenger Miles has a very low adjusted \(R^2\) indicating that the model explains less of the variability of the response data than the other two time series objects.
However, trend still appears in all three of the time series.
As this chapter suggests, aggregating the time series can help us surpress seasonality, this may allow us to see more trends or seasonality in the three time series objects. I will be aggregating the monthly data for all three time series into years and then I will also create separate plots for each series.
#aggregate by year
annually_Air <- aggregate(Air_PreSep_11.ts, nfrequency=1, FUN=sum)
plot(annually_Air, bty="l")
#aggregate by year
annually_Rail <- aggregate(Rail_PreSep_11.ts, nfrequency=1, FUN=sum)
plot(annually_Rail, bty="l")
#aggregate by year
annually_VMT <- aggregate(VMT_PreSep_11.ts, nfrequency=1, FUN=sum)
plot(annually_VMT, bty="l")
After looking at Figure 13, 14, and 15 it seems that Rail Passenger Miles were declining from 1991 until 1996, while Air Passenger Miles and Vehicle Miles Traveled (VMT) were increasing steadily during the same period of time.
Now we will look at Air Passenger Miles, Rail Passenger Miles, and VMT by month for the years prior to the 9/11 attack.
par(oma = c(0, 0, 0, 2))
Air_xrange <- c(1,14)
Air_yrange <- range(Air_PreSep_11.ts)
plot(Air_xrange, Air_yrange, type="n", xlab="Year", ylab="Air Passenger Miles (in millions)", bty="l")
colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)
for (i in 1:12) {
currentMonth <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==i)
lines(currentMonth, type="b", lwd=1.5,
lty=linetype[i], col=colors[i], pch=plotchar[i])
}
title("Air Passenger Miles (in millions) by Month")
legend("right", legend = 1:12, cex = 0.8, col = colors, pch = plotchar, lty = linetype, title = "Month", xpd=NA)
par(oma = c(0, 0, 0, 2))
Rail_xrange <- c(1,14)
Rail_yrange <- range(Rail_PreSep_11.ts)
plot(Rail_xrange, Rail_yrange, type="n", xlab="Year", ylab="Rail Passenger Miles (in millions)", bty="l")
colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)
for (i in 1:12) {
currentMonth <- subset(Rail_PreSep_11.ts, cycle(Rail_PreSep_11.ts)==i)
lines(currentMonth, type="b", lwd=1.5,
lty=linetype[i], col=colors[i], pch=plotchar[i])
}
title("Rail Passenger Miles (in millions) by Month")
legend("right", legend = 1:12, cex = 0.8, col = colors, pch = plotchar, lty = linetype, title = "Month", xpd=NA)
par(oma = c(0, 0, 0, 2))
VMT_xrange <- c(1,14)
VMT_yrange <- range(VMT_PreSep_11.ts)
plot(VMT_xrange, VMT_yrange, type="n", xlab="Year", ylab="VMT", bty="l")
colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)
for (i in 1:12) {
currentMonth <- subset(VMT_PreSep_11.ts, cycle(VMT_PreSep_11.ts)==i)
lines(currentMonth, type="b", lwd=1.5,
lty=linetype[i], col=colors[i], pch=plotchar[i])
}
title("Vehicle Miles Traveled by Month")
legend("right", legend = 1:12, cex = 0.8, col = colors, pch = plotchar, lty = linetype, title = "Month", xpd=NA)
After looking at Figure 16, 17, and 18 it becomes clear there is seasonality in the three time series data. Even though the data itself changes, the trend shows that the highest miles, whether it be Air Passenger Miles, Rail Passenger Miles, or Vehicle Miles Traveled (VMT), occurred in August over the years from 1990 to 2001 and the lowest month for miles was February over that same time period. Furthermore, in general, the summer months seem to be higher for travel across transportation type, while January and February are lower. See Appendix - Figures J, K, and L for more seasonal graphs.
For problem 3, I will import the Appliance Shipments csv file which contains a series of quarterly shipments (in millions of USD) of U.S. household appliances between 1985 and 1989. After importing I will look at the \(head()\) and \(tail()\) of the dataset.
#Importing the Appliance Shipments
App_Ship <- read.csv("ApplianceShipments.csv")
str(App_Ship)
## 'data.frame': 20 obs. of 2 variables:
## $ Quarter : Factor w/ 20 levels "Q1-1985","Q1-1986",..: 1 6 11 16 2 7 12 17 3 8 ...
## $ Shipments: int 4009 4321 4224 3944 4123 4522 4657 4030 4493 4806 ...
head(App_Ship)
## Quarter Shipments
## 1 Q1-1985 4009
## 2 Q2-1985 4321
## 3 Q3-1985 4224
## 4 Q4-1985 3944
## 5 Q1-1986 4123
## 6 Q2-1986 4522
tail(App_Ship)
## Quarter Shipments
## 15 Q3-1988 4417
## 16 Q4-1988 4258
## 17 Q1-1989 4245
## 18 Q2-1989 4900
## 19 Q3-1989 4585
## 20 Q4-1989 4533
Variables
Quarter - Shows the quarter and year of the shipment
Shipments - Shows household appliances shipments (in millions of USD) occurring in the particular time period (Quarterly)
First I will create a time series object from the appliance shipments data. Then I will show the time plot on a quarterly basis, and then an annual basis to see if a trend or seasonality becomes obvious.
App_Ship.ts <- ts(App_Ship$Shipments, start=c(1985,1), end=c(1989,4), freq=4)
plot(App_Ship.ts, xlab="Year", ylab="Appliances Shipped (in millions of USD, $)", ylim=c(3100,5000), bty="l")
App_Ship_annually <- aggregate(App_Ship.ts, nfrequency = 1, FUN=sum)
plot(App_Ship_annually, bty="l")
It is difficult to tell from Figure 19 and Figure 20 if there is a trend or seasonality in the data set. Next I will look at linear and quadratic trend lines against the quarterly data and then look at the data by quarters.
Like all time series data, both noise and level are present in this series. Additionally, the analysis below shows us that some seasonality exists within this dataset and that there is a slight quadratic trend is likely.
App_Ship_Linear <- tslm(App_Ship.ts ~trend)
summary(App_Ship_Linear)
##
## Call:
## tslm(formula = App_Ship.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -339.06 -171.14 3.13 137.27 393.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4167.663 111.035 37.535 <2e-16 ***
## trend 24.494 9.269 2.643 0.0165 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 239 on 18 degrees of freedom
## Multiple R-squared: 0.2795, Adjusted R-squared: 0.2395
## F-statistic: 6.983 on 1 and 18 DF, p-value: 0.01655
plot(App_Ship.ts, xlab="Date", ylab="Appliance Shipments (in millions of USD)", ylim=c(3100,5000),bty="l")
lines(App_Ship_Linear$fitted, lwd=2)
App_Ship_Quad <- tslm(App_Ship.ts ~trend + I(trend^2))
summary(App_Ship_Quad)
##
## Call:
## tslm(formula = App_Ship.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -398.39 -155.53 32.63 181.62 346.62
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3982.931 173.764 22.921 3.19e-14 ***
## trend 74.876 38.109 1.965 0.066 .
## I(trend^2) -2.399 1.763 -1.361 0.191
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 233.6 on 17 degrees of freedom
## Multiple R-squared: 0.3503, Adjusted R-squared: 0.2739
## F-statistic: 4.583 on 2 and 17 DF, p-value: 0.02559
lines(App_Ship_Quad$fitted, lty=2, lwd=3)
title("Appliance Shipments (in millions of USD)
Linear and Quadratic Trend Lines")
Looking at the adjusted \(R^2\) value of the Linear trend line for U.S. household appliances shipments (in millions of USD) 24% of the variance can be explained, while the Quadratic trend line can explain 27% of the variance. Therefore, the quadratic trend line is more accurate.
par(oma = c(0, 0, 0, 2))
App_Ship_xrange <- c(1,6)
App_Ship_yrange <- range(App_Ship.ts)
plot(App_Ship_xrange, App_Ship_yrange, type="n", xlab="Year", ylab="Appliance Shipments (in millions of USD)", bty="l")
colors <- c(1:4)
linetype <- c(1:4)
plotchar <-c(1:4)
for (i in 1:4) {
currentQuarter <- subset(App_Ship.ts, cycle(App_Ship.ts)==i)
lines(currentQuarter, type="b", lwd=1.5,
lty=linetype[i], col=colors[i], pch=plotchar[i])
}
title("Appliance Shipments Broken Down by Quarter")
legend("right", legend = 1:4, cex = 0.8, col = colors, pch = plotchar, lty = linetype, title = "Quarter", xpd=NA)
Figure 22 shows the breakout of appliance shipments by quarter, over the 5 years the data spans (Q1 1985 - Q4 1989). Although, there is some overlap, it does seem that Q2 had the highest appliance shipments (in millions of USD) across 1985 to 1989, which means seasonality does exist to some extent.
ggseasonplot(App_Ship.ts)
Figure 23, allows us to further see that although the general seasonality of Q2 exists, it is not the case for 1986. We would likely need more data to see if this seasonal pattern continues.
For the last problem in this assignment, I will import the Shampoo Sales csv file which contains data on the monthly sales of a certain shampoo over a 3-year period. I will then look at the \(head()\) and \(tail()\)
Shampoo_Sales<- read.csv("ShampooSales.csv")
str(Shampoo_Sales)
## 'data.frame': 36 obs. of 2 variables:
## $ Month : Factor w/ 36 levels "Apr-95","Apr-96",..: 13 10 22 1 25 19 16 4 34 31 ...
## $ Shampoo.Sales: num 266 146 183 119 180 ...
head(Shampoo_Sales)
## Month Shampoo.Sales
## 1 Jan-95 266.0
## 2 Feb-95 145.9
## 3 Mar-95 183.1
## 4 Apr-95 119.3
## 5 May-95 180.3
## 6 Jun-95 168.5
tail(Shampoo_Sales)
## Month Shampoo.Sales
## 31 Jul-97 575.5
## 32 Aug-97 407.6
## 33 Sep-97 682.0
## 34 Oct-97 475.3
## 35 Nov-97 581.3
## 36 Dec-97 646.9
Variables
Month - Shows the month and year of the sales data captured Sales - Monthly sales for a certain type of shampoo over a 3-year period
First I will create a time series object from the shampoo sales data. Then I will show the time plot on a monthly basis, and then an aggregate on a quarterly as well as, annual basis to see if a trend or seasonality becomes obvious.
Shampoo.ts <- ts(Shampoo_Sales$Shampoo.Sales, start=c(1995,1), end=c(1997,12), freq=12)
plot(Shampoo.ts, xlab="Year", ylab="Monthly Shampoo Sales", ylim=c(0,685), bty="l")
Quarterly_Shampoo <- aggregate(Shampoo.ts, nfrequency = 4, FUN=sum)
plot(Quarterly_Shampoo, bty="l")
Annual_Shampoo <- aggregate(Shampoo.ts, nfrequency = 1, FUN=sum)
plot(Annual_Shampoo, bty="l")
Figures 24, 25, and 26 allow us to see a general trend, this trend is an upward trend that could be either linear or quadratic, again it is hard to tell from just these figures. However, it does not appear to have seasonality.
Further analysis is conducted below to determine the type of trend, as well as, if seasonality does exist.
Like all time series data, both noise and level are present in this series. There is a trend occurring in the dataset, although further analysis will be conducted to determine what type of trend. At this point it does not look like there is any seasonality in the dataset but further analysis will determine if that is the case.
Shampoo_Linear <- tslm(Shampoo.ts ~trend)
summary(Shampoo_Linear)
##
## Call:
## tslm(formula = Shampoo.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -108.74 -52.12 -16.13 43.48 194.25
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 89.14 26.72 3.336 0.00207 **
## trend 12.08 1.26 9.590 3.37e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 78.5 on 34 degrees of freedom
## Multiple R-squared: 0.7301, Adjusted R-squared: 0.7222
## F-statistic: 91.97 on 1 and 34 DF, p-value: 3.368e-11
plot(Shampoo.ts, xlab="Date", ylab="Shampoo Sales", ylim=c(0,685),bty="l")
lines(Shampoo_Linear$fitted, lwd=2)
Shampoo_Quad <- tslm(Shampoo.ts ~trend + I(trend^2))
summary(Shampoo_Quad)
##
## Call:
## tslm(formula = Shampoo.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -104.148 -42.075 -8.438 33.924 144.582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 202.8789 33.3002 6.092 7.35e-07 ***
## trend -5.8801 4.1501 -1.417 0.166
## I(trend^2) 0.4854 0.1088 4.461 8.93e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 62.93 on 33 degrees of freedom
## Multiple R-squared: 0.8316, Adjusted R-squared: 0.8214
## F-statistic: 81.51 on 2 and 33 DF, p-value: 1.708e-13
lines(Shampoo_Quad$fitted, lty=2, lwd=3)
title("Monthly Shampoo Sales
Linear and Quadratic Trend Lines")
Looking at the adjusted \(R^2\) value of the Linear trend line for Shampoo sales 72% of the variance can be explained, while the Quadratic trend line can explain 82% of the variance. Therefore, the quadratic trend line is more accurate and more representative of the existing trend.
I wouldn’t expect there to be seasonality in sales of shampoo, mostly because I would think people using shampoo would have their own routine for when they use shampoo, generally daily. Therefore, I wouldn’t think there would have a seasonality unless the data could be broken down into hours of the day and then we may see more people using shampoo at a certain time in the day. However, I do think there may be seasonality in the types of shampoo purchased, unfortunately this data series only includes the sales for one particular type of shampoo, but I would be curious to look at different types and weather patterns with a hypothesis that more shampoos focused on moisturizing are sold in the drier months. It would also be interesting to see the difference between gender and shampoo sales for different types of shampoo.
par(oma = c(0,0,0,2))
plot(c(1,4), c(115,685), type="n", bty="l", xlab="Date", ylab="Shampoo Sales")
Shampoo_Jan <- subset(Shampoo.ts, cycle(Shampoo.ts)==1)
lines(Shampoo_Jan, col=1, pch=1, type="b")
Shampoo_Feb <- subset(Shampoo.ts, cycle(Shampoo.ts)==2)
lines(Shampoo_Feb, col=2, pch=2, type="b")
Shampoo_Mar <- subset(Shampoo.ts, cycle(Shampoo.ts)==3)
lines(Shampoo_Mar, col=3, pch=3, type="b")
Shampoo_Apr <- subset(Shampoo.ts, cycle(Shampoo.ts)==4)
lines(Shampoo_Apr, col=4, pch=4, type="b")
Shampoo_May <- subset(Shampoo.ts, cycle(Shampoo.ts)==5)
lines(Shampoo_May, col=5, pch=5, type="b")
Shampoo_Jun <- subset(Shampoo.ts, cycle(Shampoo.ts)==6)
lines(Shampoo_Jun, col=6, pch=6, type="b")
Shampoo_Jul <- subset(Shampoo.ts, cycle(Shampoo.ts)==7)
lines(Shampoo_Jul, col=7, pch=7, type="b")
Shampoo_Aug <- subset(Shampoo.ts, cycle(Shampoo.ts)==8)
lines(Shampoo_Aug, col=8, pch=8, type="b")
Shampoo_Sep <- subset(Shampoo.ts, cycle(Shampoo.ts)==9)
lines(Shampoo_Sep, col=9, pch=9, type="b")
Shampoo_Oct <- subset(Shampoo.ts, cycle(Shampoo.ts)==10)
lines(Shampoo_Oct, col=10, pch=10, type="b")
Shampoo_Nov <- subset(Shampoo.ts, cycle(Shampoo.ts)==11)
lines(Shampoo_Feb, col=11, pch=11, type="b")
Shampoo_Dec <- subset(Shampoo.ts, cycle(Shampoo.ts)==12)
lines(Shampoo_Dec, col=12, pch=12, type="b")
#add a legend
legend("right", legend = 1:12, cex=0.8, col=1:12, pch=1:12, lty=linetype, title="Month", xpd=NA)
Figure 28 shows the break out of shampoo sales by month, over the 3 years the data spans (January 1995 to December 1997). It appears there is no clear seasonality within this data series.
min(Sep_11$Air)
## [1] 29672427
max(Sep_11$Air)
## [1] 69003617
min(Sep_11$Rail)
## [1] 326874247
max(Sep_11$Rail)
## [1] 664013874
min(Sep_11$VMT)
## [1] 153.25
max(Sep_11$VMT)
## [1] 261.3
autoplot(Air_PreSep_11.ts, ts.color = 'red', ts.linetype = 'dashed', xlab="Date", ylab="Air Passenger Miles (in millions)")
autoplot(Rail_PreSep_11.ts, ts.color = 'red', ts.linetype = 'dashed', xlab="Date", ylab="Rail Passenger Miles (in millions)")
autoplot(VMT_PreSep_11.ts, ts.color = 'red', ts.linetype = 'dashed', xlab="Date", ylab="Vehicle Miles Traveled")
Air_PreSep_12MonthsPrior <- window(Air_PreSep_11.ts, 2000, c(2001,8))
plot(Air_PreSep_12MonthsPrior, ylab="Air Passenger Miles (in millions)", ylim=c(25,70), bty="l")
Year_2001_Sep_11.ts <- ts(Air2, start=c(2001,1), end=c(2004,4), freq=12)
plot(Year_2001_Sep_11.ts, xlab="Date", ylab="Air Passenger Miles (in millions)", ylim=c(25,70), bty="l")
Before_After_Sep_11_Months.ts <- ts(Air2, start=c(2001,7), end=c(2002,7), freq=12)
plot(Before_After_Sep_11_Months.ts, xlab="Date", ylab="Air Passenger Miles (in millions)", ylim=c(30,55), bty="l")
Air.ts <- ts(Air2, start=c(1990,1), end=c(2004,4), freq=12)
plot(Air.ts, xlab="Date", ylab="Air Passenger Miless (in millions)", ylim=c(25,70), bty="l")
Air.ts <- ts(Air2, start=c(1990,1), end=c(2004,4), freq=1)
plot(Air.ts, xlab="Date", ylab="Air Passenger Miless (in millions)", ylim=c(25,70), bty="l")
par(oma = c(0,0,0,2))
plot(c(1,14), c(25,70), type="n", bty="l", xlab="Date", ylab="Air Passenger Miles (in millions)")
Air_janRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==1)
lines(Air_janRiders, col=1, pch=1, type="b")
Air_febRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==2)
lines(Air_febRiders, col=2, pch=2, type="b")
Air_marRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==3)
lines(Air_marRiders, col=3, pch=3, type="b")
Air_aprRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==4)
lines(Air_aprRiders, col=4, pch=4, type="b")
Air_mayRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==5)
lines(Air_mayRiders, col=5, pch=5, type="b")
Air_junRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==6)
lines(Air_junRiders, col=6, pch=6, type="b")
Air_julRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==7)
lines(Air_julRiders, col=7, pch=7, type="b")
Air_augRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==8)
lines(Air_augRiders, col=8, pch=8, type="b")
Air_sepRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==9)
lines(Air_sepRiders, col=9, pch=9, type="b")
Air_octRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==10)
lines(Air_octRiders, col=10, pch=10, type="b")
Air_novRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==11)
lines(Air_novRiders, col=11, pch=11, type="b")
Air_decRiders <- subset(Air_PreSep_11.ts, cycle(Air_PreSep_11.ts)==12)
lines(Air_decRiders, col=12, pch=12, type="b")
#add a legend
legend("right", legend = 1:12, cex=0.8, col=1:12, pch=1:12, lty=linetype, title="Month", xpd=NA)
ggseasonplot(Air_PreSep_11.ts)
ggseasonplot(Rail_PreSep_11.ts)
ggseasonplot(VMT_PreSep_11.ts)
Calculating the minimum and maximum values for Shipments (in millions of USD).
min(App_Ship$Shipments)
## [1] 3944
max(App_Ship$Shipments)
## [1] 4900
plot(App_Ship.ts, log="xy", xlab="Year (log scale)", ylab="Appliances Shipped (log scale)", ylim=c(3100,5000), bty="l")
Calculating the minimum and maximum values for Shampoo Sales.
min(Shampoo_Sales$Shampoo.Sales)
## [1] 119.3
max(Shampoo_Sales$Shampoo.Sales)
## [1] 682