Our project needed some changes before the presentation. Some of them are VIF which we forgot to do, confidence intervals, and talking more about prior research done on our topic.

The main thing we talked about in class was seasonal data. We used the same time series stuff we learned before but now we were looking at data that had seasonal patterns in it and we wanted to adjust our model accordingly.

library(faraway)
## Warning: package 'faraway' was built under R version 3.4.4
data(airpass)
attach(airpass)
plot(pass~year, type="l")

From this plot we can definitely see the seasonal patterns along the trend.

plot(log(pass)~year)

with(airpass, plot(log(pass)~year, type="l"))

Here I took the log of passengers to make it look a little better as the seasonal pattern was increasing along the trend before and now it is the same height along the trend.

justyear<-floor(airpass$year)
m <- airpass$year - justyear
mfactor <-factor(round(m*12))
cbind(airpass$year, mfactor)
##                 mfactor
##   [1,] 49.08333       2
##   [2,] 49.16667       3
##   [3,] 49.25000       4
##   [4,] 49.33333       5
##   [5,] 49.41667       6
##   [6,] 49.50000       7
##   [7,] 49.58333       8
##   [8,] 49.66667       9
##   [9,] 49.75000      10
##  [10,] 49.83333      11
##  [11,] 49.91667      12
##  [12,] 50.00000       1
##  [13,] 50.08333       2
##  [14,] 50.16667       3
##  [15,] 50.25000       4
##  [16,] 50.33333       5
##  [17,] 50.41667       6
##  [18,] 50.50000       7
##  [19,] 50.58333       8
##  [20,] 50.66667       9
##  [21,] 50.75000      10
##  [22,] 50.83333      11
##  [23,] 50.91667      12
##  [24,] 51.00000       1
##  [25,] 51.08333       2
##  [26,] 51.16667       3
##  [27,] 51.25000       4
##  [28,] 51.33333       5
##  [29,] 51.41667       6
##  [30,] 51.50000       7
##  [31,] 51.58333       8
##  [32,] 51.66667       9
##  [33,] 51.75000      10
##  [34,] 51.83333      11
##  [35,] 51.91667      12
##  [36,] 52.00000       1
##  [37,] 52.08333       2
##  [38,] 52.16667       3
##  [39,] 52.25000       4
##  [40,] 52.33333       5
##  [41,] 52.41667       6
##  [42,] 52.50000       7
##  [43,] 52.58333       8
##  [44,] 52.66667       9
##  [45,] 52.75000      10
##  [46,] 52.83333      11
##  [47,] 52.91667      12
##  [48,] 53.00000       1
##  [49,] 53.08333       2
##  [50,] 53.16667       3
##  [51,] 53.25000       4
##  [52,] 53.33333       5
##  [53,] 53.41667       6
##  [54,] 53.50000       7
##  [55,] 53.58333       8
##  [56,] 53.66667       9
##  [57,] 53.75000      10
##  [58,] 53.83333      11
##  [59,] 53.91667      12
##  [60,] 54.00000       1
##  [61,] 54.08333       2
##  [62,] 54.16667       3
##  [63,] 54.25000       4
##  [64,] 54.33333       5
##  [65,] 54.41667       6
##  [66,] 54.50000       7
##  [67,] 54.58333       8
##  [68,] 54.66667       9
##  [69,] 54.75000      10
##  [70,] 54.83333      11
##  [71,] 54.91667      12
##  [72,] 55.00000       1
##  [73,] 55.08333       2
##  [74,] 55.16667       3
##  [75,] 55.25000       4
##  [76,] 55.33333       5
##  [77,] 55.41667       6
##  [78,] 55.50000       7
##  [79,] 55.58333       8
##  [80,] 55.66667       9
##  [81,] 55.75000      10
##  [82,] 55.83333      11
##  [83,] 55.91667      12
##  [84,] 56.00000       1
##  [85,] 56.08333       2
##  [86,] 56.16667       3
##  [87,] 56.25000       4
##  [88,] 56.33333       5
##  [89,] 56.41667       6
##  [90,] 56.50000       7
##  [91,] 56.58333       8
##  [92,] 56.66667       9
##  [93,] 56.75000      10
##  [94,] 56.83333      11
##  [95,] 56.91667      12
##  [96,] 57.00000       1
##  [97,] 57.08333       2
##  [98,] 57.16667       3
##  [99,] 57.25000       4
## [100,] 57.33333       5
## [101,] 57.41667       6
## [102,] 57.50000       7
## [103,] 57.58333       8
## [104,] 57.66667       9
## [105,] 57.75000      10
## [106,] 57.83333      11
## [107,] 57.91667      12
## [108,] 58.00000       1
## [109,] 58.08333       2
## [110,] 58.16667       3
## [111,] 58.25000       4
## [112,] 58.33333       5
## [113,] 58.41667       6
## [114,] 58.50000       7
## [115,] 58.58333       8
## [116,] 58.66667       9
## [117,] 58.75000      10
## [118,] 58.83333      11
## [119,] 58.91667      12
## [120,] 59.00000       1
## [121,] 59.08333       2
## [122,] 59.16667       3
## [123,] 59.25000       4
## [124,] 59.33333       5
## [125,] 59.41667       6
## [126,] 59.50000       7
## [127,] 59.58333       8
## [128,] 59.66667       9
## [129,] 59.75000      10
## [130,] 59.83333      11
## [131,] 59.91667      12
## [132,] 60.00000       1
## [133,] 60.08333       2
## [134,] 60.16667       3
## [135,] 60.25000       4
## [136,] 60.33333       5
## [137,] 60.41667       6
## [138,] 60.50000       7
## [139,] 60.58333       8
## [140,] 60.66667       9
## [141,] 60.75000      10
## [142,] 60.83333      11
## [143,] 60.91667      12
## [144,] 61.00000       1
levels(mfactor) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct","Nov", "Dec")
summary(mfactor)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec 
##  12  12  12  12  12  12  12  12  12  12  12  12

This made a month factor and shows that there are 12 of each month. Now we can make our model.

mod = lm(log(pass) ~ justyear + mfactor, data=airpass)
summary(mod)
## 
## Call:
## lm(formula = log(pass) ~ justyear + mfactor, data = airpass)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.156370 -0.041016  0.003677  0.044069  0.132324 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.214998   0.081277 -14.949  < 2e-16 ***
## justyear     0.120826   0.001432  84.399  < 2e-16 ***
## mfactorFeb   0.031390   0.024253   1.294    0.198    
## mfactorMar   0.019404   0.024253   0.800    0.425    
## mfactorApr   0.159700   0.024253   6.585 1.00e-09 ***
## mfactorMay   0.138500   0.024253   5.711 7.19e-08 ***
## mfactorJun   0.146196   0.024253   6.028 1.58e-08 ***
## mfactorJul   0.278411   0.024253  11.480  < 2e-16 ***
## mfactorAug   0.392422   0.024253  16.180  < 2e-16 ***
## mfactorSep   0.393196   0.024253  16.212  < 2e-16 ***
## mfactorOct   0.258630   0.024253  10.664  < 2e-16 ***
## mfactorNov   0.130541   0.024253   5.382 3.28e-07 ***
## mfactorDec  -0.003108   0.024253  -0.128    0.898    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0593 on 131 degrees of freedom
## Multiple R-squared:  0.9835, Adjusted R-squared:  0.982 
## F-statistic: 649.4 on 12 and 131 DF,  p-value: < 2.2e-16

This model compares all the months to January with their coefficients. This model looks pretty good while we could try seasons instead of months this might not improve our data. Also we can plot the model and see what it looks like.

plot(log(pass)~year, data = airpass, type = "l")
lines(airpass$year, mod$fitted.values, type = "l", col = "red")