We have the necessary data after the cleaning. However, values of number of passenagers may not be independent. In particular, the numbber of passengers at time t may dependent on the number of passengers at time t - 1.
To handle such an issue, a common practice is to use a technique called time delay embedding.
Time delay embedding consists of describing the state of the dynamic system that generates the observed time series valued by a set of k recently observed values.
In R, we use the function embed() to create an embedding from a time series. It receives a time series and k, and generates a data matrix.
library(readr)
library(stringr)
library(xts)
library(lubridate)
library(dplyr)
library(zoo)
dataAP <- read_csv("Air_Traffic_Passenger_Statistics1.csv", col_names = TRUE)
dataAP1 <- data.frame(dataAP$"Year",dataAP$"Month",dataAP$"Adjusted Passenger Count")
colnames(dataAP1) <- c("Year", "Month", "Count")
class(dataAP)
## [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
df<-data.frame(date=character(), time=character(),count=integer())
str(df)
## 'data.frame': 0 obs. of 3 variables:
## $ date : chr
## $ time : chr
## $ count: int
colnames(df)
## [1] "date" "time" "count"
for (y in min(dataAP1$Year):max(dataAP1$Year)){
for (m in month.name){
g=sum(select(filter(dataAP1,Year==y & Month==m),Count))
if (g!=0){
date_string=str_c(m," ", y)
df<-rbind(df,data.frame(time=date_string,count=g))
}
}
}
df
## time count
## 1 July 2005 3254797
## 2 August 2005 3224118
## 3 September 2005 2764519
## 4 October 2005 2798480
## 5 November 2005 2638631
## 6 December 2005 2696084
## 7 January 2006 2471712
## 8 February 2006 2247255
## 9 March 2006 2738730
## 10 April 2006 2792413
## 11 May 2006 2846400
## 12 June 2006 3091837
## 13 July 2006 3248908
## 14 August 2006 3165026
## 15 September 2006 2738319
## 16 October 2006 2851945
## 17 November 2006 2671437
## 18 December 2006 2717430
## 19 January 2007 2527036
## 20 February 2007 2323522
## 21 March 2007 2844144
## 22 April 2007 2891446
## 23 May 2007 3080467
## 24 June 2007 3286732
## 25 July 2007 3404925
## 26 August 2007 3459915
## 27 September 2007 2973709
## 28 October 2007 3146765
## 29 November 2007 2934915
## 30 December 2007 2917258
## 31 January 2008 2685375
## 32 February 2008 2613849
## 33 March 2008 3146970
## 34 April 2008 3048005
## 35 May 2008 3321383
## 36 June 2008 3466528
## 37 July 2008 3621751
## 38 August 2008 3629829
## 39 September 2008 3012836
## 40 October 2008 3133206
## 41 November 2008 2751947
## 42 December 2008 2970862
## 43 January 2009 2654860
## 44 February 2009 2368782
## 45 March 2009 2936292
## 46 April 2009 3035226
## 47 May 2009 3186209
## 48 June 2009 3432110
## 49 July 2009 3659471
## 50 August 2009 3659379
## 51 September 2009 3199046
## 52 October 2009 3259010
## 53 November 2009 2979513
## 54 December 2009 3083736
## 55 January 2010 2796566
## 56 February 2010 2524760
## 57 March 2010 3118364
## 58 April 2010 3151412
## 59 May 2010 3390714
## 60 June 2010 3626272
## 61 July 2010 3778957
## 62 August 2010 3785346
## 63 September 2010 3367415
## 64 October 2010 3502275
## 65 November 2010 3172057
## 66 December 2010 3177096
## 67 January 2011 2893972
## 68 February 2011 2619193
## 69 March 2011 3139089
## 70 April 2011 3209189
## 71 May 2011 3558021
## 72 June 2011 3777419
## 73 July 2011 3947019
## 74 August 2011 3930681
## 75 September 2011 3573533
## 76 October 2011 3611694
## 77 November 2011 3334801
## 78 December 2011 3450820
## 79 January 2012 3218600
## 80 February 2012 3005764
## 81 March 2012 3481078
## 82 April 2012 3569473
## 83 May 2012 3825332
## 84 June 2012 4112311
## 85 July 2012 4290086
## 86 August 2012 4362369
## 87 September 2012 3831358
## 88 October 2012 3849650
## 89 November 2012 3482422
## 90 December 2012 3448766
## 91 January 2013 3209356
## 92 February 2013 2968951
## 93 March 2013 3599968
## 94 April 2013 3609543
## 95 May 2013 3934899
## 96 June 2013 4155153
## 97 July 2013 4181115
## 98 August 2013 4351101
## 99 September 2013 3785479
## 100 October 2013 3926441
## 101 November 2013 3470482
## 102 December 2013 3819276
## 103 January 2014 3438136
## 104 February 2014 3081846
## 105 March 2014 3771574
## 106 April 2014 3887382
## 107 May 2014 4151403
## 108 June 2014 4327219
## 109 July 2014 4506218
## 110 August 2014 4525926
## 111 September 2014 3919485
## 112 October 2014 4059884
## 113 November 2014 3629119
## 114 December 2014 3856908
## 115 January 2015 3551072
## 116 February 2015 3248534
## 117 March 2015 4002732
## 118 April 2015 4022770
## 119 May 2015 4361622
## 120 June 2015 4559640
## 121 July 2015 4802431
## 122 August 2015 4797484
## 123 September 2015 4201882
## 124 October 2015 4375077
## 125 November 2015 4014388
## 126 December 2015 4129462
## 127 January 2016 3749417
## 128 February 2016 3543751
## 129 March 2016 4138030
#time_index <- as.yearmon((df$time),"%b %Y")
#ts_data <-as.xts(df$count, order.by = time_index)
ts_data <- xts(df$count, as.yearmon(df$time, "%b %Y"), tz =Sys.getenv("TZ"))
## Warning in xts(df$count, as.yearmon(df$time, "%b %Y"), tz = Sys.getenv("TZ")):
## 'tzone' setting ignored for yearmon indexes
plot(ts_data, col="lightblue", lwd=2, xlabs="Years", ylabs="passenger values", main="This is a detailed time series on passengers per month")
ts_dataRel=diff(ts_data)/ts_data[-length(ts_data)]
plot(ts_dataRel, col="blue",lwd=2, xlab="Years", ylab="relative values", main="This is a time series on passagers per month")
head(embed(ts_data, 4))
## [,1] [,2] [,3] [,4]
## [1,] 2798480 2764519 3224118 3254797
## [2,] 2638631 2798480 2764519 3224118
## [3,] 2696084 2638631 2798480 2764519
## [4,] 2471712 2696084 2638631 2798480
## [5,] 2247255 2471712 2696084 2638631
## [6,] 2738730 2247255 2471712 2696084
createEmbedDS <-function(s, emb =4){
d <- dim(s)
if(!is.null(d) && d[2] > 1) stop("Only applicable to uni-variate time series")
if(emb < 2 || emb > length(s)) stop("Invalid embed size")
e <- embed(s,emb)
colnames(e) <- c("T", paste("T", 1:(emb-1),sep="_"))
if(is.xts(s)) return (xts(e,index(s)[emb:length(s)])) else return(e)
}
dataSet <- createEmbedDS(ts_data,emb=5)
head(dataSet)
## T T_1 T_2 T_3 T_4
## Nov 2005 2638631 2798480 2764519 3224118 3254797
## Dec 2005 2696084 2638631 2798480 2764519 3224118
## Jan 2006 2471712 2696084 2638631 2798480 2764519
## Feb 2006 2247255 2471712 2696084 2638631 2798480
## Mar 2006 2738730 2247255 2471712 2696084 2638631
## Apr 2006 2792413 2738730 2247255 2471712 2696084
dataSet2 <- createEmbedDS(lag(ts_data, 12), emb=5)
head(dataSet2, 14)
## T T_1 T_2 T_3 T_4
## Nov 2005 NA NA NA NA NA
## Dec 2005 NA NA NA NA NA
## Jan 2006 NA NA NA NA NA
## Feb 2006 NA NA NA NA NA
## Mar 2006 NA NA NA NA NA
## Apr 2006 NA NA NA NA NA
## May 2006 NA NA NA NA NA
## Jun 2006 NA NA NA NA NA
## Jul 2006 3254797 NA NA NA NA
## Aug 2006 3224118 3254797 NA NA NA
## Sep 2006 2764519 3224118 3254797 NA NA
## Oct 2006 2798480 2764519 3224118 3254797 NA
## Nov 2006 2638631 2798480 2764519 3224118 3254797
## Dec 2006 2696084 2638631 2798480 2764519 3224118
head(index(ts_data), 5)
## [1] "Jul 2005" "Aug 2005" "Sep 2005" "Oct 2005" "Nov 2005"
head(coredata(ts_data), 5)
## [,1]
## [1,] 3254797
## [2,] 3224118
## [3,] 2764519
## [4,] 2798480
## [5,] 2638631
start(ts_data)
## [1] "Jul 2005"
end(ts_data)
## [1] "Mar 2016"
summary(ts_data)
## Index ts_data
## Min. :2006 Min. :2247255
## 1st Qu.:2008 1st Qu.:2979513
## Median :2011 Median :3390714
## Mean :2011 Mean :3412280
## 3rd Qu.:2014 3rd Qu.:3819276
## Max. :2016 Max. :4802431