rm(list=ls())
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.2     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
##dat <- read.table("/Users/drlakeshialegettejones/Downloads/American_Airline_passengers1.csv", sep=",", header = TRUE)
dat <- read.csv("/Users/drlakeshialegettejones/Library/Mobile Documents/com~apple~CloudDocs/Desktop/ALL FILES!/Documents/All Class Info/Data Mining with R/American_Airline_passengers1.csv")
dat
##    X2003 X55.317 X53.216 X62.656 X60.998 X66.503 X71.537 X72.092 X64.000
## 1   2004  50,075  47,066  57,356  57,307  59,912  59,962  58,393  51,419
## 2   2005  46,815  47,022  58,225  56,697  60,327  69,660  71,360  61,817
## 3   2006  57,655  56,441  69,593  74,414  78,972  79,307  76,982  65,043
## 4   2007  60,206  57,392  71,184  69,967  73,728  76,072  77,005  74,170
## 5   2008  57,092  59,175  69,906  63,995  68,133  68,780  69,416  61,618
## 6   2009  44,906  44,844  57,072  61,197  61,885  67,300  64,983  55,009
## 7   2010  47,152  43,117  56,901  45,132  45,812  42,620  46,368  40,127
## 8   2011  35,995  34,060  39,901  42,252  44,746  49,199  50,551  41,232
## 9   2012  39,279  34,833  45,290  36,496  41,541  40,313  39,204  34,557
## 10  2013  34,681  33,428  40,483  36,335  41,227  42,449  41,420  35,129
## 11  2014  36,528  37,977  45,699  45,052  51,104  50,156  49,595  43,880
## 12  2015  40,162  34,902  50,790  52,245  54,770  59,954 142,974 128,972
## 13  2016 105,835  87,860  98,310 104,517 144,985 127,941 127,795 112,763
## 14  2017 106,483 105,617 131,823 117,326 118,259 109,929 106,172 104,600
## 15  2018  93,438  93,955 107,938 108,555 115,985 107,033 111,286 104,715
## 16  2019 101,152  96,751 115,240 101,428 131,641 122,356 125,278 120,583
## 17  2020  91,762  90,663  53,051   7,639  15,491  22,323  49,336  56,352
## 18  2021  35,739  31,849  52,470  72,796  74,684  83,885  93,492  75,855
## 19  2022  65,738  71,531  97,970  77,766  79,095  72,210  71,413  69,378
## 20  2023  69,554  79,434  92,409  87,497 103,963  93,188  92,640  80,719
##    X55.447 X62.865 X50.931 X56.330
## 1   47,245  56,010  53,956  60,175
## 2   62,012  66,786  67,391  70,801
## 3   58,299  65,844  64,960  68,272
## 4   64,897  71,648  67,568  66,547
## 5   52,321  59,440  47,032  54,271
## 6   48,954  55,022  47,385  51,512
## 7   38,854  46,181  40,992  45,964
## 8   40,082  46,420  43,833  47,597
## 9   29,218  37,039  36,367  41,561
## 10  31,665  42,063  36,082  40,889
## 11  39,408  41,631  39,628  42,697
## 12 121,215 129,563 122,524 124,347
## 13 118,802 131,622 115,485 119,060
## 14  95,397 108,102  99,820  98,632
## 15  94,300 109,809 110,490 112,362
## 16 115,706 128,987 115,988 123,632
## 17  52,696  51,624  42,552  43,657
## 18  73,688  84,803  97,655  96,764
## 19  70,797  78,478  66,241  70,375
## 20  79,846  93,968  94,529  81,037
## Remove commas in the data values
dat[, 2:13] <-  lapply(dat[,2:3],function(x){as.numeric(gsub(",", "", x))})
dat
##    X2003 X55.317 X53.216 X62.656 X60.998 X66.503 X71.537 X72.092 X64.000
## 1   2004   50075   47066   50075   47066   50075   47066   50075   47066
## 2   2005   46815   47022   46815   47022   46815   47022   46815   47022
## 3   2006   57655   56441   57655   56441   57655   56441   57655   56441
## 4   2007   60206   57392   60206   57392   60206   57392   60206   57392
## 5   2008   57092   59175   57092   59175   57092   59175   57092   59175
## 6   2009   44906   44844   44906   44844   44906   44844   44906   44844
## 7   2010   47152   43117   47152   43117   47152   43117   47152   43117
## 8   2011   35995   34060   35995   34060   35995   34060   35995   34060
## 9   2012   39279   34833   39279   34833   39279   34833   39279   34833
## 10  2013   34681   33428   34681   33428   34681   33428   34681   33428
## 11  2014   36528   37977   36528   37977   36528   37977   36528   37977
## 12  2015   40162   34902   40162   34902   40162   34902   40162   34902
## 13  2016  105835   87860  105835   87860  105835   87860  105835   87860
## 14  2017  106483  105617  106483  105617  106483  105617  106483  105617
## 15  2018   93438   93955   93438   93955   93438   93955   93438   93955
## 16  2019  101152   96751  101152   96751  101152   96751  101152   96751
## 17  2020   91762   90663   91762   90663   91762   90663   91762   90663
## 18  2021   35739   31849   35739   31849   35739   31849   35739   31849
## 19  2022   65738   71531   65738   71531   65738   71531   65738   71531
## 20  2023   69554   79434   69554   79434   69554   79434   69554   79434
##    X55.447 X62.865 X50.931 X56.330
## 1    50075   47066   50075   47066
## 2    46815   47022   46815   47022
## 3    57655   56441   57655   56441
## 4    60206   57392   60206   57392
## 5    57092   59175   57092   59175
## 6    44906   44844   44906   44844
## 7    47152   43117   47152   43117
## 8    35995   34060   35995   34060
## 9    39279   34833   39279   34833
## 10   34681   33428   34681   33428
## 11   36528   37977   36528   37977
## 12   40162   34902   40162   34902
## 13  105835   87860  105835   87860
## 14  106483  105617  106483  105617
## 15   93438   93955   93438   93955
## 16  101152   96751  101152   96751
## 17   91762   90663   91762   90663
## 18   35739   31849   35739   31849
## 19   65738   71531   65738   71531
## 20   69554   79434   69554   79434
#
# R example on creating time series object
?ts
# gnp <- ts(cumsum(1 + round(rnorm(100), 2)),
#           start = c(1954, 7), frequency = 12)

## Matrix transpose
temp_dat <- t(dat)
temp_dat
##          [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12]
## X2003    2004  2005  2006  2007  2008  2009  2010  2011  2012  2013  2014  2015
## X55.317 50075 46815 57655 60206 57092 44906 47152 35995 39279 34681 36528 40162
## X53.216 47066 47022 56441 57392 59175 44844 43117 34060 34833 33428 37977 34902
## X62.656 50075 46815 57655 60206 57092 44906 47152 35995 39279 34681 36528 40162
## X60.998 47066 47022 56441 57392 59175 44844 43117 34060 34833 33428 37977 34902
## X66.503 50075 46815 57655 60206 57092 44906 47152 35995 39279 34681 36528 40162
## X71.537 47066 47022 56441 57392 59175 44844 43117 34060 34833 33428 37977 34902
## X72.092 50075 46815 57655 60206 57092 44906 47152 35995 39279 34681 36528 40162
## X64.000 47066 47022 56441 57392 59175 44844 43117 34060 34833 33428 37977 34902
## X55.447 50075 46815 57655 60206 57092 44906 47152 35995 39279 34681 36528 40162
## X62.865 47066 47022 56441 57392 59175 44844 43117 34060 34833 33428 37977 34902
## X50.931 50075 46815 57655 60206 57092 44906 47152 35995 39279 34681 36528 40162
## X56.330 47066 47022 56441 57392 59175 44844 43117 34060 34833 33428 37977 34902
##          [,13]  [,14] [,15]  [,16] [,17] [,18] [,19] [,20]
## X2003     2016   2017  2018   2019  2020  2021  2022  2023
## X55.317 105835 106483 93438 101152 91762 35739 65738 69554
## X53.216  87860 105617 93955  96751 90663 31849 71531 79434
## X62.656 105835 106483 93438 101152 91762 35739 65738 69554
## X60.998  87860 105617 93955  96751 90663 31849 71531 79434
## X66.503 105835 106483 93438 101152 91762 35739 65738 69554
## X71.537  87860 105617 93955  96751 90663 31849 71531 79434
## X72.092 105835 106483 93438 101152 91762 35739 65738 69554
## X64.000  87860 105617 93955  96751 90663 31849 71531 79434
## X55.447 105835 106483 93438 101152 91762 35739 65738 69554
## X62.865  87860 105617 93955  96751 90663 31849 71531 79434
## X50.931 105835 106483 93438 101152 91762 35739 65738 69554
## X56.330  87860 105617 93955  96751 90663 31849 71531 79434
## Create a vector for all values in the data frame
dat_val <- as.vector(temp_dat[-1, 1:20])

## Create time series object
dat_ts <- ts( dat_val,start = c(2003, 1), frequency = 12 )
dat_ts
##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct
## 2003  50075  47066  50075  47066  50075  47066  50075  47066  50075  47066
## 2004  46815  47022  46815  47022  46815  47022  46815  47022  46815  47022
## 2005  57655  56441  57655  56441  57655  56441  57655  56441  57655  56441
## 2006  60206  57392  60206  57392  60206  57392  60206  57392  60206  57392
## 2007  57092  59175  57092  59175  57092  59175  57092  59175  57092  59175
## 2008  44906  44844  44906  44844  44906  44844  44906  44844  44906  44844
## 2009  47152  43117  47152  43117  47152  43117  47152  43117  47152  43117
## 2010  35995  34060  35995  34060  35995  34060  35995  34060  35995  34060
## 2011  39279  34833  39279  34833  39279  34833  39279  34833  39279  34833
## 2012  34681  33428  34681  33428  34681  33428  34681  33428  34681  33428
## 2013  36528  37977  36528  37977  36528  37977  36528  37977  36528  37977
## 2014  40162  34902  40162  34902  40162  34902  40162  34902  40162  34902
## 2015 105835  87860 105835  87860 105835  87860 105835  87860 105835  87860
## 2016 106483 105617 106483 105617 106483 105617 106483 105617 106483 105617
## 2017  93438  93955  93438  93955  93438  93955  93438  93955  93438  93955
## 2018 101152  96751 101152  96751 101152  96751 101152  96751 101152  96751
## 2019  91762  90663  91762  90663  91762  90663  91762  90663  91762  90663
## 2020  35739  31849  35739  31849  35739  31849  35739  31849  35739  31849
## 2021  65738  71531  65738  71531  65738  71531  65738  71531  65738  71531
## 2022  69554  79434  69554  79434  69554  79434  69554  79434  69554  79434
##         Nov    Dec
## 2003  50075  47066
## 2004  46815  47022
## 2005  57655  56441
## 2006  60206  57392
## 2007  57092  59175
## 2008  44906  44844
## 2009  47152  43117
## 2010  35995  34060
## 2011  39279  34833
## 2012  34681  33428
## 2013  36528  37977
## 2014  40162  34902
## 2015 105835  87860
## 2016 106483 105617
## 2017  93438  93955
## 2018 101152  96751
## 2019  91762  90663
## 2020  35739  31849
## 2021  65738  71531
## 2022  69554  79434
class(dat_ts)
## [1] "ts"
ap <- as.xts(dat_ts)
class(ap)
## [1] "xts" "zoo"
ap
##          m.c.seq.row..seq.n...seq.col..drop...FALSE.
## Jan 2003                                       50075
## Feb 2003                                       47066
## Mar 2003                                       50075
## Apr 2003                                       47066
## May 2003                                       50075
## Jun 2003                                       47066
## Jul 2003                                       50075
## Aug 2003                                       47066
## Sep 2003                                       50075
## Oct 2003                                       47066
##      ...                                            
## Mar 2022                                       69554
## Apr 2022                                       79434
## May 2022                                       69554
## Jun 2022                                       79434
## Jul 2022                                       69554
## Aug 2022                                       79434
## Sep 2022                                       69554
## Oct 2022                                       79434
## Nov 2022                                       69554
## Dec 2022                                       79434
apRel <- diff(ap)/ap[-length(ap)]
head(apRel)
##                   e1
## Jan 2003          NA
## Feb 2003 -0.06393150
## Mar 2003  0.06008987
## Apr 2003 -0.06393150
## May 2003  0.06008987
## Jun 2003 -0.06393150
head(embed(ap,4))
##       [,1]  [,2]  [,3]  [,4]
## [1,] 47066 50075 47066 50075
## [2,] 50075 47066 50075 47066
## [3,] 47066 50075 47066 50075
## [4,] 50075 47066 50075 47066
## [5,] 47066 50075 47066 50075
## [6,] 50075 47066 50075 47066
createEmbedDS <- function(s, emb=4) {
  d <- dim(s)
  if (!is.null(d) && d[2] > 1) stop("Only applicable to uni-variate time series")
  if (emb < 2 || emb > length(s)) stop("Invalid embed size")
  e <- embed(s,emb)
  colnames(e) <- c("T",paste("T",1:(emb-1),sep="_"))
  if (is.xts(s)) return(xts(e,index(s)[emb:length(s)])) else return(e)
}
dataSet <- createEmbedDS(ap,emb=5)
head(dataSet)
##              T   T_1   T_2   T_3   T_4
## May 2003 50075 47066 50075 47066 50075
## Jun 2003 47066 50075 47066 50075 47066
## Jul 2003 50075 47066 50075 47066 50075
## Aug 2003 47066 50075 47066 50075 47066
## Sep 2003 50075 47066 50075 47066 50075
## Oct 2003 47066 50075 47066 50075 47066