Introduction & Applied Time Series in R_R-Tutorial_udemy
Source file ⇒ IntroductionTimeSeriesR_R-Tutorial_udemy.rmd
1 IT store Staff allocation
## Script for the Example Data import, 2 alternatives
# myts <- scan()
# Conversion to a time series mycounts <- ts(myts2$X2, start = 1,frequency = 12)
library(readr)
ITstore_bidaily <- read_delim("ITstore_bidaily.csv", ";", escape_double = FALSE,
col_names = FALSE, trim_ws = TRUE)
myts <- ITstore_bidaily
# Alternative with myts vector mycounts_check <- ts(myts,start = 1,frequency =
# 12)
mycounts <- ts(myts$X2, start = 1, frequency = 12)
# Visualization
plot(mycounts, ylab = "Customer Counts", xlab = "Weeks")## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 13 193 143 243 116 269
## Feb 13 313 262 365 234 392
## Mar 13 160 108 212 81 239
## Apr 13 204 152 256 125 283
## May 13 165 113 216 86 244
## Jun 13 241 189 293 162 320
## Jul 13 158 107 210 79 237
## Aug 13 234 182 285 155 313
## Sep 13 232 181 284 153 312
## Oct 13 385 333 436 305 464
## Nov 13 450 398 502 371 529
## Dec 13 657 605 709 578 736
## Jan 14 209 156 263 127 292
## Feb 14 318 263 372 235 400
## Mar 14 161 107 215 78 244
## Apr 14 204 150 258 122 287
## May 14 165 111 219 82 247
## Jun 14 241 187 295 158 324
## Jul 14 158 104 212 76 241
## Aug 14 234 180 288 151 316
## Sep 14 232 178 287 150 315
## Oct 14 385 330 439 302 467
## Nov 14 450 396 504 367 533
## Dec 14 657 603 711 574 740
2 Working With Dates And Time In R
### POSIXt classes in R
x = as.POSIXct("2019-12-25 11:45:34") # nr of seconds
y = as.POSIXlt("2019-12-25 11:45:34")
x; y # it gives the same output, but what is behind it?## [1] "2019-12-25 11:45:34 IST"
## [1] "2019-12-25 11:45:34 IST"
## [1] 1577254534
## attr(,"tzone")
## [1] ""
## $sec
## [1] 34
##
## $min
## [1] 45
##
## $hour
## [1] 11
##
## $mday
## [1] 25
##
## $mon
## [1] 11
##
## $year
## [1] 119
##
## $wday
## [1] 3
##
## $yday
## [1] 358
##
## $isdst
## [1] 0
##
## $zone
## [1] "IST"
##
## $gmtoff
## [1] NA
## [1] 1576780200
## [1] "IST"
## x$zone # gives error:not possible since it is simply a number of seconds
# another class based on days
x = as.Date("2019-12-25")
x; class(x)## [1] "2019-12-25"
## [1] "Date"
## [1] 18255
## [1] 18245
##===========================================##
library(chron)
x = chron("12/25/2019", "23:34:09")
x## [1] (12/25/19 23:34:09)
## [1] "chron" "dates" "times"
## [1] 18256
## attr(,"format")
## dates times
## "m/d/y" "h:m:s"
## attr(,"origin")
## month day year
## 1 1 1970
##===========================================##
### strptime
a = as.character(c("1993-12-30 23:45",
"1994-11-05 11:43",
"1992-03-09 21:54"))
class(a)## [1] "character"
## [1] "1993-12-30 23:45:00 IST" "1994-11-05 11:43:00 IST"
## [3] "1992-03-09 21:54:00 IST"
## [1] "POSIXlt" "POSIXt"
##===========================================##
## Lubridate
### Lets take a look at the package lubridate which has very useful time/date data functions
library(lubridate)
# different ways in how to input dates
ymd(19931123)## [1] "1993-11-23"
## [1] "1993-11-23"
## [1] "1993-11-23"
# lets use time and date together
mytimepoint <- ymd_hm("1993-11-23 11:23", tz = "Europe/Prague")
mytimepoint## [1] "1993-11-23 11:23:00 CET"
## [1] "POSIXct" "POSIXt"
## [1] 23
## [1] 23
## [1] 11
## [1] 1993
## [1] 11
## [1] "1993-11-23 14:23:00 CET"
## [1] "Africa/Abidjan" "Africa/Accra"
## [3] "Africa/Addis_Ababa" "Africa/Algiers"
## [5] "Africa/Asmara" "Africa/Asmera"
## [7] "Africa/Bamako" "Africa/Bangui"
## [9] "Africa/Banjul" "Africa/Bissau"
## [11] "Africa/Blantyre" "Africa/Brazzaville"
## [13] "Africa/Bujumbura" "Africa/Cairo"
## [15] "Africa/Casablanca" "Africa/Ceuta"
## [17] "Africa/Conakry" "Africa/Dakar"
## [19] "Africa/Dar_es_Salaam" "Africa/Djibouti"
## [21] "Africa/Douala" "Africa/El_Aaiun"
## [23] "Africa/Freetown" "Africa/Gaborone"
## [25] "Africa/Harare" "Africa/Johannesburg"
## [27] "Africa/Juba" "Africa/Kampala"
## [29] "Africa/Khartoum" "Africa/Kigali"
## [31] "Africa/Kinshasa" "Africa/Lagos"
## [33] "Africa/Libreville" "Africa/Lome"
## [35] "Africa/Luanda" "Africa/Lubumbashi"
## [37] "Africa/Lusaka" "Africa/Malabo"
## [39] "Africa/Maputo" "Africa/Maseru"
## [41] "Africa/Mbabane" "Africa/Mogadishu"
## [43] "Africa/Monrovia" "Africa/Nairobi"
## [45] "Africa/Ndjamena" "Africa/Niamey"
## [47] "Africa/Nouakchott" "Africa/Ouagadougou"
## [49] "Africa/Porto-Novo" "Africa/Sao_Tome"
## [51] "Africa/Timbuktu" "Africa/Tripoli"
## [53] "Africa/Tunis" "Africa/Windhoek"
## [55] "America/Adak" "America/Anchorage"
## [57] "America/Anguilla" "America/Antigua"
## [59] "America/Araguaina" "America/Argentina/Buenos_Aires"
## [61] "America/Argentina/Catamarca" "America/Argentina/ComodRivadavia"
## [63] "America/Argentina/Cordoba" "America/Argentina/Jujuy"
## [65] "America/Argentina/La_Rioja" "America/Argentina/Mendoza"
## [67] "America/Argentina/Rio_Gallegos" "America/Argentina/Salta"
## [69] "America/Argentina/San_Juan" "America/Argentina/San_Luis"
## [71] "America/Argentina/Tucuman" "America/Argentina/Ushuaia"
## [73] "America/Aruba" "America/Asuncion"
## [75] "America/Atikokan" "America/Atka"
## [77] "America/Bahia" "America/Bahia_Banderas"
## [79] "America/Barbados" "America/Belem"
## [81] "America/Belize" "America/Blanc-Sablon"
## [83] "America/Boa_Vista" "America/Bogota"
## [85] "America/Boise" "America/Buenos_Aires"
## [87] "America/Cambridge_Bay" "America/Campo_Grande"
## [89] "America/Cancun" "America/Caracas"
## [91] "America/Catamarca" "America/Cayenne"
## [93] "America/Cayman" "America/Chicago"
## [95] "America/Chihuahua" "America/Coral_Harbour"
## [97] "America/Cordoba" "America/Costa_Rica"
## [99] "America/Creston" "America/Cuiaba"
## [101] "America/Curacao" "America/Danmarkshavn"
## [103] "America/Dawson" "America/Dawson_Creek"
## [105] "America/Denver" "America/Detroit"
## [107] "America/Dominica" "America/Edmonton"
## [109] "America/Eirunepe" "America/El_Salvador"
## [111] "America/Ensenada" "America/Fort_Nelson"
## [113] "America/Fort_Wayne" "America/Fortaleza"
## [115] "America/Glace_Bay" "America/Godthab"
## [117] "America/Goose_Bay" "America/Grand_Turk"
## [119] "America/Grenada" "America/Guadeloupe"
## [121] "America/Guatemala" "America/Guayaquil"
## [123] "America/Guyana" "America/Halifax"
## [125] "America/Havana" "America/Hermosillo"
## [127] "America/Indiana/Indianapolis" "America/Indiana/Knox"
## [129] "America/Indiana/Marengo" "America/Indiana/Petersburg"
## [131] "America/Indiana/Tell_City" "America/Indiana/Vevay"
## [133] "America/Indiana/Vincennes" "America/Indiana/Winamac"
## [135] "America/Indianapolis" "America/Inuvik"
## [137] "America/Iqaluit" "America/Jamaica"
## [139] "America/Jujuy" "America/Juneau"
## [141] "America/Kentucky/Louisville" "America/Kentucky/Monticello"
## [143] "America/Knox_IN" "America/Kralendijk"
## [145] "America/La_Paz" "America/Lima"
## [147] "America/Los_Angeles" "America/Louisville"
## [149] "America/Lower_Princes" "America/Maceio"
## [151] "America/Managua" "America/Manaus"
## [153] "America/Marigot" "America/Martinique"
## [155] "America/Matamoros" "America/Mazatlan"
## [157] "America/Mendoza" "America/Menominee"
## [159] "America/Merida" "America/Metlakatla"
## [161] "America/Mexico_City" "America/Miquelon"
## [163] "America/Moncton" "America/Monterrey"
## [165] "America/Montevideo" "America/Montreal"
## [167] "America/Montserrat" "America/Nassau"
## [169] "America/New_York" "America/Nipigon"
## [171] "America/Nome" "America/Noronha"
## [173] "America/North_Dakota/Beulah" "America/North_Dakota/Center"
## [175] "America/North_Dakota/New_Salem" "America/Nuuk"
## [177] "America/Ojinaga" "America/Panama"
## [179] "America/Pangnirtung" "America/Paramaribo"
## [181] "America/Phoenix" "America/Port-au-Prince"
## [183] "America/Port_of_Spain" "America/Porto_Acre"
## [185] "America/Porto_Velho" "America/Puerto_Rico"
## [187] "America/Punta_Arenas" "America/Rainy_River"
## [189] "America/Rankin_Inlet" "America/Recife"
## [191] "America/Regina" "America/Resolute"
## [193] "America/Rio_Branco" "America/Rosario"
## [195] "America/Santa_Isabel" "America/Santarem"
## [197] "America/Santiago" "America/Santo_Domingo"
## [199] "America/Sao_Paulo" "America/Scoresbysund"
## [201] "America/Shiprock" "America/Sitka"
## [203] "America/St_Barthelemy" "America/St_Johns"
## [205] "America/St_Kitts" "America/St_Lucia"
## [207] "America/St_Thomas" "America/St_Vincent"
## [209] "America/Swift_Current" "America/Tegucigalpa"
## [211] "America/Thule" "America/Thunder_Bay"
## [213] "America/Tijuana" "America/Toronto"
## [215] "America/Tortola" "America/Vancouver"
## [217] "America/Virgin" "America/Whitehorse"
## [219] "America/Winnipeg" "America/Yakutat"
## [221] "America/Yellowknife" "Antarctica/Casey"
## [223] "Antarctica/Davis" "Antarctica/DumontDUrville"
## [225] "Antarctica/Macquarie" "Antarctica/Mawson"
## [227] "Antarctica/McMurdo" "Antarctica/Palmer"
## [229] "Antarctica/Rothera" "Antarctica/South_Pole"
## [231] "Antarctica/Syowa" "Antarctica/Troll"
## [233] "Antarctica/Vostok" "Arctic/Longyearbyen"
## [235] "Asia/Aden" "Asia/Almaty"
## [237] "Asia/Amman" "Asia/Anadyr"
## [239] "Asia/Aqtau" "Asia/Aqtobe"
## [241] "Asia/Ashgabat" "Asia/Ashkhabad"
## [243] "Asia/Atyrau" "Asia/Baghdad"
## [245] "Asia/Bahrain" "Asia/Baku"
## [247] "Asia/Bangkok" "Asia/Barnaul"
## [249] "Asia/Beirut" "Asia/Bishkek"
## [251] "Asia/Brunei" "Asia/Calcutta"
## [253] "Asia/Chita" "Asia/Choibalsan"
## [255] "Asia/Chongqing" "Asia/Chungking"
## [257] "Asia/Colombo" "Asia/Dacca"
## [259] "Asia/Damascus" "Asia/Dhaka"
## [261] "Asia/Dili" "Asia/Dubai"
## [263] "Asia/Dushanbe" "Asia/Famagusta"
## [265] "Asia/Gaza" "Asia/Harbin"
## [267] "Asia/Hebron" "Asia/Ho_Chi_Minh"
## [269] "Asia/Hong_Kong" "Asia/Hovd"
## [271] "Asia/Irkutsk" "Asia/Istanbul"
## [273] "Asia/Jakarta" "Asia/Jayapura"
## [275] "Asia/Jerusalem" "Asia/Kabul"
## [277] "Asia/Kamchatka" "Asia/Karachi"
## [279] "Asia/Kashgar" "Asia/Kathmandu"
## [281] "Asia/Katmandu" "Asia/Khandyga"
## [283] "Asia/Kolkata" "Asia/Krasnoyarsk"
## [285] "Asia/Kuala_Lumpur" "Asia/Kuching"
## [287] "Asia/Kuwait" "Asia/Macao"
## [289] "Asia/Macau" "Asia/Magadan"
## [291] "Asia/Makassar" "Asia/Manila"
## [293] "Asia/Muscat" "Asia/Nicosia"
## [295] "Asia/Novokuznetsk" "Asia/Novosibirsk"
## [297] "Asia/Omsk" "Asia/Oral"
## [299] "Asia/Phnom_Penh" "Asia/Pontianak"
## [301] "Asia/Pyongyang" "Asia/Qatar"
## [303] "Asia/Qostanay" "Asia/Qyzylorda"
## [305] "Asia/Rangoon" "Asia/Riyadh"
## [307] "Asia/Saigon" "Asia/Sakhalin"
## [309] "Asia/Samarkand" "Asia/Seoul"
## [311] "Asia/Shanghai" "Asia/Singapore"
## [313] "Asia/Srednekolymsk" "Asia/Taipei"
## [315] "Asia/Tashkent" "Asia/Tbilisi"
## [317] "Asia/Tehran" "Asia/Tel_Aviv"
## [319] "Asia/Thimbu" "Asia/Thimphu"
## [321] "Asia/Tokyo" "Asia/Tomsk"
## [323] "Asia/Ujung_Pandang" "Asia/Ulaanbaatar"
## [325] "Asia/Ulan_Bator" "Asia/Urumqi"
## [327] "Asia/Ust-Nera" "Asia/Vientiane"
## [329] "Asia/Vladivostok" "Asia/Yakutsk"
## [331] "Asia/Yangon" "Asia/Yekaterinburg"
## [333] "Asia/Yerevan" "Atlantic/Azores"
## [335] "Atlantic/Bermuda" "Atlantic/Canary"
## [337] "Atlantic/Cape_Verde" "Atlantic/Faeroe"
## [339] "Atlantic/Faroe" "Atlantic/Jan_Mayen"
## [341] "Atlantic/Madeira" "Atlantic/Reykjavik"
## [343] "Atlantic/South_Georgia" "Atlantic/St_Helena"
## [345] "Atlantic/Stanley" "Australia/ACT"
## [347] "Australia/Adelaide" "Australia/Brisbane"
## [349] "Australia/Broken_Hill" "Australia/Canberra"
## [351] "Australia/Currie" "Australia/Darwin"
## [353] "Australia/Eucla" "Australia/Hobart"
## [355] "Australia/LHI" "Australia/Lindeman"
## [357] "Australia/Lord_Howe" "Australia/Melbourne"
## [359] "Australia/North" "Australia/NSW"
## [361] "Australia/Perth" "Australia/Queensland"
## [363] "Australia/South" "Australia/Sydney"
## [365] "Australia/Tasmania" "Australia/Victoria"
## [367] "Australia/West" "Australia/Yancowinna"
## [369] "Brazil/Acre" "Brazil/DeNoronha"
## [371] "Brazil/East" "Brazil/West"
## [373] "Canada/Atlantic" "Canada/Central"
## [375] "Canada/Eastern" "Canada/Mountain"
## [377] "Canada/Newfoundland" "Canada/Pacific"
## [379] "Canada/Saskatchewan" "Canada/Yukon"
## [381] "CET" "Chile/Continental"
## [383] "Chile/EasterIsland" "CST6CDT"
## [385] "Cuba" "EET"
## [387] "Egypt" "Eire"
## [389] "EST" "EST5EDT"
## [391] "Etc/GMT" "Etc/GMT-0"
## [393] "Etc/GMT-1" "Etc/GMT-10"
## [395] "Etc/GMT-11" "Etc/GMT-12"
## [397] "Etc/GMT-13" "Etc/GMT-14"
## [399] "Etc/GMT-2" "Etc/GMT-3"
## [401] "Etc/GMT-4" "Etc/GMT-5"
## [403] "Etc/GMT-6" "Etc/GMT-7"
## [405] "Etc/GMT-8" "Etc/GMT-9"
## [407] "Etc/GMT+0" "Etc/GMT+1"
## [409] "Etc/GMT+10" "Etc/GMT+11"
## [411] "Etc/GMT+12" "Etc/GMT+2"
## [413] "Etc/GMT+3" "Etc/GMT+4"
## [415] "Etc/GMT+5" "Etc/GMT+6"
## [417] "Etc/GMT+7" "Etc/GMT+8"
## [419] "Etc/GMT+9" "Etc/GMT0"
## [421] "Etc/Greenwich" "Etc/UCT"
## [423] "Etc/Universal" "Etc/UTC"
## [425] "Etc/Zulu" "Europe/Amsterdam"
## [427] "Europe/Andorra" "Europe/Astrakhan"
## [429] "Europe/Athens" "Europe/Belfast"
## [431] "Europe/Belgrade" "Europe/Berlin"
## [433] "Europe/Bratislava" "Europe/Brussels"
## [435] "Europe/Bucharest" "Europe/Budapest"
## [437] "Europe/Busingen" "Europe/Chisinau"
## [439] "Europe/Copenhagen" "Europe/Dublin"
## [441] "Europe/Gibraltar" "Europe/Guernsey"
## [443] "Europe/Helsinki" "Europe/Isle_of_Man"
## [445] "Europe/Istanbul" "Europe/Jersey"
## [447] "Europe/Kaliningrad" "Europe/Kiev"
## [449] "Europe/Kirov" "Europe/Lisbon"
## [451] "Europe/Ljubljana" "Europe/London"
## [453] "Europe/Luxembourg" "Europe/Madrid"
## [455] "Europe/Malta" "Europe/Mariehamn"
## [457] "Europe/Minsk" "Europe/Monaco"
## [459] "Europe/Moscow" "Europe/Nicosia"
## [461] "Europe/Oslo" "Europe/Paris"
## [463] "Europe/Podgorica" "Europe/Prague"
## [465] "Europe/Riga" "Europe/Rome"
## [467] "Europe/Samara" "Europe/San_Marino"
## [469] "Europe/Sarajevo" "Europe/Saratov"
## [471] "Europe/Simferopol" "Europe/Skopje"
## [473] "Europe/Sofia" "Europe/Stockholm"
## [475] "Europe/Tallinn" "Europe/Tirane"
## [477] "Europe/Tiraspol" "Europe/Ulyanovsk"
## [479] "Europe/Uzhgorod" "Europe/Vaduz"
## [481] "Europe/Vatican" "Europe/Vienna"
## [483] "Europe/Vilnius" "Europe/Volgograd"
## [485] "Europe/Warsaw" "Europe/Zagreb"
## [487] "Europe/Zaporozhye" "Europe/Zurich"
## [489] "GB" "GB-Eire"
## [491] "GMT" "GMT-0"
## [493] "GMT+0" "GMT0"
## [495] "Greenwich" "Hongkong"
## [497] "HST" "Iceland"
## [499] "Indian/Antananarivo" "Indian/Chagos"
## [501] "Indian/Christmas" "Indian/Cocos"
## [503] "Indian/Comoro" "Indian/Kerguelen"
## [505] "Indian/Mahe" "Indian/Maldives"
## [507] "Indian/Mauritius" "Indian/Mayotte"
## [509] "Indian/Reunion" "Iran"
## [511] "Israel" "Jamaica"
## [513] "Japan" "Kwajalein"
## [515] "Libya" "MET"
## [517] "Mexico/BajaNorte" "Mexico/BajaSur"
## [519] "Mexico/General" "MST"
## [521] "MST7MDT" "Navajo"
## [523] "NZ" "NZ-CHAT"
## [525] "Pacific/Apia" "Pacific/Auckland"
## [527] "Pacific/Bougainville" "Pacific/Chatham"
## [529] "Pacific/Chuuk" "Pacific/Easter"
## [531] "Pacific/Efate" "Pacific/Enderbury"
## [533] "Pacific/Fakaofo" "Pacific/Fiji"
## [535] "Pacific/Funafuti" "Pacific/Galapagos"
## [537] "Pacific/Gambier" "Pacific/Guadalcanal"
## [539] "Pacific/Guam" "Pacific/Honolulu"
## [541] "Pacific/Johnston" "Pacific/Kiritimati"
## [543] "Pacific/Kosrae" "Pacific/Kwajalein"
## [545] "Pacific/Majuro" "Pacific/Marquesas"
## [547] "Pacific/Midway" "Pacific/Nauru"
## [549] "Pacific/Niue" "Pacific/Norfolk"
## [551] "Pacific/Noumea" "Pacific/Pago_Pago"
## [553] "Pacific/Palau" "Pacific/Pitcairn"
## [555] "Pacific/Pohnpei" "Pacific/Ponape"
## [557] "Pacific/Port_Moresby" "Pacific/Rarotonga"
## [559] "Pacific/Saipan" "Pacific/Samoa"
## [561] "Pacific/Tahiti" "Pacific/Tarawa"
## [563] "Pacific/Tongatapu" "Pacific/Truk"
## [565] "Pacific/Wake" "Pacific/Wallis"
## [567] "Pacific/Yap" "Poland"
## [569] "Portugal" "PRC"
## [571] "PST8PDT" "ROC"
## [573] "ROK" "Singapore"
## [575] "Turkey" "UCT"
## [577] "Universal" "US/Alaska"
## [579] "US/Aleutian" "US/Arizona"
## [581] "US/Central" "US/East-Indiana"
## [583] "US/Eastern" "US/Hawaii"
## [585] "US/Indiana-Starke" "US/Michigan"
## [587] "US/Mountain" "US/Pacific"
## [589] "US/Pacific-New" "US/Samoa"
## [591] "UTC" "W-SU"
## [593] "WET" "Zulu"
## attr(,"Version")
## [1] "2020a"
# we can take a look at the most common time zones
# but be aware that the time zone recognition also depends on your location and machine
## lets check which day our time point is
wday(mytimepoint)## [1] 3
## [1] Tuesday
## 7 Levels: Sunday < Monday < Tuesday < Wednesday < Thursday < ... < Saturday
# we can calculate which time our timepoint would be in another time zone
with_tz(mytimepoint, tz = "Europe/London")## [1] "1993-11-23 13:23:00 GMT"
## [1] "1993-11-23 14:23:00 CET"
# time intervals
time1 = ymd_hm("1993-09-23 11:23", tz = "Europe/Prague")
time2 = ymd_hm("1995-11-02 15:23", tz = "Europe/Prague")
# getting the interval
myinterval = interval(time1, time2); myinterval## [1] 1993-09-23 11:23:00 CEST--1995-11-02 15:23:00 CET
## [1] "Interval"
## attr(,"package")
## [1] "lubridate"
##===========================================##
### Exercise: Creating a Data Frame with lubridate
# lets now build a dataframe with lubridate that contains date and time data
# see the different input formats that are allowed in the ymd function
a = c("1998,11,11", "1983/01/23", "1982:09:04", "1945-05-09", 19821224, "1974.12.03", 19871210)
a = ymd(a, tz = "CET") ;a## [1] "1998-11-11 CET" "1983-01-23 CET" "1982-09-04 CEST" "1945-05-09 CEST"
## [5] "1982-12-24 CET" "1974-12-03 CET" "1987-12-10 CET"
# now I am creating a time vector - using different notations of input
b = c("22 4 5", "04;09;45", "11:9:56", "23,15,12", "14 16 34", "8 8 23", "21 16 14")
b = hms(b); b## [1] "22H 4M 5S" "4H 9M 45S" "11H 9M 56S" "23H 15M 12S" "14H 16M 34S"
## [6] "8H 8M 23S" "21H 16M 14S"
## [1] 8.2 9.7 10.0 11.7 8.6 10.5 8.2
## date time measurement
## 1 1998-11-11 22H 4M 5S 8.2
## 2 1983-01-23 4H 9M 45S 9.7
## 3 1982-09-04 11H 9M 56S 10.0
## 4 1945-05-09 23H 15M 12S 11.7
## 5 1982-12-24 14H 16M 34S 8.6
## 6 1974-12-03 8H 8M 23S 10.5
## 7 1987-12-10 21H 16M 14S 8.2
## [1] "7M 0S"
# note that class "Period" needs integers - full numbers
## minutes(2.5) # Gives error
# getting the duration
dminutes(3)## [1] "180s (~3 minutes)"
## [1] "210s (~3.5 minutes)"
## [1] "2M 5S"
## [1] "2M 75S"
## [1] "195s (~3.25 minutes)"
# lubridate has many time classes: period or duration differ!
# which year was a leap year?
leap_year(2009:2014)## [1] FALSE FALSE FALSE TRUE FALSE FALSE
## [1] "2015-01-01"
## [1] "2015-01-01 06:00:00 UTC"
## [1] TRUE
## [1] "2017-01-01"
## [1] "2016-12-31 06:00:00 UTC"
# as you see the duration is the one which is always 365 days
# the standard one (the period) makes the year a whole new unit (+1)
## Exercise Lubridate
# create x, with time zone CET and a given time point in 2014 of your choosing
# I use "2014-04-12 23:12"
# the time point consists of year, months, day and hour
# change now the minute of x to 7 and check x in the same line of code
# see which time it would be in London
# create another time point y in 2015 and get the difference between those 2 points
x = ymd_hm(tz = "CET", "2014-04-12 23:12")
minute(x) = 7 ; x## [1] "2014-04-12 23:07:00 CEST"
## [1] "2014-04-12 22:07:00 BST"
## Time difference of 608 days
3 Time Series Data Pre-Processing and Visualization
## U Standard time series functions
# Lets create a time series object - class ts
# Getting data
mydata = runif(n = 50, min = 10, max = 45)
# ts for class time series Data starts in 1956 - 4 observations/year (quarterly)
mytimeseries = ts(data = mydata, start = 1956, frequency = 4)
# Lets see how the data looks
plot(mytimeseries)## [1] "ts"
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 1956 1956 1956 1957
## 1957 1957 1957 1958 1958
## 1958 1958 1958 1958 1959
## 1959 1959 1959 1960 1960
## 1960 1960 1960 1960 1961
## 1961 1961 1961 1962 1962
## 1962 1962 1962 1962 1963
## 1963 1963 1963 1964 1964
## 1964 1964 1964 1964 1965
## 1965 1965 1965 1966 1966
## 1966 1966 1966 1966 1967
## 1967 1967 1967 1968 1968
## 1968 1968 1968
# Refining the start argument
mytimeseries = ts(data = mydata, start = c(1956, 3), frequency = 4)
## Creating a ts object - Exercise
# Get a random walk of 450 numbers, eg rnorm, runif, etc
# In the solution I am going to use a cumulative sum on the normal distribution x
# = cumsum(rnorm(n = 450))
# If you want it to be reproducible, you can set a seed
# Add the time component: it is a monthly dataset, which starts in November 1914
# Get a simple plot for this time series # Advanced: how would you get the same
# type with the 'lattice' package?
x = cumsum(rnorm(n = 450))
y = ts(x, start = c(1914, 11), frequency = 12)
plot(y)## ===========================================##
## U Plots for time series data
# Standard R Base plots
plot(nottem)## ===========================================##
# Add on packages for advanced plots
library(forecast)
library(ggplot2)
# The ggplot equivalent to plot
autoplot((nottem))# Ggplots work with different layers
autoplot(nottem) + ggtitle("Autoplot of Nottingham temperature data")## ===========================================##
## Exercise Seasonplot - library (forecast)
# use the seasonplot function in order
# to resemble the plot shown here
# we are using the AirPassengers dataset
# for all the needed arguments on the plot,
# check out the help for par
# make sure that the labels are visible
library(forecast)
seasonplot(AirPassengers, xlab = "", col = c("red", "blue"), year.labels = T, labelgap = 0.35,
type = "s", bty = "7", cex = 0.75, main = "Seasonal plot of dataset AirPassengers")## ===========================================##
# ## German Inflation Rates
# #https://www.statbureau.org/en/germany/inflation-tables # data is copied from
# web, first and last col deleted mydata = scan() plot.ts(mydata) germaninfl =
# ts(mydata, start = 2008, frequency = 12) plot(germaninfl)
## ===========================================##
### Working with Irregular Time Series dataset: irregular_sensor
library(readr)
irregular_sensor <- read_csv("irregular_sensor.csv", col_names = FALSE)
# View(irregular_sensor)
# Irregular_sensor time series csv
class(irregular_sensor$X1)## [1] "character"
library(zoo) # for general irregular time series
library(tidyr) # for function separate
## ===========================================##
## Method 1 - removing the time component
irreg.split = separate(irregular_sensor, col = X1, into = c("date", "time"), sep = 8,
remove = T)
head(irreg.split)## # A tibble: 6 x 3
## date time X2
## <chr> <chr> <dbl>
## 1 05/16/17 " 10:34 AM" 334.
## 2 05/17/17 " 03:23 PM" 386.
## 3 05/17/17 " 08:45 PM" 492.
## 4 05/18/17 " 03:23 AM" 326.
## 5 05/18/17 " 12:34 PM" 373.
## 6 05/19/17 " 11:34 AM" 345.
## [1] "2017-05-16 IST" "2017-05-17 IST" "2017-05-17 IST" "2017-05-18 IST"
## [5] "2017-05-18 IST" "2017-05-19 IST"
# Creating a data.frame for orientation
irregts.df = data.frame(date = as.Date(sensor.date), measurement = irregular_sensor$X2)
head(irregts.df)## date measurement
## 1 2017-05-16 334
## 2 2017-05-17 386
## 3 2017-05-17 492
## 4 2017-05-18 326
## 5 2017-05-18 373
## 6 2017-05-19 345
# Getting a zoo object
irreg.dates = zoo(irregts.df$measurement, order.by = irregts.df$date)
head(irreg.dates)## 2017-05-16 2017-05-17 2017-05-17 2017-05-18 2017-05-18 2017-05-19
## 334 386 492 326 373 345
## 2017-05-16 2017-05-17 2017-05-18 2017-05-19 2017-05-20 2017-05-21 2017-05-22
## 334 439 349 345 420 353 372
## 2017-05-23 2017-05-24 2017-05-25 2017-05-26 2017-05-27 2017-05-28 2017-05-29
## 402 310 382 433 393 391 405
## 2017-05-30 2017-05-31
## 370 338
## [1] 16
## ===========================================##
## Method 2 - date and time component kept
sensor.date1 = strptime(irregular_sensor$X1, "%m/%d/%y %I:%M %p")
sensor.date1## [1] "2017-05-16 10:34:00 IST" "2017-05-17 15:23:00 IST"
## [3] "2017-05-17 20:45:00 IST" "2017-05-18 03:23:00 IST"
## [5] "2017-05-18 12:34:00 IST" "2017-05-19 11:34:00 IST"
## [7] "2017-05-20 12:34:00 IST" "2017-05-21 12:34:00 IST"
## [9] "2017-05-22 17:45:00 IST" "2017-05-22 06:02:00 IST"
## [11] "2017-05-23 04:45:00 IST" "2017-05-23 12:34:00 IST"
## [13] "2017-05-24 02:35:00 IST" "2017-05-25 04:27:00 IST"
## [15] "2017-05-26 15:39:00 IST" "2017-05-27 06:29:00 IST"
## [17] "2017-05-28 07:29:00 IST" "2017-05-29 05:49:00 IST"
## [19] "2017-05-30 07:49:00 IST" "2017-05-30 08:34:00 IST"
## [21] "2017-05-30 13:37:00 IST" "2017-05-30 15:45:00 IST"
## [23] "2017-05-31 05:37:00 IST" "2017-05-31 08:38:00 IST"
## [25] "2017-05-31 16:45:00 IST"
# Creating the zoo object
irreg.dates1 = zoo(irregular_sensor$X2, order.by = sensor.date1)
irreg.dates1## 2017-05-16 10:34:00 2017-05-17 15:23:00 2017-05-17 20:45:00 2017-05-18 03:23:00
## 334 386 492 326
## 2017-05-18 12:34:00 2017-05-19 11:34:00 2017-05-20 12:34:00 2017-05-21 12:34:00
## 373 345 420 353
## 2017-05-22 06:02:00 2017-05-22 17:45:00 2017-05-23 04:45:00 2017-05-23 12:34:00
## 392 352 401 404
## 2017-05-24 02:35:00 2017-05-25 04:27:00 2017-05-26 15:39:00 2017-05-27 06:29:00
## 310 382 433 393
## 2017-05-28 07:29:00 2017-05-29 05:49:00 2017-05-30 07:49:00 2017-05-30 08:34:00
## 391 405 392 372
## 2017-05-30 13:37:00 2017-05-30 15:45:00 2017-05-31 05:37:00 2017-05-31 08:38:00
## 313 402 305 402
## 2017-05-31 16:45:00
## 307
## 2017-05-16 2017-05-17 2017-05-18 2017-05-19 2017-05-20 2017-05-21 2017-05-22
## 334 439 349 345 420 353 372
## 2017-05-23 2017-05-24 2017-05-25 2017-05-26 2017-05-27 2017-05-28 2017-05-29
## 402 310 382 433 393 391 405
## 2017-05-30 2017-05-31
## 370 338
## ===========================================##
### Working with Missing Data and Outliers
## Import ts.NAandOutliers.csv
library(readr)
ts_NAandOutliers <- read_csv("ts_NAandOutliers.csv")
# Convert the 2nd column to a simple ts without frequency
myts = ts(ts_NAandOutliers$mydata)
myts## Time Series:
## Start = 1
## End = 250
## Frequency = 1
## [1] 32.8 42.5 NA 32.2 55.6 33.1 43.4 37.8 22.8 36.4 28.5 59.0
## [13] 36.5 26.7 41.3 28.9 38.6 31.3 34.5 NA 30.5 49.4 44.0 22.2
## [25] 19.4 41.9 30.3 32.9 17.7 10.3 31.6 40.0 35.4 46.2 26.9 36.3
## [37] 23.4 42.8 31.9 37.6 33.9 17.7 19.9 24.0 999.0 32.9 33.0 47.9
## [49] 34.0 40.8 34.4 27.2 41.8 NA 49.7 37.2 34.5 27.6 38.0 24.5
## [61] 33.9 28.6 17.9 40.3 32.1 15.6 38.6 58.9 42.2 34.1 29.2 20.4
## [73] 23.7 49.0 59.2 25.0 37.3 11.8 49.9 33.3 25.9 34.7 38.2 30.2
## [85] 45.0 36.1 39.0 24.3 8.6 33.9 30.2 32.2 46.7 36.4 27.3 39.7
## [97] 48.6 999.0 31.0 33.6 41.9 45.8 21.7 32.3 55.9 17.3 33.0 36.2
## [109] 47.1 34.0 36.6 18.8 28.1 40.5 41.3 32.9 59.6 21.0 46.9 53.9
## [121] 44.7 43.1 25.8 22.8 51.4 34.8 50.9 36.4 33.0 37.7 48.0 49.9
## [133] 20.6 24.9 4.7 17.0 45.6 28.3 50.4 35.0 38.8 32.3 46.0 21.7
## [145] 16.5 36.2 35.8 23.4 34.2 39.4 26.1 28.6 45.4 NA 30.6 23.4
## [157] 36.9 17.6 23.0 40.4 30.7 49.3 31.2 30.5 41.3 28.0 24.9 23.9
## [169] 41.7 46.2 40.7 999.0 26.5 12.8 38.1 61.7 11.5 41.8 34.6 59.9
## [181] 53.1 51.9 43.7 33.6 25.0 27.0 12.8 21.0 18.1 48.6 64.6 29.4
## [193] 45.5 34.2 50.6 43.6 57.6 20.8 39.8 51.9 33.4 23.2 34.4 28.1
## [205] 34.0 35.4 38.1 40.8 45.8 39.3 999.0 36.7 50.3 34.6 46.0 39.9
## [217] 39.9 51.1 2.7 51.7 41.3 43.1 24.7 52.3 37.4 19.0 43.5 54.2
## [229] 33.6 33.9 NA 36.1 33.0 31.4 36.6 29.6 18.3 38.2 34.4 12.0
## [241] 23.6 37.0 50.7 38.0 28.7 32.4 41.3 30.0 14.8 45.4
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 3 28 35 51 42 999 5
plot(myts)
## ===========================================##
# Automatic detection of outliers
library(forecast)
myts1 = tsoutliers(myts)
myts1## $index
## [1] 45 98 172 211
##
## $replacements
## [1] 28 40 34 38
# Missing data handling with zoo
library(zoo)
myts.NAlocf = na.locf(myts)
myts.NAfill = na.fill(myts, 33)
# Tip: na.trim to get rid of NAs at the beginning or end of dataset
# Standard NA method in package forecast
myts.NAinterp = na.interp(myts)
# Cleaning NA and outliers with forecast package
mytsclean = tsclean(myts)
plot(mytsclean)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 28 35 35 42 65
4 Statistical Background For Time Series Analysis And Forecasting
## Time Series:
## Start = 1821
## End = 1934
## Frequency = 1
## [1] 269 321 585 871 1475 2821 3928 5943 4950 2577 523 98 184 279 409
## [16] 2285 2685 3409 1824 409 151 45 68 213 546 1033 2129 2536 957 361
## [31] 377 225 360 731 1638 2725 2871 2119 684 299 236 245 552 1623 3311
## [46] 6721 4254 687 255 473 358 784 1594 1676 2251 1426 756 299 201 229
## [61] 469 736 2042 2811 4431 2511 389 73 39 49 59 188 377 1292 4031
## [76] 3495 587 105 153 387 758 1307 3465 6991 6313 3794 1836 345 382 808
## [91] 1388 2713 3800 3091 2985 3790 674 81 80 108 229 399 1132 2432 3574
## [106] 2935 1537 529 485 662 1000 1590 2657 3396
## Time Series:
## Start = 1821
## End = 1934
## Frequency = 1
## [1] 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835
## [16] 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850
## [31] 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865
## [46] 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880
## [61] 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895
## [76] 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910
## [91] 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925
## [106] 1926 1927 1928 1929 1930 1931 1932 1933 1934
## [1] 114
## Time Series:
## Start = 1929
## End = 1934
## Frequency = 1
## [1] 485 662 1000 1590 2657 3396
## [1] 1538
## [1] 771
## [1] 39 45 49 59 68 73 80 81 98 105 108 151 153 184 188
## [16] 201 213 225 229 229 236 245 255 269 279 299 299 321 345 358
## [31] 360 361 377 377 382 387 389 399 409 409 469 473 485 523 529
## [46] 546 552 585 587 662 674 684 687 731 736 756 758 784 808 871
## [61] 957 1000 1033 1132 1292 1307 1388 1426 1475 1537 1590 1594 1623 1638 1676
## [76] 1824 1836 2042 2119 2129 2251 2285 2432 2511 2536 2577 2657 2685 2713 2725
## [91] 2811 2821 2871 2935 2985 3091 3311 3396 3409 3465 3495 3574 3790 3794 3800
## [106] 3928 4031 4254 4431 4950 5943 6313 6721 6991
## [1] 758 784
## 0% 25% 50% 75% 100%
## 39 348 771 2567 6991
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 39 147 259 381 547 771 1470 2166 2818 3790 6991
## ===========================================##
### simple forecast methods
set.seed(95)
myts <- ts(rnorm(200), start = (1818))
plot(myts)library(forecast)
meanm <- meanf(myts, h = 20)
naivem <- naive(myts, h = 20)
driftm <- rwf(myts, h = 20, drift = T)
plot(meanm, plot.conf = F, main = "")
lines(naivem$mean, col = 123, lwd = 2)
lines(driftm$mean, col = 22, lwd = 2)
legend("topleft", lty = 1, col = c(4, 123, 22), legend = c("Mean method", "Naive method",
"Drift Method"))## ===========================================##
###### accuracy and model comparison
set.seed(95)
myts <- ts(rnorm(200), start = (1818))
mytstrain <- window(myts, start = 1818, end = 1988)
plot(mytstrain)library(forecast)
meanm <- meanf(mytstrain, h = 30)
naivem <- naive(mytstrain, h = 30)
driftm <- rwf(mytstrain, h = 30, drift = T)
mytstest <- window(myts, start = 1988)
accuracy(meanm, mytstest)## ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
## Training set 0.000000000000000014 1.0 0.82 78 133 0.77 0.13 NA
## Test set -0.245982845808009720 1.1 0.96 101 103 0.91 0.24 0.98
## ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
## Training set -0.00021 1.3 1.1 -153 731 1.0 -0.50 NA
## Test set 0.87319 1.4 1.2 86 308 1.1 0.24 2
## ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
## Training set -0.00000000000000002 1.3 1.1 -153 731 1.0 -0.50 NA
## Test set 0.87631825937667451 1.4 1.2 86 309 1.1 0.24 2
## ===========================================##
###### Residuals
set.seed(95)
myts <- ts(rnorm(200), start = (1818))
plot(myts)library(forecast)
meanm <- meanf(myts, h = 20)
naivem <- naive(myts, h = 20)
driftm <- rwf(myts, h = 20, drift = T)
var(meanm$residuals)## [1] 1.1
## [1] -0.000000000000000006
## [1] NA
## [1] 1.8
## [1] 0.0066
## [1] 1.8
## [1] -0.000000000000000045
## ===========================================##
### Stationarity
x <- rnorm(1000) # no unit-root, stationary
library(tseries)
adf.test(x) # augmented Dickey Fuller Test##
## Augmented Dickey-Fuller Test
##
## data: x
## Dickey-Fuller = -10, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
##
## Augmented Dickey-Fuller Test
##
## data: nottem
## Dickey-Fuller = -13, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
## Augmented Dickey-Fuller Test
##
## data: y
## Dickey-Fuller = 0.8, Lag order = 9, p-value = 1
## alternative hypothesis: stationary
## ===========================================##
### Autocorrelation
# Durbin Watson test for autocorrelation
length(lynx)## [1] 114
## Time Series:
## Start = 1821
## End = 1826
## Frequency = 1
## [1] 269 321 585 871 1475 2821
## [1] 321 585 871 1475 2821 3928
## [1] 269 321 585 871 1475 2821
##
## Durbin-Watson test
##
## data: lynx[-114] ~ lynx[-1]
## DW = 1, p-value = 0.000001
## alternative hypothesis: true autocorrelation is greater than 0
##
## Durbin-Watson test
##
## data: x[-700] ~ x[-1]
## DW = 2, p-value = 0.5
## alternative hypothesis: true autocorrelation is greater than 0
## [1] 240
##
## Durbin-Watson test
##
## data: nottem[-240] ~ nottem[-1]
## DW = 1, p-value = 0.000000000000005
## alternative hypothesis: true autocorrelation is greater than 0
##
## Partial autocorrelations of series 'lynx', by lag
##
## 1 2 3 4 5 6 7 8 9 10 11
## 0.711 -0.588 -0.039 -0.250 -0.094 -0.052 0.119 0.301 0.055 -0.081 -0.089
## 12 13 14 15 16 17 18 19 20
## -0.040 -0.099 -0.014 -0.113 -0.108 -0.006 0.116 -0.016 -0.018
# lag.max for numbers of lags to be calculated
# plot = F to suppress plotting
acf(rnorm(500), lag.max = 20)## ===========================================##
## Exercise messy data
set.seed(54)
myts <- ts(c(rnorm(50, 34, 10), rnorm(67, 7, 1), runif(23, 3, 14)))
# myts <- log(myts)
plot(myts)library(forecast)
meanm <- meanf(myts, h = 10)
naivem <- naive(myts, h = 10)
driftm <- rwf(myts, h = 10, drift = T)
plot(meanm, main = "", bty = "l")
lines(naivem$mean, col = 123, lwd = 2)
lines(driftm$mean, col = 22, lwd = 2)
legend("bottomleft", lty = 1, col = c(4, 123, 22), bty = "n", cex = 0.75, legend = c("Mean method",
"Naive method", "Drift Method"))## [1] 140
mytstrain <- window(myts, start = 1, end = 112)
mytstest <- window(myts, start = 113)
meanma <- meanf(mytstrain, h = 28)
naivema <- naive(mytstrain, h = 28)
driftma <- rwf(mytstrain, h = 28, drift = T)
accuracy(meanma, mytstest)## ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
## Training set -0.00000000000000064 15 14 -85 120 2.2 0.76 NA
## Test set -11.25187075743709109 12 11 -180 180 1.8 -0.17 2
## ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
## Training set -0.41 10 6.2 -11.9 33 1.0 -0.49 NA
## Test set 0.97 3 2.5 -1.9 34 0.4 -0.17 0.65
## ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
## Training set -0.000000000000000016 10.0 6.3 -7.9 33 1.0 -0.49 NA
## Test set 6.957224205302620312 8.2 7.0 86.3 87 1.1 0.43 1.6
## [1] -0.34
##
## Shapiro-Wilk normality test
##
## data: naivem$residuals
## W = 0.9, p-value = 0.00000002
5 Time Series Analysis And Forecasting
## [1] 12
## [1] 240
## $x
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1920 41 41 44 47 54 58 58 56 54 50 43 40
## 1921 44 40 45 47 54 59 66 60 57 54 40 43
## 1922 38 39 40 42 56 58 57 54 54 47 42 42
## 1923 42 40 43 46 49 53 64 60 54 49 36 38
## 1924 39 38 38 46 53 58 61 58 56 50 44 44
## 1925 40 40 41 45 54 59 64 61 53 50 38 36
## 1926 39 43 43 49 51 57 62 62 58 47 42 40
## 1927 39 38 45 47 52 55 60 60 55 50 42 35
## 1928 41 41 43 47 51 56 62 60 55 50 43 37
## 1929 35 31 41 44 53 57 62 60 60 49 43 42
## 1930 42 37 41 47 51 60 60 62 57 51 43 39
## 1931 37 38 38 46 54 58 61 58 54 47 46 41
## 1932 42 38 40 45 51 57 62 64 56 47 44 42
## 1933 36 39 44 49 54 61 66 65 60 50 42 36
## 1934 39 38 40 47 53 60 66 60 59 51 43 46
## 1935 40 43 44 47 50 60 65 64 57 49 44 36
## 1936 37 35 44 44 53 59 60 61 58 50 42 41
## 1937 41 41 38 47 54 59 61 62 56 51 41 37
## 1938 42 41 47 47 52 59 60 60 57 51 48 39
## 1939 39 41 42 48 52 58 61 62 58 47 47 38
##
## $seasonal
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1920 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1921 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1922 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1923 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1924 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1925 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1926 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1927 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1928 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1929 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1930 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1931 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1932 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1933 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1934 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1935 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1936 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1937 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1938 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
## 1939 -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
##
## $trend
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1920 NA NA NA NA NA NA 49 49 49 49 49 49
## 1921 50 50 50 51 51 51 50 50 50 49 49 49
## 1922 49 48 48 47 47 47 47 48 48 48 48 48
## 1923 48 48 48 49 48 48 48 48 47 47 47 48
## 1924 48 47 47 48 48 48 49 49 49 49 49 49
## 1925 50 50 50 50 49 49 48 49 49 49 49 49
## 1926 49 49 49 49 49 49 49 49 49 49 49 49
## 1927 49 49 49 49 49 49 48 49 49 48 48 48
## 1928 49 49 49 49 49 49 49 48 48 47 47 47
## 1929 47 47 48 48 48 48 48 49 49 49 49 49
## 1930 49 49 49 49 49 49 49 49 49 49 49 49
## 1931 49 49 48 48 48 48 48 49 49 49 48 48
## 1932 48 49 49 49 49 49 49 49 49 49 49 50
## 1933 50 50 50 51 51 50 50 50 50 50 50 50
## 1934 50 50 49 49 49 50 50 51 51 51 51 51
## 1935 51 51 51 51 51 50 50 49 49 49 49 49
## 1936 49 48 48 48 48 48 49 49 49 49 49 49
## 1937 49 49 49 49 49 49 49 49 50 50 50 50
## 1938 50 50 50 50 50 50 50 50 50 50 50 50
## 1939 50 50 50 50 50 49 NA NA NA NA NA NA
##
## $random
## Jan Feb Mar Apr May Jun Jul Aug Sep
## 1920 NA NA NA NA NA NA -4.3089 -4.2091 -2.2376
## 1921 3.9727 -0.3709 1.7174 -0.8385 0.0299 -0.8948 2.8786 -1.6883 -0.2501
## 1922 -2.0315 0.3582 -1.4492 -2.6302 4.9674 1.4927 -3.6214 -4.8508 -0.9918
## 1923 3.4560 1.7874 1.4091 0.0282 -2.6409 -4.2740 3.5203 0.6409 -0.2001
## 1924 1.0477 0.0082 -2.1701 0.7323 1.8591 0.2385 -0.9214 -2.1674 -0.1376
## 1925 -0.1731 0.6582 -1.9701 -1.7260 1.0174 1.6510 2.1078 1.0284 -3.1418
## 1926 -0.1023 4.6582 1.4758 2.7365 -1.7826 -1.4073 0.1578 1.3617 1.0457
## 1927 -0.0981 -0.2876 3.7383 1.3157 -0.4742 -2.5448 -0.9922 0.4492 -1.2959
## 1928 1.5060 2.2916 1.0091 1.2948 -1.3409 -1.4907 0.4911 0.9576 0.3999
## 1929 -3.3398 -6.2834 0.2883 -1.1427 1.8924 -0.0282 1.1161 -0.1008 3.2082
## 1930 1.4560 -2.4376 -1.2284 0.3282 -1.6576 2.1343 -1.8297 1.3117 0.8332
## 1931 -2.2231 -0.2418 -2.9201 1.3032 2.1674 1.3552 -0.7214 -1.8341 -2.2543
## 1932 3.4352 -0.2876 -1.6659 -1.6885 -1.5492 -0.9532 0.3745 3.5034 0.1499
## 1933 -4.4606 -1.0001 1.0299 0.7615 -0.0076 1.3718 2.2078 3.0284 2.5041
## 1934 -1.0106 -1.5043 -2.0326 0.2740 0.4924 0.7135 3.1911 -1.6091 0.9374
## 1935 -1.3815 1.7082 -0.3951 -0.7760 -4.0367 1.2635 1.8870 3.2242 0.3791
## 1936 -2.0106 -3.4376 2.6758 -1.7093 0.9466 1.2177 -1.7130 0.4992 1.5416
## 1937 0.7477 1.4207 -4.0867 0.7448 1.1883 0.3385 -0.7214 1.1242 -0.6959
## 1938 1.7227 1.5166 4.6924 -0.2177 -0.8867 -0.1740 -3.5297 -1.0966 -0.2209
## 1939 -0.9398 1.0166 -0.5451 0.7823 -0.6117 -0.4365 NA NA NA
## Oct Nov Dec
## 1920 0.6661 0.3260 -0.0398
## 1921 4.1328 -2.9573 2.8560
## 1922 -1.7422 0.3468 3.4727
## 1923 1.5495 -4.2323 -0.5648
## 1924 -0.0797 1.7843 3.6310
## 1925 0.3370 -4.3157 -3.1315
## 1926 -3.0130 -0.8115 0.1602
## 1927 1.1453 0.4427 -3.9398
## 1928 2.1620 2.2843 -0.7856
## 1929 -0.7797 0.1468 1.8227
## 1930 1.6120 0.9052 -0.5648
## 1931 -2.7089 3.6510 1.6602
## 1932 -2.4505 0.8135 1.4602
## 1933 -0.4047 -1.1240 -4.5981
## 1934 -0.4547 -1.4490 4.3977
## 1935 -0.9630 1.9302 -3.1606
## 1936 -0.1255 -1.0573 1.3269
## 1937 0.3120 -1.8115 -3.3148
## 1938 0.3786 4.7010 -1.1148
## 1939 NA NA NA
##
## $figure
## [1] -9.34 -9.90 -6.95 -2.76 3.45 8.99 12.97 11.46 7.40 0.65 -6.62 -9.36
##
## $type
## [1] "additive"
##
## attr(,"class")
## [1] "decomposed.ts"
library(forecast)
library(ggplot2)
# library(ggplot2 and forecast)
autoplot(decompose(nottem, type = "additive"))## Call:
## stl(x = nottem, s.window = "periodic")
##
## Components
## seasonal trend remainder
## Jan 1920 -9.35 50 0.2665
## Feb 1920 -9.86 50 1.1097
## Mar 1920 -6.85 49 1.8429
## Apr 1920 -2.76 49 0.1348
## May 1920 3.50 49 1.3518
## Jun 1920 8.98 49 0.3064
## Jul 1920 12.85 49 -4.3189
## Aug 1920 11.48 49 -4.2103
## Sep 1920 7.45 49 -2.2416
## Oct 1920 0.47 49 0.8743
## Nov 1920 -6.43 49 0.1203
## Dec 1920 -9.48 49 -0.2147
## Jan 1921 -9.35 50 3.7714
## Feb 1921 -9.86 50 -0.4261
## Mar 1921 -6.85 50 1.5664
## Apr 1921 -2.76 50 -0.7302
## May 1921 3.50 51 -0.0018
## Jun 1921 8.98 51 -0.7857
## Jul 1921 12.85 50 3.0503
## Aug 1921 11.48 50 -1.7270
## Sep 1921 7.45 50 -0.3444
## Oct 1921 0.47 50 4.0968
## Nov 1921 -6.43 49 -3.2320
## Dec 1921 -9.48 49 3.2294
## Jan 1922 -9.35 49 -1.8881
## Feb 1922 -9.86 48 0.2235
## Mar 1922 -6.85 48 -1.5748
## Apr 1922 -2.76 48 -2.7662
## May 1922 3.50 47 4.8674
## Jun 1922 8.98 47 1.4774
## Jul 1922 12.85 47 -3.3926
## Aug 1922 11.48 48 -4.7167
## Sep 1922 7.45 48 -0.8808
## Oct 1922 0.47 48 -1.2276
## Nov 1922 -6.43 48 0.2555
## Dec 1922 -9.48 48 3.1218
## Jan 1923 -9.35 48 3.0091
## Feb 1923 -9.86 48 1.6734
## Mar 1923 -6.85 48 1.3277
## Apr 1923 -2.76 48 0.2308
## May 1923 3.50 48 -2.5411
## Jun 1923 8.98 48 -4.2434
## Jul 1923 12.85 48 3.6743
## Aug 1923 11.48 47 0.6640
## Sep 1923 7.45 47 -0.2863
## Oct 1923 0.47 47 1.5002
## Nov 1923 -6.43 47 -4.4832
## Dec 1923 -9.48 47 -0.2109
## Jan 1924 -9.35 47 1.2825
## Feb 1924 -9.86 47 -0.0802
## Mar 1924 -6.85 48 -2.3529
## Apr 1924 -2.76 48 0.5283
## May 1924 3.50 48 1.7346
## Jun 1924 8.98 48 0.4154
## Jul 1924 12.85 49 -0.6838
## Aug 1924 11.48 49 -2.1488
## Sep 1924 7.45 49 -0.1537
## Oct 1924 0.47 49 0.0908
## Nov 1924 -6.43 49 1.4653
## Dec 1924 -9.48 49 3.6010
## Jan 1925 -9.35 50 -0.2422
## Feb 1925 -9.86 50 0.7305
## Mar 1925 -6.85 50 -2.0069
## Apr 1925 -2.76 49 -1.5845
## May 1925 3.50 49 1.0628
## Jun 1925 8.98 49 1.4539
## Jul 1925 12.85 49 1.9651
## Aug 1925 11.48 49 0.8611
## Sep 1925 7.45 49 -3.0828
## Oct 1925 0.47 49 0.8385
## Nov 1925 -6.43 49 -4.2101
## Dec 1925 -9.48 49 -2.9566
## Jan 1926 -9.35 49 -0.1821
## Feb 1926 -9.86 49 4.4361
## Mar 1926 -6.85 49 1.3443
## Apr 1926 -2.76 49 2.6250
## May 1926 3.50 49 -2.0694
## Jun 1926 8.98 49 -1.3640
## Jul 1926 12.85 49 0.4615
## Aug 1926 11.48 49 1.4044
## Sep 1926 7.45 49 1.0074
## Oct 1926 0.47 49 -2.7875
## Nov 1926 -6.43 49 -0.9524
## Dec 1926 -9.48 49 0.3539
## Jan 1927 -9.35 49 -0.1189
## Feb 1927 -9.86 49 -0.4172
## Mar 1927 -6.85 49 3.4745
## Apr 1927 -2.76 49 1.2224
## May 1927 3.50 49 -0.4046
## Jun 1927 8.98 49 -2.5280
## Jul 1927 12.85 48 -0.9315
## Aug 1927 11.48 48 0.5627
## Sep 1927 7.45 48 -1.1831
## Oct 1927 0.47 48 1.3538
## Nov 1927 -6.43 49 0.2207
## Dec 1927 -9.48 49 -3.9110
## Jan 1928 -9.35 49 1.4783
## Feb 1928 -9.86 49 2.2358
## Mar 1928 -6.85 49 0.8832
## Apr 1928 -2.76 49 1.2587
## May 1928 3.50 49 -1.4409
## Jun 1928 8.98 49 -1.2809
## Jul 1928 12.85 49 0.7990
## Aug 1928 11.48 48 0.8160
## Sep 1928 7.45 48 0.0930
## Oct 1928 0.47 48 2.1079
## Nov 1928 -6.43 47 2.0528
## Dec 1928 -9.48 47 -0.5552
## Jan 1929 -9.35 47 -3.1422
## Feb 1929 -9.86 47 -6.2005
## Mar 1929 -6.85 47 0.4311
## Apr 1929 -2.76 48 -0.9593
## May 1929 3.50 48 1.7753
## Jun 1929 8.98 48 -0.2791
## Jul 1929 12.85 49 1.0865
## Aug 1929 11.48 49 -0.0803
## Sep 1929 7.45 49 3.1128
## Oct 1929 0.47 49 -0.6240
## Nov 1929 -6.43 49 -0.1308
## Dec 1929 -9.48 49 1.9308
## Jan 1930 -9.35 49 1.5134
## Feb 1930 -9.86 49 -2.4330
## Mar 1930 -6.85 49 -1.2893
## Apr 1930 -2.76 49 0.3703
## May 1930 3.50 49 -1.5451
## Jun 1930 8.98 49 2.2517
## Jul 1930 12.85 49 -1.8315
## Aug 1930 11.48 49 1.1507
## Sep 1930 7.45 49 0.6928
## Oct 1930 0.47 49 1.6459
## Nov 1930 -6.43 49 0.7290
## Dec 1930 -9.48 49 -0.3427
## Jan 1931 -9.35 49 -2.0934
## Feb 1931 -9.86 48 -0.1279
## Mar 1931 -6.85 48 -2.9725
## Apr 1931 -2.76 48 1.1409
## May 1931 3.50 48 1.9793
## Jun 1931 8.98 48 1.2739
## Jul 1931 12.85 48 -0.5116
## Aug 1931 11.48 48 -1.6861
## Sep 1931 7.45 49 -2.2006
## Oct 1931 0.47 49 -2.4146
## Nov 1931 -6.43 49 3.4015
## Dec 1931 -9.48 49 1.5438
## Jan 1932 -9.35 49 3.2072
## Feb 1932 -9.86 49 -0.4109
## Mar 1932 -6.85 49 -1.6391
## Apr 1932 -2.76 49 -1.4734
## May 1932 3.50 49 -1.4828
## Jun 1932 8.98 49 -0.8050
## Jul 1932 12.85 49 0.4927
## Aug 1932 11.48 49 3.1922
## Sep 1932 7.45 49 -0.0484
## Oct 1932 0.47 49 -2.2954
## Nov 1932 -6.43 49 0.6875
## Dec 1932 -9.48 50 1.6782
## Jan 1933 -9.35 50 -4.3101
## Feb 1933 -9.86 50 -0.9658
## Mar 1933 -6.85 50 0.9686
## Apr 1933 -2.76 51 0.9056
## May 1933 3.50 51 -0.0324
## Jun 1933 8.98 51 1.1264
## Jul 1933 12.85 51 2.0052
## Aug 1933 11.48 50 2.9613
## Sep 1933 7.45 50 2.3773
## Oct 1933 0.47 50 -0.2985
## Nov 1933 -6.43 50 -1.2443
## Dec 1933 -9.48 50 -4.3254
## Jan 1934 -9.35 49 -0.6855
## Feb 1934 -9.86 49 -1.3297
## Mar 1934 -6.85 49 -2.0839
## Apr 1934 -2.76 49 0.1638
## May 1934 3.50 50 0.2366
## Jun 1934 8.98 50 0.6254
## Jul 1934 12.85 50 3.3342
## Aug 1934 11.48 51 -1.6602
## Sep 1934 7.45 51 0.9053
## Oct 1934 0.47 51 -0.1636
## Nov 1934 -6.43 51 -1.7026
## Dec 1934 -9.48 51 4.4072
## Jan 1935 -9.35 51 -1.4619
## Feb 1935 -9.86 51 1.6970
## Mar 1935 -6.85 51 -0.3541
## Apr 1935 -2.76 51 -0.7165
## May 1935 3.50 50 -3.9539
## Jun 1935 8.98 50 1.3675
## Jul 1935 12.85 50 1.9089
## Aug 1935 11.48 50 2.9835
## Sep 1935 7.45 49 0.1182
## Oct 1935 0.47 49 -0.9028
## Nov 1935 -6.43 49 1.8062
## Dec 1935 -9.48 49 -2.7672
## Jan 1936 -9.35 48 -1.8197
## Feb 1936 -9.86 48 -3.4970
## Mar 1936 -6.85 48 2.6157
## Apr 1936 -2.76 48 -1.6441
## May 1936 3.50 48 0.8210
## Jun 1936 8.98 49 1.0303
## Jul 1936 12.85 49 -1.6405
## Aug 1936 11.48 49 0.6569
## Sep 1936 7.45 49 1.5142
## Oct 1936 0.47 49 -0.0880
## Nov 1936 -6.43 49 -1.2603
## Dec 1936 -9.48 49 1.4400
## Jan 1937 -9.35 49 0.7613
## Feb 1937 -9.86 49 1.4437
## Mar 1937 -6.85 49 -4.1840
## Apr 1937 -2.76 49 0.7644
## May 1937 3.50 49 1.2378
## Jun 1937 8.98 49 0.2884
## Jul 1937 12.85 49 -0.7411
## Aug 1937 11.48 49 0.9277
## Sep 1937 7.45 49 -0.6435
## Oct 1937 0.47 50 0.7991
## Nov 1937 -6.43 50 -1.9283
## Dec 1937 -9.48 50 -3.1792
## Jan 1938 -9.35 50 1.6910
## Feb 1938 -9.86 50 1.3220
## Mar 1938 -6.85 50 4.4430
## Apr 1938 -2.76 50 -0.4247
## May 1938 3.50 50 -0.9673
## Jun 1938 8.98 50 0.1009
## Jul 1938 12.85 50 -3.2108
## Aug 1938 11.48 50 -0.9733
## Sep 1938 7.45 50 -0.2757
## Oct 1938 0.47 50 0.4297
## Nov 1938 -6.43 50 4.4652
## Dec 1938 -9.48 50 -1.1119
## Jan 1939 -9.35 50 -1.0679
## Feb 1939 -9.86 50 0.9674
## Mar 1939 -6.85 50 -0.5073
## Apr 1939 -2.76 50 0.8790
## May 1939 3.50 50 -0.7097
## Jun 1939 8.98 50 -0.5011
## Jul 1939 12.85 49 -1.5725
## Aug 1939 11.48 49 0.9871
## Sep 1939 7.45 49 1.5066
## Oct 1939 0.47 49 -2.9370
## Nov 1939 -6.43 49 3.9494
## Dec 1939 -9.48 49 -1.7270
## [1] "decomposed.ts"
# we are subtracting the seasonal element
nottemadjusted = nottem - mynottem$seasonal
# getting a plot
plot(nottemadjusted)## [1] 12
mymodel1 = decompose(AirPassengers, type = "additive")
mymodel2 = decompose(AirPassengers, type = "multiplicative")
plot(mymodel1)## ===========================================##
### SMOOTHING SMA
library("TTR")
# in order to identify trends, we can use smoothers
# like a simple moving avg
# n identfies the order or the SMA - you can experiment with this parameter
x = c(1, 2, 3, 4, 5, 6, 7)
SMA(x, n = 3) # SMA fro TTR package, 3rd order## [1] NA NA 2 3 4 5 6
## Time Series:
## Start = 1821
## End = 1934
## Frequency = 1
## [1] NA NA NA NA NA NA NA NA 2351 2608 2630 2576 2500 2367 2099
## [16] 1916 1554 1383 1300 1287 1293 1277 1254 1232 1039 855 713 792 853 876
## [31] 913 931 947 968 1035 1101 1138 1267 1303 1295 1296 1283 1263 1262 1327
## [46] 1754 1992 1992 1987 2013 2026 2052 2049 1867 1370 1056 1064 1069 1038 1024
## [61] 989 894 934 997 1330 1525 1535 1521 1500 1453 1378 1172 902 553 722
## [76] 1067 1124 1131 1143 1179 1243 1346 1588 1916 2230 2586 2778 2800 2799 2805
## [91] 2814 2730 2375 2017 1928 2145 2181 2148 2067 1925 1649 1271 1053 992 968
## [106] 1219 1381 1431 1472 1521 1587 1638 1663 1643
## ===========================================##
# Exponential Smoothing with ets
## ets
library(forecast)
# Using function ets
etsmodel = ets(nottem)
etsmodel## ETS(A,N,A)
##
## Call:
## ets(y = nottem)
##
## Smoothing parameters:
## alpha = 0.0392
## gamma = 0.0001
##
## Initial states:
## l = 49.4597
## s = -9.6 -6.6 0.54 7.5 12 13
## 9 3.4 -2.8 -6.8 -9.8 -9.4
##
## sigma: 2.3
##
## AIC AICc BIC
## 1735 1737 1787
## ETS(M,N,M)
##
## Call:
## ets(y = nottem, model = "MZM")
##
## Smoothing parameters:
## alpha = 0.0214
## gamma = 0.0001
##
## Initial states:
## l = 49.3793
## s = 0.81 0.86 1 1.2 1.2 1.3
## 1.2 1.1 0.94 0.86 0.8 0.81
##
## sigma: 0.051
##
## AIC AICc BIC
## 1762 1764 1814
6 ARIMA Models
## Series: lynx
## ARIMA(2,0,2) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 ma2 mean
## 1.342 -0.67 -0.20 -0.26 1544
## s.e. 0.098 0.08 0.13 0.11 132
##
## sigma^2 estimated as 761965: log likelihood=-932
## AIC=1876 AICc=1877 BIC=1893
##
## ARIMA(2,0,2) with non-zero mean : 1877
## ARIMA(0,0,0) with non-zero mean : 2007
## ARIMA(1,0,0) with non-zero mean : 1927
## ARIMA(0,0,1) with non-zero mean : 1918
## ARIMA(0,0,0) with zero mean : 2081
## ARIMA(1,0,2) with non-zero mean : 1889
## ARIMA(2,0,1) with non-zero mean : 1880
## ARIMA(3,0,2) with non-zero mean : 1879
## ARIMA(2,0,3) with non-zero mean : Inf
## ARIMA(1,0,1) with non-zero mean : 1891
## ARIMA(1,0,3) with non-zero mean : 1890
## ARIMA(3,0,1) with non-zero mean : 1882
## ARIMA(3,0,3) with non-zero mean : 1882
## ARIMA(2,0,2) with zero mean : 1906
##
## Best model: ARIMA(2,0,2) with non-zero mean
## Series: lynx
## ARIMA(2,0,2) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 ma2 mean
## 1.342 -0.67 -0.20 -0.26 1544
## s.e. 0.098 0.08 0.13 0.11 132
##
## sigma^2 estimated as 761965: log likelihood=-932
## AIC=1876 AICc=1877 BIC=1893
##
## ARIMA(0,0,0) with zero mean : 2081
## ARIMA(0,0,0) with non-zero mean : 2007
## ARIMA(0,0,1) with zero mean : 1973
## ARIMA(0,0,1) with non-zero mean : 1918
## ARIMA(0,0,2) with zero mean : 1925
## ARIMA(0,0,2) with non-zero mean : 1890
## ARIMA(0,0,3) with zero mean : 1913
## ARIMA(0,0,3) with non-zero mean : 1888
## ARIMA(0,0,4) with zero mean : 1907
## ARIMA(0,0,4) with non-zero mean : 1889
## ARIMA(0,0,5) with zero mean : 1909
## ARIMA(0,0,5) with non-zero mean : 1887
## ARIMA(1,0,0) with zero mean : 1935
## ARIMA(1,0,0) with non-zero mean : 1927
## ARIMA(1,0,1) with zero mean : 1903
## ARIMA(1,0,1) with non-zero mean : 1891
## ARIMA(1,0,2) with zero mean : 1904
## ARIMA(1,0,2) with non-zero mean : 1889
## ARIMA(1,0,3) with zero mean : 1906
## ARIMA(1,0,3) with non-zero mean : 1890
## ARIMA(1,0,4) with zero mean : 1908
## ARIMA(1,0,4) with non-zero mean : Inf
## ARIMA(2,0,0) with zero mean : 1907
## ARIMA(2,0,0) with non-zero mean : 1878
## ARIMA(2,0,1) with zero mean : 1903
## ARIMA(2,0,1) with non-zero mean : 1880
## ARIMA(2,0,2) with zero mean : 1906
## ARIMA(2,0,2) with non-zero mean : 1877
## ARIMA(2,0,3) with zero mean : 1908
## ARIMA(2,0,3) with non-zero mean : Inf
## ARIMA(3,0,0) with zero mean : 1904
## ARIMA(3,0,0) with non-zero mean : 1881
## ARIMA(3,0,1) with zero mean : 1906
## ARIMA(3,0,1) with non-zero mean : 1882
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(3,0,2) with non-zero mean : 1879
## ARIMA(4,0,0) with zero mean : 1906
## ARIMA(4,0,0) with non-zero mean : 1875
## ARIMA(4,0,1) with zero mean : Inf
## ARIMA(4,0,1) with non-zero mean : 1876
## ARIMA(5,0,0) with zero mean : 1905
## ARIMA(5,0,0) with non-zero mean : 1876
##
##
##
## Best model: ARIMA(4,0,0) with non-zero mean
## Series: lynx
## ARIMA(4,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 mean
## 1.12 -0.72 0.26 -0.25 1547
## s.e. 0.09 0.14 0.14 0.09 137
##
## sigma^2 estimated as 748457: log likelihood=-931
## AIC=1874 AICc=1875 BIC=1891
## ===========================================##
## ARIMA calculations
# AR(2) model
myarima = arima(lynx, order = c(2, 0, 0))
myarima##
## Call:
## arima(x = lynx, order = c(2, 0, 0))
##
## Coefficients:
## ar1 ar2 intercept
## 1.147 -0.600 1545
## s.e. 0.074 0.074 182
##
## sigma^2 estimated as 768159: log likelihood = -935, aic = 1878
## Time Series:
## Start = 1929
## End = 1934
## Frequency = 1
## [1] 485 662 1000 1590 2657 3396
## Time Series:
## Start = 1821
## End = 1934
## Frequency = 1
## [1] -712 -247 -321 -307 127 952 877 2429 -212 -238 -164 344
## [13] -314 -572 -500 1284 -391 1000 -1176 -338 77 -582 -592 -537
## [25] -357 -165 572 14 -1375 85 -162 -690 -371 -246 316 585
## [37] 28 -240 -725 86 -396 -545 -287 438 1081 3196 -2171 -862
## [49] 1319 -107 -731 -42 210 -382 585 -851 -229 -412 -388 -521
## [61] -372 -364 780 210 1731 -1586 -534 434 -510 -651 -673 -549
## [73] -502 273 2076 -1054 -1705 829 -314 -425 -293 -30 1721 3100
## [85] -330 44 570 -185 388 -122 -9 906 820 -341 1018 1520
## [97] -2584 882 -308 -634 -546 -498 112 673 763 -406 -386 -173
## [109] 101 -276 -168 141 733 602
## [1] 1850
## [1] 1851
##
## Call:
## arima(x = lynx, order = c(0, 0, 2))
##
## Coefficients:
## ma1 ma2 intercept
## 1.141 0.470 1545
## s.e. 0.078 0.072 225
##
## sigma^2 estimated as 855092: log likelihood = -941, aic = 1890
## Time Series:
## Start = 1821
## End = 1934
## Frequency = 1
## [1] -803.7 -316.8 -339.8 -153.6 256.2 1051.0 1062.7 2690.6 -162.9
## [10] -44.6 -894.9 -405.6 -478.4 -530.1 -306.9 1338.7 -243.4 1512.5
## [19] -1332.4 -326.9 -395.7 -895.5 -270.0 -603.7 -183.8 -19.1 691.8
## [28] 210.5 -1153.4 32.5 -663.7 -578.5 -213.7 -298.9 533.9 710.9
## [37] 263.9 -61.3 -915.4 -173.4 -681.7 -441.3 -169.7 478.6 1299.5
## [46] 3468.5 -1858.4 -367.6 1.8 -901.8 -159.5 -155.8 301.3 -139.9
## [55] 723.7 -879.2 -126.3 -689.3 -498.7 -423.7 -358.8 -201.1 894.5
## [64] 339.7 2078.0 -1564.4 -347.8 -340.8 -954.2 -247.8 -755.5 -379.1
## [73] -381.0 359.3 2254.7 -791.1 -1114.9 203.0 -1100.3 1.4 -272.2
## [82] 71.5 1966.0 3169.4 228.8 499.0 -386.1 -994.3 152.3 -444.0
## [91] 277.6 1059.5 915.6 3.5 1005.6 1095.9 -2593.8 979.8 -1364.7
## [100] -340.7 -286.7 -659.3 473.4 656.3 1057.6 -125.1 -362.4 -544.2
## [109] -269.4 -320.5 -53.3 255.9 844.7 766.8
## [1] 1851
## [1] 1851
## ===========================================##
## ARIMA time series simulations
set.seed(123) # for reproduction
# simulation, at least n of 1000
asim <- arima.sim(model = list(order = c(1, 0, 1), ar = c(0.4), ma = c(0.3)), n = 1000) +
10
plot(asim)# Stationarity library(tseries) adf.test(asim)
# Autocorrelation library(forecast) tsdisplay(asim)
auto.arima(asim, trace = T, stepwise = F, approximation = F)##
## ARIMA(0,0,0) with zero mean : 7465
## ARIMA(0,0,0) with non-zero mean : 3242
## ARIMA(0,0,1) with zero mean : 6219
## ARIMA(0,0,1) with non-zero mean : 2879
## ARIMA(0,0,2) with zero mean : 5342
## ARIMA(0,0,2) with non-zero mean : 2837
## ARIMA(0,0,3) with zero mean : 4810
## ARIMA(0,0,3) with non-zero mean : 2838
## ARIMA(0,0,4) with zero mean : 4450
## ARIMA(0,0,4) with non-zero mean : 2839
## ARIMA(0,0,5) with zero mean : 4219
## ARIMA(0,0,5) with non-zero mean : 2841
## ARIMA(1,0,0) with zero mean : Inf
## ARIMA(1,0,0) with non-zero mean : 2871
## ARIMA(1,0,1) with zero mean : Inf
## ARIMA(1,0,1) with non-zero mean : 2836
## ARIMA(1,0,2) with zero mean : Inf
## ARIMA(1,0,2) with non-zero mean : 2837
## ARIMA(1,0,3) with zero mean : Inf
## ARIMA(1,0,3) with non-zero mean : 2839
## ARIMA(1,0,4) with zero mean : Inf
## ARIMA(1,0,4) with non-zero mean : 2841
## ARIMA(2,0,0) with zero mean : Inf
## ARIMA(2,0,0) with non-zero mean : 2837
## ARIMA(2,0,1) with zero mean : Inf
## ARIMA(2,0,1) with non-zero mean : 2837
## ARIMA(2,0,2) with zero mean : Inf
## ARIMA(2,0,2) with non-zero mean : 2839
## ARIMA(2,0,3) with zero mean : Inf
## ARIMA(2,0,3) with non-zero mean : 2841
## ARIMA(3,0,0) with zero mean : Inf
## ARIMA(3,0,0) with non-zero mean : 2837
## ARIMA(3,0,1) with zero mean : Inf
## ARIMA(3,0,1) with non-zero mean : 2839
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(3,0,2) with non-zero mean : 2841
## ARIMA(4,0,0) with zero mean : Inf
## ARIMA(4,0,0) with non-zero mean : 2839
## ARIMA(4,0,1) with zero mean : Inf
## ARIMA(4,0,1) with non-zero mean : 2841
## ARIMA(5,0,0) with zero mean : Inf
## ARIMA(5,0,0) with non-zero mean : 2841
##
##
##
## Best model: ARIMA(1,0,1) with non-zero mean
## Series: asim
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.349 0.318 10.029
## s.e. 0.048 0.047 0.064
##
## sigma^2 estimated as 0.993: log likelihood=-1414
## AIC=2836 AICc=2836 BIC=2856
## ===========================================##
## ARIMA parameter selection
library(tseries)
adf.test(lynx)##
## Augmented Dickey-Fuller Test
##
## data: lynx
## Dickey-Fuller = -6, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,0,0) with non-zero mean
## Q* = 13, df = 5, p-value = 0.02
##
## Model df: 5. Total lags used: 10
# Example MA time series
set.seed(123) # for reproduction
# Simulation
myts <- arima.sim(model = list(order = c(0, 0, 2), ma = c(0.3, 0.7)), n = 1000) +
10
adf.test(myts) # Stationarity##
## Augmented Dickey-Fuller Test
##
## data: myts
## Dickey-Fuller = -9, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,3) with non-zero mean
## Q* = 4, df = 6, p-value = 0.7
##
## Model df: 4. Total lags used: 10
##
## ARIMA(0,0,0) with zero mean : 7466
## ARIMA(0,0,0) with non-zero mean : 3240
## ARIMA(0,0,1) with zero mean : 6415
## ARIMA(0,0,1) with non-zero mean : 3199
## ARIMA(0,0,2) with zero mean : 5572
## ARIMA(0,0,2) with non-zero mean : 2828
## ARIMA(0,0,3) with zero mean : 4982
## ARIMA(0,0,3) with non-zero mean : 2830
## ARIMA(0,0,4) with zero mean : 4557
## ARIMA(0,0,4) with non-zero mean : 2832
## ARIMA(0,0,5) with zero mean : 4301
## ARIMA(0,0,5) with non-zero mean : 2831
## ARIMA(1,0,0) with zero mean : 3611
## ARIMA(1,0,0) with non-zero mean : 3164
## ARIMA(1,0,1) with zero mean : Inf
## ARIMA(1,0,1) with non-zero mean : 3121
## ARIMA(1,0,2) with zero mean : Inf
## ARIMA(1,0,2) with non-zero mean : 2830
## ARIMA(1,0,3) with zero mean : Inf
## ARIMA(1,0,3) with non-zero mean : 2831
## ARIMA(1,0,4) with zero mean : Inf
## ARIMA(1,0,4) with non-zero mean : 2833
## ARIMA(2,0,0) with zero mean : Inf
## ARIMA(2,0,0) with non-zero mean : 3017
## ARIMA(2,0,1) with zero mean : Inf
## ARIMA(2,0,1) with non-zero mean : 2977
## ARIMA(2,0,2) with zero mean : Inf
## ARIMA(2,0,2) with non-zero mean : 2832
## ARIMA(2,0,3) with zero mean : Inf
## ARIMA(2,0,3) with non-zero mean : 2833
## ARIMA(3,0,0) with zero mean : Inf
## ARIMA(3,0,0) with non-zero mean : 2929
## ARIMA(3,0,1) with zero mean : Inf
## ARIMA(3,0,1) with non-zero mean : 2924
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(3,0,2) with non-zero mean : 2831
## ARIMA(4,0,0) with zero mean : Inf
## ARIMA(4,0,0) with non-zero mean : 2914
## ARIMA(4,0,1) with zero mean : Inf
## ARIMA(4,0,1) with non-zero mean : 2899
## ARIMA(5,0,0) with zero mean : Inf
## ARIMA(5,0,0) with non-zero mean : 2873
##
##
##
## Best model: ARIMA(0,0,2) with non-zero mean
## Series: myts
## ARIMA(0,0,2) with non-zero mean
##
## Coefficients:
## ma1 ma2 mean
## 0.288 0.684 10.030
## s.e. 0.023 0.023 0.062
##
## sigma^2 estimated as 0.984: log likelihood=-1410
## AIC=2828 AICc=2828 BIC=2848
## ===========================================##
## ARIMA Forecasting
# Our model
myarima <- auto.arima(lynx, stepwise = F, approximation = F)
# Forecast of 10 years
arimafore <- forecast(myarima, h = 10)
plot(arimafore)## Time Series:
## Start = 1935
## End = 1944
## Frequency = 1
## [1] 2981 2115 1362 839 669 874 1281 1680 1933 1988
# Ets for comparison
myets <- ets(lynx)
etsfore <- forecast(myets, h = 10)
# Comparison plot for 2 models
library(ggplot2)
autoplot(lynx) + forecast::autolayer(etsfore$mean, series = "ETS model") + forecast::autolayer(arimafore$mean,
series = "ARIMA model") + xlab("year") + ylab("Lynx Trappings") + guides(colour = guide_legend(title = "Forecast Method")) +
theme(legend.position = c(0.8, 0.8))## ===========================================##
## ARIMA with Explanatory Variables - Dynamic Regression Importing the cyprinidae
## dataset
library(readr)
cyprinidae <- read_csv("cyprinidae.csv")
# Display the multivariate dataset
library(ggplot2)
ggplot(cyprinidae, aes(y = concentration, x = X1)) + geom_point() + aes(colour = predator_presence)# Convert the variables into time series
x = ts(cyprinidae$concentration)
y = as.numeric(cyprinidae$predator_presence)
# Arima model creation
library(forecast)
mymodel = auto.arima(x, xreg = y, stepwise = F, approximation = F)
mymodel## Series: x
## Regression with ARIMA(0,0,0) errors
##
## Coefficients:
## intercept xreg
## 9.98 254.8
## s.e. 0.34 1.9
##
## sigma^2 estimated as 28.4: log likelihood=-772
## AIC=1550 AICc=1550 BIC=1560
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,0,0) errors
## Q* = 14, df = 8, p-value = 0.08
##
## Model df: 2. Total lags used: 10
# Expected predator presence at future 10 time points
y1 = as.numeric(c(T, T, F, F, F, F, T, F, T, F))
# Getting a forecast based on future predator presence
plot(forecast(mymodel, xreg = y1))7 Multivariate Time Series Analysis
(unable to install MTS package—R version issue)
### Multivariate Time Series Datasets
# Generating a random dataframe
x = rnorm(100, 1)
y = rnorm(100, 30)
z = rnorm(100, 500)
xyz = data.frame(x, y, z)
class(xyz)## [1] "data.frame"
##===========================================##
# Converting a data.frame into mts
mymts = ts(xyz,
frequency = 12,
start = c(1940, 4))
##===========================================##
# Standard exploratory tools
plot(mymts)## x y z
## Apr 1940 0.98 29 501
## May 1940 0.87 29 499
## Jun 1940 -1.55 30 501
## Jul 1940 2.04 30 501
## Aug 1940 1.25 29 499
## Sep 1940 3.42 30 501
## [1] "mts" "ts" "matrix"
##===========================================##
# Our further exercise dataset
class(EuStockMarkets)## [1] "mts" "ts" "matrix"
## Time Series:
## Start = c(1991, 130)
## End = c(1991, 135)
## Frequency = 260
## DAX SMI CAC FTSE
## 1991 1629 1678 1773 2444
## 1992 1614 1688 1750 2460
## 1992 1607 1679 1718 2448
## 1992 1621 1684 1708 2470
## 1992 1618 1687 1723 2485
## 1992 1611 1672 1714 2467
##===========================================##
# Main packages - problem: both have different functions VAR
library(vars)
#install.packages("MTS")
#devtools::install_github('d-/MTS')
library(MTS)
##===========================================##
## Testing for stationarity
library(tseries)
### tseries - standard test adt.test
apply(EuStockMarkets, 2, adf.test)## $DAX
##
## Augmented Dickey-Fuller Test
##
## data: newX[, i]
## Dickey-Fuller = -0.8, Lag order = 12, p-value = 1
## alternative hypothesis: stationary
##
##
## $SMI
##
## Augmented Dickey-Fuller Test
##
## data: newX[, i]
## Dickey-Fuller = -0.5, Lag order = 12, p-value = 1
## alternative hypothesis: stationary
##
##
## $CAC
##
## Augmented Dickey-Fuller Test
##
## data: newX[, i]
## Dickey-Fuller = -0.2, Lag order = 12, p-value = 1
## alternative hypothesis: stationary
##
##
## $FTSE
##
## Augmented Dickey-Fuller Test
##
## data: newX[, i]
## Dickey-Fuller = -2, Lag order = 12, p-value = 0.6
## alternative hypothesis: stationary
# Alternative: lib fUnitRoots, function
library(fUnitRoots)
apply(EuStockMarkets, 2, adfTest, lags=0, type="c")## $DAX
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 0
## STATISTIC:
## Dickey-Fuller: 1.9429
## P VALUE:
## 0.99
##
## Description:
## Tue Sep 15 14:29:44 2020 by user: HP
##
##
## $SMI
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 0
## STATISTIC:
## Dickey-Fuller: 2.2138
## P VALUE:
## 0.99
##
## Description:
## Tue Sep 15 14:29:44 2020 by user: HP
##
##
## $CAC
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 0
## STATISTIC:
## Dickey-Fuller: 1.2494
## P VALUE:
## 0.99
##
## Description:
## Tue Sep 15 14:29:44 2020 by user: HP
##
##
## $FTSE
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 0
## STATISTIC:
## Dickey-Fuller: 0.2207
## P VALUE:
## 0.9735
##
## Description:
## Tue Sep 15 14:29:44 2020 by user: HP
##===========================================##
# Differencing the whole mts
library(MTS)
stnry = diffM(EuStockMarkets)
# Retest
apply(stnry, 2, adf.test)## $DAX
##
## Augmented Dickey-Fuller Test
##
## data: newX[, i]
## Dickey-Fuller = -10, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $SMI
##
## Augmented Dickey-Fuller Test
##
## data: newX[, i]
## Dickey-Fuller = -11, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $CAC
##
## Augmented Dickey-Fuller Test
##
## data: newX[, i]
## Dickey-Fuller = -11, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $FTSE
##
## Augmented Dickey-Fuller Test
##
## data: newX[, i]
## Dickey-Fuller = -11, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##===========================================##
# Lag order identification
library(vars)
VARselect(stnry, type = "none", lag.max = 10)## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 9 1 1 9
##
## $criteria
## 1 2 3 4 5 6
## AIC(n) 25 25 25 25 25 25
## HQ(n) 25 25 25 25 25 25
## SC(n) 25 25 25 25 25 26
## FPE(n) 94382059527 94857714232 93915004830 93240097820 93129641966 92834667798
## 7 8 9 10
## AIC(n) 25 25 25 25
## HQ(n) 25 25 25 25
## SC(n) 26 26 26 26
## FPE(n) 93098769912 93103290305 92525331160 92880467531
##===========================================##
# Creating a VAR model with vars
var.a <- vars::VAR(stnry,
lag.max = 10,
ic = "AIC",
type = "none")
summary(var.a)##
## VAR Estimation Results:
## =========================
## Endogenous variables: DAX, SMI, CAC, FTSE
## Deterministic variables: none
## Sample size: 1850
## Log Likelihood: -33712.408
## Roots of the characteristic polynomial:
## 0.817 0.817 0.812 0.812 0.792 0.792 0.786 0.786 0.778 0.778 0.758 0.758 0.754 0.754 0.754 0.754 0.747 0.742 0.742 0.729 0.729 0.729 0.715 0.715 0.672 0.672 0.67 0.67 0.662 0.662 0.655 0.577 0.577 0.454 0.289 0.121
## Call:
## vars::VAR(y = stnry, type = "none", lag.max = 10, ic = "AIC")
##
##
## Estimation results for equation DAX:
## ====================================
## DAX = DAX.l1 + SMI.l1 + CAC.l1 + FTSE.l1 + DAX.l2 + SMI.l2 + CAC.l2 + FTSE.l2 + DAX.l3 + SMI.l3 + CAC.l3 + FTSE.l3 + DAX.l4 + SMI.l4 + CAC.l4 + FTSE.l4 + DAX.l5 + SMI.l5 + CAC.l5 + FTSE.l5 + DAX.l6 + SMI.l6 + CAC.l6 + FTSE.l6 + DAX.l7 + SMI.l7 + CAC.l7 + FTSE.l7 + DAX.l8 + SMI.l8 + CAC.l8 + FTSE.l8 + DAX.l9 + SMI.l9 + CAC.l9 + FTSE.l9
##
## Estimate Std. Error t value Pr(>|t|)
## DAX.l1 0.009657 0.042449 0.23 0.82006
## SMI.l1 -0.100817 0.029764 -3.39 0.00072 ***
## CAC.l1 0.075269 0.046580 1.62 0.10629
## FTSE.l1 0.073006 0.036617 1.99 0.04633 *
## DAX.l2 0.019045 0.042327 0.45 0.65279
## SMI.l2 -0.017241 0.029894 -0.58 0.56419
## CAC.l2 0.068712 0.046597 1.47 0.14049
## FTSE.l2 -0.080475 0.036939 -2.18 0.02949 *
## DAX.l3 -0.067636 0.042318 -1.60 0.11016
## SMI.l3 0.013541 0.029993 0.45 0.65170
## CAC.l3 0.048469 0.046659 1.04 0.29903
## FTSE.l3 0.040979 0.036967 1.11 0.26778
## DAX.l4 -0.050167 0.042272 -1.19 0.23548
## SMI.l4 0.016254 0.030086 0.54 0.58910
## CAC.l4 0.100151 0.046932 2.13 0.03298 *
## FTSE.l4 -0.045199 0.036932 -1.22 0.22117
## DAX.l5 0.010950 0.042494 0.26 0.79669
## SMI.l5 -0.097862 0.030319 -3.23 0.00127 **
## CAC.l5 0.073162 0.046914 1.56 0.11905
## FTSE.l5 -0.025479 0.036994 -0.69 0.49109
## DAX.l6 -0.012190 0.042406 -0.29 0.77380
## SMI.l6 0.024618 0.030368 0.81 0.41766
## CAC.l6 0.087186 0.046872 1.86 0.06304 .
## FTSE.l6 0.000774 0.036997 0.02 0.98332
## DAX.l7 0.078660 0.042510 1.85 0.06442 .
## SMI.l7 -0.005083 0.030254 -0.17 0.86660
## CAC.l7 -0.069110 0.046688 -1.48 0.13898
## FTSE.l7 -0.041838 0.037086 -1.13 0.25941
## DAX.l8 -0.033635 0.042586 -0.79 0.42974
## SMI.l8 0.096321 0.030433 3.17 0.00158 **
## CAC.l8 -0.118025 0.046665 -2.53 0.01152 *
## FTSE.l8 0.051702 0.037105 1.39 0.16367
## DAX.l9 -0.026205 0.042305 -0.62 0.53571
## SMI.l9 0.005200 0.030360 0.17 0.86402
## CAC.l9 0.135937 0.046741 2.91 0.00368 **
## FTSE.l9 -0.006809 0.036993 -0.18 0.85398
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 32 on 1814 degrees of freedom
## Multiple R-Squared: 0.0513, Adjusted R-squared: 0.0325
## F-statistic: 2.72 on 36 and 1814 DF, p-value: 0.000000204
##
##
## Estimation results for equation SMI:
## ====================================
## SMI = DAX.l1 + SMI.l1 + CAC.l1 + FTSE.l1 + DAX.l2 + SMI.l2 + CAC.l2 + FTSE.l2 + DAX.l3 + SMI.l3 + CAC.l3 + FTSE.l3 + DAX.l4 + SMI.l4 + CAC.l4 + FTSE.l4 + DAX.l5 + SMI.l5 + CAC.l5 + FTSE.l5 + DAX.l6 + SMI.l6 + CAC.l6 + FTSE.l6 + DAX.l7 + SMI.l7 + CAC.l7 + FTSE.l7 + DAX.l8 + SMI.l8 + CAC.l8 + FTSE.l8 + DAX.l9 + SMI.l9 + CAC.l9 + FTSE.l9
##
## Estimate Std. Error t value Pr(>|t|)
## DAX.l1 0.03513 0.05203 0.68 0.49958
## SMI.l1 -0.03830 0.03648 -1.05 0.29389
## CAC.l1 0.04665 0.05709 0.82 0.41397
## FTSE.l1 0.12752 0.04488 2.84 0.00454 **
## DAX.l2 0.00628 0.05187 0.12 0.90369
## SMI.l2 0.01835 0.03664 0.50 0.61653
## CAC.l2 0.10467 0.05711 1.83 0.06698 .
## FTSE.l2 -0.09667 0.04527 -2.14 0.03286 *
## DAX.l3 -0.14862 0.05186 -2.87 0.00421 **
## SMI.l3 0.00423 0.03676 0.12 0.90842
## CAC.l3 0.09477 0.05718 1.66 0.09764 .
## FTSE.l3 0.13168 0.04531 2.91 0.00370 **
## DAX.l4 -0.17524 0.05181 -3.38 0.00073 ***
## SMI.l4 0.02918 0.03687 0.79 0.42890
## CAC.l4 0.12425 0.05752 2.16 0.03089 *
## FTSE.l4 0.01151 0.04526 0.25 0.79924
## DAX.l5 0.00721 0.05208 0.14 0.88995
## SMI.l5 -0.08951 0.03716 -2.41 0.01611 *
## CAC.l5 0.07089 0.05750 1.23 0.21775
## FTSE.l5 -0.03791 0.04534 -0.84 0.40316
## DAX.l6 -0.07211 0.05197 -1.39 0.16549
## SMI.l6 0.01165 0.03722 0.31 0.75431
## CAC.l6 0.10245 0.05745 1.78 0.07468 .
## FTSE.l6 -0.00103 0.04534 -0.02 0.98194
## DAX.l7 0.14799 0.05210 2.84 0.00456 **
## SMI.l7 -0.01300 0.03708 -0.35 0.72594
## CAC.l7 -0.12321 0.05722 -2.15 0.03143 *
## FTSE.l7 -0.04917 0.04545 -1.08 0.27950
## DAX.l8 0.00860 0.05219 0.16 0.86915
## SMI.l8 0.08978 0.03730 2.41 0.01618 *
## CAC.l8 -0.09939 0.05719 -1.74 0.08240 .
## FTSE.l8 -0.01926 0.04547 -0.42 0.67192
## DAX.l9 0.07266 0.05185 1.40 0.16125
## SMI.l9 -0.09185 0.03721 -2.47 0.01366 *
## CAC.l9 0.08142 0.05728 1.42 0.15537
## FTSE.l9 0.06844 0.04534 1.51 0.13132
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 39 on 1814 degrees of freedom
## Multiple R-Squared: 0.0598, Adjusted R-squared: 0.0412
## F-statistic: 3.21 on 36 and 1814 DF, p-value: 0.000000000723
##
##
## Estimation results for equation CAC:
## ====================================
## CAC = DAX.l1 + SMI.l1 + CAC.l1 + FTSE.l1 + DAX.l2 + SMI.l2 + CAC.l2 + FTSE.l2 + DAX.l3 + SMI.l3 + CAC.l3 + FTSE.l3 + DAX.l4 + SMI.l4 + CAC.l4 + FTSE.l4 + DAX.l5 + SMI.l5 + CAC.l5 + FTSE.l5 + DAX.l6 + SMI.l6 + CAC.l6 + FTSE.l6 + DAX.l7 + SMI.l7 + CAC.l7 + FTSE.l7 + DAX.l8 + SMI.l8 + CAC.l8 + FTSE.l8 + DAX.l9 + SMI.l9 + CAC.l9 + FTSE.l9
##
## Estimate Std. Error t value Pr(>|t|)
## DAX.l1 -0.00104 0.03434 -0.03 0.9758
## SMI.l1 -0.07118 0.02408 -2.96 0.0032 **
## CAC.l1 0.04349 0.03768 1.15 0.2486
## FTSE.l1 0.08227 0.02962 2.78 0.0055 **
## DAX.l2 0.01449 0.03424 0.42 0.6722
## SMI.l2 -0.02791 0.02418 -1.15 0.2486
## CAC.l2 0.08384 0.03769 2.22 0.0263 *
## FTSE.l2 -0.06358 0.02988 -2.13 0.0335 *
## DAX.l3 -0.03244 0.03423 -0.95 0.3435
## SMI.l3 0.03145 0.02426 1.30 0.1951
## CAC.l3 -0.05948 0.03775 -1.58 0.1152
## FTSE.l3 0.02377 0.02991 0.79 0.4268
## DAX.l4 -0.11268 0.03420 -3.30 0.0010 **
## SMI.l4 0.04590 0.02434 1.89 0.0595 .
## CAC.l4 0.07106 0.03797 1.87 0.0614 .
## FTSE.l4 -0.02052 0.02988 -0.69 0.4922
## DAX.l5 -0.04005 0.03438 -1.16 0.2442
## SMI.l5 -0.04000 0.02453 -1.63 0.1031
## CAC.l5 0.04413 0.03795 1.16 0.2451
## FTSE.l5 -0.01147 0.02993 -0.38 0.7017
## DAX.l6 -0.01049 0.03431 -0.31 0.7599
## SMI.l6 0.01746 0.02457 0.71 0.4772
## CAC.l6 0.04611 0.03792 1.22 0.2241
## FTSE.l6 -0.00225 0.02993 -0.08 0.9400
## DAX.l7 0.09344 0.03439 2.72 0.0066 **
## SMI.l7 -0.01170 0.02447 -0.48 0.6328
## CAC.l7 -0.05858 0.03777 -1.55 0.1211
## FTSE.l7 -0.05967 0.03000 -1.99 0.0469 *
## DAX.l8 0.01229 0.03445 0.36 0.7213
## SMI.l8 0.02625 0.02462 1.07 0.2865
## CAC.l8 -0.10252 0.03775 -2.72 0.0067 **
## FTSE.l8 0.04884 0.03002 1.63 0.1039
## DAX.l9 0.01994 0.03422 0.58 0.5603
## SMI.l9 -0.02547 0.02456 -1.04 0.2999
## CAC.l9 0.04809 0.03781 1.27 0.2036
## FTSE.l9 -0.00390 0.02993 -0.13 0.8963
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 26 on 1814 degrees of freedom
## Multiple R-Squared: 0.0451, Adjusted R-squared: 0.0262
## F-statistic: 2.38 on 36 and 1814 DF, p-value: 0.00000873
##
##
## Estimation results for equation FTSE:
## =====================================
## FTSE = DAX.l1 + SMI.l1 + CAC.l1 + FTSE.l1 + DAX.l2 + SMI.l2 + CAC.l2 + FTSE.l2 + DAX.l3 + SMI.l3 + CAC.l3 + FTSE.l3 + DAX.l4 + SMI.l4 + CAC.l4 + FTSE.l4 + DAX.l5 + SMI.l5 + CAC.l5 + FTSE.l5 + DAX.l6 + SMI.l6 + CAC.l6 + FTSE.l6 + DAX.l7 + SMI.l7 + CAC.l7 + FTSE.l7 + DAX.l8 + SMI.l8 + CAC.l8 + FTSE.l8 + DAX.l9 + SMI.l9 + CAC.l9 + FTSE.l9
##
## Estimate Std. Error t value Pr(>|t|)
## DAX.l1 0.02560 0.03980 0.64 0.5201
## SMI.l1 -0.08587 0.02790 -3.08 0.0021 **
## CAC.l1 -0.00388 0.04367 -0.09 0.9292
## FTSE.l1 0.16562 0.03433 4.82 0.0000015 ***
## DAX.l2 0.02346 0.03968 0.59 0.5544
## SMI.l2 -0.02324 0.02803 -0.83 0.4071
## CAC.l2 0.02832 0.04368 0.65 0.5168
## FTSE.l2 -0.03130 0.03463 -0.90 0.3662
## DAX.l3 -0.05291 0.03967 -1.33 0.1825
## SMI.l3 0.01231 0.02812 0.44 0.6615
## CAC.l3 0.05773 0.04374 1.32 0.1871
## FTSE.l3 0.00778 0.03466 0.22 0.8224
## DAX.l4 -0.05419 0.03963 -1.37 0.1717
## SMI.l4 0.04308 0.02821 1.53 0.1268
## CAC.l4 0.07816 0.04400 1.78 0.0758 .
## FTSE.l4 -0.08359 0.03462 -2.41 0.0159 *
## DAX.l5 0.00161 0.03984 0.04 0.9677
## SMI.l5 -0.04218 0.02842 -1.48 0.1380
## CAC.l5 0.10293 0.04398 2.34 0.0194 *
## FTSE.l5 -0.06902 0.03468 -1.99 0.0467 *
## DAX.l6 -0.02704 0.03976 -0.68 0.4965
## SMI.l6 0.05831 0.02847 2.05 0.0407 *
## CAC.l6 0.09420 0.04394 2.14 0.0322 *
## FTSE.l6 -0.08832 0.03468 -2.55 0.0110 *
## DAX.l7 0.05406 0.03985 1.36 0.1751
## SMI.l7 0.05208 0.02836 1.84 0.0665 .
## CAC.l7 -0.06552 0.04377 -1.50 0.1346
## FTSE.l7 -0.05559 0.03477 -1.60 0.1100
## DAX.l8 -0.00493 0.03992 -0.12 0.9018
## SMI.l8 0.05727 0.02853 2.01 0.0449 *
## CAC.l8 -0.08091 0.04375 -1.85 0.0646 .
## FTSE.l8 0.01729 0.03479 0.50 0.6192
## DAX.l9 0.00363 0.03966 0.09 0.9271
## SMI.l9 -0.01747 0.02846 -0.61 0.5394
## CAC.l9 0.06908 0.04382 1.58 0.1151
## FTSE.l9 0.01247 0.03468 0.36 0.7191
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 30 on 1814 degrees of freedom
## Multiple R-Squared: 0.0594, Adjusted R-squared: 0.0407
## F-statistic: 3.18 on 36 and 1814 DF, p-value: 0.00000000095
##
##
##
## Covariance matrix of residuals:
## DAX SMI CAC FTSE
## DAX 1026 940 617 648
## SMI 940 1538 651 722
## CAC 617 651 672 522
## FTSE 648 722 522 903
##
## Correlation matrix of residuals:
## DAX SMI CAC FTSE
## DAX 1.000 0.749 0.743 0.673
## SMI 0.749 1.000 0.640 0.613
## CAC 0.743 0.640 1.000 0.670
## FTSE 0.673 0.613 0.670 1.000
##
## Portmanteau Test (asymptotic)
##
## data: Residuals of VAR object var.a
## Chi-squared = 183, df = 112, p-value = 0.00002
## $Granger
##
## Granger causality H0: DAX do not Granger-cause SMI CAC FTSE
##
## data: VAR object var.a
## F-Test = 2, df1 = 27, df2 = 7256, p-value = 0.01
##
##
## $Instant
##
## H0: No instantaneous causality between: DAX and SMI CAC FTSE
##
## data: VAR object var.a
## Chi-squared = 759, df = 3, p-value <0.0000000000000002
##===========================================##
## Forecasting VAR models
fcast = predict(var.a, n.ahead = 25)
plot(fcast)##===========================================##
# Forecasting the DAX index
DAX = fcast$fcst[1]; DAX # type list## $DAX
## fcst lower upper CI
## [1,] 5.601 -57 68 63
## [2,] -5.920 -69 57 63
## [3,] -13.003 -76 50 63
## [4,] 13.727 -50 77 63
## [5,] -36.354 -100 27 63
## [6,] -0.649 -64 63 64
## [7,] 5.966 -58 70 64
## [8,] 8.825 -55 73 64
## [9,] 2.155 -62 66 64
## [10,] 2.121 -62 67 65
## [11,] -0.806 -65 64 65
## [12,] -4.248 -69 60 65
## [13,] 0.313 -64 65 65
## [14,] -2.108 -67 62 65
## [15,] 1.305 -63 66 65
## [16,] 0.237 -64 65 65
## [17,] 1.782 -63 66 65
## [18,] -0.360 -65 64 65
## [19,] 0.722 -64 65 65
## [20,] -0.691 -65 64 65
## [21,] -0.434 -65 64 65
## [22,] -0.394 -65 64 65
## [23,] -0.085 -65 65 65
## [24,] 0.194 -64 65 65
## [25,] 0.228 -64 65 65
## [1] 5.601 -5.920 -13.003 13.727 -36.354 -0.649 5.966 8.825 2.155
## [10] 2.121 -0.806 -4.248 0.313 -2.108 1.305 0.237 1.782 -0.360
## [19] 0.722 -0.691 -0.434 -0.394 -0.085 0.194 0.228
## Time Series:
## Start = c(1998, 164)
## End = c(1998, 169)
## Frequency = 260
## DAX SMI CAC FTSE
## 1999 5598 7953 4042 5680
## 1999 5460 7721 3940 5588
## 1999 5286 7448 3846 5433
## 1999 5387 7608 3946 5462
## 1999 5355 7553 3952 5400
## 1999 5474 7676 3995 5455
# Adding data and forecast to one time series
DAXinv =ts(c(EuStockMarkets[,1], x),
start = c(1991,130), frequency = 260)
plot(DAXinv)##===========================================##
## Creating an advanced plot with visual separation
library(lattice)
library(grid)
library(zoo)
# Converting to object zoo
x = zoo(DAXinv[1786:1885])
##===========================================##
# Advanced xyplot from lattice
xyplot(x, grid=TRUE, panel = function(x, y, ...){
panel.xyplot(x, y, col="red", ...)
grid.clip(x = unit(76, "native"), just=c("right"))
panel.xyplot(x, y, col="green", ...) })8 Neural Networks in Time Series Analysis
## Import the APTelectricity.csv file as APTelectric
library(readr)
APTelectric <- read_csv("APTelectricity.csv")
myts = ts(APTelectric$watt, frequency = 288)
plot(myts)## ===========================================##
set.seed(34)
library(forecast)
fit = nnetar(myts)
nnetforecast <- forecast(fit, h = 400, PI = F)
library(ggplot2)
autoplot(nnetforecast)## ===========================================##
## Using an external regressor in a neural net
fit2 = nnetar(myts, xreg = APTelectric$appliances)
## ===========================================##
# Defining the vector which we want to forecast
y = rep(2, times = 12 * 10)
nnetforecast <- forecast(fit2, xreg = y, PI = F)
library(ggplot2)
autoplot(nnetforecast)9 Applied Time Series Analysis and Forecasting with R Projects
This is the second course by R-Tutorial in Udemy
9.1 Project I Trending Data:Singapur Labor Force Participation
# https://docs.google.com/spreadsheets/d/1frieoKODnD9sX3VCZy5c3QAjBXMY-vN7k_I9gR-gcU8/pub
# http://www.gapminder.org/data/
# Singapur data
"70.19999695 71.09999847 71.69999695 72.30000305 73.09999847 72.90000153 74.40000153 75.40000153 76 76.90000153 77.40000153 78.19999695 78.90000153 78.69999695 79 78 80 79.80000305 80.30000305 80.5 80.69999695 81.09999847 81.5 81.90000153 82.30000305 82.69999695 83.19999695 83.5"## [1] "70.19999695 71.09999847 71.69999695 72.30000305 73.09999847 72.90000153 74.40000153 75.40000153 76 76.90000153 77.40000153 78.19999695 78.90000153 78.69999695 79 78 80 79.80000305 80.30000305 80.5 80.69999695 81.09999847 81.5 81.90000153 82.30000305 82.69999695 83.19999695 83.5"
# Import with scan singapur = scan() singapur singapore<-data.frame(singapur)
# head(singapore) write.csv(singapore,'df.csv',row.names = F)
singapur <- read.csv("df.csv")
head(singapur)## singapur
## 1 70
## 2 71
## 3 72
## 4 72
## 5 73
## 6 73
## ===========================================##
# Conversion to time series
singapur = ts(singapur, start = 1980)
plot(singapur, ylab = "Labour Force Participation Rate 25-54")library(forecast)
# Exponential smoothing with holt
holttrend = holt(singapur, h = 5)
summary(holttrend)##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = singapur, h = 5)
##
## Smoothing parameters:
## alpha = 0.6378
## beta = 0.1212
##
## Initial states:
## l = 69.619
## b = 0.6666
##
## sigma: 0.55
##
## AIC AICc BIC
## 66 69 72
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.082 0.51 0.31 -0.11 0.4 0.5 -0.092
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2008 84 83 85 83 85
## 2009 84 83 85 83 86
## 2010 85 84 86 83 86
## 2011 85 84 86 83 87
## 2012 85 84 87 83 88
## ===========================================##
# Phi auto generated
plot(holt(singapur, h = 15, damped = T))##
## Forecast method: Damped Holt's method
##
## Model Information:
## Damped Holt's method
##
## Call:
## holt(y = singapur, h = 15, damped = T)
##
## Smoothing parameters:
## alpha = 0.5149
## beta = 0.0001
## phi = 0.9666
##
## Initial states:
## l = 69.5404
## b = 0.771
##
## sigma: 0.51
##
## AIC AICc BIC
## 63 67 71
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.014 0.47 0.33 0.017 0.42 0.53 -0.0016
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2008 84 83 84 83 85
## 2009 84 83 85 83 85
## 2010 84 83 85 83 86
## 2011 85 84 85 83 86
## 2012 85 84 86 83 86
## 2013 85 84 86 83 87
## 2014 85 84 86 84 87
## 2015 85 84 87 84 87
## 2016 86 85 87 84 87
## 2017 86 85 87 84 88
## 2018 86 85 87 84 88
## 2019 86 85 88 84 88
## 2020 87 85 88 84 89
## 2021 87 85 88 85 89
## 2022 87 85 88 85 89
## Series: singapur
## ARIMA(1,1,0) with drift
##
## Coefficients:
## ar1 drift
## -0.37 0.490
## s.e. 0.18 0.072
##
## sigma^2 estimated as 0.278: log likelihood=-20
## AIC=46 AICc=47 BIC=50
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0069 0.5 0.38 0.018 0.49 0.61 0.055
## ===========================================##
# Exact calculation of Arima parameters
auto.arima(singapur, stepwise = F, approximation = F)## Series: singapur
## ARIMA(1,1,0) with drift
##
## Coefficients:
## ar1 drift
## -0.37 0.490
## s.e. 0.18 0.072
##
## sigma^2 estimated as 0.278: log likelihood=-20
## AIC=46 AICc=47 BIC=50
# Overview plot - models
holttrend = holt(singapur, h = 10)
holtdamped = holt(singapur, h = 10, damped = T)
arimafore = forecast(auto.arima(singapur), h = 10)
## ===========================================##
library(ggplot2)
# 3 Forecast Lines as Comparison
autoplot(singapur) + forecast::autolayer(holttrend$mean, series = "Holt Linear Trend") +
forecast::autolayer(holtdamped$mean, series = "Holt Damped Trend") + forecast::autolayer(arimafore$mean,
series = "ARIMA") + xlab("year") + ylab("Labour Force Participation Rate Age 25-54") +
guides(colour = guide_legend(title = "Forecast Method")) + theme(legend.position = c(0.8,
0.2)) + ggtitle("Singapur") + theme(plot.title = element_text(family = "Times",
hjust = 0.5, color = "blue", face = "bold", size = 15))## ===========================================##
# Exercise - Dataset vs Model
# Models
holttrend = holt(singapur, h = 10)
holtdamped = holt(singapur, h = 10, damped = T)
arimafore = forecast(auto.arima(singapur), h = 10)
autoplot(singapur) + geom_line(size = 2) + forecast::autolayer(holttrend$fitted,
series = "Holt Linear Trend", size = 1.1) + forecast::autolayer(holtdamped$fitted,
series = "Holt Damped Trend", size = 1.1) + forecast::autolayer(arimafore$fitted,
series = "ARIMA", size = 1.1) + xlab("year") + ylab("Labour Force Participation Rate Age 25-54") +
guides(colour = guide_legend(title = "Forecast Method")) + theme(legend.position = c(0.8,
0.2)) + ggtitle("Singapur") + theme(plot.title = element_text(family = "Times",
hjust = 0.5, color = "blue", face = "bold", size = 15))9.2 Project II Seasonal Data: Monthly Inflation Rates of Germany
########## German Inflation Rates Project
########## https://www.statbureau.org/en/germany/inflation-tables
"-0.31\t0.41\t0.51\t-0.20\t0.61\t0.20\t0.61\t-0.30\t-0.10\t-0.20\t-0.51\t0.41
-0.51\t0.61\t-0.20\t0.10\t-0.10\t0.30\t0.00\t0.20\t-0.30\t0.00\t-0.10\t0.81
-0.60\t0.40\t0.50\t0.10\t-0.10\t0.00\t0.20\t0.10\t-0.10\t0.10\t0.10\t0.60
-0.20\t0.60\t0.59\t0.00\t0.00\t0.10\t0.20\t0.10\t0.20\t0.00\t0.20\t0.19
-0.10\t0.68\t0.58\t-0.19\t0.00\t-0.19\t0.39\t0.38\t0.10\t0.00\t0.10\t0.29
-0.48\t0.57\t0.48\t-0.47\t0.38\t0.09\t0.47\t0.00\t0.00\t-0.19\t0.19\t0.38
-0.56\t0.47\t0.28\t-0.19\t-0.09\t0.28\t0.28\t0.00\t0.00\t-0.28\t0.00\t0.00
-1.03\t0.85\t0.47\t0.00\t0.09\t-0.09\t0.19\t0.00\t-0.19\t0.00\t0.09\t-0.09
-0.84\t0.38\t0.75\t-0.37\t0.28\t0.09\t0.28\t0.00\t0.09\t0.19\t0.09\t0.74
-0.64\t0.65\t0.18\t0.00\t-0.18\t0.18\t0.37\t0.09\t0.09\t0.00\t\t
"## [1] "-0.31\t0.41\t0.51\t-0.20\t0.61\t0.20\t0.61\t-0.30\t-0.10\t-0.20\t-0.51\t0.41\n-0.51\t0.61\t-0.20\t0.10\t-0.10\t0.30\t0.00\t0.20\t-0.30\t0.00\t-0.10\t0.81\n-0.60\t0.40\t0.50\t0.10\t-0.10\t0.00\t0.20\t0.10\t-0.10\t0.10\t0.10\t0.60\n-0.20\t0.60\t0.59\t0.00\t0.00\t0.10\t0.20\t0.10\t0.20\t0.00\t0.20\t0.19\n-0.10\t0.68\t0.58\t-0.19\t0.00\t-0.19\t0.39\t0.38\t0.10\t0.00\t0.10\t0.29\n-0.48\t0.57\t0.48\t-0.47\t0.38\t0.09\t0.47\t0.00\t0.00\t-0.19\t0.19\t0.38\n-0.56\t0.47\t0.28\t-0.19\t-0.09\t0.28\t0.28\t0.00\t0.00\t-0.28\t0.00\t0.00\n-1.03\t0.85\t0.47\t0.00\t0.09\t-0.09\t0.19\t0.00\t-0.19\t0.00\t0.09\t-0.09\n-0.84\t0.38\t0.75\t-0.37\t0.28\t0.09\t0.28\t0.00\t0.09\t0.19\t0.09\t0.74\n-0.64\t0.65\t0.18\t0.00\t-0.18\t0.18\t0.37\t0.09\t0.09\t0.00\t\t\n"
# mydata = scan()
# mydata1<-data.frame(mydata) head(mydata1)
# write.csv(mydata1,'df_mydata1.csv',row.names = F)
mydata <- read.csv("df_mydata1.csv")
head(mydata)## mydata
## 1 -0.31
## 2 0.41
## 3 0.51
## 4 -0.20
## 5 0.61
## 6 0.20
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2008 -0.31 0.41 0.51 -0.20 0.61 0.20 0.61 -0.30 -0.10 -0.20 -0.51 0.41
## 2009 -0.51 0.61 -0.20 0.10 -0.10 0.30 0.00 0.20 -0.30 0.00 -0.10 0.81
## 2010 -0.60 0.40 0.50 0.10 -0.10 0.00 0.20 0.10 -0.10 0.10 0.10 0.60
## 2011 -0.20 0.60 0.59 0.00 0.00 0.10 0.20 0.10 0.20 0.00 0.20 0.19
## 2012 -0.10 0.68 0.58 -0.19 0.00 -0.19 0.39 0.38 0.10 0.00 0.10 0.29
## 2013 -0.48 0.57 0.48 -0.47 0.38 0.09 0.47 0.00 0.00 -0.19 0.19 0.38
## 2014 -0.56 0.47 0.28 -0.19 -0.09 0.28 0.28 0.00 0.00 -0.28 0.00 0.00
## 2015 -1.03 0.85 0.47 0.00 0.09 -0.09 0.19 0.00 -0.19 0.00 0.09 -0.09
## 2016 -0.84 0.38 0.75 -0.37 0.28 0.09 0.28 0.00 0.09 0.19 0.09 0.74
## 2017 -0.64 0.65 0.18 0.00 -0.18 0.18 0.37 0.09 0.09 0.00
## Time-Series [1:118, 1] from 2008 to 2018: -0.31 0.41 0.51 -0.2 0.61 0.2 0.61 -0.3 -0.1 -0.2 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "mydata"
## $x
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2008 -0.31 0.41 0.51 -0.20 0.61 0.20 0.61 -0.30 -0.10 -0.20 -0.51 0.41
## 2009 -0.51 0.61 -0.20 0.10 -0.10 0.30 0.00 0.20 -0.30 0.00 -0.10 0.81
## 2010 -0.60 0.40 0.50 0.10 -0.10 0.00 0.20 0.10 -0.10 0.10 0.10 0.60
## 2011 -0.20 0.60 0.59 0.00 0.00 0.10 0.20 0.10 0.20 0.00 0.20 0.19
## 2012 -0.10 0.68 0.58 -0.19 0.00 -0.19 0.39 0.38 0.10 0.00 0.10 0.29
## 2013 -0.48 0.57 0.48 -0.47 0.38 0.09 0.47 0.00 0.00 -0.19 0.19 0.38
## 2014 -0.56 0.47 0.28 -0.19 -0.09 0.28 0.28 0.00 0.00 -0.28 0.00 0.00
## 2015 -1.03 0.85 0.47 0.00 0.09 -0.09 0.19 0.00 -0.19 0.00 0.09 -0.09
## 2016 -0.84 0.38 0.75 -0.37 0.28 0.09 0.28 0.00 0.09 0.19 0.09 0.74
## 2017 -0.64 0.65 0.18 0.00 -0.18 0.18 0.37 0.09 0.09 0.00
##
## $seasonal
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2008 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## 2009 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## 2010 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## 2011 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## 2012 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## 2013 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## 2014 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## 2015 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## 2016 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## 2017 -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## Nov Dec
## 2008 -0.074 0.282
## 2009 -0.074 0.282
## 2010 -0.074 0.282
## 2011 -0.074 0.282
## 2012 -0.074 0.282
## 2013 -0.074 0.282
## 2014 -0.074 0.282
## 2015 -0.074 0.282
## 2016 -0.074 0.282
## 2017
##
## $trend
## Jan Feb Mar Apr May Jun Jul Aug Sep
## 2008 NA NA NA NA NA NA 0.0858 0.0858 0.0646
## 2009 -0.0163 -0.0208 -0.0083 -0.0083 0.0171 0.0508 0.0638 0.0513 0.0717
## 2010 0.0842 0.0883 0.0925 0.1050 0.1175 0.1171 0.1250 0.1500 0.1621
## 2011 0.1742 0.1742 0.1867 0.1950 0.1950 0.1821 0.1692 0.1767 0.1796
## 2012 0.1471 0.1667 0.1742 0.1700 0.1658 0.1658 0.1542 0.1337 0.1250
## 2013 0.1558 0.1433 0.1233 0.1112 0.1071 0.1146 0.1150 0.1075 0.0950
## 2014 0.0787 0.0708 0.0708 0.0671 0.0554 0.0317 -0.0038 -0.0075 0.0163
## 2015 0.0204 0.0167 0.0087 0.0125 0.0279 0.0279 0.0321 0.0204 0.0125
## 2016 0.0279 0.0317 0.0433 0.0629 0.0708 0.1054 0.1483 0.1679 0.1554
## 2017 0.1354 0.1429 0.1467 0.1387 NA NA NA NA NA
## Oct Nov Dec
## 2008 0.0475 0.0304 0.0050
## 2009 0.1008 0.1008 0.0883
## 2010 0.1617 0.1617 0.1700
## 2011 0.1712 0.1633 0.1512
## 2012 0.1092 0.1133 0.1408
## 2013 0.0983 0.0904 0.0787
## 2014 0.0321 0.0475 0.0396
## 2015 0.0088 0.0013 0.0167
## 2016 0.1471 0.1433 0.1279
## 2017 NA
##
## $random
## Jan Feb Mar Apr May Jun Jul Aug
## 2008 NA NA NA NA NA NA 0.32890 -0.34374
## 2009 0.14408 0.13936 -0.50490 0.31357 -0.08300 0.27309 -0.25902 0.19084
## 2010 -0.04633 -0.17981 0.09427 0.20024 -0.18342 -0.09316 -0.12027 -0.00791
## 2011 0.26367 -0.06564 0.09010 0.01024 -0.16092 -0.05816 -0.16444 -0.03457
## 2012 0.39075 0.02186 0.09260 -0.15476 -0.13175 -0.33191 0.04056 0.28834
## 2013 0.00200 -0.06481 0.04344 -0.37601 0.30700 -0.00066 0.15973 -0.06541
## 2014 -0.00092 -0.09231 -0.10406 -0.05184 -0.11133 0.27226 0.08848 0.04959
## 2015 -0.41258 0.34186 0.14802 0.19274 0.09617 -0.09399 -0.03735 0.02168
## 2016 -0.23008 -0.14314 0.39344 -0.22768 0.24325 0.00851 -0.06360 -0.12582
## 2017 -0.13758 0.01561 -0.27990 0.06649 NA NA NA NA
## Sep Oct Nov Dec
## 2008 -0.03624 -0.11087 -0.46652 0.12293
## 2009 -0.24332 0.03580 -0.12694 0.43959
## 2010 -0.13374 0.07496 0.01223 0.14793
## 2011 0.14876 -0.03462 0.11056 -0.24332
## 2012 0.10334 0.02746 0.06056 -0.13291
## 2013 0.03334 -0.15170 0.17348 0.01918
## 2014 0.11209 -0.17545 0.02640 -0.32166
## 2015 -0.07416 0.12788 0.16265 -0.38874
## 2016 0.06293 0.17955 0.02056 0.33001
## 2017 NA NA
##
## $figure
## [1] -0.638 0.491 0.313 -0.205 -0.034 -0.024 0.195 -0.042 -0.128 -0.137
## [11] -0.074 0.282
##
## $type
## [1] "additive"
##
## attr(,"class")
## [1] "decomposed.ts"
# Using the stl method plot(stl(germaninfl, s.window = 7))
# stl forecasting
plot(stlf(germaninfl, method = "ets"))## ===========================================##
# using autoplot
library(ggplot2)
autoplot(stlf(germaninfl, method = "ets"))## Seasonal Arima (package forecast)
auto.arima(germaninfl, stepwise = T, approximation = F, trace = T)##
## ARIMA(2,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[12] with drift : 32
## ARIMA(1,0,0)(1,1,0)[12] with drift : 23
## ARIMA(0,0,1)(0,1,1)[12] with drift : 6.8
## ARIMA(0,0,0)(0,1,0)[12] : 30
## ARIMA(0,0,1)(0,1,0)[12] with drift : 29
## ARIMA(0,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,1)(0,1,2)[12] with drift : Inf
## ARIMA(0,0,1)(1,1,0)[12] with drift : 24
## ARIMA(0,0,1)(1,1,2)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,1)[12] with drift : 5.6
## ARIMA(0,0,0)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,2)[12] with drift : Inf
## ARIMA(0,0,0)(1,1,0)[12] with drift : 23
## ARIMA(0,0,0)(1,1,2)[12] with drift : Inf
## ARIMA(1,0,0)(0,1,1)[12] with drift : 6.4
## ARIMA(1,0,1)(0,1,1)[12] with drift : 1.7
## ARIMA(1,0,1)(0,1,0)[12] with drift : 22
## ARIMA(1,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,1)(0,1,2)[12] with drift : Inf
## ARIMA(1,0,1)(1,1,0)[12] with drift : 17
## ARIMA(1,0,1)(1,1,2)[12] with drift : Inf
## ARIMA(2,0,1)(0,1,1)[12] with drift : 0.69
## ARIMA(2,0,1)(0,1,0)[12] with drift : 23
## ARIMA(2,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(2,0,1)(0,1,2)[12] with drift : Inf
## ARIMA(2,0,1)(1,1,0)[12] with drift : 16
## ARIMA(2,0,1)(1,1,2)[12] with drift : Inf
## ARIMA(2,0,0)(0,1,1)[12] with drift : 2.3
## ARIMA(3,0,1)(0,1,1)[12] with drift : 2.6
## ARIMA(2,0,2)(0,1,1)[12] with drift : 0.23
## ARIMA(2,0,2)(0,1,0)[12] with drift : Inf
## ARIMA(2,0,2)(0,1,2)[12] with drift : Inf
## ARIMA(2,0,2)(1,1,0)[12] with drift : 17
## ARIMA(2,0,2)(1,1,2)[12] with drift : Inf
## ARIMA(1,0,2)(0,1,1)[12] with drift : 0.049
## ARIMA(1,0,2)(0,1,0)[12] with drift : 23
## ARIMA(1,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,2)(0,1,2)[12] with drift : Inf
## ARIMA(1,0,2)(1,1,0)[12] with drift : 15
## ARIMA(1,0,2)(1,1,2)[12] with drift : Inf
## ARIMA(0,0,2)(0,1,1)[12] with drift : 4.4
## ARIMA(1,0,3)(0,1,1)[12] with drift : 1.9
## ARIMA(0,0,3)(0,1,1)[12] with drift : 4.7
## ARIMA(2,0,3)(0,1,1)[12] with drift : 2.3
## ARIMA(1,0,2)(0,1,1)[12] : -2.2
## ARIMA(1,0,2)(0,1,0)[12] : 20
## ARIMA(1,0,2)(1,1,1)[12] : Inf
## ARIMA(1,0,2)(0,1,2)[12] : Inf
## ARIMA(1,0,2)(1,1,0)[12] : 13
## ARIMA(1,0,2)(1,1,2)[12] : Inf
## ARIMA(0,0,2)(0,1,1)[12] : 2.3
## ARIMA(1,0,1)(0,1,1)[12] : -0.45
## ARIMA(2,0,2)(0,1,1)[12] : -0.73
## ARIMA(1,0,3)(0,1,1)[12] : -0.31
## ARIMA(0,0,1)(0,1,1)[12] : 4.7
## ARIMA(0,0,3)(0,1,1)[12] : 2.5
## ARIMA(2,0,1)(0,1,1)[12] : -1.5
## ARIMA(2,0,3)(0,1,1)[12] : Inf
##
## Best model: ARIMA(1,0,2)(0,1,1)[12]
## Series: germaninfl
## ARIMA(1,0,2)(0,1,1)[12]
##
## Coefficients:
## ar1 ma1 ma2 sma1
## -0.80 0.78 0.21 -0.76
## s.e. 0.12 0.15 0.10 0.13
##
## sigma^2 estimated as 0.0488: log likelihood=6.4
## AIC=-2.8 AICc=-2.2 BIC=11
# Getting an object
germaninflarima = auto.arima(germaninfl, stepwise = T, approximation = F, trace = T)##
## ARIMA(2,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[12] with drift : 32
## ARIMA(1,0,0)(1,1,0)[12] with drift : 23
## ARIMA(0,0,1)(0,1,1)[12] with drift : 6.8
## ARIMA(0,0,0)(0,1,0)[12] : 30
## ARIMA(0,0,1)(0,1,0)[12] with drift : 29
## ARIMA(0,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,1)(0,1,2)[12] with drift : Inf
## ARIMA(0,0,1)(1,1,0)[12] with drift : 24
## ARIMA(0,0,1)(1,1,2)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,1)[12] with drift : 5.6
## ARIMA(0,0,0)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,2)[12] with drift : Inf
## ARIMA(0,0,0)(1,1,0)[12] with drift : 23
## ARIMA(0,0,0)(1,1,2)[12] with drift : Inf
## ARIMA(1,0,0)(0,1,1)[12] with drift : 6.4
## ARIMA(1,0,1)(0,1,1)[12] with drift : 1.7
## ARIMA(1,0,1)(0,1,0)[12] with drift : 22
## ARIMA(1,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,1)(0,1,2)[12] with drift : Inf
## ARIMA(1,0,1)(1,1,0)[12] with drift : 17
## ARIMA(1,0,1)(1,1,2)[12] with drift : Inf
## ARIMA(2,0,1)(0,1,1)[12] with drift : 0.69
## ARIMA(2,0,1)(0,1,0)[12] with drift : 23
## ARIMA(2,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(2,0,1)(0,1,2)[12] with drift : Inf
## ARIMA(2,0,1)(1,1,0)[12] with drift : 16
## ARIMA(2,0,1)(1,1,2)[12] with drift : Inf
## ARIMA(2,0,0)(0,1,1)[12] with drift : 2.3
## ARIMA(3,0,1)(0,1,1)[12] with drift : 2.6
## ARIMA(2,0,2)(0,1,1)[12] with drift : 0.23
## ARIMA(2,0,2)(0,1,0)[12] with drift : Inf
## ARIMA(2,0,2)(0,1,2)[12] with drift : Inf
## ARIMA(2,0,2)(1,1,0)[12] with drift : 17
## ARIMA(2,0,2)(1,1,2)[12] with drift : Inf
## ARIMA(1,0,2)(0,1,1)[12] with drift : 0.049
## ARIMA(1,0,2)(0,1,0)[12] with drift : 23
## ARIMA(1,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,2)(0,1,2)[12] with drift : Inf
## ARIMA(1,0,2)(1,1,0)[12] with drift : 15
## ARIMA(1,0,2)(1,1,2)[12] with drift : Inf
## ARIMA(0,0,2)(0,1,1)[12] with drift : 4.4
## ARIMA(1,0,3)(0,1,1)[12] with drift : 1.9
## ARIMA(0,0,3)(0,1,1)[12] with drift : 4.7
## ARIMA(2,0,3)(0,1,1)[12] with drift : 2.3
## ARIMA(1,0,2)(0,1,1)[12] : -2.2
## ARIMA(1,0,2)(0,1,0)[12] : 20
## ARIMA(1,0,2)(1,1,1)[12] : Inf
## ARIMA(1,0,2)(0,1,2)[12] : Inf
## ARIMA(1,0,2)(1,1,0)[12] : 13
## ARIMA(1,0,2)(1,1,2)[12] : Inf
## ARIMA(0,0,2)(0,1,1)[12] : 2.3
## ARIMA(1,0,1)(0,1,1)[12] : -0.45
## ARIMA(2,0,2)(0,1,1)[12] : -0.73
## ARIMA(1,0,3)(0,1,1)[12] : -0.31
## ARIMA(0,0,1)(0,1,1)[12] : 4.7
## ARIMA(0,0,3)(0,1,1)[12] : 2.5
## ARIMA(2,0,1)(0,1,1)[12] : -1.5
## ARIMA(2,0,3)(0,1,1)[12] : Inf
##
## Best model: ARIMA(1,0,2)(0,1,1)[12]
## ===========================================##
# Forecast
forec = forecast(germaninflarima)
plot(forec)## ETS(A,N,A)
##
## Call:
## ets(y = germaninfl)
##
## Smoothing parameters:
## alpha = 0.0001
## gamma = 0.0001
##
## Initial states:
## l = 0.1021
## s = 0.27 -0.087 -0.13 -0.1 -0.038 0.2
## -0.0085 -0.011 -0.24 0.32 0.45 -0.63
##
## sigma: 0.21
##
## AIC AICc BIC
## 215 219 256
## ===========================================##
## Cross Validation of 2 models
germaninflets = ets(germaninfl)
germaninflarima = auto.arima(germaninfl, stepwise = T, approximation = F, trace = T)##
## ARIMA(2,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[12] with drift : 32
## ARIMA(1,0,0)(1,1,0)[12] with drift : 23
## ARIMA(0,0,1)(0,1,1)[12] with drift : 6.8
## ARIMA(0,0,0)(0,1,0)[12] : 30
## ARIMA(0,0,1)(0,1,0)[12] with drift : 29
## ARIMA(0,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,1)(0,1,2)[12] with drift : Inf
## ARIMA(0,0,1)(1,1,0)[12] with drift : 24
## ARIMA(0,0,1)(1,1,2)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,1)[12] with drift : 5.6
## ARIMA(0,0,0)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,2)[12] with drift : Inf
## ARIMA(0,0,0)(1,1,0)[12] with drift : 23
## ARIMA(0,0,0)(1,1,2)[12] with drift : Inf
## ARIMA(1,0,0)(0,1,1)[12] with drift : 6.4
## ARIMA(1,0,1)(0,1,1)[12] with drift : 1.7
## ARIMA(1,0,1)(0,1,0)[12] with drift : 22
## ARIMA(1,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,1)(0,1,2)[12] with drift : Inf
## ARIMA(1,0,1)(1,1,0)[12] with drift : 17
## ARIMA(1,0,1)(1,1,2)[12] with drift : Inf
## ARIMA(2,0,1)(0,1,1)[12] with drift : 0.69
## ARIMA(2,0,1)(0,1,0)[12] with drift : 23
## ARIMA(2,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(2,0,1)(0,1,2)[12] with drift : Inf
## ARIMA(2,0,1)(1,1,0)[12] with drift : 16
## ARIMA(2,0,1)(1,1,2)[12] with drift : Inf
## ARIMA(2,0,0)(0,1,1)[12] with drift : 2.3
## ARIMA(3,0,1)(0,1,1)[12] with drift : 2.6
## ARIMA(2,0,2)(0,1,1)[12] with drift : 0.23
## ARIMA(2,0,2)(0,1,0)[12] with drift : Inf
## ARIMA(2,0,2)(0,1,2)[12] with drift : Inf
## ARIMA(2,0,2)(1,1,0)[12] with drift : 17
## ARIMA(2,0,2)(1,1,2)[12] with drift : Inf
## ARIMA(1,0,2)(0,1,1)[12] with drift : 0.049
## ARIMA(1,0,2)(0,1,0)[12] with drift : 23
## ARIMA(1,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,2)(0,1,2)[12] with drift : Inf
## ARIMA(1,0,2)(1,1,0)[12] with drift : 15
## ARIMA(1,0,2)(1,1,2)[12] with drift : Inf
## ARIMA(0,0,2)(0,1,1)[12] with drift : 4.4
## ARIMA(1,0,3)(0,1,1)[12] with drift : 1.9
## ARIMA(0,0,3)(0,1,1)[12] with drift : 4.7
## ARIMA(2,0,3)(0,1,1)[12] with drift : 2.3
## ARIMA(1,0,2)(0,1,1)[12] : -2.2
## ARIMA(1,0,2)(0,1,0)[12] : 20
## ARIMA(1,0,2)(1,1,1)[12] : Inf
## ARIMA(1,0,2)(0,1,2)[12] : Inf
## ARIMA(1,0,2)(1,1,0)[12] : 13
## ARIMA(1,0,2)(1,1,2)[12] : Inf
## ARIMA(0,0,2)(0,1,1)[12] : 2.3
## ARIMA(1,0,1)(0,1,1)[12] : -0.45
## ARIMA(2,0,2)(0,1,1)[12] : -0.73
## ARIMA(1,0,3)(0,1,1)[12] : -0.31
## ARIMA(0,0,1)(0,1,1)[12] : 4.7
## ARIMA(0,0,3)(0,1,1)[12] : 2.5
## ARIMA(2,0,1)(0,1,1)[12] : -1.5
## ARIMA(2,0,3)(0,1,1)[12] : Inf
##
## Best model: ARIMA(1,0,2)(0,1,1)[12]
forecastets = function(x, h) {
forecast(ets(x), h = h)
}
forecastarima = function(x, h) {
forecast(auto.arima(x), stepwise = T, approximation = F, h = h)
}
etserror = tsCV(germaninfl, forecastets, h = 1)
arimaerror = tsCV(germaninfl, forecastarima, h = 1)
mean(etserror^2, na.rm = TRUE)## [1] 0.091
## [1] 0.078
9.3 Project III Irregularly Spaced Data: Analyze A Biotech Stock
# Fetching data from yahoo with quantmod
library(quantmod)
novartis = getSymbols("NVS", auto.assign = F, from = "2015-01-01", to = "2016-01-01")
# use argument return.class to modify output class to ts or mts
## using a column like a standard ts
plot(as.ts(novartis$NVS.Open))## ===========================================##
# acf and pacf to get an idea about autocorrelation
library(forecast)
ggtsdisplay(novartis$NVS.Open)# Arima model
novartisarima = auto.arima(novartis$NVS.Open, stepwise = T, approximation = F, trace = T)##
## ARIMA(2,1,2) with drift : 790
## ARIMA(0,1,0) with drift : 789
## ARIMA(1,1,0) with drift : 787
## ARIMA(0,1,1) with drift : 786
## ARIMA(0,1,0) : 787
## ARIMA(1,1,1) with drift : 786
## ARIMA(0,1,2) with drift : 786
## ARIMA(1,1,2) with drift : 788
## ARIMA(0,1,1) : 785
## ARIMA(1,1,1) : 785
## ARIMA(0,1,2) : 785
## ARIMA(1,1,0) : 785
## ARIMA(1,1,2) : 786
##
## Best model: ARIMA(0,1,1)
## Series: novartis$NVS.Open
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.155
## s.e. 0.068
##
## sigma^2 estimated as 1.32: log likelihood=-390
## AIC=784 AICc=785 BIC=792
## ===========================================##
# Alternative arima with autoregressive part
novartisarima2 = Arima(novartis$NVS.Open, order = c(1, 1, 1))
novartisarima2## Series: novartis$NVS.Open
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.59 -0.72
## s.e. 0.31 0.27
##
## sigma^2 estimated as 1.31: log likelihood=-389
## AIC=785 AICc=785 BIC=795
## ===========================================##
## Getting a regular time series
# Conversion to dataframe
novartis = as.data.frame(novartis)
### Adding the rownames as date
novartis$Date = rownames(novartis)
novartis$Date = as.Date(novartis$Date)
head(novartis)## NVS.Open NVS.High NVS.Low NVS.Close NVS.Volume NVS.Adjusted
## 2015-01-02 83 83 82 83 807800 67
## 2015-01-05 84 84 83 83 1537800 67
## 2015-01-06 83 83 82 82 1316900 67
## 2015-01-07 82 83 82 82 1598400 67
## 2015-01-08 84 86 84 85 2156700 70
## 2015-01-09 86 87 86 86 2206800 70
## Date
## 2015-01-02 2015-01-02
## 2015-01-05 2015-01-05
## 2015-01-06 2015-01-06
## 2015-01-07 2015-01-07
## 2015-01-08 2015-01-08
## 2015-01-09 2015-01-09
# Creating the date column, 'by' can be either nr days or integer 'from' and 'to'
# with as.Date to make sure this is a date format
mydates = seq.Date(from = as.Date("2015-01-01"), to = as.Date("2016-01-01"), by = 1)
# Converting to a df (required for the merge)
mydates = data.frame(Date = mydates)
# Padding with 'mydates'
mydata = merge(novartis, mydates, by = "Date", all.y = T)
# Removing initial days to start on monday
mydata = mydata[5:366, ]
# Removing sundays, watch the from as the first one to remove
mydata = mydata[-(seq(from = 7, to = nrow(mydata), by = 7)), ]
# Removing saturdays
mydata = mydata[-(seq(from = 6, to = nrow(mydata), by = 6)), ]
## ===========================================##
# Using last observation carried forward imputation
mydata = na.locf(mydata)
## Which days are the ones best to buy or sell?
# Putting the closeprice into a weekly time series
highestprice = ts(as.numeric(mydata$NVS.High), frequency = 5)
## ===========================================##
# Various plots
seasonplot(highestprice, season.labels = c("Mon", "Tue", "Wed", "Thu", "Fri"))# Comparison with the low prices
par(mfrow = c(1, 2))
lowestprice = ts(as.numeric(mydata$NVS.Low), frequency = 5)
monthplot(lowestprice, base = median, col.base = "red")
monthplot(highestprice, base = median, col.base = "red")9.4 Project IV Neural Networks:Neural Nets and Interactive Graphs
## Import Data Camping_Revenue as revenue
## Use import data (base) functionality in Rstudio
# write.csv(revenue,'revenue.csv',row.names = F)
revenue <- read.csv("revenue.csv")
revenue## V1 V2
## 1 "1 ""16857"""
## 2 "2 ""14209"""
## 3 "3 ""15513"""
## 4 "4 ""16415"""
## 5 "5 ""19047"""
## 6 "6 ""20655"""
## 7 "7 ""20442"""
## 8 "8 ""20052"""
## 9 "9 ""19414"""
## 10 "10 ""18176"""
## 11 "11 ""15561"""
## 12 "12 ""17246"""
## 13 "13 ""16072"""
## 14 "14 ""17359"""
## 15 "15 ""16466"""
## 16 "16 ""17181"""
## 17 "17 ""19660"""
## 18 "18 ""21315"""
## 19 "19 ""23944"""
## 20 "20 ""21866"""
## 21 "21 ""20923"""
## 22 "22 ""19960"""
## 23 "23 ""17726"""
## 24 "24 ""16123"""
## 25 "25 ""16891"""
## 26 "26 ""17474"""
## 27 "27 ""17818"""
## 28 "28 ""16024"""
## 29 "29 ""20723"""
## 30 "30 ""21496"""
## 31 "31 ""21205"""
## 32 "32 ""20427"""
## 33 "33 ""20468"""
## 34 "34 ""18063"""
## 35 "35 ""16330"""
## 36 "36 ""16392"""
## 37 "37 ""16514"""
## 38 "38 ""18704"""
## 39 "39 ""16958"""
## 40 "40 ""18062"""
## 41 "41 ""19248"""
## 42 "42 ""20476"""
## 43 "43 ""324"""
## 44 "44 ""22947"""
## 45 "45 ""19306"""
## 46 "46 ""17567"""
## 47 "47 ""15644"""
## 48 "48 ""16240"""
## 49 "49 ""16990"""
## 50 "50 ""16298"""
## 51 "51 ""16674"""
## 52 "52 ""16509"""
## 53 "53 ""19225"""
## 54 "54 ""20753"""
## 55 "55 ""21852"""
## 56 "56 ""20978"""
## 57 "57 ""20385"""
## 58 "58 ""18141"""
## 59 "59 ""16359"""
## 60 "60 ""16057"""
## 61 "61 ""17637"""
## 62 "62 ""17912"""
## 63 "63 ""15255"""
## 64 "64 ""16782"""
## 65 "65 ""19819"""
## 66 "66 ""21817"""
## 67 "67 ""TBD"""
## 68 "68 ""TBD"""
## 69 "69 ""TBD"""
## 70 "70 ""18787"""
## 71 "71 ""17336"""
## 72 "72 ""16636"""
## 73 "73 ""17874"""
## 74 "74 ""16667"""
## 75 "75 ""16688"""
## 76 "76 ""18667"""
## 77 "77 ""19325"""
## 78 "78 ""21504"""
## 79 "79 ""23477"""
## 80 "80 ""23400"""
## 81 "81 ""821"""
## 82 "82 ""18221"""
## 83 "83 ""16572"""
## 84 "84 ""18821"""
## 85 "85 ""18685"""
## 86 "86 ""18356"""
## 87 "87 ""18104"""
## 88 "88 ""18753"""
## 89 "89 ""20407"""
## 90 "90 ""21655"""
## 91 "91 ""TBD"""
## 92 "92 ""23625"""
## 93 "93 ""21673"""
## 94 "94 ""20204"""
## 95 "95 ""19373"""
## 96 "96 ""19378"""
## 97 "97 ""18948"""
## 98 "98 ""19143"""
## 99 "99 ""19776"""
## 100 "100 ""21415"""
## 101 "101 ""21610"""
## 102 "102 ""23491"""
## 103 "103 ""25446"""
## 104 "104 ""24875"""
## 105 "105 ""23260"""
## 106 "106 ""21606"""
## 107 "107 ""19290"""
## 108 "108 ""19907"""
## 109 "109 ""18991"""
## 110 "110 ""17686"""
## 111 "111 ""18861"""
## 112 "112 ""19842"""
## 113 "113 ""22860"""
## 114 "114 ""24083"""
## 115 "115 ""25933"""
## 116 "116 ""25186"""
## 117 "117 ""25110"""
## 118 "118 ""21642"""
## 119 "119 ""19645"""
## 120 "120 ""19435"""
## 121 "121 ""19423"""
## 122 "122 ""20389"""
## 123 "123 ""19334"""
## 124 "124 ""22354"""
## 125 "125 ""23892"""
## 126 "126 ""27099"""
## 127 "127 ""27050"""
## 128 "128 ""27652"""
## 129 "129 ""26085"""
## 130 "130 ""24043"""
## 131 "131 ""21412"""
## 132 "132 ""22724"""
## 133 "133 ""22053"""
## 134 "134 ""22629"""
## 135 "135 ""22723"""
## 136 "136 ""22908"""
## 137 "137 ""25344"""
## 138 "138 ""27035"""
## 139 "139 ""27842"""
## 140 "140 ""27006"""
## 141 "141 ""25577"""
## 142 "142 ""23116"""
## 143 "143 ""24367"""
## 144 "144 ""25590"""
## 145 "145 ""23273"""
## 146 "146 ""24643"""
## 147 "147 ""25591"""
## 148 "148 ""24291"""
## 149 "149 ""26626"""
## 150 "150 ""28965"""
## 151 "151 ""30899"""
## 152 "152 ""31474"""
## 153 "153 ""28837"""
## 154 "154 ""25597"""
## 155 "155 ""24228"""
## 156 "156 ""23616"""
## 157 "157 ""24274"""
## 158 "158 ""25755"""
## 159 "159 ""24828"""
## 160 "160 ""26349"""
## 161 "161 ""28476"""
## 162 "162 ""30954"""
## 163 "163 ""32718"""
## 164 "164 ""32567"""
## 165 "165 ""30874"""
## 166 "166 ""27278"""
## 167 "167 ""24373"""
## 168 "168 ""23177"""
## 169 "169 ""24692"""
## 170 "170 ""24234"""
## 171 "171 ""25227"""
## 172 "172 ""24708"""
## 173 "173 ""27016"""
## 174 "174 ""29229"""
## 175 "175 ""31637"""
## 176 "176 ""29600"""
## 177 "177 ""29259"""
## 178 "178 ""26501"""
## 179 "179 ""23642"""
## 180 "180 ""24734"""
## 181 "181 ""25552"""
## 182 "182 ""23712"""
## 183 "183 ""24067"""
## 184 "184 ""25306"""
## 185 "185 ""26355"""
## 186 "186 ""30078"""
## 187 "187 ""31477"""
## 188 "188 ""31405"""
## 189 "189 ""29027"""
## 190 "190 ""26228"""
## 191 "191 ""24739"""
## 192 "192 ""3334333"""
## 193 "193 ""25022"""
## 194 "194 ""24128"""
## 195 "195 ""24892"""
## 196 "196 ""24940"""
## 197 "197 ""28045"""
## 198 "198 ""30099"""
## 199 "199 ""30624"""
## 200 "200 ""31047"""
## 201 "201 ""30076"""
## 202 "202 ""27232"""
## 203 "203 ""3"""
## 204 "204 ""24669"""
## 205 "205 ""24602"""
## 206 "206 ""24757"""
## 207 "207 ""26619"""
## 208 "208 ""27142"""
## 209 "209 ""29542"""
## 210 "210 ""31175"""
## 211 "211 ""32201"""
## 212 "212 ""32439"""
## 213 "213 ""30640"""
## 214 "214 ""28841"""
## 215 "215 ""25591"""
## 216 "216 ""26752"""
## 217 "217 ""26029"""
## 218 "218 ""25846"""
## 219 "219 ""28042"""
## 220 "220 ""27888"""
## 221 "221 ""29973"""
## 222 "222 ""32325"""
## 223 "223 ""32601"""
## 224 "224 ""32945"""
## 225 "225 ""31860"""
## 226 "226 ""29742"""
## 227 "227 ""28788"""
## 228 "228 ""28598"""
## 229 "229 ""28754"""
## 230 "230 ""26626"""
## 231 "231 ""27278"""
## 232 "232 ""43"""
## 233 "233 ""30845"""
## 234 "234 ""32892"""
## 235 "235 ""33916"""
## 236 "236 ""34366"""
## 237 "237 ""33185"""
## 238 "238 ""29267"""
## 239 "239 ""29369"""
## 240 "240 ""28964"""
## [1] "character"
# chopping off the useless quotes at 2 positions
library(tidyr)
revenue <- separate(revenue, col = V2, sep = c(2, -3), into = c("rest", "data", "rest2"))
# all the relevant data is in column 'data'
head(revenue)## V1 rest data rest2
## 1 "1 "" 16857 """
## 2 "2 "" 14209 """
## 3 "3 "" 15513 """
## 4 "4 "" 16415 """
## 5 "5 "" 19047 """
## 6 "6 "" 20655 """
## [1] "character"
## ===========================================##
# conversion to time series
myts <- ts(as.numeric(revenue$data), start = 1997, frequency = 12)
# data is still not clean (outliers and NAs)
summary(myts)## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 3 18980 23218 36912 26816 3334333 4
## ===========================================##
# all in one cleaning tool
library(forecast)
myts <- tsclean(myts)
# check the data
summary(myts)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14209 19280 23266 23282 26658 34366
## ===========================================##
# set up a Neural Network model
mynnetar <- nnetar(myts)
# forecasting 3 years with the model
nnetforecast <- forecast(mynnetar, h = 36, PI = T)
library(ggplot2)
autoplot(nnetforecast)## ===========================================##
## interactive dygraph
# data we need for the graph
data <- nnetforecast$x
lower <- nnetforecast$lower[, 2]
upper <- nnetforecast$upper[, 2]
pforecast <- nnetforecast$mean
mydata <- cbind(data, lower, upper, pforecast)
library(dygraphs)
dygraph(mydata, main = "Oregon Campsite Restaurant") %>% dyRangeSelector() %>% dySeries(name = "data",
label = "Revenue Data") %>% dySeries(c("lower", "pforecast", "upper"), label = "Revenue Forecast") %>%
dyLegend(show = "always", hideOnMouseOut = FALSE) %>% dyAxis("y", label = "Monthly Revenue USD") %>%
dyHighlight(highlightCircleSize = 5, highlightSeriesOpts = list(strokeWidth = 2)) %>%
dyOptions(axisLineColor = "navy", gridLineColor = "grey") %>% dyAnnotation("2010-8-1",
text = "CF", tooltip = "Camp Festival", attachAtBottom = T)10 Resources
## 1651.88 sec elapsed
## elapsed
## 1652
11 R Environment and OS
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_India.1252 LC_CTYPE=English_India.1252
## [3] LC_MONETARY=English_India.1252 LC_NUMERIC=C
## [5] LC_TIME=English_India.1252
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] dygraphs_1.1.1.6 quantmod_0.4.17 xts_0.12-0
## [4] fUnitRoots_3042.79 fBasics_3042.89.1 timeSeries_3062.100
## [7] timeDate_3043.102 MTS_1.0 vars_1.5-3
## [10] urca_1.3-0 strucchange_1.5-2 sandwich_2.5-1
## [13] MASS_7.3-51.6 TTR_0.24.2 lmtest_0.9-38
## [16] tseries_0.10-47 tidyr_1.1.2 zoo_1.8-8
## [19] lattice_0.20-41 lubridate_1.7.9 chron_2.3-56
## [22] forecast_8.12 readr_1.3.1 tictoc_1.0
## [25] DataComputing_0.6.1.9000 ggplot2_3.3.2 dplyr_1.0.2
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-149 tools_4.0.2 backports_1.1.9 utf8_1.1.4
## [5] R6_2.4.1 lazyeval_0.2.2 colorspace_1.4-1 nnet_7.3-14
## [9] withr_2.2.0 tidyselect_1.1.0 gridExtra_2.3 leaflet_2.0.3
## [13] curl_4.3 compiler_4.0.2 cli_2.0.2 formatR_1.7
## [17] fGarch_3042.83.2 ggdendro_0.1.21 labeling_0.3 bookdown_0.20
## [21] mosaicCore_0.6.0 scales_1.1.1 mvtnorm_1.1-1 fracdiff_1.5-1
## [25] quadprog_1.5-8 spatial_7.3-12 stringr_1.4.0 digest_0.6.25
## [29] ggformula_0.9.4 rmarkdown_2.3 base64enc_0.1-3 pkgconfig_2.0.3
## [33] htmltools_0.5.0 manipulate_1.0.1 htmlwidgets_1.5.1 rlang_0.4.7
## [37] farver_2.0.3 generics_0.0.2 jsonlite_1.7.1 crosstalk_1.1.0.1
## [41] magrittr_1.5 mosaicData_0.18.0 Matrix_1.2-18 Rcpp_1.0.5
## [45] munsell_0.5.0 fansi_0.4.1 lifecycle_0.2.0 stringi_1.5.3
## [49] yaml_2.2.1 ggstance_0.3.4 parallel_4.0.2 ggrepel_0.8.2
## [53] crayon_1.3.4.9000 splines_4.0.2 hms_0.5.3 knitr_1.29
## [57] pillar_1.4.6 igraph_1.2.5 glue_1.4.2 evaluate_0.14
## [61] vctrs_0.3.4 rmdformats_0.3.7 tweenr_1.0.1 gtable_0.3.0
## [65] purrr_0.3.4 polyclip_1.10-0 assertthat_0.2.1 xfun_0.17
## [69] ggforce_0.3.2 mime_0.9 broom_0.7.0 tibble_3.0.3
## [73] mosaic_1.7.0 ellipsis_0.3.1