Import data
Med<-read.csv("/Users/joshuawarren/Desktop/Janas_Files/WGU/D213/medical_time_series .csv",header=T)
Bring in the data using the “Import Dataset” gui in the environment tab. Rename to Med for ease of use during analysis. Check that the data set came in as a time series. Output: “data.frame’
class(Med)
## [1] "data.frame"
Check for missing values
sum(is.na(Med$Revenue))
## [1] 0
Check for duplicates in days
Med[duplicated(Med['Day'])]
Plot the data
plot(Med, type = "l", main = "Hospital Revenue by Days", xlab = "Day", ylab = "Revenue in Millions $")
Transform data to timeseries. Frequency is 7 since data is daily.
MedTS <- ts(Med$Revenue, frequency = 7, start = c(2000,1))
head(MedTS, 20)
## [1] 0.0000000 -0.2923555 -0.3277718 -0.3399871 -0.1248875 -0.4915896
## [7] -1.2552498 -1.8962789 -1.7472594 -2.1295577 -2.1514219 -3.2286370
## [13] -2.8226161 -3.6097128 -3.3988751 -2.8674246 -2.7146069 -2.0210840
## [19] -1.1260142 -0.8455451
Cross check class: output “ts”, or time series
class(MedTS)
## [1] "ts"
View data header
head.matrix(MedTS)
## Time Series:
## Start = c(2000, 1)
## End = c(2104, 3)
## Frequency = 7
## [1] 0.00000000 -0.29235551 -0.32777180 -0.33998712 -0.12488752 -0.49158956
## [7] -1.25524981 -1.89627889 -1.74725938 -2.12955767 -2.15142187 -3.22863700
## [13] -2.82261611 -3.60971276 -3.39887512 -2.86742462 -2.71460692 -2.02108399
## [19] -1.12601417 -0.84554515 -1.18784907 -1.59336888 -2.23363357 -2.98296814
## [25] -3.74967618 -4.06831795 -4.17291175 -4.42329870 -3.77323732 -3.13036593
## [31] -3.06699738 -2.22210903 -1.35465079 -0.41589860 0.08884823 0.98681089
## [37] 1.07208612 0.57528878 0.77847588 0.73196193 1.26740763 1.27765111
## [43] 1.92965323 1.88988029 2.11756586 1.50808259 1.36342093 1.57392010
## [49] 2.23015415 2.30021882 2.19261817 1.68846471 0.97280056 0.11118353
## [55] -0.76091828 -1.13651353 -1.00924333 -0.79661916 -1.05952317 -1.35656447
## [61] -0.82054623 -0.21037409 -0.71997908 -0.97557157 -0.89799749 -1.48208720
## [67] -1.71853182 -1.94498419 -2.08163125 -1.67759883 -0.85702750 0.13807808
## [73] 0.29225176 0.71532475 0.99985401 1.38247323 1.77940671 2.52737328
## [79] 3.03581311 3.16281552 2.56744451 1.69993096 1.61972529 2.15744872
## [85] 3.33619129 3.82394944 4.29542606 4.64672718 4.17809274 3.51394046
## [91] 3.41236614 3.31075785 3.82166659 4.63043983 5.32676923 5.22589066
## [97] 5.04315472 5.14507113 5.22007126 5.85037678 6.09648893 6.12546089
## [103] 6.82309241 6.44069962 6.39062265 5.86570152 5.35423497 5.27378511
## [109] 5.05744971 4.83478546 4.14051638 3.56362351 3.61658457 3.69835767
## [115] 3.62170725 3.93793958 5.17170572 5.20390162 5.36131373 4.98890073
## [121] 4.77044415 4.90036123 5.05323921 6.00657143 5.93032488 5.71543766
## [127] 5.62903599 6.48986248 6.71276799 7.19705927 7.03616688 7.10041120
## [133] 7.97131468 8.96532868 8.80208341 9.19888334 9.64799724 10.64242361
## [139] 10.87305828 10.86136081 11.10909178 11.15183669 10.57215633 10.71685029
## [145] 10.37960890 10.60628747 10.60806877 10.63457878 10.89393966 10.82687488
## [151] 10.95761761 10.54065737 10.45554673 10.59949748 11.04588172 11.52234191
## [157] 11.34734423 11.57505258 10.87052261 10.91859971 11.72891120 12.35524380
## [163] 12.50978326 13.09759974 13.06058006 13.15472692 13.21953178 13.70950155
## [169] 14.03037405 14.09952605 13.81021580 13.53283397 13.34737095 13.64552297
## [175] 13.05270212 12.54338309 12.51023704 11.49645527 11.06400360 10.91348824
## [181] 11.02021545 11.59504505 11.52909833 11.21456868 11.05443450 11.24443203
## [187] 11.12431599 11.67838468 11.53165810 11.62863107 11.28259614 11.80001114
## [193] 10.98934740 10.50358016 9.86148569 10.03416379 10.08627842 9.55311562
## [199] 9.41233969 9.74748564 9.44269761 9.94101458 9.89509820 9.53174970
## [205] 9.24862447 9.23239282 9.37147065 10.70447637 10.80789525 10.23711276
## [211] 9.38616295 9.59653492 10.26465563 10.52707705 11.31493865 12.11628343
## [217] 12.01820955 12.38268926 13.28355365 14.27765592 15.33083155 15.91018951
## [223] 15.92287096 16.04580246 15.71461819 15.52612490 15.53723932 15.53854650
## [229] 16.54513094 16.71738749 17.15379376 16.87350767 16.01998436 15.74730154
## [235] 16.17500206 16.43686264 16.72761626 16.68317242 16.05986155 15.95183003
## [241] 16.26091436 17.51151503 18.12423800 18.39935094 18.60895766 19.25965933
## [247] 19.39897911 19.81679130 20.17202384 20.02925640 20.48237962 21.24176639
## [253] 21.44107894 21.32805991 20.52548866 19.99965124 20.30960671 20.76375889
## [259] 21.18108986 21.73002354 22.38507705 23.27109385 23.10810298 22.80780801
## [265] 22.45721597 23.14323958 24.25454098 24.39029659 24.24192076 23.31085247
## [271] 23.11125888 23.10119411 23.55154135 23.47918839 23.58015145 23.09946413
## [277] 22.50202793 22.25599684 21.59646221 20.80239262 20.50511703 20.29541529
## [283] 20.38180606 20.52471876 20.77457048 21.75312933 21.40841468 21.17654981
## [289] 21.08531565 20.73978259 20.79254110 21.60332299 20.78927347 20.17024472
## [295] 19.86818033 19.77301482 20.01425707 19.52097882 18.43814072 18.64671159
## [301] 18.95051579 18.83960393 18.31354062 17.79698810 17.81044493 17.68630952
## [307] 17.49616085 17.34578868 18.23251653 19.42845811 19.57832686 19.73615187
## [313] 20.13527699 20.08228763 20.50900464 20.58125915 20.77965065 20.50631922
## [319] 20.59477812 20.27971009 19.94044816 19.52679262 19.69555778 19.34297164
## [325] 19.42067267 19.78043554 19.87482205 19.07356375 18.60755878 18.05845959
## [331] 18.04120951 17.79178940 17.23307158 17.26777341 16.77847659 16.19784221
## [337] 15.46408174 15.44200193 15.18524849 15.37956777 15.29980182 16.43135418
## [343] 17.39165351 17.41567218 17.07282932 16.90275916 16.42152911 16.27720408
## [349] 16.63851400 16.66854395 17.59501517 18.40240253 18.93028158 19.07474653
## [355] 18.93172244 18.52257206 18.35132530 18.83294272 19.61689989 19.96263596
## [361] 20.01570800 19.56631950 19.13766496 19.41546546 19.41141311 20.27007344
## [367] 21.05455237 20.80103975 20.49621756 20.33229330 20.76331504 20.64435316
## [373] 20.00703063 19.66406306 18.85381876 19.06842115 19.12768412 19.62877239
## [379] 19.32234915 19.11647870 19.12648854 18.98216135 19.10488759 19.14934300
## [385] 18.48729895 18.04310885 18.17420210 18.18266181 17.93060759 17.50659816
## [391] 16.62133727 16.24655262 16.58335590 16.54067996 16.41540223 15.83245152
## [397] 15.18698149 15.53459058 15.94869702 16.28489266 16.21923010 16.63014600
## [403] 16.07062587 15.83854691 15.74525264 14.86282361 15.11712332 15.23537670
## [409] 15.33383198 15.32809010 15.21556010 15.13816135 14.62520662 14.16896250
## [415] 14.03718789 13.20456728 12.87170405 12.37203938 13.00860339 12.34727803
## [421] 12.85895013 12.78861272 13.44318655 13.85685512 14.22030460 15.01732959
## [427] 15.63499339 15.20822077 15.11372339 14.49483878 13.68868211 13.90632746
## [433] 13.75502130 14.72506461 14.37025704 14.55664976 14.82437591 14.66020382
## [439] 14.32523093 14.35321911 14.11550898 14.23111643 14.07643074 14.47678687
## [445] 14.97897336 15.33220256 15.78830544 16.32930792 17.02116113 17.64796359
## [451] 17.45235266 17.67754922 18.53468087 19.28679632 19.92268506 19.94592829
## [457] 19.88401812 19.37368176 18.96268644 19.30021638 19.90994029 19.98600966
## [463] 20.15257227 20.65639748 20.24764509 20.35840064 20.40599677 20.78899362
## [469] 21.32369036 21.91210454 22.76537315 23.65651901 24.28427461 24.71991579
## [475] 24.70219109 24.34441654 23.90215996 24.28728824 24.36337379 24.59267678
## [481] 24.78648660 24.40179138 24.22407458 24.50485315 24.73754444 24.22381250
## [487] 24.79224925 24.50223685 24.32510579 23.68528079 23.96667888 23.37814612
## [493] 22.97179742 22.77963895 22.38191492 22.45540878 22.30188749 20.93709085
## [499] 20.22934582 20.31748124 19.38642381 19.34715037 19.41952252 19.74993909
## [505] 19.88512288 20.39561487 20.88758605 21.11584642 21.70557599 21.98160523
## [511] 21.63929070 21.45086723 21.21678023 20.84265813 21.39243097 22.07355692
## [517] 22.55416754 22.04989146 21.52657776 20.84716909 20.89004436 20.95616966
## [523] 21.36993978 20.85181211 20.82123696 21.54594731 20.97035542 20.61429967
## [529] 19.18423803 18.23379502 17.01067089 15.52168674 14.68280520 14.26711828
## [535] 14.25716100 14.33526130 13.99599107 13.34591973 12.22730918 12.33249832
## [541] 12.37732752 12.62458427 12.81438984 12.97291726 12.56518370 11.87861291
## [547] 12.05844579 12.18031969 12.85321650 13.43787156 13.87645903 14.30220212
## [553] 15.06566591 15.21174805 14.93010045 14.88542803 14.81785462 15.07030357
## [559] 15.54372397 16.08853531 16.69569590 16.66404928 16.73611598 17.60434655
## [565] 17.78580525 18.11695901 18.31833304 19.15812589 19.56843926 20.45844332
## [571] 20.43089703 20.16155368 20.33122453 20.69256668 20.16570973 20.10815508
## [577] 19.53625416 19.86755397 20.02259050 19.78263543 19.08826542 18.80550091
## [583] 18.91023263 19.18608936 19.31273398 19.57672475 18.98803475 18.43760771
## [589] 18.51908507 18.37249848 18.40125387 18.64812932 18.33325378 17.69431401
## [595] 17.46511988 16.93402572 17.23473672 16.84142672 16.22910638 16.40851998
## [601] 16.63257795 16.67862794 16.25484481 15.45111819 14.83145352 14.85477225
## [607] 14.26352674 13.82316571 13.85292416 14.30303825 13.81404793 12.53492808
## [613] 11.59997052 11.18296694 11.30992176 11.56385028 11.11916768 11.28501839
## [619] 12.34642905 13.08351696 13.53518778 13.21823281 13.40939508 14.06523756
## [625] 14.71663889 14.84117658 14.67312028 14.60045465 15.02646456 16.02914971
## [631] 16.45267281 16.11553884 15.98378420 17.04559138 17.03249392 16.86755462
## [637] 17.38269453 18.53401643 18.42636819 17.74886725 17.65585117 17.65435043
## [643] 17.74334287 17.41515672 17.35849569 17.03394954 17.02397361 17.12924705
## [649] 16.85215536 16.87632056 16.74196846 16.65679565 16.77803414 16.72572860
## [655] 16.41141682 16.68460581 16.05763027 15.83582485 15.62435873 15.58215608
## [661] 15.44953699 15.54358000 15.73518259 15.32726854 15.29212973 15.20409098
## [667] 15.28867524 15.21272483 15.56314234 16.29417008 16.64104597 17.74169248
## [673] 18.05209520 18.07906540 18.19260356 18.45640064 18.94372536 19.43926174
## [679] 19.41508318 18.44808858 17.32676713 16.96580444 16.83548422 16.62847592
## [685] 16.11429133 16.12943604 15.97647760 15.00198842 14.27435845 13.79413971
## [691] 14.32249043 14.08876727 14.90497801 15.61118508 15.25722325 15.44995913
## [697] 15.30104445 15.53990358 16.05930024 16.69294392 17.83743995 17.97049828
## [703] 18.34314806 18.76031022 19.08031732 19.37859058 20.07618154 20.30713669
## [709] 20.03550478 19.75000881 18.92506696 18.80368721 18.97848606 18.69402609
## [715] 18.18418878 18.87576548 18.96851930 19.37214904 19.33828833 18.51265351
## [721] 17.33376752 16.15511086 15.98145875 15.24621933 15.69321833 15.75474857
## [727] 15.72205587 15.86582153 15.70898774 15.82286745 16.06942948
Run augmented dickey fuller test on the time series data
## Error in adf.test(MedTS): could not find function "adf.test"
## Error in eval(expr, envir, enclos): object 'ADF' not found
Data is non-stationary, use difference equation over time approx 7 to transform data
MedTS1 <- diff(MedTS)
head(MedTS1)
## [1] -0.29235551 -0.03541629 -0.01221533 0.21509961 -0.36670205 -0.76366025
Run augmented Dickey Fuller Test, data is now stationary.
adf.test(MedTS1)
## Error in adf.test(MedTS1): could not find function "adf.test"
Split the data 70/30 into test and traing sets, view headers.
library(TSstudio)
split <- ts_split(MedTS1, round(length(MedTS1)*.3))
train <- split$train
test <- split$test
head(train)
## [1] -0.29235551 -0.03541629 -0.01221533 0.21509961 -0.36670205 -0.76366025
head(test)
## [1] -0.2340870 -0.3741221 0.5497728 0.6811260 0.4806106 -0.5042761
Visualize the split data on the line graph.
plot(MedTS1)
lines(test, col='red')
lines(train, col = 'green')
Export MedTS as a csv file to attach to submission.
write.csv(MedTS1, "MedTS_Clean")
Decompose the time series to look for seasonality
MedTS_deco <- decompose(MedTS1)
plot(head(MedTS_deco$seasonal,28),type = "l")
plot(head(MedTS_deco$seasonal,7),type = "l")
Decompose to look for trends
plot(MedTS_deco$trend)
Assumption of ARIMM Check autocorrelation: Shows high level of autocorrelation.
acf(MedTS1)
Check partial autocorrelation: 2 bars cross the blue line
pacf(MedTS1)
look at the Spectrial density of the time series
spectrum(MedTS1)
Run a full decomposition of the time series data
plot(MedTS_deco)
Plot random white noise
plot(MedTS_deco$random)
Start the model Use D = 1 to account for differening transformation
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
MedModel <- auto.arima(train, D=1, stepwise = TRUE, seasonal = TRUE, trace = TRUE)
##
## Fitting models using approximations to speed things up...
##
## ARIMA(2,0,2)(1,1,1)[7] with drift : 691.3325
## ARIMA(0,0,0)(0,1,0)[7] with drift : 1011.542
## ARIMA(1,0,0)(1,1,0)[7] with drift : 810.6725
## ARIMA(0,0,1)(0,1,1)[7] with drift : 689.3507
## ARIMA(0,0,0)(0,1,0)[7] : 1009.582
## ARIMA(0,0,1)(0,1,0)[7] with drift : 969.2125
## ARIMA(0,0,1)(1,1,1)[7] with drift : 711.9873
## ARIMA(0,0,1)(0,1,2)[7] with drift : 690.8248
## ARIMA(0,0,1)(1,1,0)[7] with drift : 823.7466
## ARIMA(0,0,1)(1,1,2)[7] with drift : 683.8349
## ARIMA(0,0,1)(2,1,2)[7] with drift : 672.5853
## ARIMA(0,0,1)(2,1,1)[7] with drift : 695.2391
## ARIMA(0,0,0)(2,1,2)[7] with drift : Inf
## ARIMA(1,0,1)(2,1,2)[7] with drift : 658.3303
## ARIMA(1,0,1)(1,1,2)[7] with drift : 667.0945
## ARIMA(1,0,1)(2,1,1)[7] with drift : 678.9222
## ARIMA(1,0,1)(1,1,1)[7] with drift : 699.6737
## ARIMA(1,0,0)(2,1,2)[7] with drift : 656.6058
## ARIMA(1,0,0)(1,1,2)[7] with drift : 665.5747
## ARIMA(1,0,0)(2,1,1)[7] with drift : 676.8655
## ARIMA(1,0,0)(1,1,1)[7] with drift : 698.6339
## ARIMA(2,0,0)(2,1,2)[7] with drift : 659.5965
## ARIMA(2,0,1)(2,1,2)[7] with drift : 661.24
## ARIMA(1,0,0)(2,1,2)[7] : 654.5548
## ARIMA(1,0,0)(1,1,2)[7] : 663.6238
## ARIMA(1,0,0)(2,1,1)[7] : 674.8169
## ARIMA(1,0,0)(1,1,1)[7] : 696.6116
## ARIMA(0,0,0)(2,1,2)[7] : Inf
## ARIMA(2,0,0)(2,1,2)[7] : 657.5544
## ARIMA(1,0,1)(2,1,2)[7] : 656.2762
## ARIMA(0,0,1)(2,1,2)[7] : 670.6372
## ARIMA(2,0,1)(2,1,2)[7] : 659.1812
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,0)(2,1,2)[7] : Inf
## ARIMA(1,0,1)(2,1,2)[7] : Inf
## ARIMA(1,0,0)(2,1,2)[7] with drift : Inf
## ARIMA(2,0,0)(2,1,2)[7] : Inf
## ARIMA(1,0,1)(2,1,2)[7] with drift : Inf
## ARIMA(2,0,1)(2,1,2)[7] : Inf
## ARIMA(2,0,0)(2,1,2)[7] with drift : Inf
## ARIMA(2,0,1)(2,1,2)[7] with drift : Inf
## ARIMA(1,0,0)(1,1,2)[7] : Inf
## ARIMA(1,0,0)(1,1,2)[7] with drift : Inf
## ARIMA(1,0,1)(1,1,2)[7] with drift : Inf
## ARIMA(0,0,1)(2,1,2)[7] : Inf
## ARIMA(0,0,1)(2,1,2)[7] with drift : Inf
## ARIMA(1,0,0)(2,1,1)[7] : Inf
## ARIMA(1,0,0)(2,1,1)[7] with drift : Inf
## ARIMA(1,0,1)(2,1,1)[7] with drift : Inf
## ARIMA(0,0,1)(1,1,2)[7] with drift : Inf
## ARIMA(0,0,1)(0,1,1)[7] with drift : Inf
## ARIMA(0,0,1)(0,1,2)[7] with drift : Inf
## ARIMA(2,0,2)(1,1,1)[7] with drift : Inf
## ARIMA(0,0,1)(2,1,1)[7] with drift : Inf
## ARIMA(1,0,0)(1,1,1)[7] : Inf
## ARIMA(1,0,0)(1,1,1)[7] with drift : Inf
## ARIMA(1,0,1)(1,1,1)[7] with drift : Inf
## ARIMA(0,0,1)(1,1,1)[7] with drift : Inf
## ARIMA(1,0,0)(1,1,0)[7] with drift : 811.3147
##
## Best model: ARIMA(1,0,0)(1,1,0)[7] with drift
MedModel
## Series: train
## ARIMA(1,0,0)(1,1,0)[7] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.3492 -0.5016 0.0008
## s.e. 0.0418 0.0388 0.0035
##
## sigma^2 = 0.2887: log likelihood = -401.62
## AIC=811.23 AICc=811.31 BIC=828.12
Start the forecast
library(astsa)
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
MedForecast = sarima.for(train, n.ahead = length(test),1,0,0,1,1,0,7)
lines(test, col = "blue")
Find the CI for the forecast
CI = c(MedForecast$pred[1] - (1.95*MedForecast$se[1]),MedForecast$pred[1] + (1.95*MedForecast$se[1]))
CI
## [1] -0.9898563 1.0992933
6 month forecast
MedForecast_sixM = sarima.for(MedTS1, 182,1,0,0,1,1,0,7)
Model evaluation using mse and rmse
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
mse(MedForecast$pred, test)
## [1] 0.3689671
rmse(MedForecast$pred, test)
## [1] 0.6074266
Annotated visual
Revenue <- train
Pass <- sarima.for(Revenue,n.ahead = length(test), 1,0,0,1,1,0,7)
lines(test, col = "blue")
text(x=2068,y=3,pos=4,"Past")
text(x=2068,y=2.5,pos=4,"(train)")
text(x=2075,y=3,pos=4,"Future")
text(x=2075,y=2.5,pos=4,"(train)")
text(x=2059,y=4,pos=4,"Red = Predictions", col = "red")
text(x=2059,y=4.5,pos=4,"Blue = Actuals", col = "blue")
abline(v=2073)