## Warning: package 'fpp2' was built under R version 3.5.3
## Warning: package 'forecast' was built under R version 3.5.3
## Warning: package 'fma' was built under R version 3.5.3
## Warning: package 'expsmooth' was built under R version 3.5.3
## Warning: package 'corrplot' was built under R version 3.5.3
## Warning: package 'zoo' was built under R version 3.5.3
data visualization
train.sj[, 4:ncol(train.sj)] %>%
cor(use = "pairwise.complete.obs") %>%
corrplot(tl.cex = 0.5,
method = "circle")
train.iq[, 4:ncol(train.iq)] %>%
cor(use = "pairwise.complete.obs") %>%
corrplot(tl.cex = 0.5,
method = "circle")
choose three indicators from all of the indicators
autoplot(ts(train.sj$total_cases, start = c(1990,18), frequency = 52))
autoplot(ts(train.iq$total_cases, start = c(2000,26), frequency = 52))
the historical data is not whitenoise, may need transformation
# prepare for modeling and forecast
train.sj.new <- train.sj[,c("total_cases",
"reanalysis_specific_humidity_g_per_kg",
"reanalysis_precip_amt_kg_per_m2",
"reanalysis_dew_point_temp_k")]
train.iq.new <- train.iq[,c("total_cases",
"reanalysis_specific_humidity_g_per_kg",
"reanalysis_precip_amt_kg_per_m2",
"reanalysis_dew_point_temp_k")]
new.names <- c("cases", "humidity","precipitation","temperature")
colnames(train.sj.new) <- new.names
colnames(train.iq.new) <- new.names
test.feature.sj <- subset(test.feature, city == "sj")
test.feature.iq <- subset(test.feature, city == "iq")
test.sj.new <- test.feature.sj[,c("reanalysis_specific_humidity_g_per_kg",
"reanalysis_precip_amt_kg_per_m2",
"reanalysis_dew_point_temp_k")]
test.iq.new <- test.feature.iq[,c("reanalysis_specific_humidity_g_per_kg",
"reanalysis_precip_amt_kg_per_m2",
"reanalysis_dew_point_temp_k")]
test.new.names <- c("humidity","precipitation","temperature")
colnames(test.sj.new) <- test.new.names
colnames(test.iq.new) <- test.new.names
# deal with NA values
colSums(sapply(train.sj.new, is.na))
## cases humidity precipitation temperature
## 0 6 6 6
colSums(sapply(train.iq.new, is.na))
## cases humidity precipitation temperature
## 0 4 4 4
colSums(sapply(test.sj.new, is.na))
## humidity precipitation temperature
## 2 2 2
colSums(sapply(test.iq.new, is.na))
## humidity precipitation temperature
## 0 0 0
train.sj.new <- na.locf(train.sj.new)
train.iq.new <- na.locf(train.iq.new)
test.sj.new <- na.locf(test.sj.new)
# ts form
ts.train.sj.new <- ts(train.sj.new, start=c(1990,18),frequency = 52)
ts.train.iq.new <- ts(train.iq.new, start=c(2000,26),frequency = 52)
model
#model1 - tslm
sj_fit1 <- tslm(cases ~ humidity + precipitation + temperature,
data = ts.train.sj.new)
summary(sj_fit1)
##
## Call:
## tslm(formula = cases ~ humidity + precipitation + temperature,
## data = ts.train.sj.new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54.61 -23.67 -11.19 3.70 424.94
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.130e+04 5.418e+03 2.086 0.0372 *
## humidity 4.737e+01 1.961e+01 2.415 0.0159 *
## precipitation 5.137e-02 4.934e-02 1.041 0.2981
## temperature -4.085e+01 1.946e+01 -2.099 0.0360 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.21 on 932 degrees of freedom
## Multiple R-squared: 0.04811, Adjusted R-squared: 0.04504
## F-statistic: 15.7 on 3 and 932 DF, p-value: 5.742e-10
autoplot(ts.train.sj.new[,"cases"]) +
autolayer(sj_fit1$fitted)
sj.fcast_fit1 <- forecast(sj_fit1, newdata = test.sj.new)
autoplot(sj.fcast_fit1)
accuracy(sj.fcast_fit1)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 5.177944e-15 50.10345 27.07694 -Inf Inf 0.7359268 0.8460648
iq_fit1 <- tslm(cases ~ humidity + precipitation + temperature,
data = ts.train.iq.new)
summary(iq_fit1)
##
## Call:
## tslm(formula = cases ~ humidity + precipitation + temperature,
## data = ts.train.iq.new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.874 -5.226 -2.863 1.333 106.619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2753.70749 1361.82683 2.022 0.0437 *
## humidity 11.76303 4.81483 2.443 0.0149 *
## precipitation -0.01356 0.01126 -1.204 0.2291
## temperature -9.97141 4.88607 -2.041 0.0418 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.45 on 516 degrees of freedom
## Multiple R-squared: 0.0639, Adjusted R-squared: 0.05846
## F-statistic: 11.74 on 3 and 516 DF, p-value: 1.88e-07
autoplot(ts.train.iq.new[,"cases"]) +
autolayer(iq_fit1$fitted)
iq.fcast_fit1 <- forecast(iq_fit1, newdata = test.iq.new)
autoplot(iq.fcast_fit1)
accuracy(iq.fcast_fit1)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -5.059788e-16 10.4058 6.274696 -Inf Inf 0.6652827 0.3946892
# model2 - Arima errors
sj_fit2 <- auto.arima(ts.train.sj.new[,"cases"],
xreg = ts.train.sj.new[,"humidity"])
summary(sj_fit2)
## Series: ts.train.sj.new[, "cases"]
## Regression with ARIMA(3,1,2)(0,0,1)[52] errors
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 sma1 xreg
## -0.8602 -1.0974 -0.4001 0.4645 0.8210 -0.0687 3.840
## s.e. 0.0586 0.0393 0.0335 0.0586 0.0422 0.0319 0.713
##
## sigma^2 estimated as 612.7: log likelihood=-4323.96
## AIC=8663.92 AICc=8664.08 BIC=8702.64
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.004373336 24.64724 12.46461 -Inf Inf 0.3387769 -0.03176884
checkresiduals(sj_fit2)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(3,1,2)(0,0,1)[52] errors
## Q* = 202.56, df = 97, p-value = 1.965e-09
##
## Model df: 7. Total lags used: 104
autoplot(ts.train.sj.new[,"cases"]) +
autolayer(sj_fit2$fitted)
sj.fcast_fit2 <- forecast(sj_fit2,
xreg = test.sj.new[,"humidity"])
autoplot(sj.fcast_fit2)
accuracy(sj.fcast_fit2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.004373336 24.64724 12.46461 -Inf Inf 0.3387769 -0.03176884
iq_fit2 <- auto.arima(ts.train.iq.new[,"cases"],
xreg = ts.train.iq.new[,"humidity"])
summary(iq_fit2)
## Series: ts.train.iq.new[, "cases"]
## Regression with ARIMA(2,0,1)(0,0,2)[52] errors
##
## Coefficients:
## ar1 ar2 ma1 sma1 sma2 intercept xreg
## 0.5053 0.2896 -0.3107 -0.0065 0.0943 -5.1111 0.7383
## s.e. 0.1013 0.0634 0.1054 0.0462 0.0507 6.3822 0.3645
##
## sigma^2 estimated as 76.39: log likelihood=-1862.4
## AIC=3740.8 AICc=3741.08 BIC=3774.83
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.05237957 8.681177 4.86068 NaN Inf 0.5153599 0.001998071
checkresiduals(iq_fit2)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,1)(0,0,2)[52] errors
## Q* = 77.752, df = 97, p-value = 0.9246
##
## Model df: 7. Total lags used: 104
autoplot(ts.train.iq.new[,"cases"]) +
autolayer(iq_fit2$fitted)
iq.fcast_fit2 <- forecast(iq_fit2,
xreg = test.iq.new[,"humidity"])
autoplot(iq.fcast_fit2)
accuracy(iq.fcast_fit2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.05237957 8.681177 4.86068 NaN Inf 0.5153599 0.001998071
# model 3 - neural network
sj_fit3 <- nnetar(ts.train.sj.new[,"cases"],
xreg = ts.train.sj.new[,"humidity"])
summary(sj_fit3)
## Length Class Mode
## x 936 ts numeric
## m 1 -none- numeric
## p 1 -none- numeric
## P 1 -none- numeric
## scalex 2 -none- list
## scalexreg 2 -none- list
## size 1 -none- numeric
## xreg 936 -none- numeric
## subset 936 -none- numeric
## model 20 nnetarmodels list
## nnetargs 0 -none- list
## fitted 936 ts numeric
## residuals 936 ts numeric
## lags 21 -none- numeric
## series 1 -none- character
## method 1 -none- character
## call 3 -none- call
autoplot(ts.train.sj.new[,"cases"]) +
autolayer(sj_fit3$fitted)
## Warning: Removed 52 rows containing missing values (geom_path).
sj.fcast_fit3 <- forecast(sj_fit3, xreg=test.sj.new[,1])
autoplot(sj.fcast_fit3)
accuracy(sj.fcast_fit3)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.1073929 8.532394 5.586333 -Inf Inf 0.1518315 0.04115448
iq_fit3 <- nnetar(ts.train.iq.new[,"cases"],
xreg = ts.train.iq.new[,"humidity"] )
summary(iq_fit3)
## Length Class Mode
## x 520 ts numeric
## m 1 -none- numeric
## p 1 -none- numeric
## P 1 -none- numeric
## scalex 2 -none- list
## scalexreg 2 -none- list
## size 1 -none- numeric
## xreg 520 -none- numeric
## subset 520 -none- numeric
## model 20 nnetarmodels list
## nnetargs 0 -none- list
## fitted 520 ts numeric
## residuals 520 ts numeric
## lags 14 -none- numeric
## series 1 -none- character
## method 1 -none- character
## call 3 -none- call
autoplot(ts.train.iq.new[,"cases"]) +
autolayer(iq_fit3$fitted)
## Warning: Removed 52 rows containing missing values (geom_path).
iq.fcast_fit3 <- forecast(iq_fit3, xreg = test.iq.new[,"humidity"])
autoplot(iq.fcast_fit3)
accuracy(iq.fcast_fit3)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.05163778 3.197393 2.303775 NaN Inf 0.2442607 0.0007720475
feature <- rbind(train.feature, test.feature)
feature.sj <- subset(feature, city=="sj")
feature.sj <- na.locf(feature.sj)
colSums(sapply(feature.sj, is.na))
## city
## 0
## year
## 0
## weekofyear
## 0
## week_start_date
## 0
## ndvi_ne
## 0
## ndvi_nw
## 0
## ndvi_se
## 0
## ndvi_sw
## 0
## precipitation_amt_mm
## 0
## reanalysis_air_temp_k
## 0
## reanalysis_avg_temp_k
## 0
## reanalysis_dew_point_temp_k
## 0
## reanalysis_max_air_temp_k
## 0
## reanalysis_min_air_temp_k
## 0
## reanalysis_precip_amt_kg_per_m2
## 0
## reanalysis_relative_humidity_percent
## 0
## reanalysis_sat_precip_amt_mm
## 0
## reanalysis_specific_humidity_g_per_kg
## 0
## reanalysis_tdtr_k
## 0
## station_avg_temp_c
## 0
## station_diur_temp_rng_c
## 0
## station_max_temp_c
## 0
## station_min_temp_c
## 0
## station_precip_mm
## 0
feature.iq <- subset(feature, city=="iq")
feature.iq <- na.locf(feature.iq)
autoplot(ts(log(feature.sj$reanalysis_specific_humidity_g_per_kg),
start=c(1990,18),frequency = 52))
autoplot(ts(feature.iq$reanalysis_specific_humidity_g_per_kg,
start=c(2000,26),frequency = 52))
ts.sj.cases <- ts(train.sj$total_cases,
start=c(1990,18),frequency = 52)
ts.sj.humi <- ts(feature.sj$reanalysis_specific_humidity_g_per_kg,
start=c(1990,18),frequency = 52)
ts.iq.cases <- ts(train.iq$total_cases, start=c(2000,26),frequency = 52)
ts.iq.humi <- ts(feature.iq$reanalysis_specific_humidity_g_per_kg,
start=c(2000,26),frequency = 52)
sj.df.humi <- diff(ts.sj.humi)
train_sj.df.humi=sj.df.humi[1:936]
test_sj.df.humi=sj.df.humi[936:1195]
f1 <- auto.arima(ts.sj.cases, xreg = train_sj.df.humi)
summary(f1)
## Series: ts.sj.cases
## Regression with ARIMA(2,1,5)(0,0,2)[52] errors
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 ma4 ma5 sma1
## -0.7131 -0.8425 0.2742 0.5514 -0.3240 0.1549 0.1410 -0.0645
## s.e. 0.0415 0.0449 0.0508 0.0479 0.0379 0.0361 0.0371 0.0351
## sma2 xreg
## 0.0692 -0.1143
## s.e. 0.0330 0.7926
##
## sigma^2 estimated as 618.1: log likelihood=-4326.9
## AIC=8675.8 AICc=8676.09 BIC=8729.05
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.001647022 24.71486 12.09538 -Inf Inf 0.3287415 0.005220595
Box.test(ts.sj.cases)
##
## Box-Pierce test
##
## data: ts.sj.cases
## X-squared = 681.47, df = 1, p-value < 2.2e-16
library(tseries)
## Warning: package 'tseries' was built under R version 3.5.3
lambda_sj.cases <- BoxCox.lambda(ts.sj.cases)
Box.test(BoxCox(ts.sj.cases, lambda = lambda_sj.cases))
##
## Box-Pierce test
##
## data: BoxCox(ts.sj.cases, lambda = lambda_sj.cases)
## X-squared = 0.016433, df = 1, p-value = 0.898
kpss.test(BoxCox(ts.sj.cases, lambda = lambda_sj.cases))
## Warning in kpss.test(BoxCox(ts.sj.cases, lambda = lambda_sj.cases)): p-
## value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: BoxCox(ts.sj.cases, lambda = lambda_sj.cases)
## KPSS Level = 0.1353, Truncation lag parameter = 6, p-value = 0.1
autoplot(diff((ts.train.iq.new[,"humidity"])))
f2 <- auto.arima(ts.train.sj.new[,"cases"],
xreg = ts.train.sj.new[,"humidity"],
lambda = lambda_sj.cases)
summary(f2)
## Series: ts.train.sj.new[, "cases"]
## Regression with ARIMA(3,0,2) errors
## Box Cox transformation: lambda= 4.102259e-05
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 xreg
## -0.4386 0.4445 0.3623 0.3946 -0.2322 -5.7972
## s.e. 0.0802 0.0751 0.0313 0.0854 0.0833 5.2959
##
## sigma^2 estimated as 2184647: log likelihood=-8156.71
## AIC=16327.42 AICc=16327.54 BIC=16361.31
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -Inf Inf Inf -Inf Inf Inf NaN