library(dplyr); library(psych);library(pgirmess); library(table1); library(explore); library(GGally); library(gridExtra); library(leaps);library(BMA); library(ggplot2);library(ggfortify);library(sciplot); library(fpp3)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'pgirmess'
## The following object is masked from 'package:psych':
##
## shannon
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
##
## Attaching package: 'explore'
## The following object is masked from 'package:psych':
##
## describe
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:explore':
##
## rescale01
## The following object is masked from 'package:dplyr':
##
## nasa
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Loading required package: survival
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.5-2)
## -- Attaching packages ---------------------------------------------- fpp3 0.2 --
## v tibble 3.0.1 v tsibbledata 0.1.0
## v tidyr 1.0.3 v feasts 0.1.3
## v lubridate 1.7.8 v fable 0.2.0
## v tsibble 0.8.6
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x gridExtra::combine() masks dplyr::combine()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::id() masks dplyr::id()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x fabletools::report() masks explore::report()
setwd("E:/cuong_dataR/test")
dat = read.csv("dataxlbtestR.csv", header = T, na.strings = "0")
dat = na.omit(dat)
attach(dat)
names(dat)
## [1] "date" "year" "month" "seasonal"
## [5] "rwntu" "fecl3" "rw.vol" "feedsludge"
## [9] "drysludge" "polymer" "concentr.poly" "unit.poly"
## [13] "poly.gm3"
“date” : data update date (yyyy/mm/dd)
“year” : data updadte year (2017, 2018, 2019)
“month” : data update month (1,2,3,…,10,11,12)
“seasonal” : data update season (Dry, Rain)
“rwntu” : Turbidity of raw water (NTU)
“fecl3” : ferric chloride using for water treatment (g/m3)
“feedsludge” : sludge flow inlet (m3/day)
“drysludge” : dry sludge volume outlet (Kg/day)
“polymer” : polymer cation volume using for STS (Kg/day)
“concentr.poly” : concentration of polymer cation (%)
“unit.poly” : polymer cation quantity per a m3 feedsludge (Kg/m3)
“poly.gm3” : the unit of “polymer” with a gram quantity of polymer cation per a m3 raw water
dat$date = as.character(dat$date)
dat <- dat %>% mutate(Time = yearmonth(date)) %>% as_tsibble(index = Time)
dat %>%
gather("var", "value", rwntu, feedsludge, polymer) %>%
ggplot(aes(x = Time, y = value)) + geom_line() +
facet_grid(vars(var), scales = "free_y") + xlab("Year") +
ylab(NULL) +
ggtitle("Monthly changes of turdibility, feed sludge and polymer at STS")
p = ggplot(dat, aes(x=as.factor(month), y=poly.gm3, colour=factor(year)))
p + geom_boxplot(aes(colour=factor(year))) + geom_jitter(alpha=0.05) + theme_bw() + theme_classic()
temp = dat[ , c("year","month", "seasonal", "rwntu", "feedsludge", "poly.gm3")]
ggpairs(temp)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
table1(~ rwntu + feedsludge + poly.gm3|year, dadta = dat)
## Warning in table1.formula(~rwntu + feedsludge + poly.gm3 | year, dadta = dat):
## Terms to the right of '|' in formula 'x' define table columns and are expected
## to be factors with meaningful labels.
| 2017 (N=321) |
2018 (N=347) |
2019 (N=327) |
Overall (N=995) |
|
|---|---|---|---|---|
| rwntu | ||||
| Mean (SD) | 44.1 (31.3) | 44.4 (27.4) | 57.4 (45.2) | 48.6 (35.8) |
| Median [Min, Max] | 32.8 [8.20, 175] | 34.5 [11.5, 183] | 38.2 [6.39, 217] | 35.8 [6.39, 217] |
| feedsludge | ||||
| Mean (SD) | 325 (173) | 297 (136) | 332 (174) | 317 (162) |
| Median [Min, Max] | 301 [0, 868] | 253 [105, 696] | 263 [72.0, 794] | 272 [0, 868] |
| poly.gm3 | ||||
| Mean (SD) | 0.156 (0.0851) | 0.160 (0.105) | 0.195 (0.136) | 0.170 (0.112) |
| Median [Min, Max] | 0.140 [0, 1.00] | 0.130 [0, 0.540] | 0.180 [0, 0.800] | 0.160 [0, 1.00] |
table1(~ rwntu + feedsludge + poly.gm3|seasonal, dadta = dat)
| Dry (N=502) |
Rain (N=493) |
Overall (N=995) |
|
|---|---|---|---|
| rwntu | |||
| Mean (SD) | 27.0 (11.9) | 70.5 (38.7) | 48.6 (35.8) |
| Median [Min, Max] | 24.9 [6.39, 84.5] | 62.0 [12.8, 217] | 35.8 [6.39, 217] |
| feedsludge | |||
| Mean (SD) | 215 (79.6) | 422 (158) | 317 (162) |
| Median [Min, Max] | 212 [0, 673] | 419 [72.0, 868] | 272 [0, 868] |
| poly.gm3 | |||
| Mean (SD) | 0.121 (0.0860) | 0.220 (0.114) | 0.170 (0.112) |
| Median [Min, Max] | 0.100 [0, 1.00] | 0.200 [0, 0.550] | 0.160 [0, 1.00] |
table1(~ rwntu + feedsludge + poly.gm3|month, dadta = dat)
## Warning in table1.formula(~rwntu + feedsludge + poly.gm3 | month, dadta = dat):
## Terms to the right of '|' in formula 'x' define table columns and are expected
## to be factors with meaningful labels.
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 13 columns. Are you sure this is what you want?
| 1 (N=87) |
2 (N=75) |
3 (N=91) |
4 (N=84) |
5 (N=87) |
6 (N=83) |
7 (N=90) |
8 (N=76) |
9 (N=78) |
10 (N=79) |
11 (N=82) |
12 (N=83) |
Overall (N=995) |
|
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| rwntu | |||||||||||||
| Mean (SD) | 25.7 (9.13) | 21.9 (7.85) | 21.7 (8.77) | 22.7 (6.00) | 32.0 (11.1) | 51.0 (18.4) | 72.4 (21.3) | 98.2 (43.8) | 96.7 (40.9) | 78.8 (37.2) | 42.2 (16.0) | 28.3 (6.09) | 48.6 (35.8) |
| Median [Min, Max] | 24.0 [9.35, 46.9] | 21.1 [10.6, 45.6] | 21.1 [6.39, 41.0] | 21.0 [12.0, 37.2] | 29.9 [12.8, 71.0] | 49.0 [21.9, 120] | 69.6 [38.3, 150] | 82.1 [45.7, 217] | 86.8 [42.1, 215] | 68.4 [35.2, 202] | 39.6 [16.2, 84.5] | 28.0 [16.5, 43.0] | 35.8 [6.39, 217] |
| feedsludge | |||||||||||||
| Mean (SD) | 230 (63.4) | 217 (72.4) | 213 (52.9) | 233 (84.4) | 262 (98.4) | 380 (126) | 453 (110) | 520 (137) | 443 (124) | 490 (193) | 183 (103) | 213 (86.1) | 317 (162) |
| Median [Min, Max] | 228 [63.0, 540] | 201 [89.0, 404] | 205 [105, 389] | 213 [108, 673] | 243 [108, 532] | 351 [142, 646] | 457 [220, 868] | 545 [165, 809] | 454 [72.0, 697] | 503 [92.0, 794] | 205 [0, 313] | 199 [0, 556] | 272 [0, 868] |
| poly.gm3 | |||||||||||||
| Mean (SD) | 0.103 (0.0586) | 0.113 (0.104) | 0.0942 (0.0573) | 0.0969 (0.0569) | 0.114 (0.0801) | 0.192 (0.101) | 0.243 (0.0833) | 0.292 (0.131) | 0.251 (0.0950) | 0.239 (0.0999) | 0.180 (0.121) | 0.144 (0.0695) | 0.170 (0.112) |
| Median [Min, Max] | 0.100 [0, 0.300] | 0.100 [0, 0.800] | 0.0900 [0, 0.240] | 0.100 [0, 0.220] | 0.100 [0, 0.380] | 0.170 [0, 0.420] | 0.220 [0.110, 0.470] | 0.280 [0.0800, 0.550] | 0.240 [0, 0.480] | 0.200 [0.0600, 0.460] | 0.170 [0, 1.00] | 0.130 [0, 0.330] | 0.160 [0, 1.00] |
fit1.polymer <- dat %>%
model(
tslm = TSLM(poly.gm3 ~ feedsludge)
)
report(fit1.polymer)
## Series: poly.gm3
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24739 -0.04910 -0.01322 0.04180 0.89313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.618e-02 5.897e-03 4.44 1e-05 ***
## feedsludge 4.533e-04 1.655e-05 27.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08459 on 993 degrees of freedom
## Multiple R-squared: 0.4304, Adjusted R-squared: 0.4298
## F-statistic: 750.3 on 1 and 993 DF, p-value: < 2.22e-16
glance(fit1.polymer) %>% select(adj_r_squared, CV, AIC, AICc, BIC)
## # A tibble: 1 x 5
## adj_r_squared CV AIC AICc BIC
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.430 0.00718 -4911. -4911. -4896.
fit2.polymer <- dat %>%
model(
tslm = TSLM(poly.gm3 ~ rwntu)
)
report(fit2.polymer)
## Series: poly.gm3
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.248146 -0.053838 -0.009589 0.044678 0.845935
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.080308 0.004829 16.63 <2e-16 ***
## rwntu 0.001847 0.000080 23.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09041 on 993 degrees of freedom
## Multiple R-squared: 0.3493, Adjusted R-squared: 0.3487
## F-statistic: 533.1 on 1 and 993 DF, p-value: < 2.22e-16
glance(fit2.polymer) %>% select(adj_r_squared, CV, AIC, AICc, BIC)
## # A tibble: 1 x 5
## adj_r_squared CV AIC AICc BIC
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.349 0.00819 -4779. -4779. -4764.
fit.polymer <- dat %>%
model(
tslm = TSLM(poly.gm3 ~ feedsludge + rwntu)
)
report(fit.polymer)
## Series: poly.gm3
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.237806 -0.047000 -0.009198 0.043096 0.883139
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.261e-02 5.644e-03 4.007 6.61e-05 ***
## feedsludge 3.250e-04 2.049e-05 15.861 < 2e-16 ***
## rwntu 9.113e-04 9.269e-05 9.832 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08079 on 992 degrees of freedom
## Multiple R-squared: 0.481, Adjusted R-squared: 0.4799
## F-statistic: 459.6 on 2 and 992 DF, p-value: < 2.22e-16
glance(fit.polymer) %>% select(adj_r_squared, CV, AIC, AICc, BIC)
## # A tibble: 1 x 5
## adj_r_squared CV AIC AICc BIC
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.480 0.00656 -5002. -5002. -4982.
Choose “fit.polymer” model due to the least AICc value.
Equation :
poly.gm3 = 0.02 + 0.0003 * feedsludge + 0.0009 * rwntu
augment(fit.polymer) %>%
ggplot(aes(x=Time)) +
geom_line(aes(y=poly.gm3, colour = "Data")) +
geom_line(aes(y=.fitted, colour = "Fitted")) +
xlab("Year") + ylab(NULL) +
ggtitle("Polymer changes for time series at STS") +
guides(colour = guide_legend(title = NULL))
augment(fit.polymer) %>%
ggplot(aes(x=poly.gm3, y=.fitted,
colour = factor(year(Time)))) +
geom_point() +
ylab("Fitted (Predicted values)") +
xlab("Data (Actual values)") +
ggtitle("Annual polymer for time series observing at STS") + scale_colour_brewer(palette = "Dark2", name = "Year") +
geom_abline(intercept = 0, slope = 1)
fit_feedsludge <- dat %>%
model(TSLM(feedsludge ~ trend() + season()))
report(fit_feedsludge)
## Series: feedsludge
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -399.894 -62.255 -3.279 63.257 442.390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 228.1113 12.6896 17.976 <2e-16 ***
## trend() 0.1674 0.3589 0.466 0.6411
## season()year2 -13.6542 17.2559 -0.791 0.4290
## season()year3 -17.7537 16.4307 -1.081 0.2802
## season()year4 1.8297 16.7836 0.109 0.9132
## season()year5 30.5142 16.6750 1.830 0.0676 .
## season()year6 148.9243 16.8836 8.821 <2e-16 ***
## season()year7 221.3753 16.5928 13.342 <2e-16 ***
## season()year8 288.8968 17.3412 16.660 <2e-16 ***
## season()year9 211.2451 17.3475 12.177 <2e-16 ***
## season()year10 258.0927 17.2962 14.922 <2e-16 ***
## season()year11 -48.9240 17.1823 -2.847 0.0045 **
## season()year12 -18.6457 17.1719 -1.086 0.2778
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109.5 on 982 degrees of freedom
## Multiple R-squared: 0.5493, Adjusted R-squared: 0.5438
## F-statistic: 99.73 on 12 and 982 DF, p-value: < 2.22e-16
fc_feedsludge1 <- forecast(fit_feedsludge)
fc_feedsludge1 %>% autoplot(dat) +
ggtitle("Forecasts of Feed sludge using regression") +
xlab("Year") + ylab("m3")
fourier_feedsludge <- dat %>%
model(TSLM(feedsludge ~ trend() + fourier(K=2)))
report(fourier_feedsludge)
## Series: feedsludge
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -416.14 -65.11 -9.70 58.37 442.54
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 320.03759 7.98916 40.059 <2e-16 ***
## trend() -0.02715 0.38075 -0.071 0.9432
## fourier(K = 2)C1_12 -130.51859 5.27249 -24.755 <2e-16 ***
## fourier(K = 2)S1_12 -76.13941 5.44989 -13.971 <2e-16 ***
## fourier(K = 2)C2_12 13.33410 5.24971 2.540 0.0112 *
## fourier(K = 2)S2_12 51.35025 5.33158 9.631 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 117.3 on 989 degrees of freedom
## Multiple R-squared: 0.4791, Adjusted R-squared: 0.4765
## F-statistic: 181.9 on 5 and 989 DF, p-value: < 2.22e-16
fc_feedsludge2 <- forecast(fourier_feedsludge)
fc_feedsludge2 %>% autoplot(dat) +
ggtitle("Forecasts of Feed sludge using regression") +
xlab("Year") + ylab("m3")
fit_rwntu <- dat %>%
model(TSLM(rwntu ~ trend() + season()))
report(fit_rwntu)
## Series: rwntu
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.282 -10.592 -1.893 7.145 116.793
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.52461 2.55566 7.248 8.53e-13 ***
## trend() 0.53528 0.07228 7.406 2.80e-13 ***
## season()year2 -3.95627 3.47531 -1.138 0.255234
## season()year3 -4.82740 3.30910 -1.459 0.144933
## season()year4 -4.52723 3.38018 -1.339 0.180768
## season()year5 4.01594 3.35832 1.196 0.232056
## season()year6 22.87473 3.40032 6.727 2.93e-11 ***
## season()year7 43.60297 3.34176 13.048 < 2e-16 ***
## season()year8 69.11361 3.49249 19.789 < 2e-16 ***
## season()year9 66.41700 3.49374 19.010 < 2e-16 ***
## season()year10 48.48333 3.48341 13.918 < 2e-16 ***
## season()year11 11.52860 3.46049 3.331 0.000896 ***
## season()year12 -2.65420 3.45840 -0.767 0.442988
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.06 on 982 degrees of freedom
## Multiple R-squared: 0.626, Adjusted R-squared: 0.6214
## F-statistic: 137 on 12 and 982 DF, p-value: < 2.22e-16
fc_rwntu1 <- forecast(fit_rwntu)
fc_rwntu1 %>% autoplot(dat) +
ggtitle("Forecasts of Turdibility using regression") +
xlab("Year") + ylab("NTU")
fourier_rwntu <- dat %>%
model(TSLM(rwntu ~ trend() + fourier(K=2)))
report(fourier_rwntu)
## Series: rwntu
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.923 -11.506 -1.964 7.774 121.263
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.81557 1.51169 26.338 < 2e-16 ***
## trend() 0.50906 0.07204 7.066 3.01e-12 ***
## fourier(K = 2)C1_12 -26.72269 0.99765 -26.786 < 2e-16 ***
## fourier(K = 2)S1_12 -24.22844 1.03121 -23.495 < 2e-16 ***
## fourier(K = 2)C2_12 0.61489 0.99334 0.619 0.536
## fourier(K = 2)S2_12 13.02600 1.00883 12.912 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.2 on 989 degrees of freedom
## Multiple R-squared: 0.6185, Adjusted R-squared: 0.6165
## F-statistic: 320.6 on 5 and 989 DF, p-value: < 2.22e-16
fc_rwntu2 <- forecast(fourier_rwntu)
fc_rwntu2 %>% autoplot(dat) +
ggtitle("Forecasts of Turdibility using regression") +
xlab("Year") + ylab("NTU")
fit_poly <- dat %>%
model(TSLM(poly.gm3 ~ trend() + season()))
report(fit_poly)
## Series: poly.gm3
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24939 -0.05213 -0.01332 0.04623 0.83896
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.080783 0.010290 7.850 1.08e-14 ***
## trend() 0.001630 0.000291 5.600 2.79e-08 ***
## season()year2 0.009723 0.013994 0.695 0.4874
## season()year3 -0.011053 0.013324 -0.830 0.4070
## season()year4 -0.010419 0.013610 -0.766 0.4441
## season()year5 0.004871 0.013522 0.360 0.7188
## season()year6 0.081569 0.013692 5.958 3.56e-09 ***
## season()year7 0.131035 0.013456 9.738 < 2e-16 ***
## season()year8 0.179111 0.014063 12.737 < 2e-16 ***
## season()year9 0.134386 0.014068 9.553 < 2e-16 ***
## season()year10 0.122477 0.014026 8.732 < 2e-16 ***
## season()year11 0.062332 0.013934 4.473 8.60e-06 ***
## season()year12 0.025017 0.013926 1.796 0.0727 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08881 on 982 degrees of freedom
## Multiple R-squared: 0.3792, Adjusted R-squared: 0.3716
## F-statistic: 49.98 on 12 and 982 DF, p-value: < 2.22e-16
fc_poly1 <- forecast(fit_poly)
fc_poly1 %>% autoplot(dat) +
ggtitle("Forecasts of polymer using regression") +
xlab("Year") + ylab("g/m3")
fourier_poly <- dat %>%
model(TSLM(poly.gm3 ~ trend() + fourier(K=2)))
report(fourier_poly)
## Series: poly.gm3
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27078 -0.05252 -0.01274 0.04607 0.84199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.141327 0.006063 23.309 < 2e-16 ***
## trend() 0.001630 0.000289 5.641 2.21e-08 ***
## fourier(K = 2)C1_12 -0.061887 0.004001 -15.466 < 2e-16 ***
## fourier(K = 2)S1_12 -0.060981 0.004136 -14.744 < 2e-16 ***
## fourier(K = 2)C2_12 0.011647 0.003984 2.923 0.00354 **
## fourier(K = 2)S2_12 0.019969 0.004046 4.935 9.39e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08903 on 989 degrees of freedom
## Multiple R-squared: 0.3716, Adjusted R-squared: 0.3685
## F-statistic: 117 on 5 and 989 DF, p-value: < 2.22e-16
fc_poly2 <- forecast(fourier_poly)
fc_poly2 %>% autoplot(dat) +
ggtitle("Forecasts of polymer using regression") +
xlab("Year") + ylab("g/m3")
Choose “fit.polymer” model to analyse
fit.polymer %>% gg_tsresiduals()
augment(fit.polymer) %>%
features(.resid, ljung_box, dog = 5, lag = 18)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 tslm 1041. 0
df <- left_join(dat, residuals(fit.polymer), by = "Time")
p1 <- ggplot(df, aes(x=feedsludge, y=.resid)) +
geom_point() + ylab("Residuals")
p2 <- ggplot(df, aes(x=rwntu, y=.resid)) +
geom_point() + ylab("Residuals")
p3 <- ggplot(df, aes(x=poly.gm3, y=.resid)) +
geom_point() + ylab("Residuals")
grid.arrange(p1, p2, p3, ncol = 3)
augment(fit.polymer) %>%
ggplot(aes(x=.fitted, y=.resid)) +
geom_point() +
labs(x = "Fitted", y = "Residuals")
fit1 <- dat %>%
model(ARIMA(poly.gm3 ~ feedsludge))
report(fit1)
## Series: poly.gm3
## Model: LM w/ ARIMA(0,1,1) errors
##
## Coefficients:
## ma1 feedsludge
## -0.7945 3e-04
## s.e. 0.0220 0e+00
##
## sigma^2 estimated as 0.004958: log likelihood=1227.56
## AIC=-2449.12 AICc=-2449.09 BIC=-2434.41
fit2 <- dat %>%
model(ARIMA(poly.gm3 ~ rwntu))
report(fit2)
## Series: poly.gm3
## Model: LM w/ ARIMA(4,1,1) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 rwntu
## 0.1763 0.1436 0.1318 0.1129 -0.9450 6e-04
## s.e. 0.0377 0.0363 0.0353 0.0345 0.0198 1e-04
##
## sigma^2 estimated as 0.005799: log likelihood=1151.6
## AIC=-2289.2 AICc=-2289.09 BIC=-2254.89
fit <- dat %>%
model(ARIMA(poly.gm3 ~ feedsludge + rwntu))
report(fit)
## Series: poly.gm3
## Model: LM w/ ARIMA(0,1,1) errors
##
## Coefficients:
## ma1 feedsludge rwntu
## -0.7988 3e-04 3e-04
## s.e. 0.0221 0e+00 1e-04
##
## sigma^2 estimated as 0.004933: log likelihood=1230.54
## AIC=-2453.08 AICc=-2453.04 BIC=-2433.48
ARIMA of “fit” is better than “fit1” and “fit2” (there is the least AICc value).
Equations :
polymer = 3e^-4 * feedsludge + 3e^-4*rwntu + eta(t)
eta(t) = eps(t) - 0.8 * eps(t-1)
eps(t) ~ NID(0, 0.005).
bind_rows(
"Regression Errors" = residuals(fit1, type = "regression"),
"ARIMA Errors" = residuals(fit1, type = "innovation"),
.id = "type"
) %>%
ggplot(aes(x=Time, y=.resid)) +
geom_line() +
facet_grid(vars(type), scales = "free_y") +
xlab("Year") + ylab(NULL)
bind_rows(
"Regression Errors" = residuals(fit2, type = "regression"),
"ARIMA Errors" = residuals(fit2, type = "innovation"),
.id = "type"
) %>%
ggplot(aes(x=Time, y=.resid)) +
geom_line() +
facet_grid(vars(type), scales = "free_y") +
xlab("Year") + ylab(NULL)
bind_rows(
"Regression Errors" = residuals(fit, type = "regression"),
"ARIMA Errors" = residuals(fit, type = "innovation"),
.id = "type"
) %>%
ggplot(aes(x=Time, y=.resid)) +
geom_line() +
facet_grid(vars(type), scales = "free_y") +
xlab("Year") + ylab(NULL)
fit1 %>% gg_tsresiduals()
fit2 %>% gg_tsresiduals()
fit %>% gg_tsresiduals()
augment(fit1) %>%
features(.resid, ljung_box, dof = 5, lag = 18)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(poly.gm3 ~ feedsludge) 37.4 0.000358
augment(fit2) %>%
features(.resid, ljung_box, dof = 5, lag = 18)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(poly.gm3 ~ rwntu) 22.2 0.0528
augment(fit) %>%
features(.resid, ljung_box, dof = 5, lag = 18)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(poly.gm3 ~ feedsludge + rwntu) 38.9 0.000206
poly_future <- new_data(dat, 24) %>%
mutate(
feedsludge = mean(dat$feedsludge),
rwntu = mean(dat$rwntu),
poly.gm3 = mean(dat$poly.gm3)
)
forecast(fit, new_data = poly_future) %>%
autoplot(dat) +
xlab("Year") + ylab("Average change")
Comparing AICc values of TSLM and ARIMA which were chosen, the regression model had AICc lowest.
So, the useful model for “optimize polymer” in sludge treatment at STS :
poly.gm3 = 0.02 + 0.003 * feedsludge + 0.0009 * rwntu