Wczytanie danych oraz pakietów.
PoczÄ…tkowa wizualizacja danych
zgony_SWE <- ts(zgony$Sweden)
zgony_POR <- ts(zgony$Portugal)
par(mfrow = c(2, 1))
plot(zgony_SWE)
plot(zgony_POR)
Odpowiednie przekształcenia danych + wyliczenie średniej umieralności tygodniowej.
zgony <- zgony %>%
separate(TIME, into = c("Rok", "Tydzień"), sep = "-W")
zgony$Rok <- as.numeric(zgony$Rok)
zgony$Tydzień <- as.numeric(zgony$Tydzień)
wybrane <- zgony %>%
filter(Rok >= 2014 & Rok <= 2019)
Baseline <- wybrane %>%
group_by(Tydzień) %>%
summarise(Oczekiwane_SWE = mean(Sweden, na.rm = TRUE),
Oczekiwane_POR = mean(Portugal, na.rm=TRUE))
Baseline2 <- Baseline %>%
pivot_longer(cols=-Tydzień, names_to="Kraj", values_to="Umieralnosc")
Wykres pokazujący kształtowanie się oczekiwanej umieralności w Szwecji i Portugalii.
ggplot(Baseline2, aes(x = Tydzień, y = Umieralnosc, color=Kraj)) +
geom_line() +
geom_smooth(method = "loess", se = FALSE, color = "red") + # Add a smoothing line
labs(title = "Oczekiwana umieralność w Szwecji i Portugalii w ujęciu tygodniowym",
x = "Tydzień",
y = "Surowy współczynnik zgonów") +
scale_y_continuous(limits = c(0, max(Baseline$Oczekiwane_POR)), expand = c(0, 0)) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Dalsze przekształcenia.
zgony <- zgony %>%
left_join(Baseline, by = "Tydzień")
zgony <- zgony %>%
mutate(Nadumieralnosc_SWE = Sweden - Oczekiwane_SWE,
Nadumieralnosc_POR = Portugal - Oczekiwane_POR)
zgony <- as.data.frame(zgony)
pandemia <- zgony %>%
filter(Rok >= 2020)
pandemia_ts_SWE <- ts(pandemia[, c("Oczekiwane_SWE", "Nadumieralnosc_SWE")])
pandemia_df_SWE <- data.frame(Tydzień = time(pandemia_ts_SWE), as.data.frame(pandemia_ts_SWE))
pandemia_ts_POR <- ts(pandemia[, c("Oczekiwane_POR", "Nadumieralnosc_POR")])
pandemia_df_POR <- data.frame(Tydzień = time(pandemia_ts_POR), as.data.frame(pandemia_ts_POR))
pandemia_df <- pandemia_df_SWE %>%
left_join(pandemia_df_POR, by="Tydzień")
Wizualizacje oczekiwanej umieralności i nadumieralności w Szwecji i Portugalii
# Szwecja
ggplot(pandemia_df_SWE, aes(x = Tydzień, y = Oczekiwane_SWE, group = 1)) +
geom_line(aes(color = "Oczekiwane_SWE"), linetype = "dashed") +
geom_line(aes(x = Tydzień, y = Nadumieralnosc_SWE, color = "Nadumieralność"), linetype = "solid") +
labs(title = "Tygodniowa nadumieralność w Szwecji",
y = "Surowy współczynnik zgonów",
color = "Legenda") +
theme_minimal()
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
# Portugalia
ggplot(pandemia_df_POR, aes(x = Tydzień, y = Oczekiwane_POR, group = 1)) +
geom_line(aes(color = "Oczekiwane_POR"), linetype = "dashed") +
geom_line(aes(x = Tydzień, y = Nadumieralnosc_POR, color = "Nadumieralność"), linetype = "solid") +
labs(title = "Tygodniowa nadumieralność w Portugalii",
y = "Surowy współczynnik zgonów",
color = "Legenda") +
theme_minimal()
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
#Razem
ggplot(pandemia_df, aes(x = Tydzień, y = Oczekiwane_SWE, group = 2)) +
geom_line(aes(color = "Oczekiwane_SWE"), linetype = "dashed") +
geom_line(aes(x = Tydzień, y = Nadumieralnosc_SWE, color = "Nadumieralność_SWE"), linetype = "solid") +
geom_line(aes(x=Tydzień, y = Oczekiwane_POR, color = "Oczekiwane_POR"), linetype = "dashed") +
geom_line(aes(x = Tydzień, y = Nadumieralnosc_POR, color = "Nadumieralność_POR"), linetype = "solid") +
labs(title = "Tygodniowa nadumieralność w Szwecji i Portugalii",
y = "Surowy współczynnik zgonów",
color = "Legenda") +
theme_minimal()+
scale_x_continuous(breaks = seq(0, 200, by = 20))+
scale_y_continuous(breaks = seq(0, 3000, by = 500))
1, Metoda średniej ruchomej dla k=5
#Wygenerowanie wygładzonego szeregu dla k=5
MA_nad_SWE <- ma(pandemia_df$Nadumieralnosc_SWE,order=5,centre=FALSE)
MA_nad_POR <- ma(pandemia_df$Nadumieralnosc_POR,order=5,centre=FALSE)
F1_SWE<-forecast(MA_nad_SWE,h=16)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
F1_POR<-forecast(MA_nad_POR,h=16)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
print(F1_SWE$mean)
## Time Series:
## Start = 195
## End = 210
## Frequency = 1
## [1] -21.43053 -27.29058 -32.12829 -36.12203 -39.41902 -42.14083 -44.38779
## [8] -46.24276 -47.77410 -49.03830 -50.08194 -50.94351 -51.65477 -52.24195
## [15] -52.72669 -53.12686
print(F1_POR$mean)
## Time Series:
## Start = 195
## End = 210
## Frequency = 1
## [1] 138.706545 94.724742 59.489524 31.261471 8.647081 -9.470024
## [7] -23.984213 -35.611991 -44.927373 -52.390221 -58.368946 -63.158692
## [13] -66.995910 -70.070028 -72.532800 -74.505806
accuracy(F1_SWE)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.03404831 24.40643 18.30514 -13.53938 65.06884 0.6501766
## ACF1
## Training set 0.1435571
accuracy(F1_POR)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.04338501 53.17477 36.94078 18.24234 55.19877 0.7165804
## ACF1
## Training set 0.3399033
plot(F1_SWE)
plot(F1_POR)
2, Metoda średniej ruchomej dla k=10
#Wygenerowanie wygładzonego szeregu dla k=10
MA_nad_SWE2 <- ma(pandemia_df$Nadumieralnosc_SWE,order=10,centre=FALSE)
MA_nad_POR2 <- ma(pandemia_df$Nadumieralnosc_POR,order=10,centre=FALSE)
F2_SWE<-forecast(MA_nad_SWE2,h=16)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
F2_POR<-forecast(MA_nad_POR2,h=16)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
print(F2_SWE$mean)
## Time Series:
## Start = 192
## End = 207
## Frequency = 1
## [1] -19.280784 -14.795772 -10.310761 -5.825749 -1.340738 3.144274
## [7] 7.629286 12.114297 16.599309 21.084321 25.569332 30.054344
## [13] 34.539355 39.024367 43.509379 47.994390
print(F2_POR$mean)
## Time Series:
## Start = 192
## End = 207
## Frequency = 1
## [1] 213.6449 207.4351 202.1934 197.7689 194.0341 190.8816 188.2205 185.9743
## [9] 184.0782 182.4778 181.1268 179.9865 179.0239 178.2114 177.5256 176.9466
accuracy(F2_SWE)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1892446 13.26963 10.25695 -14.62167 45.66112 0.5065675 0.1386607
accuracy(F2_POR)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.07932332 25.20058 18.31797 11.2001 23.90729 0.6007941 0.1604187
plot(F2_SWE)
plot(F2_POR)
# Wygenerowanie modelu wygładzenia wykładniczego dla alpha=0.2
SES_nad_SWE<-ses(pandemia_df$Nadumieralnosc_SWE,alpha=0.2,h=16)
SES_nad_POR<-ses(pandemia_df$Nadumieralnosc_POR,alpha=0.2,h=16)
F3_SWE<-forecast(SES_nad_SWE)
F3_POR<-forecast(SES_nad_POR)
print(F3_SWE$mean)
## Time Series:
## Start = 197
## End = 212
## Frequency = 1
## [1] -20.21454 -20.21454 -20.21454 -20.21454 -20.21454 -20.21454 -20.21454
## [8] -20.21454 -20.21454 -20.21454 -20.21454 -20.21454 -20.21454 -20.21454
## [15] -20.21454 -20.21454
print(F3_POR$mean)
## Time Series:
## Start = 197
## End = 212
## Frequency = 1
## [1] 192.8246 192.8246 192.8246 192.8246 192.8246 192.8246 192.8246 192.8246
## [9] 192.8246 192.8246 192.8246 192.8246 192.8246 192.8246 192.8246 192.8246
accuracy(F3_SWE)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.097283 146.5308 99.04124 -22.06838 292.7339 1.418078 0.7846532
accuracy(F3_POR)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 6.931087 273.1849 174.3677 295.5858 391.946 1.320454 0.7484166
plot(F3_SWE)
plot(F3_POR)
# Prognoza na podstawie metody Holta
SWE <-ts(pandemia_df$Nadumieralnosc_SWE,frequency = 52,start = c(1,1))
POR <-ts(pandemia_df$Nadumieralnosc_POR,frequency = 52,start = c(1,1))
HW_SWE<-HoltWinters(SWE)
HW_POR<-HoltWinters(POR)
F4_SWE<-forecast(HW_SWE,h=16)
F4_POR<-forecast(HW_POR,h=16)
print(F4_SWE$mean)
## Time Series:
## Start = c(4, 41)
## End = c(5, 4)
## Frequency = 52
## [1] -95.85103 -39.61053 -26.16035 22.06077 65.16268 194.81654 323.48915
## [8] 366.03404 395.60998 488.48043 608.56561 595.54953 548.90161 470.43071
## [15] 403.81289 366.42674
print(F4_POR$mean)
## Time Series:
## Start = c(4, 41)
## End = c(5, 4)
## Frequency = 52
## [1] 124.14961 110.40255 77.22703 341.47820 400.21722 529.06400
## [7] 459.89898 384.07100 480.03058 583.52943 404.24240 111.24881
## [13] 663.76316 922.18694 1716.67437 2137.30442
accuracy(F4_SWE)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.822234 92.93102 65.78039 -84.05355 284.0704 0.4098171
## ACF1
## Training set -0.01864785
accuracy(F4_POR)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.9283638 263.5126 175.1035 40.01226 175.5913 0.4929568 0.3337243
plot(F4_SWE)
plot(F4_POR)