library(tidyverse)
library(lubridate)
library(scales)
library(knitr)
ggplot2::economics –
makroekonomické ukazovatele (nezamestnanosť, osobné príjmy, sentiment…)
po mesiacoch od r. 1967.data_raw <- ggplot2::economics |> as_tibble()
# rýchly náhľad
head(data_raw)
## # A tibble: 6 × 6
## date pce pop psavert uempmed unemploy
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1967-07-01 507. 198712 12.6 4.5 2944
## 2 1967-08-01 510. 198911 12.6 4.7 2945
## 3 1967-09-01 516. 199113 11.9 4.6 2958
## 4 1967-10-01 512. 199311 12.9 4.9 3143
## 5 1967-11-01 517. 199498 12.8 4.7 3066
## 6 1967-12-01 525. 199657 11.8 4.8 3018
glue::glue("Počet riadkov: {nrow(data_raw)}, počet stĺpcov: {ncol(data_raw)}")
## Počet riadkov: 574, počet stĺpcov: 6
# library(readr)
# data_raw <- readr::read_csv("cesta/k_suboru.csv")
# # Príklad konverzie dátumu (uprav názvy stĺpcov):
# data_raw <- data_raw |> mutate(datum = lubridate::ymd(datum))
# Premenujeme kľúčové stĺpce do snake_case a doplníme rok/mesiac
data <- data_raw |>
rename(
date = date,
personal_income = pce,
unemployment = unemploy,
population = pop,
savings_rate = psavert
) |>
mutate(
year = year(date),
month = month(date),
year_month = floor_date(date, unit = "month")
)
# Kontrola chýbajúcich hodnôt
colSums(is.na(data))
## date personal_income population savings_rate uempmed
## 0 0 0 0 0
## unemployment year month year_month
## 0 0 0 0
yearly <- data |>
summarise(
avg_unemployment = mean(unemployment, na.rm = TRUE),
avg_income = mean(personal_income, na.rm = TRUE),
.by = year
)
kable(head(yearly), caption = "Súhrny podľa rokov")
| year | avg_unemployment | avg_income |
|---|---|---|
| 1967 | 3012.333 | 514.4667 |
| 1968 | 2797.417 | 556.8417 |
| 1969 | 2830.167 | 603.6500 |
| 1970 | 4127.333 | 646.7250 |
| 1971 | 5021.667 | 699.9250 |
| 1972 | 4875.833 | 768.1500 |
data |>
ggplot(aes(x = year_month, y = unemployment)) +
geom_line(color = "#2C7FB8", linewidth = 0.9) +
labs(
x = "Rok",
y = "Počet nezamestnaných (tis.)",
title = "Trend nezamestnanosti – mesačné údaje"
) +
scale_x_date(date_breaks = "5 years", date_labels = "%Y") +
theme_minimal(base_size = 12)
season_tbl <- data |>
summarise(avg_unemp = mean(unemployment, na.rm = TRUE), .by = month)
season_tbl |>
ggplot(aes(x = month, y = avg_unemp)) +
geom_col(fill = "#41AB5D") +
scale_x_continuous(breaks = 1:12) +
labs(x = "Mesiac", y = "Priemerný počet nezamestnaných",
title = "Priemerná sezónnosť nezamestnanosti") +
theme_minimal(base_size = 12)
Úloha: Vyfiltruj len roky 2000–2009 a vykresli trend
úspor savings_rate.
data |>
filter(year >= 2000, year <= 2009) |>
ggplot(aes(x = year_month, y = savings_rate)) +
geom_line(color = "#FB6A4A") +
labs(x = "Rok", y = "%", title = "Miera úspor 2000–2009") +
theme_minimal()
Úloha: Vypočítaj pre každý rok maximum
unemployment a zobraz top 5 rokov.
max_yearly <- data |> summarise(max_unemp = max(unemployment), .by = year)
max_yearly |> arrange(desc(max_unemp)) |> head(5)
unemployment vs. personal_income.Vytvorím funkciu na tvorbu pohyblivého priemeru s flexibilným oknom a automatickým popisom legendy. Funkciu použijem na tri rôzne stĺpce.
moving_avg <- function(x, n = 6) {
stopifnot(n >= 1)
stats::filter(x, rep(1 / n, n), sides = 2) |> as.numeric()
}
viz_ma <- function(df, var, n = 6, color = "#2C7FB8") {
nm <- rlang::ensym(var)
df |>
mutate(ma = moving_avg({{ nm }}, n = n)) |>
ggplot(aes(x = year_month)) +
geom_line(aes(y = {{ nm }}), alpha = 0.4) +
geom_line(aes(y = ma), color = color, linewidth = 1) +
labs(x = "Rok", y = NULL,
title = glue::glue("{rlang::as_string(nm)} s {n}-mesačným MA")) +
theme_minimal(base_size = 12)
}
viz_ma(data, unemployment, n = 9)
Úloha: Napíš funkciu pct_change(x, k),
ktorá vráti medziročnú percentuálnu zmenu (vhodné pre mesačné dáta –
zmena oproti hodnote spred 12 mesiacov). Aplikuj ju na
personal_income a vizualizuj výsledok.
Moje riešenie:
pct_change <- function(x, k = 12) {
# percentuálna zmena oproti hodnote posunutého okna
c(rep(NA_real_, k), 100 * (x[(k + 1):length(x)] - x[1:(length(x) - k)]) / x[1:(length(x) - k)])
}
plot_pct <- data |>
mutate(pct_income = pct_change(personal_income, k = 12)) |>
ggplot(aes(x = year_month, y = pct_income)) +
geom_hline(yintercept = 0, color = "grey70") +
geom_line(color = "#756bb1") +
labs(x = "Rok", y = "%", title = "Medziročná zmena osobných príjmov") +
theme_minimal(base_size = 12)
plot_pct
Po publikovaní vlož sem odkaz:
RPubs link: https://rpubs.com/VAŠE_MENO/ID_DOKUMENTU
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.50 scales_1.4.0 lubridate_1.9.4 forcats_1.0.1
## [5] stringr_1.5.2 dplyr_1.1.4 purrr_1.1.0 readr_2.1.5
## [9] tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.0 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 jsonlite_2.0.0 crayon_1.5.3 compiler_4.5.1
## [5] tidyselect_1.2.1 jquerylib_0.1.4 yaml_2.3.10 fastmap_1.2.0
## [9] R6_2.6.1 labeling_0.4.3 generics_0.1.4 bslib_0.9.0
## [13] pillar_1.11.1 RColorBrewer_1.1-3 tzdb_0.5.0 rlang_1.1.6
## [17] stringi_1.8.7 cachem_1.1.0 xfun_0.54 sass_0.4.10
## [21] S7_0.2.0 timechange_0.3.0 cli_3.6.5 withr_3.0.2
## [25] magrittr_2.0.4 digest_0.6.37 grid_4.5.1 rstudioapi_0.17.1
## [29] hms_1.1.4 lifecycle_1.0.4 vctrs_0.6.5 evaluate_1.0.5
## [33] glue_1.8.0 farver_2.1.2 rmarkdown_2.30 tools_4.5.1
## [37] pkgconfig_2.0.3 htmltools_0.5.8.1