Import údajov
z mojej bakalárskej práce
Načítam si csv súbor s dátami o elektromobilite vo V4
data<-read.csv("data_r_comma_utf8.csv")
library(dplyr)
library(ggplot2)
dim(data) #počet riadkov a stĺpcov
[1] 50 12
[1] "Krajina" "Rok" "BEV"
[4] "EPI" "HDP" "BEV_PHEV"
[7] "co2" "res_develop" "r_sources"
[10] "unvest_man" "invest_sources" "invest_transport"
Rows: 50
Columns: 12
$ Krajina <chr> "CZ", "CZ", "CZ", "CZ", "CZ", "CZ", "CZ", "CZ",…
$ Rok <int> 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016,…
$ BEV <dbl> 22451, 13928, 9969, 7109, 3897, 3047, 2309, 176…
$ EPI <dbl> 60.700, 59.900, 65.450, 71.000, 69.340, 67.680,…
$ HDP <int> 21660, 21910, 21830, 20980, 22050, 21320, 20760…
$ BEV_PHEV <dbl> 33448, 22646, 15128, 9835, 5223, 3906, 2877, 21…
$ co2 <dbl> 98918217, 116453963, 121829204, 119492626, 1262…
$ res_develop <dbl> 1.83, 1.89, 1.93, 1.95, 1.90, 1.88, 1.75, 1.65,…
$ r_sources <dbl> 510.170, 534.492, 546.272, 529.730, 496.716, 44…
$ unvest_man <dbl> 0.08, 0.09, 0.10, 0.11, 0.12, 0.08, 0.08, 0.07,…
$ invest_sources <dbl> 0.05, 0.06, 0.09, 0.09, 0.08, 0.06, 0.07, 0.08,…
$ invest_transport <dbl> 0.20, 0.21, 0.24, 0.30, 0.25, 0.31, 0.23, 0.27,…
library(knitr)
library(kableExtra)
head(data, 10) |>
kbl(caption = "Prvých 10 riadkov", booktabs = TRUE) |>
kable_styling(
full_width = FALSE,
bootstrap_options = c("striped", "condensed"),
stripe_color = "#ECFDF5" # veľmi svetlá zelená
) |>
row_spec(0, bold = TRUE, color = "white", background = "#10B981") # smaragdový header
Prvých 10 riadkov
Krajina |
Rok |
BEV |
EPI |
HDP |
BEV_PHEV |
co2 |
res_develop |
r_sources |
unvest_man |
invest_sources |
invest_transport |
CZ |
2023 |
22451 |
60.700 |
21660 |
33448 |
98918217 |
1.83 |
510.170 |
0.08 |
0.05 |
0.20 |
CZ |
2022 |
13928 |
59.900 |
21910 |
22646 |
116453963 |
1.89 |
534.492 |
0.09 |
0.06 |
0.21 |
CZ |
2021 |
9969 |
65.450 |
21830 |
15128 |
121829204 |
1.93 |
546.272 |
0.10 |
0.09 |
0.24 |
CZ |
2020 |
7109 |
71.000 |
20980 |
9835 |
119492626 |
1.95 |
529.730 |
0.11 |
0.09 |
0.30 |
CZ |
2019 |
3897 |
69.340 |
22050 |
5223 |
126296221 |
1.90 |
496.716 |
0.12 |
0.08 |
0.25 |
CZ |
2018 |
3047 |
67.680 |
21320 |
3906 |
125679525 |
1.88 |
442.219 |
0.08 |
0.06 |
0.31 |
CZ |
2017 |
2309 |
76.175 |
20760 |
2877 |
123141318 |
1.75 |
462.759 |
0.08 |
0.07 |
0.23 |
CZ |
2016 |
1762 |
84.670 |
19760 |
2102 |
120794086 |
1.65 |
492.898 |
0.07 |
0.08 |
0.27 |
CZ |
2015 |
1326 |
81.385 |
19270 |
1530 |
117895260 |
1.91 |
487.991 |
0.08 |
0.10 |
0.36 |
CZ |
2014 |
855 |
78.100 |
18370 |
934 |
117541172 |
1.94 |
460.295 |
0.09 |
0.11 |
0.32 |
#náhľad prvých 10 riadkov
Deskriptívna štatistika dát
library(dplyr)
library(gt)
# 1) Deskriptíva pre všetky numerické premenné
num_vars <- names(Filter(is.numeric, data))
get_stats <- function(x) data.frame(
n = sum(!is.na(x)),
mean = mean(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE),
min = min(x, na.rm = TRUE),
q25 = as.numeric(quantile(x, 0.25, na.rm = TRUE)),
median = median(x, na.rm = TRUE),
q75 = as.numeric(quantile(x, 0.75, na.rm = TRUE)),
max = max(x, na.rm = TRUE)
)
stats_table <- do.call(
rbind,
lapply(num_vars, function(v) cbind(Premenna = v, get_stats(data[[v]])))
)
# 2) Pekná tabuľka – zelený header, kompletné orámovanie, páskovanie
gt(stats_table) |>
cols_label(
Premenna = "Premenná",
n = "n",
mean = "Priemer",
sd = "SD",
min = "Min",
q25 = "P25",
median = "Medián",
q75 = "P75",
max = "Max"
) |>
fmt_number(columns = c(Priemer = mean, SD = sd, Min = min, P25 = q25,
Medián = median, P75 = q75, Max = max),
decimals = 2, sep_mark = " ", dec_mark = ",") |>
fmt_number(columns = n, decimals = 0, sep_mark = " ", dec_mark = ",") |>
tab_header(title = md("**Deskriptívna štatistika (numerické premenné)**")) |>
tab_style(
style = list(cell_fill(color = "#10B981"),
cell_text(color = "white", weight = "bold")),
locations = cells_column_labels(everything())
) |>
opt_row_striping() |>
tab_options(
row.striping.background_color = "#ECFDF5",
table.border.top.style = "solid", table.border.top.width = px(1),
table.border.bottom.style = "solid", table.border.bottom.width = px(1),
column_labels.border.top.style = "solid",
column_labels.border.bottom.style = "solid",
table_body.hlines.style = "solid", table_body.hlines.width = px(1),
table_body.vlines.style = "solid", table_body.vlines.width = px(1)
)
Deskriptívna štatistika (numerické premenné) |
Premenná |
n |
Priemer |
SD |
Min |
P25 |
Medián |
P75 |
Max |
Rok |
50 |
2.018500e+03 |
2.901442e+00 |
2014.000 |
2.016000e+03 |
2.018500e+03 |
2.021000e+03 |
2.023000e+03 |
BEV |
50 |
1.592708e+04 |
3.018585e+04 |
113.000 |
9.110000e+02 |
4.155426e+03 |
1.407975e+04 |
1.632455e+05 |
EPI |
50 |
6.788088e+01 |
9.180603e+00 |
46.000 |
6.350000e+01 |
6.830000e+01 |
7.328125e+01 |
8.542000e+01 |
HDP |
50 |
1.947160e+04 |
6.655541e+03 |
11360.000 |
1.452250e+04 |
1.732500e+04 |
2.178750e+04 |
3.330000e+04 |
BEV_PHEV |
50 |
2.771188e+04 |
5.478741e+04 |
201.000 |
1.575500e+03 |
7.198500e+03 |
2.848325e+04 |
2.902244e+05 |
co2 |
50 |
7.344969e+08 |
1.319764e+09 |
28339113.568 |
5.459271e+07 |
1.177182e+08 |
1.294116e+08 |
3.565184e+09 |
res_develop |
50 |
1.523600e+00 |
4.820101e-01 |
0.790 |
1.067500e+00 |
1.450000e+00 |
1.925000e+00 |
2.310000e+00 |
r_sources |
50 |
6.284690e+02 |
5.298468e+02 |
129.454 |
3.178076e+02 |
4.373183e+02 |
5.248400e+02 |
2.010715e+03 |
unvest_man |
50 |
1.468000e-01 |
2.043550e-01 |
0.030 |
5.000000e-02 |
7.500000e-02 |
1.100000e-01 |
1.000000e+00 |
invest_sources |
50 |
1.470000e-01 |
7.412758e-02 |
0.040 |
9.000000e-02 |
1.400000e-01 |
1.900000e-01 |
3.500000e-01 |
invest_transport |
50 |
2.660000e-01 |
1.256168e-01 |
0.090 |
1.700000e-01 |
2.600000e-01 |
3.200000e-01 |
7.200000e-01 |
Korelačná matica a heatmap
library(dplyr); library(gt); library(viridisLite)
# numerické stĺpce + korelácie
num <- dplyr::select_if(data, is.numeric)
cm <- round(cor(num, use = "pairwise.complete.obs"), 2)
# zoradenie premenných podľa klastrovania (prehľadnejšie)
ord <- hclust(dist(cm))$order
cm <- cm[ord, ord]
# priprav data.frame pre gt
tbl <- cbind(Premenná = rownames(cm), as.data.frame(cm), row.names = NULL)
g <- gt(tbl)
g <- tab_header(g, title = md("**Korelačná matica (numerické premenné)**"))
g <- tab_style(g,
style = list(cell_fill(color = "#10B981"),
cell_text(color = "white", weight = "bold")),
locations = cells_column_labels(everything()))
g <- fmt_number(g, columns = -Premenná, decimals = 2)
g <- data_color(g, columns = -Premenná,
colors = scales::col_numeric(palette = viridis(11, option = "G"),
domain = c(-1, 1)))
g
Korelačná matica (numerické premenné) |
Premenná |
HDP |
res_develop |
Rok |
BEV |
BEV_PHEV |
EPI |
invest_transport |
invest_sources |
unvest_man |
co2 |
r_sources |
HDP |
1.00 |
0.81 |
0.19 |
0.52 |
0.54 |
−0.02 |
−0.03 |
0.20 |
−0.30 |
−0.42 |
−0.31 |
res_develop |
0.81 |
1.00 |
0.15 |
0.45 |
0.46 |
−0.06 |
0.24 |
−0.26 |
−0.04 |
−0.30 |
−0.20 |
Rok |
0.19 |
0.15 |
1.00 |
0.54 |
0.53 |
−0.62 |
−0.49 |
−0.07 |
0.32 |
−0.03 |
0.04 |
BEV |
0.52 |
0.45 |
0.54 |
1.00 |
1.00 |
−0.33 |
−0.05 |
0.12 |
0.06 |
−0.06 |
−0.04 |
BEV_PHEV |
0.54 |
0.46 |
0.53 |
1.00 |
1.00 |
−0.33 |
−0.05 |
0.14 |
0.01 |
−0.05 |
−0.04 |
EPI |
−0.02 |
−0.06 |
−0.62 |
−0.33 |
−0.33 |
1.00 |
0.22 |
−0.02 |
−0.35 |
−0.20 |
−0.21 |
invest_transport |
−0.03 |
0.24 |
−0.49 |
−0.05 |
−0.05 |
0.22 |
1.00 |
−0.46 |
−0.03 |
−0.14 |
−0.25 |
invest_sources |
0.20 |
−0.26 |
−0.07 |
0.12 |
0.14 |
−0.02 |
−0.46 |
1.00 |
−0.22 |
−0.06 |
−0.05 |
unvest_man |
−0.30 |
−0.04 |
0.32 |
0.06 |
0.01 |
−0.35 |
−0.03 |
−0.22 |
1.00 |
−0.03 |
−0.07 |
co2 |
−0.42 |
−0.30 |
−0.03 |
−0.06 |
−0.05 |
−0.20 |
−0.14 |
−0.06 |
−0.03 |
1.00 |
0.97 |
r_sources |
−0.31 |
−0.20 |
0.04 |
−0.04 |
−0.04 |
−0.21 |
−0.25 |
−0.05 |
−0.07 |
0.97 |
1.00 |
library(ggplot2)
# voliteľné: pre krajšie poradie premenných (clustering)
ord <- hclust(dist(cor_mat))$order
cor_reord <- cor_mat[ord, ord]
df <- as.data.frame(as.table(cor_reord))
names(df) <- c("Var1", "Var2", "Corr")
ggplot(df, aes(Var1, Var2, fill = Corr)) +
geom_tile() +
geom_text(aes(label = Corr), size = 3) +
scale_fill_gradient2(limits = c(-1, 1)) +
coord_fixed() +
labs(title = "Korelačná matica (heatmapa)", x = NULL, y = NULL, fill = "r") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

