Import údajov z mojej bakalárskej práce

Načítam si csv súbor s dátami o elektromobilite vo V4

data<-read.csv("data_r_comma_utf8.csv")
library(dplyr)
library(ggplot2)
dim(data) #počet riadkov a stĺpcov
[1] 50 12
names(data) #názvy 
 [1] "Krajina"          "Rok"              "BEV"             
 [4] "EPI"              "HDP"              "BEV_PHEV"        
 [7] "co2"              "res_develop"      "r_sources"       
[10] "unvest_man"       "invest_sources"   "invest_transport"
dplyr::glimpse(data)
Rows: 50
Columns: 12
$ Krajina          <chr> "CZ", "CZ", "CZ", "CZ", "CZ", "CZ", "CZ", "CZ",…
$ Rok              <int> 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016,…
$ BEV              <dbl> 22451, 13928, 9969, 7109, 3897, 3047, 2309, 176…
$ EPI              <dbl> 60.700, 59.900, 65.450, 71.000, 69.340, 67.680,…
$ HDP              <int> 21660, 21910, 21830, 20980, 22050, 21320, 20760…
$ BEV_PHEV         <dbl> 33448, 22646, 15128, 9835, 5223, 3906, 2877, 21…
$ co2              <dbl> 98918217, 116453963, 121829204, 119492626, 1262…
$ res_develop      <dbl> 1.83, 1.89, 1.93, 1.95, 1.90, 1.88, 1.75, 1.65,…
$ r_sources        <dbl> 510.170, 534.492, 546.272, 529.730, 496.716, 44…
$ unvest_man       <dbl> 0.08, 0.09, 0.10, 0.11, 0.12, 0.08, 0.08, 0.07,…
$ invest_sources   <dbl> 0.05, 0.06, 0.09, 0.09, 0.08, 0.06, 0.07, 0.08,…
$ invest_transport <dbl> 0.20, 0.21, 0.24, 0.30, 0.25, 0.31, 0.23, 0.27,…
library(knitr)
library(kableExtra)

head(data, 10) |>
  kbl(caption = "Prvých 10 riadkov", booktabs = TRUE) |>
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "condensed"),
    stripe_color = "#ECFDF5"   # veľmi svetlá zelená
  ) |>
  row_spec(0, bold = TRUE, color = "white", background = "#10B981")  # smaragdový header
Prvých 10 riadkov
Krajina Rok BEV EPI HDP BEV_PHEV co2 res_develop r_sources unvest_man invest_sources invest_transport
CZ 2023 22451 60.700 21660 33448 98918217 1.83 510.170 0.08 0.05 0.20
CZ 2022 13928 59.900 21910 22646 116453963 1.89 534.492 0.09 0.06 0.21
CZ 2021 9969 65.450 21830 15128 121829204 1.93 546.272 0.10 0.09 0.24
CZ 2020 7109 71.000 20980 9835 119492626 1.95 529.730 0.11 0.09 0.30
CZ 2019 3897 69.340 22050 5223 126296221 1.90 496.716 0.12 0.08 0.25
CZ 2018 3047 67.680 21320 3906 125679525 1.88 442.219 0.08 0.06 0.31
CZ 2017 2309 76.175 20760 2877 123141318 1.75 462.759 0.08 0.07 0.23
CZ 2016 1762 84.670 19760 2102 120794086 1.65 492.898 0.07 0.08 0.27
CZ 2015 1326 81.385 19270 1530 117895260 1.91 487.991 0.08 0.10 0.36
CZ 2014 855 78.100 18370 934 117541172 1.94 460.295 0.09 0.11 0.32
 #náhľad prvých 10 riadkov

Frekvenčná tabuľka podľa stĺpca

library(dplyr)
library(knitr)
library(kableExtra)
library(scales)

tab_krajina <- data %>%
  count(Krajina, sort = TRUE, name = "Počet") %>%
  mutate(Podiel = percent(Počet / sum(Počet), accuracy = 0.1))

kbl(tab_krajina, caption = "Počty a podiely podľa krajiny",
    col.names = c("Krajina","Počet","Podiel")) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped","condensed")) %>%
  row_spec(0, bold = TRUE, color = "white", background = "#10B981")
Počty a podiely podľa krajiny
Krajina Počet Podiel
CZ 10 20.0%
EU-avrg 10 20.0%
MR 10 20.0%
PL 10 20.0%
SK 10 20.0%
NA

Deskriptívna štatistika dát

library(dplyr)
library(gt)

# 1) Deskriptíva pre všetky numerické premenné

num_vars <- names(Filter(is.numeric, data))

get_stats <- function(x) data.frame(
n      = sum(!is.na(x)),
mean   = mean(x, na.rm = TRUE),
sd     = sd(x, na.rm = TRUE),
min    = min(x, na.rm = TRUE),
q25    = as.numeric(quantile(x, 0.25, na.rm = TRUE)),
median = median(x, na.rm = TRUE),
q75    = as.numeric(quantile(x, 0.75, na.rm = TRUE)),
max    = max(x, na.rm = TRUE)
)

stats_table <- do.call(
rbind,
lapply(num_vars, function(v) cbind(Premenna = v, get_stats(data[[v]])))
)

# 2) Pekná tabuľka – zelený header, kompletné orámovanie, páskovanie

gt(stats_table) |>
cols_label(
Premenna = "Premenná",
n        = "n",
mean     = "Priemer",
sd       = "SD",
min      = "Min",
q25      = "P25",
median   = "Medián",
q75      = "P75",
max      = "Max"
) |>
fmt_number(columns = c(Priemer = mean, SD = sd, Min = min, P25 = q25,
Medián = median, P75 = q75, Max = max),
decimals = 2, sep_mark = " ", dec_mark = ",") |>
fmt_number(columns = n, decimals = 0, sep_mark = " ", dec_mark = ",") |>
tab_header(title = md("**Deskriptívna štatistika (numerické premenné)**")) |>
tab_style(
style = list(cell_fill(color = "#10B981"),
cell_text(color = "white", weight = "bold")),
locations = cells_column_labels(everything())
) |>
opt_row_striping() |>
tab_options(
row.striping.background_color = "#ECFDF5",
table.border.top.style = "solid",  table.border.top.width = px(1),
table.border.bottom.style = "solid", table.border.bottom.width = px(1),
column_labels.border.top.style = "solid",
column_labels.border.bottom.style = "solid",
table_body.hlines.style = "solid", table_body.hlines.width = px(1),
table_body.vlines.style = "solid", table_body.vlines.width = px(1)
)
Deskriptívna štatistika (numerické premenné)
Premenná n Priemer SD Min P25 Medián P75 Max
Rok 50 2.018500e+03 2.901442e+00 2014.000 2.016000e+03 2.018500e+03 2.021000e+03 2.023000e+03
BEV 50 1.592708e+04 3.018585e+04 113.000 9.110000e+02 4.155426e+03 1.407975e+04 1.632455e+05
EPI 50 6.788088e+01 9.180603e+00 46.000 6.350000e+01 6.830000e+01 7.328125e+01 8.542000e+01
HDP 50 1.947160e+04 6.655541e+03 11360.000 1.452250e+04 1.732500e+04 2.178750e+04 3.330000e+04
BEV_PHEV 50 2.771188e+04 5.478741e+04 201.000 1.575500e+03 7.198500e+03 2.848325e+04 2.902244e+05
co2 50 7.344969e+08 1.319764e+09 28339113.568 5.459271e+07 1.177182e+08 1.294116e+08 3.565184e+09
res_develop 50 1.523600e+00 4.820101e-01 0.790 1.067500e+00 1.450000e+00 1.925000e+00 2.310000e+00
r_sources 50 6.284690e+02 5.298468e+02 129.454 3.178076e+02 4.373183e+02 5.248400e+02 2.010715e+03
unvest_man 50 1.468000e-01 2.043550e-01 0.030 5.000000e-02 7.500000e-02 1.100000e-01 1.000000e+00
invest_sources 50 1.470000e-01 7.412758e-02 0.040 9.000000e-02 1.400000e-01 1.900000e-01 3.500000e-01
invest_transport 50 2.660000e-01 1.256168e-01 0.090 1.700000e-01 2.600000e-01 3.200000e-01 7.200000e-01
NA
NA

Histogram

library(ggplot2)

ggplot(data, aes(x = BEV)) +
geom_histogram(bins = 20, fill = "#10B981", color = "white") +
labs(title = "Histogram: BEV", x = "BEV", y = "Počet") +
theme_minimal() +
theme(plot.title = element_text(color = "#10B981", face = "bold"))

NA
NA

Log-mierka (ukáže rozdelenie prehľadnejšie)

ggplot(data, aes(BEV)) +
  geom_histogram(bins = 20, fill = "#10B981", color = "white") +
  scale_x_log10() +
  labs(title = "Histogram: BEV (log škála)", x = "BEV (log10)", y = "Počet")

BoxPlot

library(dplyr)
library(ggplot2)
library(scales)

# zoradíme krajiny podľa mediánu BEV

data_bp <- data %>% mutate(Krajina = reorder(Krajina, BEV, FUN = median, na.rm = TRUE))

ggplot(data_bp, aes(x = Krajina, y = BEV)) +
geom_boxplot(fill = "#10B981", color = "#0B7D63", width = 0.6, outlier.shape = NA, alpha = 0.9) +
geom_jitter(width = 0.15, height = 0, alpha = 0.35, size = 1.8, color = "#065F46") +
labs(
title = "BEV podľa krajiny",
subtitle = "Krajiny zoradené podľa mediánu BEV",
x = NULL, y = "BEV (kusy)"
) +
scale_y_continuous(labels = label_number(big.mark = " ", accuracy = 1)) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", color = "#10B981"),
axis.text.x = element_text(angle = 0, hjust = 0.5),
panel.grid.minor = element_blank()
)

NA
NA

Vzťah HDP a BEV

library(ggplot2)
library(scales)

ggplot(data, aes(x = HDP, y = BEV)) +
  geom_point(color = "#10B981", alpha = 0.8, size = 2.3) +
  geom_smooth(method = "lm", se = TRUE, color = "#065F46") +
  labs(title = "HDP vs. BEV", x = "HDP", y = "BEV") +
  scale_x_continuous(labels = label_number(big.mark = " ")) +
  scale_y_continuous(labels = label_number(big.mark = " ")) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", color = "#10B981"),
    panel.grid.minor = element_blank()
  )

Vývoj BEV v čase podľa krajín - samostatné grafy

library(ggplot2)

ggplot(data, aes(x = Rok, y = BEV, color = Krajina)) +
  geom_line(linewidth = 1) +
  geom_point(size = 1.6) +
  facet_wrap(~ Krajina, scales = "free_y") +
  scale_color_viridis_d(option = "G", end = 0.9) +  # pekná paleta
  labs(title = "Vývoj BEV v čase podľa krajiny", x = NULL, y = "BEV", color = "Krajina") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", color = "#10B981"),
        panel.grid.minor = element_blank(),
        legend.position = "none")               

Vývoj BEV v čase podľa krajín - spoločný graf

ggplot(data, aes(Rok, BEV, color = Krajina)) +
  geom_line(linewidth = 1) +
  geom_point(size = 1.6) +
  scale_color_viridis_d(option = "G", end = 0.9) +
  labs(title = "Vývoj BEV v čase (farby podľa krajiny)", x = NULL, y = "BEV", color = "Krajina") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", color = "#10B981"),
        panel.grid.minor = element_blank(),
        legend.position = "bottom")

BEV po rokoch spolu za všetky krajiny

bev_year <- data %>%
group_by(Rok) %>%
summarise(BEV = sum(BEV, na.rm = TRUE), .groups = "drop") %>%
arrange(Rok)

gt(bev_year) |>
fmt_number(columns = BEV, decimals = 0, sep_mark = " ") |>
tab_header(title = md("**BEV po rokoch (spolu)**")) |>
data_color(
columns = BEV,
colors = scales::col_numeric(
palette = viridis(10),
domain = range(bev_year$BEV, na.rm = TRUE)
)
) |>
tab_options(table.font.size = px(14))
BEV po rokoch (spolu)
Rok BEV
2014 3 930
2015 6 174
2016 9 161
2017 14 909
2018 23 620
2019 64 093
2020 69 313
2021 121 908
2022 191 646
2023 291 601
NA

1 faktorová ANOVA


library(gt); library(broom)

data$Krajina <- as.factor(data$Krajina)

fit_aov <- aov(BEV ~ Krajina, data = data)
aov_tbl <- tidy(fit_aov)   # stĺpce: term, df, sumsq, meansq, statistic, p.value

tab <- gt(aov_tbl)
tab <- cols_label(tab,
                  term = "Zdroj", df = "df",
                  sumsq = "SS", meansq = "MS",
                  statistic = "F", p.value = "p-hodnota")
tab <- fmt_number(tab, columns = c(sumsq, meansq, statistic, p.value),
                  decimals = 3, sep_mark = " ")
tab <- tab_header(tab, title = md("**Jednofaktorová ANOVA: BEV ~ Krajina**"))
tab <- tab_style(tab,
                 style = list(cell_fill(color = "#10B981"),
                              cell_text(color = "white", weight = "bold")),
                 locations = cells_column_labels(everything()))
tab <- opt_row_striping(tab)
tab <- tab_options(tab,
                   row.striping.background_color = "#ECFDF5",
                   table.border.top.style = "solid",
                   table.border.bottom.style = "solid",
                   column_labels.border.top.style = "solid",
                   column_labels.border.bottom.style = "solid")
tab    # zobraz tabuľku
Jednofaktorová ANOVA: BEV ~ Krajina
Zdroj df SS MS F p-hodnota
Krajina 4 11 750 642 287.144 2 937 660 571.786 4.018 0.007
Residuals 45 32 897 463 152.073 731 054 736.713 NA NA
NA
NA

Korelačná matica a heatmap

library(dplyr); library(gt); library(viridisLite)

# numerické stĺpce + korelácie
num <- dplyr::select_if(data, is.numeric)
cm  <- round(cor(num, use = "pairwise.complete.obs"), 2)

# zoradenie premenných podľa klastrovania (prehľadnejšie)
ord <- hclust(dist(cm))$order
cm  <- cm[ord, ord]

# priprav data.frame pre gt
tbl <- cbind(Premenná = rownames(cm), as.data.frame(cm), row.names = NULL)

g <- gt(tbl)
g <- tab_header(g, title = md("**Korelačná matica (numerické premenné)**"))
g <- tab_style(g,
       style = list(cell_fill(color = "#10B981"),
                    cell_text(color = "white", weight = "bold")),
       locations = cells_column_labels(everything()))
g <- fmt_number(g, columns = -Premenná, decimals = 2)
g <- data_color(g, columns = -Premenná,
       colors = scales::col_numeric(palette = viridis(11, option = "G"),
                                    domain = c(-1, 1)))
g
Korelačná matica (numerické premenné)
Premenná HDP res_develop Rok BEV BEV_PHEV EPI invest_transport invest_sources unvest_man co2 r_sources
HDP 1.00 0.81 0.19 0.52 0.54 −0.02 −0.03 0.20 −0.30 −0.42 −0.31
res_develop 0.81 1.00 0.15 0.45 0.46 −0.06 0.24 −0.26 −0.04 −0.30 −0.20
Rok 0.19 0.15 1.00 0.54 0.53 −0.62 −0.49 −0.07 0.32 −0.03 0.04
BEV 0.52 0.45 0.54 1.00 1.00 −0.33 −0.05 0.12 0.06 −0.06 −0.04
BEV_PHEV 0.54 0.46 0.53 1.00 1.00 −0.33 −0.05 0.14 0.01 −0.05 −0.04
EPI −0.02 −0.06 −0.62 −0.33 −0.33 1.00 0.22 −0.02 −0.35 −0.20 −0.21
invest_transport −0.03 0.24 −0.49 −0.05 −0.05 0.22 1.00 −0.46 −0.03 −0.14 −0.25
invest_sources 0.20 −0.26 −0.07 0.12 0.14 −0.02 −0.46 1.00 −0.22 −0.06 −0.05
unvest_man −0.30 −0.04 0.32 0.06 0.01 −0.35 −0.03 −0.22 1.00 −0.03 −0.07
co2 −0.42 −0.30 −0.03 −0.06 −0.05 −0.20 −0.14 −0.06 −0.03 1.00 0.97
r_sources −0.31 −0.20 0.04 −0.04 −0.04 −0.21 −0.25 −0.05 −0.07 0.97 1.00
NA
NA
library(ggplot2)

# voliteľné: pre krajšie poradie premenných (clustering)

ord <- hclust(dist(cor_mat))$order
cor_reord <- cor_mat[ord, ord]

df <- as.data.frame(as.table(cor_reord))
names(df) <- c("Var1", "Var2", "Corr")

ggplot(df, aes(Var1, Var2, fill = Corr)) +
geom_tile() +
geom_text(aes(label = Corr), size = 3) +
scale_fill_gradient2(limits = c(-1, 1)) +
coord_fixed() +
labs(title = "Korelačná matica (heatmapa)", x = NULL, y = NULL, fill = "r") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

