knitr::opts_chunk$set(
    echo = TRUE,
    message = FALSE,
    warning = FALSE
)

#jednoduchá tabulka Meno <- c(“Jana”, “Jozef”, “Mária”) Vek <- c(10, 11, 9) Body <- c(85, 92, 78) udaje <- data.frame(Meno, Vek, Body) udaje udaje\(Vek mean(udaje\)Vek) udaje[Meno == “Jozef”,] udaje[3,] udaje[, 2:3] udaje[1, 1] summary(udaje) MaAuto <- c(TRUE, FALSE, TRUE) udaje <- cbind(udaje, MaAuto) udaje novy.riadok <- data.frame(Meno = “Diana”, Vek = 22.485, Body = 42, MaAuto = FALSE) udaje <- rbind(udaje, novy.riadok) udaje

#Tabuľky – kableExtra library(knitr) library(kableExtra)

kable( udaje, digits = 2, align = c(“l”,“c”,“l”,“r”), caption = “Toto je tabuľka” ) %>% kable_styling( bootstrap_options = c(“striped”,“hover”,“condensed”,“responsive”), full_width = FALSE, position = “center” )

#Tidyverse – moderná práca s údajmi library(tidyverse)

#Výber a triedenie udaje %>% filter(Body > 50) %>% arrange(desc(Body)) %>% kable() %>% kable_styling(full_width = FALSE)

#Zoskupovanie a sumarizácia udaje %>% group_by(MaAuto) %>% summarise( Priemer = mean(Body), Pocet = n(), .groups = “drop” ) %>% kable( caption = “Priemerné Body podľa premennej MaAuto”, col.names = c(“Má auto”,“Priemer Body”,“Počet”) ) %>% kable_styling(full_width = FALSE)

#Vytvorenie novej premennej udaje %>% mutate( grade = case_when( Body >= 90 ~ “A”, Body >= 80 ~ “B”, Body >= 70 ~ “C”, TRUE ~ “D” ), VekPoPlnoletosti = round(Vek - 18) ) %>% kable() %>% kable_styling(full_width = FALSE)

#Databázy v R library(datasets)

ds_datasets <- as.data.frame(utils::data(package = “datasets”)$results)[, c(“Item”,“Title”)] kable(head(ds_datasets, 20), col.names = c(“Dataset”,“Title”))

head(CO2) # install.packages(“wooldridge”) # odkomentovať pri prvej inštalácii

library(wooldridge)

ds_wool <- as.data.frame(utils::data(package = “wooldridge”)$results)[, c(“Item”,“Title”)] kable(head(ds_wool, 20), col.names = c(“Dataset”,“Title”)) %>% kable_styling( bootstrap_options = c(“striped”,“hover”,“condensed”,“responsive”), full_width = FALSE, position = “center” )

#Moja databáza – WDI (krajiny V4) Import WDI dát # install.packages(“WDI”) # ak treba

library(WDI) library(dplyr)

countries <- c(“SVK”,“CZE”,“POL”,“HUN”)

indicators <- c( GDP.PCAP = “NY.GDP.PCAP.KD”, INFLATION = “FP.CPI.TOTL.ZG”, UNEMP = “SL.UEM.TOTL.ZS”, EXPORTS = “NE.EXP.GNFS.ZS” )

mojedata <- WDI( country = countries, indicator = indicators, start = 2000, end = 2023 )

mojedata <- mojedata %>% rename(COUNTRY = country, YEAR = year) %>% arrange(COUNTRY, YEAR)

head(mojedata)

#Export a opätovný import CSV write.csv2(mojedata, “mojedata_WDI_V4.csv”, row.names = FALSE)

mojedata_csv <- read.csv2( “mojedata_WDI_V4.csv”, header = TRUE, sep = “;”, dec = “.” )

head(mojedata_csv) colnames(mojedata_csv)

#Grafy na dátach WDI Podmnožina pre rok 2015 mojedata_2015 <- mojedata %>% filter(YEAR == 2015) %>% select(COUNTRY, GDP.PCAP, INFLATION, UNEMP, EXPORTS)

mojedata_2015

#Scatter plot: HDP vs. exporty library(ggplot2)

ggplot(mojedata_2015, aes(x = EXPORTS, y = GDP.PCAP, color = COUNTRY)) + geom_point(size = 3) + theme_minimal() + labs( title = “HDP na obyvateľa vs. exporty (% HDP), rok 2015”, x = “Exporty (% HDP)”, y = “HDP na obyvateľa (konštantné USD)”, color = “Krajina” )

#Boxplot: HDP podľa krajiny ggplot(mojedata_2015, aes(x = COUNTRY, y = GDP.PCAP)) + geom_boxplot(fill = “lightblue”, color = “darkblue”) + theme_minimal() + labs( title = “Rozdelenie HDP na obyvateľa podľa krajiny (2015)”, x = “Krajina”, y = “HDP na obyvateľa (konštantné USD)” )

#Základné štatistiky – HDP na obyvateľa library(knitr)

wdi.stats <- mojedata %>% filter(YEAR %in% 2010:2020) %>% group_by(YEAR) %>% summarise( n = n(), mean = mean(GDP.PCAP, na.rm = TRUE), sd = sd(GDP.PCAP, na.rm = TRUE), min = min(GDP.PCAP, na.rm = TRUE), q25 = quantile(GDP.PCAP, 0.25, na.rm = TRUE), median = median(GDP.PCAP, na.rm = TRUE), q75 = quantile(GDP.PCAP, 0.75, na.rm = TRUE), max = max(GDP.PCAP, na.rm = TRUE), .groups = “drop” )

kable(wdi.stats, digits = 2, caption = “Základné štatistiky HDP na obyvateľa (2010–2020, krajiny V4)”)

#kableExtra – krajšia tabuľka library(kableExtra)

wdi.stats %>% kable( digits = 2, caption = “Základné štatistiky HDP na obyvateľa (2010–2020, krajiny V4)” ) %>% kable_styling( full_width = FALSE, bootstrap_options = c(“striped”,“hover”,“condensed”) ) %>% column_spec(1, bold = TRUE) %>% row_spec(0, bold = TRUE, background = “#f2f2f2”) %>% add_header_above(c(” ” = 2, “HDP štatistiky” = 7))

#t-test: Porovnanie HDP v rokoch 2005 a 2015 t.test( mojedata\(GDP.PCAP[mojedata\)YEAR == 2005], mojedata\(GDP.PCAP[mojedata\)YEAR == 2015] )

#ANOVA: Porovnanie HDP medzi krajinami (2015) anova.result <- aov(GDP.PCAP ~ COUNTRY, data = mojedata_2015) summary(anova.result)

#Lineárna regresia: HDP ~ inflácia + nezamestnanosť + exporty wdi_reg <- mojedata %>% filter(YEAR == 2015) %>% select(GDP.PCAP, INFLATION, UNEMP, EXPORTS) %>% na.omit()

model <- lm(GDP.PCAP ~ INFLATION + UNEMP + EXPORTS, data = wdi_reg) summary(model)

#Regresné koeficienty – broom + kableExtra # install.packages(c(“broom”,“stringr”)) # ak treba

library(broom) library(stringr)

coef.tbl <- tidy(model, conf.int = TRUE) %>% mutate( term = recode(term, “(Intercept)” = “Intercept”, “INFLATION” = “Inflácia”, “UNEMP” = “Nezamestnanosť”, “EXPORTS” = “Exporty (% HDP)” ), stars = case_when( p.value < 0.001 ~ “”, p.value < 0.01 ~ ””, p.value < 0.05 ~ ””, p.value < 0.1 ~ “·”, TRUE ~ “” ) ) %>% transmute( Term = term, Estimate = estimate, Std. Error= std.error, t value = statistic, p value = p.value, 95% CI = str_c(“[”, round(conf.low,3), ”, ”, round(conf.high,3), ”]”), Sig = stars )

coef.tbl %>% kable( digits = 3, caption = “OLS koeficienty (HDP na obyvateľa ~ inflácia + nezamestnanosť + exporty)” ) %>% kable_styling( full_width = FALSE, bootstrap_options = c(“striped”,“hover”,“condensed”) ) %>% column_spec(1, bold = TRUE) %>% row_spec(0, bold = TRUE, background = “#f2f2f2”) %>% footnote( general = “Signif. codes: *** p<0.001, ** p<0.01, * p<0.05, · p<0.1.”, threeparttable = TRUE )

#Model Fit Statistics fit.tbl <- glance(model) %>% transmute( R-squared = r.squared, Adj. R-squared = adj.r.squared, F-statistic = statistic, F p-value = p.value, AIC = AIC, BIC = BIC, Num. obs. = nobs )

fit.tbl %>% kable(digits = 3, caption = “Model Fit Statistics”) %>% kable_styling( full_width = FALSE, bootstrap_options = c(“condensed”) )

---
title: "Práca s databázou – WDI (V4 krajiny)"
author: "Natália Soligová  <br>
(s využitím verejne dostupných kódov)"
date: "September 2025"
output: 
  html_notebook:
    toc: true
    toc_float: true
    theme: united
    highlight: tango
editor_options: 
  markdown: 
    wrap: 72
---

```{r}
knitr::opts_chunk$set(
    echo = TRUE,
    message = FALSE,
    warning = FALSE
)
```

#jednoduchá tabulka
Meno <- c("Jana", "Jozef", "Mária")
Vek  <- c(10, 11, 9)
Body <- c(85, 92, 78)
udaje <- data.frame(Meno, Vek, Body)
udaje
udaje$Vek
mean(udaje$Vek)
udaje[Meno == "Jozef",]
udaje[3,]
udaje[, 2:3]
udaje[1, 1]
summary(udaje)
MaAuto <- c(TRUE, FALSE, TRUE)
udaje  <- cbind(udaje, MaAuto)
udaje
novy.riadok <- data.frame(Meno = "Diana", Vek = 22.485, Body = 42, MaAuto = FALSE)
udaje <- rbind(udaje, novy.riadok)
udaje

#Tabuľky – kableExtra
library(knitr)
library(kableExtra)

kable(
udaje,
digits  = 2,
align   = c("l","c","l","r"),
caption = "Toto je tabuľka"
) %>%
kable_styling(
bootstrap_options = c("striped","hover","condensed","responsive"),
full_width        = FALSE,
position          = "center"
)

#Tidyverse – moderná práca s údajmi
library(tidyverse)

#Výber a triedenie
udaje %>%
filter(Body > 50) %>%
arrange(desc(Body)) %>%
kable() %>%
kable_styling(full_width = FALSE)

#Zoskupovanie a sumarizácia
udaje %>%
group_by(MaAuto) %>%
summarise(
Priemer = mean(Body),
Pocet   = n(),
.groups = "drop"
) %>%
kable(
caption   = "Priemerné Body podľa premennej MaAuto",
col.names = c("Má auto","Priemer Body","Počet")
) %>%
kable_styling(full_width = FALSE)

#Vytvorenie novej premennej
udaje %>%
mutate(
grade = case_when(
Body >= 90 ~ "A",
Body >= 80 ~ "B",
Body >= 70 ~ "C",
TRUE       ~ "D"
),
VekPoPlnoletosti = round(Vek - 18)
) %>%
kable() %>%
kable_styling(full_width = FALSE)

#Databázy v R
library(datasets)

ds_datasets <- as.data.frame(utils::data(package = "datasets")$results)[, c("Item","Title")]
kable(head(ds_datasets, 20), col.names = c("Dataset","Title"))

head(CO2)
# install.packages("wooldridge")  # odkomentovať pri prvej inštalácii

library(wooldridge)

ds_wool <- as.data.frame(utils::data(package = "wooldridge")$results)[, c("Item","Title")]
kable(head(ds_wool, 20), col.names = c("Dataset","Title")) %>%
kable_styling(
bootstrap_options = c("striped","hover","condensed","responsive"),
full_width        = FALSE,
position          = "center"
)

#Moja databáza – WDI (krajiny V4) Import WDI dát
# install.packages("WDI")  # ak treba

library(WDI)
library(dplyr)

countries <- c("SVK","CZE","POL","HUN")

indicators <- c(
GDP.PCAP  = "NY.GDP.PCAP.KD",
INFLATION = "FP.CPI.TOTL.ZG",
UNEMP     = "SL.UEM.TOTL.ZS",
EXPORTS   = "NE.EXP.GNFS.ZS"
)

mojedata <- WDI(
country   = countries,
indicator = indicators,
start     = 2000,
end       = 2023
)

mojedata <- mojedata %>%
rename(COUNTRY = country, YEAR = year) %>%
arrange(COUNTRY, YEAR)

head(mojedata)

#Export a opätovný import CSV
write.csv2(mojedata, "mojedata_WDI_V4.csv", row.names = FALSE)

mojedata_csv <- read.csv2(
"mojedata_WDI_V4.csv",
header = TRUE,
sep    = ";",
dec    = "."
)

head(mojedata_csv)
colnames(mojedata_csv)

#Grafy na dátach WDI Podmnožina pre rok 2015
mojedata_2015 <- mojedata %>%
filter(YEAR == 2015) %>%
select(COUNTRY, GDP.PCAP, INFLATION, UNEMP, EXPORTS)

mojedata_2015

#Scatter plot: HDP vs. exporty
library(ggplot2)

ggplot(mojedata_2015, aes(x = EXPORTS, y = GDP.PCAP, color = COUNTRY)) +
geom_point(size = 3) +
theme_minimal() +
labs(
title = "HDP na obyvateľa vs. exporty (% HDP), rok 2015",
x     = "Exporty (% HDP)",
y     = "HDP na obyvateľa (konštantné USD)",
color = "Krajina"
)

#Boxplot: HDP podľa krajiny
ggplot(mojedata_2015, aes(x = COUNTRY, y = GDP.PCAP)) +
geom_boxplot(fill = "lightblue", color = "darkblue") +
theme_minimal() +
labs(
title = "Rozdelenie HDP na obyvateľa podľa krajiny (2015)",
x     = "Krajina",
y     = "HDP na obyvateľa (konštantné USD)"
)

#Základné štatistiky – HDP na obyvateľa
library(knitr)

wdi.stats <- mojedata %>%
filter(YEAR %in% 2010:2020) %>%
group_by(YEAR) %>%
summarise(
n      = n(),
mean   = mean(GDP.PCAP, na.rm = TRUE),
sd     = sd(GDP.PCAP, na.rm = TRUE),
min    = min(GDP.PCAP, na.rm = TRUE),
q25    = quantile(GDP.PCAP, 0.25, na.rm = TRUE),
median = median(GDP.PCAP, na.rm = TRUE),
q75    = quantile(GDP.PCAP, 0.75, na.rm = TRUE),
max    = max(GDP.PCAP, na.rm = TRUE),
.groups = "drop"
)

kable(wdi.stats, digits = 2,
caption = "Základné štatistiky HDP na obyvateľa (2010–2020, krajiny V4)")

#kableExtra – krajšia tabuľka
library(kableExtra)

wdi.stats %>%
kable(
digits  = 2,
caption = "Základné štatistiky HDP na obyvateľa (2010–2020, krajiny V4)"
) %>%
kable_styling(
full_width        = FALSE,
bootstrap_options = c("striped","hover","condensed")
) %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE, background = "#f2f2f2") %>%
add_header_above(c(" " = 2, "HDP štatistiky" = 7))

#t-test: Porovnanie HDP v rokoch 2005 a 2015
t.test(
mojedata$GDP.PCAP[mojedata$YEAR == 2005],
mojedata$GDP.PCAP[mojedata$YEAR == 2015]
)

#ANOVA: Porovnanie HDP medzi krajinami (2015)
anova.result <- aov(GDP.PCAP ~ COUNTRY, data = mojedata_2015)
summary(anova.result)

#Lineárna regresia: HDP ~ inflácia + nezamestnanosť + exporty
wdi_reg <- mojedata %>%
filter(YEAR == 2015) %>%
select(GDP.PCAP, INFLATION, UNEMP, EXPORTS) %>%
na.omit()

model <- lm(GDP.PCAP ~ INFLATION + UNEMP + EXPORTS, data = wdi_reg)
summary(model)

#Regresné koeficienty – broom + kableExtra
# install.packages(c("broom","stringr"))  # ak treba

library(broom)
library(stringr)

coef.tbl <- tidy(model, conf.int = TRUE) %>%
mutate(
term = recode(term,
"(Intercept)" = "Intercept",
"INFLATION"   = "Inflácia",
"UNEMP"       = "Nezamestnanosť",
"EXPORTS"     = "Exporty (% HDP)"
),
stars = case_when(
p.value < 0.001 ~ "***",
p.value < 0.01  ~ "**",
p.value < 0.05  ~ "*",
p.value < 0.1   ~ "·",
TRUE            ~ ""
)
) %>%
transmute(
Term        = term,
Estimate    = estimate,
`Std. Error`= std.error,
`t value`   = statistic,
`p value`   = p.value,
`95% CI`    = str_c("[", round(conf.low,3), ", ", round(conf.high,3), "]"),
Sig         = stars
)

coef.tbl %>%
kable(
digits  = 3,
caption = "OLS koeficienty (HDP na obyvateľa ~ inflácia + nezamestnanosť + exporty)"
) %>%
kable_styling(
full_width        = FALSE,
bootstrap_options = c("striped","hover","condensed")
) %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE, background = "#f2f2f2") %>%
footnote(
general = "Signif. codes: *** p<0.001, ** p<0.01, * p<0.05, · p<0.1.",
threeparttable = TRUE
)

#Model Fit Statistics
fit.tbl <- glance(model) %>%
transmute(
`R-squared`      = r.squared,
`Adj. R-squared` = adj.r.squared,
`F-statistic`    = statistic,
`F p-value`      = p.value,
`AIC`            = AIC,
`BIC`            = BIC,
`Num. obs.`      = nobs
)

fit.tbl %>%
kable(digits = 3, caption = "Model Fit Statistics") %>%
kable_styling(
full_width        = FALSE,
bootstrap_options = c("condensed")
)