knitr::opts_chunk$set(
    echo = TRUE,
    message = FALSE,
    warning = FALSE
)

#jednoduchá tabulka Meno <- c(“Jana”, “Jozef”, “Mária”) Vek <- c(10, 11, 9) Body <- c(85, 92, 78) udaje <- data.frame(Meno, Vek, Body) udaje udaje\(Vek mean(udaje\)Vek) udaje[Meno == “Jozef”,] udaje[3,] udaje[, 2:3] udaje[1, 1] summary(udaje) MaAuto <- c(TRUE, FALSE, TRUE) udaje <- cbind(udaje, MaAuto) udaje novy.riadok <- data.frame(Meno = “Diana”, Vek = 22.485, Body = 42, MaAuto = FALSE) udaje <- rbind(udaje, novy.riadok) udaje

#Tabuľky – kableExtra library(knitr) library(kableExtra)

kable( udaje, digits = 2, align = c(“l”,“c”,“l”,“r”), caption = “Toto je tabuľka” ) %>% kable_styling( bootstrap_options = c(“striped”,“hover”,“condensed”,“responsive”), full_width = FALSE, position = “center” )

#Tidyverse – moderná práca s údajmi library(tidyverse)

#Výber a triedenie udaje %>% filter(Body > 50) %>% arrange(desc(Body)) %>% kable() %>% kable_styling(full_width = FALSE)

#Zoskupovanie a sumarizácia udaje %>% group_by(MaAuto) %>% summarise( Priemer = mean(Body), Pocet = n(), .groups = “drop” ) %>% kable( caption = “Priemerné Body podľa premennej MaAuto”, col.names = c(“Má auto”,“Priemer Body”,“Počet”) ) %>% kable_styling(full_width = FALSE)

#Vytvorenie novej premennej udaje %>% mutate( grade = case_when( Body >= 90 ~ “A”, Body >= 80 ~ “B”, Body >= 70 ~ “C”, TRUE ~ “D” ), VekPoPlnoletosti = round(Vek - 18) ) %>% kable() %>% kable_styling(full_width = FALSE)

#Databázy v R library(datasets)

ds_datasets <- as.data.frame(utils::data(package = “datasets”)$results)[, c(“Item”,“Title”)] kable(head(ds_datasets, 20), col.names = c(“Dataset”,“Title”))

head(CO2) # install.packages(“wooldridge”) # odkomentovať pri prvej inštalácii

library(wooldridge)

ds_wool <- as.data.frame(utils::data(package = “wooldridge”)$results)[, c(“Item”,“Title”)] kable(head(ds_wool, 20), col.names = c(“Dataset”,“Title”)) %>% kable_styling( bootstrap_options = c(“striped”,“hover”,“condensed”,“responsive”), full_width = FALSE, position = “center” )

#Moja databáza – WDI (krajiny V4) Import WDI dát # install.packages(“WDI”) # ak treba

library(WDI) library(dplyr)

countries <- c(“SVK”,“CZE”,“POL”,“HUN”)

indicators <- c( GDP.PCAP = “NY.GDP.PCAP.KD”, INFLATION = “FP.CPI.TOTL.ZG”, UNEMP = “SL.UEM.TOTL.ZS”, EXPORTS = “NE.EXP.GNFS.ZS” )

mojedata <- WDI( country = countries, indicator = indicators, start = 2000, end = 2023 )

mojedata <- mojedata %>% rename(COUNTRY = country, YEAR = year) %>% arrange(COUNTRY, YEAR)

head(mojedata)

#Export a opätovný import CSV write.csv2(mojedata, “mojedata_WDI_V4.csv”, row.names = FALSE)

mojedata_csv <- read.csv2( “mojedata_WDI_V4.csv”, header = TRUE, sep = “;”, dec = “.” )

head(mojedata_csv) colnames(mojedata_csv)

#Grafy na dátach WDI Podmnožina pre rok 2015 mojedata_2015 <- mojedata %>% filter(YEAR == 2015) %>% select(COUNTRY, GDP.PCAP, INFLATION, UNEMP, EXPORTS)

mojedata_2015

#Scatter plot: HDP vs. exporty library(ggplot2)

ggplot(mojedata_2015, aes(x = EXPORTS, y = GDP.PCAP, color = COUNTRY)) + geom_point(size = 3) + theme_minimal() + labs( title = “HDP na obyvateľa vs. exporty (% HDP), rok 2015”, x = “Exporty (% HDP)”, y = “HDP na obyvateľa (konštantné USD)”, color = “Krajina” )

#Boxplot: HDP podľa krajiny ggplot(mojedata_2015, aes(x = COUNTRY, y = GDP.PCAP)) + geom_boxplot(fill = “lightblue”, color = “darkblue”) + theme_minimal() + labs( title = “Rozdelenie HDP na obyvateľa podľa krajiny (2015)”, x = “Krajina”, y = “HDP na obyvateľa (konštantné USD)” )

#Základné štatistiky – HDP na obyvateľa library(knitr)

wdi.stats <- mojedata %>% filter(YEAR %in% 2010:2020) %>% group_by(YEAR) %>% summarise( n = n(), mean = mean(GDP.PCAP, na.rm = TRUE), sd = sd(GDP.PCAP, na.rm = TRUE), min = min(GDP.PCAP, na.rm = TRUE), q25 = quantile(GDP.PCAP, 0.25, na.rm = TRUE), median = median(GDP.PCAP, na.rm = TRUE), q75 = quantile(GDP.PCAP, 0.75, na.rm = TRUE), max = max(GDP.PCAP, na.rm = TRUE), .groups = “drop” )

kable(wdi.stats, digits = 2, caption = “Základné štatistiky HDP na obyvateľa (2010–2020, krajiny V4)”)

#kableExtra – krajšia tabuľka library(kableExtra)

wdi.stats %>% kable( digits = 2, caption = “Základné štatistiky HDP na obyvateľa (2010–2020, krajiny V4)” ) %>% kable_styling( full_width = FALSE, bootstrap_options = c(“striped”,“hover”,“condensed”) ) %>% column_spec(1, bold = TRUE) %>% row_spec(0, bold = TRUE, background = “#f2f2f2”) %>% add_header_above(c(” ” = 2, “HDP štatistiky” = 7))

#t-test: Porovnanie HDP v rokoch 2005 a 2015 t.test( mojedata\(GDP.PCAP[mojedata\)YEAR == 2005], mojedata\(GDP.PCAP[mojedata\)YEAR == 2015] )

#ANOVA: Porovnanie HDP medzi krajinami (2015) anova.result <- aov(GDP.PCAP ~ COUNTRY, data = mojedata_2015) summary(anova.result)

#Lineárna regresia: HDP ~ inflácia + nezamestnanosť + exporty wdi_reg <- mojedata %>% filter(YEAR == 2015) %>% select(GDP.PCAP, INFLATION, UNEMP, EXPORTS) %>% na.omit()

model <- lm(GDP.PCAP ~ INFLATION + UNEMP + EXPORTS, data = wdi_reg) summary(model)

#Regresné koeficienty – broom + kableExtra # install.packages(c(“broom”,“stringr”)) # ak treba

library(broom) library(stringr)

coef.tbl <- tidy(model, conf.int = TRUE) %>% mutate( term = recode(term, “(Intercept)” = “Intercept”, “INFLATION” = “Inflácia”, “UNEMP” = “Nezamestnanosť”, “EXPORTS” = “Exporty (% HDP)” ), stars = case_when( p.value < 0.001 ~ “”, p.value < 0.01 ~ ””, p.value < 0.05 ~ ””, p.value < 0.1 ~ “·”, TRUE ~ “” ) ) %>% transmute( Term = term, Estimate = estimate, Std. Error= std.error, t value = statistic, p value = p.value, 95% CI = str_c(“[”, round(conf.low,3), ”, ”, round(conf.high,3), ”]”), Sig = stars )

coef.tbl %>% kable( digits = 3, caption = “OLS koeficienty (HDP na obyvateľa ~ inflácia + nezamestnanosť + exporty)” ) %>% kable_styling( full_width = FALSE, bootstrap_options = c(“striped”,“hover”,“condensed”) ) %>% column_spec(1, bold = TRUE) %>% row_spec(0, bold = TRUE, background = “#f2f2f2”) %>% footnote( general = “Signif. codes: *** p<0.001, ** p<0.01, * p<0.05, · p<0.1.”, threeparttable = TRUE )

#Model Fit Statistics fit.tbl <- glance(model) %>% transmute( R-squared = r.squared, Adj. R-squared = adj.r.squared, F-statistic = statistic, F p-value = p.value, AIC = AIC, BIC = BIC, Num. obs. = nobs )

fit.tbl %>% kable(digits = 3, caption = “Model Fit Statistics”) %>% kable_styling( full_width = FALSE, bootstrap_options = c(“condensed”) )

