knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE
)
#jednoduchá tabulka Meno <- c(“Jana”, “Jozef”, “Mária”) Vek <-
c(10, 11, 9) Body <- c(85, 92, 78) udaje <- data.frame(Meno, Vek,
Body) udaje udaje\(Vek
mean(udaje\)Vek) udaje[Meno == “Jozef”,] udaje[3,] udaje[, 2:3]
udaje[1, 1] summary(udaje) MaAuto <- c(TRUE, FALSE, TRUE) udaje <-
cbind(udaje, MaAuto) udaje novy.riadok <- data.frame(Meno = “Diana”,
Vek = 22.485, Body = 42, MaAuto = FALSE) udaje <- rbind(udaje,
novy.riadok) udaje
#Tabuľky – kableExtra library(knitr) library(kableExtra)
kable( udaje, digits = 2, align = c(“l”,“c”,“l”,“r”), caption = “Toto
je tabuľka” ) %>% kable_styling( bootstrap_options =
c(“striped”,“hover”,“condensed”,“responsive”), full_width = FALSE,
position = “center” )
#Tidyverse – moderná práca s údajmi library(tidyverse)
#Výber a triedenie udaje %>% filter(Body > 50) %>%
arrange(desc(Body)) %>% kable() %>% kable_styling(full_width =
FALSE)
#Zoskupovanie a sumarizácia udaje %>% group_by(MaAuto) %>%
summarise( Priemer = mean(Body), Pocet = n(), .groups = “drop” ) %>%
kable( caption = “Priemerné Body podľa premennej MaAuto”, col.names =
c(“Má auto”,“Priemer Body”,“Počet”) ) %>% kable_styling(full_width =
FALSE)
#Vytvorenie novej premennej udaje %>% mutate( grade = case_when(
Body >= 90 ~ “A”, Body >= 80 ~ “B”, Body >= 70 ~ “C”, TRUE ~
“D” ), VekPoPlnoletosti = round(Vek - 18) ) %>% kable() %>%
kable_styling(full_width = FALSE)
#Databázy v R library(datasets)
ds_datasets <- as.data.frame(utils::data(package =
“datasets”)$results)[, c(“Item”,“Title”)] kable(head(ds_datasets, 20),
col.names = c(“Dataset”,“Title”))
head(CO2) # install.packages(“wooldridge”) # odkomentovať pri prvej
inštalácii
library(wooldridge)
ds_wool <- as.data.frame(utils::data(package =
“wooldridge”)$results)[, c(“Item”,“Title”)] kable(head(ds_wool, 20),
col.names = c(“Dataset”,“Title”)) %>% kable_styling(
bootstrap_options = c(“striped”,“hover”,“condensed”,“responsive”),
full_width = FALSE, position = “center” )
#Moja databáza – WDI (krajiny V4) Import WDI dát #
install.packages(“WDI”) # ak treba
library(WDI) library(dplyr)
countries <- c(“SVK”,“CZE”,“POL”,“HUN”)
indicators <- c( GDP.PCAP = “NY.GDP.PCAP.KD”, INFLATION =
“FP.CPI.TOTL.ZG”, UNEMP = “SL.UEM.TOTL.ZS”, EXPORTS = “NE.EXP.GNFS.ZS”
)
mojedata <- WDI( country = countries, indicator = indicators,
start = 2000, end = 2023 )
mojedata <- mojedata %>% rename(COUNTRY = country, YEAR = year)
%>% arrange(COUNTRY, YEAR)
head(mojedata)
#Export a opätovný import CSV write.csv2(mojedata,
“mojedata_WDI_V4.csv”, row.names = FALSE)
mojedata_csv <- read.csv2( “mojedata_WDI_V4.csv”, header = TRUE,
sep = “;”, dec = “.” )
head(mojedata_csv) colnames(mojedata_csv)
#Grafy na dátach WDI Podmnožina pre rok 2015 mojedata_2015 <-
mojedata %>% filter(YEAR == 2015) %>% select(COUNTRY, GDP.PCAP,
INFLATION, UNEMP, EXPORTS)
mojedata_2015
#Scatter plot: HDP vs. exporty library(ggplot2)
ggplot(mojedata_2015, aes(x = EXPORTS, y = GDP.PCAP, color =
COUNTRY)) + geom_point(size = 3) + theme_minimal() + labs( title = “HDP
na obyvateľa vs. exporty (% HDP), rok 2015”, x = “Exporty (% HDP)”, y =
“HDP na obyvateľa (konštantné USD)”, color = “Krajina” )
#Boxplot: HDP podľa krajiny ggplot(mojedata_2015, aes(x = COUNTRY, y
= GDP.PCAP)) + geom_boxplot(fill = “lightblue”, color = “darkblue”) +
theme_minimal() + labs( title = “Rozdelenie HDP na obyvateľa podľa
krajiny (2015)”, x = “Krajina”, y = “HDP na obyvateľa (konštantné USD)”
)
#Základné štatistiky – HDP na obyvateľa library(knitr)
wdi.stats <- mojedata %>% filter(YEAR %in% 2010:2020) %>%
group_by(YEAR) %>% summarise( n = n(), mean = mean(GDP.PCAP, na.rm =
TRUE), sd = sd(GDP.PCAP, na.rm = TRUE), min = min(GDP.PCAP, na.rm =
TRUE), q25 = quantile(GDP.PCAP, 0.25, na.rm = TRUE), median =
median(GDP.PCAP, na.rm = TRUE), q75 = quantile(GDP.PCAP, 0.75, na.rm =
TRUE), max = max(GDP.PCAP, na.rm = TRUE), .groups = “drop” )
kable(wdi.stats, digits = 2, caption = “Základné štatistiky HDP na
obyvateľa (2010–2020, krajiny V4)”)
#kableExtra – krajšia tabuľka library(kableExtra)
wdi.stats %>% kable( digits = 2, caption = “Základné štatistiky
HDP na obyvateľa (2010–2020, krajiny V4)” ) %>% kable_styling(
full_width = FALSE, bootstrap_options = c(“striped”,“hover”,“condensed”)
) %>% column_spec(1, bold = TRUE) %>% row_spec(0, bold = TRUE,
background = “#f2f2f2”) %>% add_header_above(c(” ” = 2, “HDP
štatistiky” = 7))
#t-test: Porovnanie HDP v rokoch 2005 a 2015 t.test( mojedata\(GDP.PCAP[mojedata\)YEAR == 2005],
mojedata\(GDP.PCAP[mojedata\)YEAR ==
2015] )
#ANOVA: Porovnanie HDP medzi krajinami (2015) anova.result <-
aov(GDP.PCAP ~ COUNTRY, data = mojedata_2015) summary(anova.result)
#Lineárna regresia: HDP ~ inflácia + nezamestnanosť + exporty wdi_reg
<- mojedata %>% filter(YEAR == 2015) %>% select(GDP.PCAP,
INFLATION, UNEMP, EXPORTS) %>% na.omit()
model <- lm(GDP.PCAP ~ INFLATION + UNEMP + EXPORTS, data =
wdi_reg) summary(model)
#Regresné koeficienty – broom + kableExtra #
install.packages(c(“broom”,“stringr”)) # ak treba
library(broom) library(stringr)
coef.tbl <- tidy(model, conf.int = TRUE) %>% mutate( term =
recode(term, “(Intercept)” = “Intercept”, “INFLATION” = “Inflácia”,
“UNEMP” = “Nezamestnanosť”, “EXPORTS” = “Exporty (% HDP)” ), stars =
case_when( p.value < 0.001 ~ “”, p.value < 0.01 ~
””, p.value < 0.05 ~ ””, p.value < 0.1 ~ “·”, TRUE ~
“” ) ) %>% transmute( Term = term, Estimate = estimate,
Std. Error= std.error, t value = statistic,
p value = p.value, 95% CI = str_c(“[”,
round(conf.low,3), ”, ”, round(conf.high,3), ”]”), Sig = stars )
coef.tbl %>% kable( digits = 3, caption = “OLS koeficienty (HDP na
obyvateľa ~ inflácia + nezamestnanosť + exporty)” ) %>%
kable_styling( full_width = FALSE, bootstrap_options =
c(“striped”,“hover”,“condensed”) ) %>% column_spec(1, bold = TRUE)
%>% row_spec(0, bold = TRUE, background = “#f2f2f2”) %>% footnote(
general = “Signif. codes: *** p<0.001, ** p<0.01, * p<0.05, ·
p<0.1.”, threeparttable = TRUE )
#Model Fit Statistics fit.tbl <- glance(model) %>% transmute(
R-squared = r.squared, Adj. R-squared =
adj.r.squared, F-statistic = statistic,
F p-value = p.value, AIC = AIC,
BIC = BIC, Num. obs. = nobs )
fit.tbl %>% kable(digits = 3, caption = “Model Fit Statistics”)
%>% kable_styling( full_width = FALSE, bootstrap_options =
c(“condensed”) )
