knitr::opts_chunk$set(
    echo = TRUE,
    message = FALSE,
    warning = FALSE
)

Práca s údajmi - športovci

Tradičná práca s databázou

Príklad

Majme údaje o športovoch, ktoré predstavujú tri premenné - Meno, Výška, Váha a šport.

# Working with data frames

  Meno = c("Boris", "Pavol", "Daniel")
  Výška = c(183, 179, 194)
  Váha = c(91, 86, 105)
  Šport = c("futbal", "hokej", "basketbal")
udaje <- data.frame(Meno,Výška,Váha)
print(udaje)
print(udaje$Vek)                 # takto adresujeme jednotlivé premenné v data.frame
NULL
print(mean(udaje$Vek))           # priemerny vek
[1] NA
print(udaje[Meno=="Pavol",])     # adresovanie celého riadku
print(udaje[3,])                 # ina moznost adresovania celeho riadku
print(udaje[,2:3])               # vypisanie druheho a tretieho stlpca tabulky
print(udaje[1,1])                # vypisanie jednej bunky tabulky
[1] "Boris"
summary(udaje)                   # zakladna deskriptivna statistika celej tabulky
     Meno               Výška            Váha      
 Length:3           Min.   :179.0   Min.   : 86.0  
 Class :character   1st Qu.:181.0   1st Qu.: 88.5  
 Mode  :character   Median :183.0   Median : 91.0  
                    Mean   :185.3   Mean   : 94.0  
                    3rd Qu.:188.5   3rd Qu.: 98.0  
                    Max.   :194.0   Max.   :105.0  

Ak chceme pridať k tabuľke dodatočný stĺpec, potom to robíme nasledovne

Šport <- c("futbal", "hokej", "basketbal")
udaje <- cbind(udaje,Šport)
print(udaje)

Ak chceme pridať riadok, potom

# New record (must match column order/types)
novy.riadok <- data.frame(Meno = "Filip", Výška = 178, Váha = 82port = "plávanie")

# Append
udaje <- rbind(udaje, novy.riadok)
print(udaje)

Tabuľky v prostredí kableextra

library(knitr)
library(kableExtra)
kable(
  udaje,
#  format,
digits = 2,
#  row.names = NA,
#  col.names = NA,
  align=c("l","c","l","r"),
  caption = "Toto je tabuľka"
#  label = NULL,
#  format.args = list(),
#  escape = TRUE,
 # ...
) %>%
      kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center")
Toto je tabuľka
Meno Výška Váha Šport
Boris 183 91 futbal
Pavol 179 86 hokej
Daniel 194 105 basketbal
Filip 178 82 plávanie
NA

Tidyverse - moderná práca s údajmi

Tidyverse je súbor knižníc, ktoré majú zjednodušiť prácu s údajmi. Majú jednotný komunikačný štandard, vzájomne sa doplňujú.

# Load tidyverse
library(tidyverse)

dplyr - pre manipuláciu s údajmi

.dplyr. poskytuje základné možnosti manipulácie s údajmi, ako napr.:

  1. filter(): vyberá riadky

  2. select(): vyberá stĺpce

  3. mutate(): vytvára nové stĺpce tabuľky

  4. arrange(): triedi riadky

  5. summarise(): sumarizuje

V nasledovnej ukážke využijeme tzv. .pipes. %>% alebo %<% umožňuje posielať výsledky z jednej funkcie priamo do volanie nasledovnej funkcie. To umožňuje ľahšiu čitateľnosť kódov, konvencia sa ujala a má široké použitie.

Výber a triedenie

# výber a následné triedenie
udaje %>%
  filter(Váha > 50) %>%     # vybera zaznamy s poctom bodov viac, ako 50
  arrange(desc(Body)) %>%     # vysledny subor triedi zostupne podla premennej Body
kable %>%
    kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center"
  )
Error in `arrange()`:
ℹ In argument: `..1 = Body`.
Caused by error:
! `..1` must be size 4 or 1, not 3.
Run `]8;;x-r-run:rlang::last_trace()rlang::last_trace()]8;;` to see where the error occurred.

Zoskupenie a sumarizácia

# Zoskupí and sumarizuje
udaje %>%
  group_by(MaAuto) %>%      # zoskupi zaznamy podla premennej MaAuto a vypocita za kazdu skupinu jej priemer Body
  summarise(                # a taktiez spocita pocetnosti oboch skupin
    Priem.Body = mean(Váha),
    count = n()
  ) %>%
 kable(
    caption = "Priemerné Body podľa premennej MaAuto",
    col.names = c("Má Auto", "Priemer Váha", "Počet"),
    align = "c"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center"
  )
Error in `group_by()`:
! Must group by variables found in `.data`.
✖ Column `MaAuto` is not found.
Run `]8;;x-r-run:rlang::last_trace()rlang::last_trace()]8;;` to see where the error occurred.

Vytváranie novej premennej

# Vytváranie novej premennej
udaje %>%
  mutate(
    grade = case_when(     # vytvara novu premennu grade podla nasledovnej relacnej schemy
      Váha >= 90 ~ "A",
      Váha >= 80 ~ "B",
      Váha >= 70 ~ "C",
      TRUE ~ "D"
    ),
  
  ) %>% 
  kable %>%
   kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center"
  ) 
Meno Výška Váha Šport grade
Boris 183 91 futbal A
Pavol 179 86 hokej B
Daniel 194 105 basketbal A
Filip 178 82 plávanie B
LS0tCnRpdGxlOiAiUHLDoWNhIHMgZGF0YWLDoXpvdSIKYXV0aG9yOiAiQmFyYm9yYSBLdWNow6FyaWtvdsOhICA8YnI+ICIKZGF0ZTogIk9rdMOzYmVyIDIwMjUiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRoZW1lOiB1bml0ZWQKICAgIGhpZ2hsaWdodDogdGFuZ28KICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICBkZl9wcmludDogcGFnZWQKZWRpdG9yX29wdGlvbnM6CiAgbWFya2Rvd246CiAgICB3cmFwOiA3MgotLS0KCmBgYHtyfQprbml0cjo6b3B0c19jaHVuayRzZXQoCiAgICBlY2hvID0gVFJVRSwKICAgIG1lc3NhZ2UgPSBGQUxTRSwKICAgIHdhcm5pbmcgPSBGQUxTRQopCmBgYAoKIyBQcsOhY2EgcyDDumRham1pIC0gxaFwb3J0b3ZjaQoKIyMgVHJhZGnEjW7DoSBwcsOhY2EgcyBkYXRhYsOhem91CgojIyMgUHLDrWtsYWQKCk1ham1lIMO6ZGFqZSBvIMWhcG9ydG92b2NoLCBrdG9yw6kgcHJlZHN0YXZ1asO6IHRyaSBwcmVtZW5uw6kgLSBNZW5vLCBWw73FoWthLCBWw6FoYSBhIMWhcG9ydC4KCmBgYHtyfQojIFdvcmtpbmcgd2l0aCBkYXRhIGZyYW1lcwoKICBNZW5vID0gYygiQm9yaXMiLCAiUGF2b2wiLCAiRGFuaWVsIikKICBWw73FoWthID0gYygxODMsIDE3OSwgMTk0KQogIFbDoWhhID0gYyg5MSwgODYsIDEwNSkKICDFoHBvcnQgPSBjKCJmdXRiYWwiLCAiaG9rZWoiLCAiYmFza2V0YmFsIikKYGBgCgoKYGBge3J9CnVkYWplIDwtIGRhdGEuZnJhbWUoTWVubyxWw73FoWthLFbDoWhhKQpwcmludCh1ZGFqZSkKYGBgCgoKYGBge3J9CnByaW50KHVkYWplJFZlaykgICAgICAgICAgICAgICAgICMgdGFrdG8gYWRyZXN1amVtZSBqZWRub3RsaXbDqSBwcmVtZW5uw6kgdiBkYXRhLmZyYW1lCnByaW50KG1lYW4odWRhamUkVmVrKSkgICAgICAgICAgICMgcHJpZW1lcm55IHZlawpwcmludCh1ZGFqZVtNZW5vPT0iUGF2b2wiLF0pICAgICAjIGFkcmVzb3ZhbmllIGNlbMOpaG8gcmlhZGt1CnByaW50KHVkYWplWzMsXSkgICAgICAgICAgICAgICAgICMgaW5hIG1vem5vc3QgYWRyZXNvdmFuaWEgY2VsZWhvIHJpYWRrdQpwcmludCh1ZGFqZVssMjozXSkgICAgICAgICAgICAgICAjIHZ5cGlzYW5pZSBkcnVoZWhvIGEgdHJldGllaG8gc3RscGNhIHRhYnVsa3kKcHJpbnQodWRhamVbMSwxXSkgICAgICAgICAgICAgICAgIyB2eXBpc2FuaWUgamVkbmVqIGJ1bmt5IHRhYnVsa3kKc3VtbWFyeSh1ZGFqZSkgICAgICAgICAgICAgICAgICAgIyB6YWtsYWRuYSBkZXNrcmlwdGl2bmEgc3RhdGlzdGlrYSBjZWxlaiB0YWJ1bGt5CmBgYAoKQWsgY2hjZW1lIHByaWRhxaUgayB0YWJ1xL5rZSBkb2RhdG/EjW7DvSBzdMS6cGVjLCBwb3RvbSB0byByb2LDrW1lIG5hc2xlZG92bmUKCmBgYHtyfQrFoHBvcnQgPC0gYygiZnV0YmFsIiwgImhva2VqIiwgImJhc2tldGJhbCIpCnVkYWplIDwtIGNiaW5kKHVkYWplLMWgcG9ydCkKcHJpbnQodWRhamUpCmBgYAoKQWsgY2hjZW1lIHByaWRhxaUgcmlhZG9rLCBwb3RvbQoKYGBge3J9CiMgTmV3IHJlY29yZCAobXVzdCBtYXRjaCBjb2x1bW4gb3JkZXIvdHlwZXMpCm5vdnkucmlhZG9rIDwtIGRhdGEuZnJhbWUoTWVubyA9ICJGaWxpcCIsIFbDvcWha2EgPSAxNzgsIFbDoWhhID0gODIsxaBwb3J0ID0gInBsw6F2YW5pZSIpCgojIEFwcGVuZAp1ZGFqZSA8LSByYmluZCh1ZGFqZSwgbm92eS5yaWFkb2spCnByaW50KHVkYWplKQpgYGAKCiMjIyBUYWJ1xL5reSB2IHByb3N0cmVkw60ga2FibGVleHRyYQoKCmBgYHtyfQpsaWJyYXJ5KGtuaXRyKQpsaWJyYXJ5KGthYmxlRXh0cmEpCmthYmxlKAogIHVkYWplLAojICBmb3JtYXQsCmRpZ2l0cyA9IDIsCiMgIHJvdy5uYW1lcyA9IE5BLAojICBjb2wubmFtZXMgPSBOQSwKICBhbGlnbj1jKCJsIiwiYyIsImwiLCJyIiksCiAgY2FwdGlvbiA9ICJUb3RvIGplIHRhYnXEvmthIgojICBsYWJlbCA9IE5VTEwsCiMgIGZvcm1hdC5hcmdzID0gbGlzdCgpLAojICBlc2NhcGUgPSBUUlVFLAogIyAuLi4KKSAlPiUKICAgICAga2FibGVfc3R5bGluZyhib290c3RyYXBfb3B0aW9ucyA9IGMoInN0cmlwZWQiLCAiaG92ZXIiLCAiY29uZGVuc2VkIiwgInJlc3BvbnNpdmUiKSwKICAgIGZ1bGxfd2lkdGggPSBGQUxTRSwKICAgIHBvc2l0aW9uID0gImNlbnRlciIpCgpgYGAKCgoKIyMgVGlkeXZlcnNlIC0gbW9kZXJuw6EgcHLDoWNhIHMgw7pkYWptaQoKVGlkeXZlcnNlIGplIHPDumJvciBrbmnFvm7DrWMsIGt0b3LDqSBtYWrDuiB6amVkbm9kdcWhacWlIHByw6FjdSBzIMO6ZGFqbWkuIE1hasO6IGplZG5vdG7DvSBrb211bmlrYcSNbsO9IMWhdGFuZGFyZCwgdnrDoWpvbW5lIHNhIGRvcGzFiHVqw7ouCgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQojIExvYWQgdGlkeXZlcnNlCmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCiMjIyAgZHBseXIgLSBwcmUgbWFuaXB1bMOhY2l1IHMgw7pkYWptaQoKLmRwbHlyLiBwb3NreXR1amUgesOha2xhZG7DqSBtb8W+bm9zdGkgbWFuaXB1bMOhY2llIHMgw7pkYWptaSwgYWtvIG5hcHIuOiAKCjEuIGZpbHRlcigpOiB2eWJlcsOhIHJpYWRreSAKCjEuIHNlbGVjdCgpOiB2eWJlcsOhIHN0xLpwY2UgCgoxLiBtdXRhdGUoKTogdnl0dsOhcmEgbm92w6kgc3TEunBjZSB0YWJ1xL5reSAKCjEuIGFycmFuZ2UoKTogdHJpZWRpIHJpYWRreSAKCjEuIHN1bW1hcmlzZSgpOiBzdW1hcml6dWplCgpWIG5hc2xlZG92bmVqIHVrw6HFvmtlIHZ5dcW+aWplbWUgdHp2LiAucGlwZXMuICU+JSBhbGVibyAlPCUgdW1vxb7FiHVqZSBwb3NpZWxhxaUgdsO9c2xlZGt5IHogamVkbmVqIGZ1bmtjaWUgcHJpYW1vIGRvIHZvbGFuaWUgbmFzbGVkb3ZuZWogZnVua2NpZS4gVG8gdW1vxb7FiHVqZSDEvmFoxaFpdSDEjWl0YXRlxL5ub3PFpSBrw7Nkb3YsIGtvbnZlbmNpYSBzYSB1amFsYSBhIG3DoSDFoWlyb2vDqSBwb3XFvml0aWUuCgojIyMjIFbDvWJlciBhIHRyaWVkZW5pZQoKYGBge3J9CiMgdsO9YmVyIGEgbsOhc2xlZG7DqSB0cmllZGVuaWUKdWRhamUgJT4lCiAgZmlsdGVyKFbDoWhhID4gNTApICU+JSAgICAgIyB2eWJlcmEgemF6bmFteSBzIHBvY3RvbSBib2RvdiB2aWFjLCBha28gNTAKICBhcnJhbmdlKGRlc2MoQm9keSkpICU+JSAgICAgIyB2eXNsZWRueSBzdWJvciB0cmllZGkgem9zdHVwbmUgcG9kbGEgcHJlbWVubmVqIEJvZHkKa2FibGUgJT4lCiAgICBrYWJsZV9zdHlsaW5nKAogICAgYm9vdHN0cmFwX29wdGlvbnMgPSBjKCJzdHJpcGVkIiwgImhvdmVyIiwgImNvbmRlbnNlZCIsICJyZXNwb25zaXZlIiksCiAgICBmdWxsX3dpZHRoID0gRkFMU0UsCiAgICBwb3NpdGlvbiA9ICJjZW50ZXIiCiAgKQpgYGAKCiMjIyMgWm9za3VwZW5pZSBhIHN1bWFyaXrDoWNpYQoKYGBge3J9CiMgWm9za3Vww60gYW5kIHN1bWFyaXp1amUKdWRhamUgJT4lCiAgZ3JvdXBfYnkoTWFBdXRvKSAlPiUgICAgICAjIHpvc2t1cGkgemF6bmFteSBwb2RsYSBwcmVtZW5uZWogTWFBdXRvIGEgdnlwb2NpdGEgemEga2F6ZHUgc2t1cGludSBqZWogcHJpZW1lciBCb2R5CiAgc3VtbWFyaXNlKCAgICAgICAgICAgICAgICAjIGEgdGFrdGlleiBzcG9jaXRhIHBvY2V0bm9zdGkgb2JvY2ggc2t1cGluCiAgICBQcmllbS5Cb2R5ID0gbWVhbihWw6FoYSksCiAgICBjb3VudCA9IG4oKQogICkgJT4lCiBrYWJsZSgKICAgIGNhcHRpb24gPSAiUHJpZW1lcm7DqSBCb2R5IHBvZMS+YSBwcmVtZW5uZWogTWFBdXRvIiwKICAgIGNvbC5uYW1lcyA9IGMoIk3DoSBBdXRvIiwgIlByaWVtZXIgVsOhaGEiLCAiUG/EjWV0IiksCiAgICBhbGlnbiA9ICJjIgogICkgJT4lCiAga2FibGVfc3R5bGluZygKICAgIGJvb3RzdHJhcF9vcHRpb25zID0gYygic3RyaXBlZCIsICJob3ZlciIsICJjb25kZW5zZWQiLCAicmVzcG9uc2l2ZSIpLAogICAgZnVsbF93aWR0aCA9IEZBTFNFLAogICAgcG9zaXRpb24gPSAiY2VudGVyIgogICkKYGBgCgojIyMjIFZ5dHbDoXJhbmllIG5vdmVqIHByZW1lbm5lagoKYGBge3J9CiMgVnl0dsOhcmFuaWUgbm92ZWogcHJlbWVubmVqCnVkYWplICU+JQogIG11dGF0ZSgKICAgIGdyYWRlID0gY2FzZV93aGVuKCAgICAgIyB2eXR2YXJhIG5vdnUgcHJlbWVubnUgZ3JhZGUgcG9kbGEgbmFzbGVkb3ZuZWogcmVsYWNuZWogc2NoZW15CiAgICAgIFbDoWhhID49IDkwIH4gIkEiLAogICAgICBWw6FoYSA+PSA4MCB+ICJCIiwKICAgICAgVsOhaGEgPj0gNzAgfiAiQyIsCiAgICAgIFRSVUUgfiAiRCIKICAgICksCiAgCiAgKSAlPiUgCiAga2FibGUgJT4lCiAgIGthYmxlX3N0eWxpbmcoCiAgICBib290c3RyYXBfb3B0aW9ucyA9IGMoInN0cmlwZWQiLCAiaG92ZXIiLCAiY29uZGVuc2VkIiwgInJlc3BvbnNpdmUiKSwKICAgIGZ1bGxfd2lkdGggPSBGQUxTRSwKICAgIHBvc2l0aW9uID0gImNlbnRlciIKICApIApgYGAKCgoKCg==