knitr::opts_chunk$set(
    echo = TRUE,
    message = FALSE,
    warning = FALSE,
    results = 'asis'
)

Práca s údajmi

Tradičná práca s databázou

Príklad

Majme údaje o krasokorčuliarkach, ktoré sa na posledných majstrovstvách sveta umiestnili na prvých desiatich priečkach.

Meno <- c("Max Verstappen", "Lewis Hamilton", "Charles Leclerc", "Lando Norris")
Tim <- c("Red Bull", "Mercedes", "Ferrari", "McLaren")
Body <- c(400, 350, 280, 260)
f1 <- data.frame(Meno, Tim, Body)
print(f1)
Meno <- c("Kaori Sakamoto", "Isabeau Levito", "Chaeyeon Kim",
          "Loena Hendrickx", "Kimmy Repond", "Lee Hae-In",
          "Mone Chiba", "Hana Yoshida", "Livia Kaiser", "Amber Glenn")

Krajina <- c("JPN", "USA", "KOR", "BEL", "SUI", "KOR", "JPN", "JPN", "SUI", "USA")

Body <- c(222.96, 212.16, 203.59, 200.25, 196.02, 195.48, 195.46, 194.93, 187.24, 186.53)

# Vytvorenie data frame
krasokorculiarky <- data.frame(Meno, Krajina, Body)

# Zobrazenie
krasokorculiarky

Tieto tri premenné nie sú zatiaľ nijako prepojené, predstavujú izolované stĺpce tabuľky. Do tabuľky ich spojíme nasledovne

krasokorčuliarky <- data.frame(Meno,Krajina,Body)
print(krasokorculiarky)

Vysvetlenie: DataFrame má tri stĺpce: Meno, Krajina a Body. Niektoré operácie s údajmi organizovanými v .data.frame. sú uvedené nasledovne

print(krasokorculiarky$Krajina)                   # vypíše nám stĺpec s krajinami
print(mean(krasokorculiarky$Body))                # priemerný počet bodov
print(krasokorculiarky[Meno=="Kaori Sakamoto",])  # adresovanie celého riadku podľa mena
print(krasokorculiarky[3,])                       # ina moznost adresovania celeho riadku
print(krasokorculiarky[,2:3])                     # vypisanie druheho a tretieho stlpca tabulky
print(krasokorculiarky[1,1])                      # vypisanie jednej bunky tabulky
summary(krasokorculiarky)                         # zakladna deskriptivna statistika celej tabulky

Ak chceme pridať k tabuľke dodatočný stĺpec, potom to robíme nasledovne:

MaMedailu <- c(TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)
krasokorculiarky <- cbind(krasokorculiarky,MaMedailu)
print(krasokorculiarky)

Pridali sme stlpec s informáciou, ktorá nám hovorí o tom, či si súťažiaca vybojovala medailu alebo nie. Ak chceme pridať riadok, potom

# New record (must match column order/types)
novy.riadok <- data.frame(Meno = "Anna Shcherbakova", Krajina = "RUS", Body = 184.50, MaMedailu = FALSE)

# Append
krasokorculiarky <- rbind(krasokorculiarky, novy.riadok)
print(krasokorculiarky)

Tabuľky v prostredí kableextra

library(knitr)
library(kableExtra)
kable(
  krasokorculiarky,
#  format,
digits = 2,
#  row.names = NA,
#  col.names = NA,
  align=c("l","c","l","r"),
  caption = "Toto je tabuľka"
#  label = NULL,
#  format.args = list(),
#  escape = TRUE,
 # ...
) %>%
      kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center")

Tidyverse - moderná práca s údajmi

Tidyverse je súbor knižníc, ktoré majú zjednodušiť prácu s údajmi. Majú jednotný komunikačný štandard, vzájomne sa doplňujú.

# Load tidyverse
library(tidyverse)

dplyr - pre manipuláciu s údajmi

.dplyr. poskytuje základné možnosti manipulácie s údajmi, ako napr.:

  1. filter(): vyberá riadky

  2. select(): vyberá stĺpce

  3. mutate(): vytvára nové stĺpce tabuľky

  4. arrange(): triedi riadky

  5. summarise(): sumarizuje

V nasledovnej ukážke využijeme tzv. .pipes. %>% alebo %<% umožňuje posielať výsledky z jednej funkcie priamo do volanie nasledovnej funkcie. To umožňuje ľahšiu čitateľnosť kódov, konvencia sa ujala a má široké použitie.

Výber a triedenie

# výber a následné triedenie
krasokorculiarky %>%
  filter(Body > 200) %>%     # vybera zaznamy s poctom bodov viac, ako 200
  arrange(desc(Body)) %>%     # vysledny subor triedi zostupne podla premennej Body
kable %>%
    kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center"
  )

Zoskupenie a sumarizácia

# Zoskupí and sumarizuje
krasokorculiarky %>%
  group_by(Krajina) %>%      # zoskupi zaznamy podla premennej MaAuto a vypocita za kazdu skupinu jej priemer Body
  summarise(                # a taktiez spocita pocetnosti oboch skupin
    Priem.Body = mean(Body),
    count = n()
  ) %>%
 kable(
    caption = "Priemerné Body podľa premennej Krajina",
    col.names = c("Krajina", "Priemer Body", "Počet"),
    align = "c"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center"
  )

Vytváranie novej premennej

library(dplyr)
library(knitr)
library(kableExtra)

# Pridanie grade a CloseToMedal
krasokorculiarky %>%
  mutate(
    grade = case_when(
      Body >= 220 ~ "A",
      Body >= 200 ~ "B",
      Body >= 190 ~ "C",
      TRUE ~ "D"
    ),
    CloseToMedal = Body >= 200 & Body < 203.59  # TRUE pre body tesne mimo top 3 čiže blízko ku medaile
  ) %>%
  kable() %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center"
  ) 
LS0tCnRpdGxlOiAiUHLDoWNhIHMgZGF0YWLDoXpvdSIKYXV0aG9yOiAiTWlyaWFtYSDFoGt1bGNvdsOhICA8YnI+IChzIHZ5dcW+aXTDrW0gdmVyZWpuZSBkb3N0dXBuw71jaCBrw7Nkb3YpIgpkYXRlOiAiU2VwdGVtYmVyIDIwMjUiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRoZW1lOiB1bml0ZWQKICAgIGhpZ2hsaWdodDogdGFuZ28KICAgIGNzczogc3R5bGVzLmNzcwplZGl0b3Jfb3B0aW9uczoKICBtYXJrZG93bjoKICAgIHdyYXA6IDcyCi0tLQoKYGBge3J9CmtuaXRyOjpvcHRzX2NodW5rJHNldCgKICAgIGVjaG8gPSBUUlVFLAogICAgbWVzc2FnZSA9IEZBTFNFLAogICAgd2FybmluZyA9IEZBTFNFLAogICAgcmVzdWx0cyA9ICdhc2lzJwopCmBgYAoKIyBQcsOhY2EgcyDDumRham1pCgojIyBUcmFkacSNbsOhIHByw6FjYSBzIGRhdGFiw6F6b3UKCiMjIyBQcsOta2xhZAoKTWFqbWUgw7pkYWplIG8ga3Jhc29rb3LEjXVsaWFya2FjaCwga3RvcsOpIHNhIG5hIHBvc2xlZG7DvWNoIG1hanN0cm92c3R2w6FjaApzdmV0YSB1bWllc3RuaWxpIG5hIHBydsO9Y2ggZGVzaWF0aWNoIHByaWXEjWthY2guCmBgYHtyfQpNZW5vIDwtIGMoIk1heCBWZXJzdGFwcGVuIiwgIkxld2lzIEhhbWlsdG9uIiwgIkNoYXJsZXMgTGVjbGVyYyIsICJMYW5kbyBOb3JyaXMiKQpUaW0gPC0gYygiUmVkIEJ1bGwiLCAiTWVyY2VkZXMiLCAiRmVycmFyaSIsICJNY0xhcmVuIikKQm9keSA8LSBjKDQwMCwgMzUwLCAyODAsIDI2MCkKZjEgPC0gZGF0YS5mcmFtZShNZW5vLCBUaW0sIEJvZHkpCnByaW50KGYxKQpgYGAKCmBgYHtyfQpNZW5vIDwtIGMoIkthb3JpIFNha2Ftb3RvIiwgIklzYWJlYXUgTGV2aXRvIiwgIkNoYWV5ZW9uIEtpbSIsCiAgICAgICAgICAiTG9lbmEgSGVuZHJpY2t4IiwgIktpbW15IFJlcG9uZCIsICJMZWUgSGFlLUluIiwKICAgICAgICAgICJNb25lIENoaWJhIiwgIkhhbmEgWW9zaGlkYSIsICJMaXZpYSBLYWlzZXIiLCAiQW1iZXIgR2xlbm4iKQoKS3JhamluYSA8LSBjKCJKUE4iLCAiVVNBIiwgIktPUiIsICJCRUwiLCAiU1VJIiwgIktPUiIsICJKUE4iLCAiSlBOIiwgIlNVSSIsICJVU0EiKQoKQm9keSA8LSBjKDIyMi45NiwgMjEyLjE2LCAyMDMuNTksIDIwMC4yNSwgMTk2LjAyLCAxOTUuNDgsIDE5NS40NiwgMTk0LjkzLCAxODcuMjQsIDE4Ni41MykKCiMgVnl0dm9yZW5pZSBkYXRhIGZyYW1lCmtyYXNva29yY3VsaWFya3kgPC0gZGF0YS5mcmFtZShNZW5vLCBLcmFqaW5hLCBCb2R5KQoKIyBab2JyYXplbmllCmtyYXNva29yY3VsaWFya3kKYGBgCgpUaWV0byB0cmkgcHJlbWVubsOpIG5pZSBzw7ogemF0aWHEviBuaWpha28gcHJlcG9qZW7DqSwgcHJlZHN0YXZ1asO6IGl6b2xvdmFuw6kKc3TEunBjZSB0YWJ1xL5reS4gRG8gdGFidcS+a3kgaWNoIHNwb2rDrW1lIG5hc2xlZG92bmUKCmBgYHtyfQprcmFzb2tvcsSNdWxpYXJreSA8LSBkYXRhLmZyYW1lKE1lbm8sS3JhamluYSxCb2R5KQpwcmludChrcmFzb2tvcmN1bGlhcmt5KQpgYGAKClZ5c3ZldGxlbmllOiBEYXRhRnJhbWUgbcOhIHRyaSBzdMS6cGNlOiBNZW5vLCBLcmFqaW5hIGEgQm9keS4gTmlla3RvcsOpCm9wZXLDoWNpZSBzIMO6ZGFqbWkgb3JnYW5pem92YW7DvW1pIHYgLmRhdGEuZnJhbWUuIHPDuiB1dmVkZW7DqSBuYXNsZWRvdm5lCgpgYGB7cn0KcHJpbnQoa3Jhc29rb3JjdWxpYXJreSRLcmFqaW5hKSAgICAgICAgICAgICAgICAgICAjIHZ5cMOtxaFlIG7DoW0gc3TEunBlYyBzIGtyYWppbmFtaQpwcmludChtZWFuKGtyYXNva29yY3VsaWFya3kkQm9keSkpICAgICAgICAgICAgICAgICMgcHJpZW1lcm7DvSBwb8SNZXQgYm9kb3YKcHJpbnQoa3Jhc29rb3JjdWxpYXJreVtNZW5vPT0iS2FvcmkgU2FrYW1vdG8iLF0pICAjIGFkcmVzb3ZhbmllIGNlbMOpaG8gcmlhZGt1IHBvZMS+YSBtZW5hCnByaW50KGtyYXNva29yY3VsaWFya3lbMyxdKSAgICAgICAgICAgICAgICAgICAgICAgIyBpbmEgbW96bm9zdCBhZHJlc292YW5pYSBjZWxlaG8gcmlhZGt1CnByaW50KGtyYXNva29yY3VsaWFya3lbLDI6M10pICAgICAgICAgICAgICAgICAgICAgIyB2eXBpc2FuaWUgZHJ1aGVobyBhIHRyZXRpZWhvIHN0bHBjYSB0YWJ1bGt5CnByaW50KGtyYXNva29yY3VsaWFya3lbMSwxXSkgICAgICAgICAgICAgICAgICAgICAgIyB2eXBpc2FuaWUgamVkbmVqIGJ1bmt5IHRhYnVsa3kKc3VtbWFyeShrcmFzb2tvcmN1bGlhcmt5KSAgICAgICAgICAgICAgICAgICAgICAgICAjIHpha2xhZG5hIGRlc2tyaXB0aXZuYSBzdGF0aXN0aWthIGNlbGVqIHRhYnVsa3kKYGBgCgpBayBjaGNlbWUgcHJpZGHFpSBrIHRhYnXEvmtlIGRvZGF0b8SNbsO9IHN0xLpwZWMsIHBvdG9tIHRvIHJvYsOtbWUgbmFzbGVkb3ZuZToKCmBgYHtyfQpNYU1lZGFpbHUgPC0gYyhUUlVFLFRSVUUsVFJVRSxGQUxTRSxGQUxTRSxGQUxTRSxGQUxTRSxGQUxTRSxGQUxTRSxGQUxTRSkKa3Jhc29rb3JjdWxpYXJreSA8LSBjYmluZChrcmFzb2tvcmN1bGlhcmt5LE1hTWVkYWlsdSkKcHJpbnQoa3Jhc29rb3JjdWxpYXJreSkKYGBgCgpQcmlkYWxpIHNtZSBzdGxwZWMgcyBpbmZvcm3DoWNpb3UsIGt0b3LDoSBuw6FtIGhvdm9yw60gbyB0b20sIMSNaSBzaQpzw7rFpWHFvmlhY2Egdnlib2pvdmFsYSBtZWRhaWx1IGFsZWJvIG5pZS4gQWsgY2hjZW1lIHByaWRhxaUgcmlhZG9rLCBwb3RvbQoKYGBge3J9CiMgTmV3IHJlY29yZCAobXVzdCBtYXRjaCBjb2x1bW4gb3JkZXIvdHlwZXMpCm5vdnkucmlhZG9rIDwtIGRhdGEuZnJhbWUoTWVubyA9ICJBbm5hIFNoY2hlcmJha292YSIsIEtyYWppbmEgPSAiUlVTIiwgQm9keSA9IDE4NC41MCwgTWFNZWRhaWx1ID0gRkFMU0UpCgojIEFwcGVuZAprcmFzb2tvcmN1bGlhcmt5IDwtIHJiaW5kKGtyYXNva29yY3VsaWFya3ksIG5vdnkucmlhZG9rKQpwcmludChrcmFzb2tvcmN1bGlhcmt5KQpgYGAKCiMjIyBUYWJ1xL5reSB2IHByb3N0cmVkw60ga2FibGVleHRyYQoKYGBge3J9CmxpYnJhcnkoa25pdHIpCmxpYnJhcnkoa2FibGVFeHRyYSkKa2FibGUoCiAga3Jhc29rb3JjdWxpYXJreSwKIyAgZm9ybWF0LApkaWdpdHMgPSAyLAojICByb3cubmFtZXMgPSBOQSwKIyAgY29sLm5hbWVzID0gTkEsCiAgYWxpZ249YygibCIsImMiLCJsIiwiciIpLAogIGNhcHRpb24gPSAiVG90byBqZSB0YWJ1xL5rYSIKIyAgbGFiZWwgPSBOVUxMLAojICBmb3JtYXQuYXJncyA9IGxpc3QoKSwKIyAgZXNjYXBlID0gVFJVRSwKICMgLi4uCikgJT4lCiAgICAgIGthYmxlX3N0eWxpbmcoYm9vdHN0cmFwX29wdGlvbnMgPSBjKCJzdHJpcGVkIiwgImhvdmVyIiwgImNvbmRlbnNlZCIsICJyZXNwb25zaXZlIiksCiAgICBmdWxsX3dpZHRoID0gRkFMU0UsCiAgICBwb3NpdGlvbiA9ICJjZW50ZXIiKQoKCgoKCmBgYAoKIyMgVGlkeXZlcnNlIC0gbW9kZXJuw6EgcHLDoWNhIHMgw7pkYWptaQoKVGlkeXZlcnNlIGplIHPDumJvciBrbmnFvm7DrWMsIGt0b3LDqSBtYWrDuiB6amVkbm9kdcWhacWlIHByw6FjdSBzIMO6ZGFqbWkuIE1hasO6CmplZG5vdG7DvSBrb211bmlrYcSNbsO9IMWhdGFuZGFyZCwgdnrDoWpvbW5lIHNhIGRvcGzFiHVqw7ouCgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQojIExvYWQgdGlkeXZlcnNlCmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCiMjIyBkcGx5ciAtIHByZSBtYW5pcHVsw6FjaXUgcyDDumRham1pCgouZHBseXIuIHBvc2t5dHVqZSB6w6FrbGFkbsOpIG1vxb5ub3N0aSBtYW5pcHVsw6FjaWUgcyDDumRham1pLCBha28gbmFwci46CgoxLiAgZmlsdGVyKCk6IHZ5YmVyw6EgcmlhZGt5CgoyLiAgc2VsZWN0KCk6IHZ5YmVyw6Egc3TEunBjZQoKMy4gIG11dGF0ZSgpOiB2eXR2w6FyYSBub3bDqSBzdMS6cGNlIHRhYnXEvmt5Cgo0LiAgYXJyYW5nZSgpOiB0cmllZGkgcmlhZGt5Cgo1LiAgc3VtbWFyaXNlKCk6IHN1bWFyaXp1amUKClYgbmFzbGVkb3ZuZWogdWvDocW+a2Ugdnl1xb5pamVtZSB0enYuIC5waXBlcy4gJVw+JSBhbGVibyAlXDwlIHVtb8W+xYh1amUKcG9zaWVsYcWlIHbDvXNsZWRreSB6IGplZG5laiBmdW5rY2llIHByaWFtbyBkbyB2b2xhbmllIG5hc2xlZG92bmVqCmZ1bmtjaWUuIFRvIHVtb8W+xYh1amUgxL5haMWhaXUgxI1pdGF0ZcS+bm9zxaUga8OzZG92LCBrb252ZW5jaWEgc2EgdWphbGEgYSBtw6EKxaFpcm9rw6kgcG91xb5pdGllLgoKIyMjIyBWw71iZXIgYSB0cmllZGVuaWUKCmBgYHtyfQojIHbDvWJlciBhIG7DoXNsZWRuw6kgdHJpZWRlbmllCmtyYXNva29yY3VsaWFya3kgJT4lCiAgZmlsdGVyKEJvZHkgPiAyMDApICU+JSAgICAgIyB2eWJlcmEgemF6bmFteSBzIHBvY3RvbSBib2RvdiB2aWFjLCBha28gMjAwCiAgYXJyYW5nZShkZXNjKEJvZHkpKSAlPiUgICAgICMgdnlzbGVkbnkgc3Vib3IgdHJpZWRpIHpvc3R1cG5lIHBvZGxhIHByZW1lbm5laiBCb2R5CmthYmxlICU+JQogICAga2FibGVfc3R5bGluZygKICAgIGJvb3RzdHJhcF9vcHRpb25zID0gYygic3RyaXBlZCIsICJob3ZlciIsICJjb25kZW5zZWQiLCAicmVzcG9uc2l2ZSIpLAogICAgZnVsbF93aWR0aCA9IEZBTFNFLAogICAgcG9zaXRpb24gPSAiY2VudGVyIgogICkKYGBgCgojIyMjIFpvc2t1cGVuaWUgYSBzdW1hcml6w6FjaWEKCmBgYHtyfQojIFpvc2t1cMOtIGFuZCBzdW1hcml6dWplCmtyYXNva29yY3VsaWFya3kgJT4lCiAgZ3JvdXBfYnkoS3JhamluYSkgJT4lICAgICAgIyB6b3NrdXBpIHphem5hbXkgcG9kbGEgcHJlbWVubmVqIE1hQXV0byBhIHZ5cG9jaXRhIHphIGthemR1IHNrdXBpbnUgamVqIHByaWVtZXIgQm9keQogIHN1bW1hcmlzZSggICAgICAgICAgICAgICAgIyBhIHRha3RpZXogc3BvY2l0YSBwb2NldG5vc3RpIG9ib2NoIHNrdXBpbgogICAgUHJpZW0uQm9keSA9IG1lYW4oQm9keSksCiAgICBjb3VudCA9IG4oKQogICkgJT4lCiBrYWJsZSgKICAgIGNhcHRpb24gPSAiUHJpZW1lcm7DqSBCb2R5IHBvZMS+YSBwcmVtZW5uZWogS3JhamluYSIsCiAgICBjb2wubmFtZXMgPSBjKCJLcmFqaW5hIiwgIlByaWVtZXIgQm9keSIsICJQb8SNZXQiKSwKICAgIGFsaWduID0gImMiCiAgKSAlPiUKICBrYWJsZV9zdHlsaW5nKAogICAgYm9vdHN0cmFwX29wdGlvbnMgPSBjKCJzdHJpcGVkIiwgImhvdmVyIiwgImNvbmRlbnNlZCIsICJyZXNwb25zaXZlIiksCiAgICBmdWxsX3dpZHRoID0gRkFMU0UsCiAgICBwb3NpdGlvbiA9ICJjZW50ZXIiCiAgKQpgYGAKCiMjIyMgVnl0dsOhcmFuaWUgbm92ZWogcHJlbWVubmVqCgpgYGB7cn0KbGlicmFyeShkcGx5cikKbGlicmFyeShrbml0cikKbGlicmFyeShrYWJsZUV4dHJhKQoKIyBQcmlkYW5pZSBncmFkZSBhIENsb3NlVG9NZWRhbAprcmFzb2tvcmN1bGlhcmt5ICU+JQogIG11dGF0ZSgKICAgIGdyYWRlID0gY2FzZV93aGVuKAogICAgICBCb2R5ID49IDIyMCB+ICJBIiwKICAgICAgQm9keSA+PSAyMDAgfiAiQiIsCiAgICAgIEJvZHkgPj0gMTkwIH4gIkMiLAogICAgICBUUlVFIH4gIkQiCiAgICApLAogICAgQ2xvc2VUb01lZGFsID0gQm9keSA+PSAyMDAgJiBCb2R5IDwgMjAzLjU5ICAjIFRSVUUgcHJlIGJvZHkgdGVzbmUgbWltbyB0b3AgMyDEjWnFvmUgYmzDrXprbyBrdSBtZWRhaWxlCiAgKSAlPiUKICBrYWJsZSgpICU+JQogIGthYmxlX3N0eWxpbmcoCiAgICBib290c3RyYXBfb3B0aW9ucyA9IGMoInN0cmlwZWQiLCAiaG92ZXIiLCAiY29uZGVuc2VkIiwgInJlc3BvbnNpdmUiKSwKICAgIGZ1bGxfd2lkdGggPSBGQUxTRSwKICAgIHBvc2l0aW9uID0gImNlbnRlciIKICApIApgYGAK