library(dplyr)
library(tidyr)

Ôn tập các buổi trước

Dplyr

Thao tác với dòng

Thao tác với cột

arrange(
  select(
    filter(mtcars, mpg > mean(mpg)),
    mpg, hp, wt
  ),
  desc(wt)
)
filter(mtcars, mpg > mean(mpg)) %>% 
  select(mpg, hp, wt) %>%  
  arrange(desc(wt))

Tidyr

1. gather (đổi wide sang long)

gdp <- read.csv('gdp.csv', check.names = FALSE)
head(gdp)
gdp_long <- gather(
  gdp,
  `1960`:`2021`,
  key="Year",
  value="GDP",
  convert = TRUE
) 
gdp_long
gdp_long %>%
  spread(Year, GDP)
gdp <- read.csv('gdp.csv', check.names = F)
pop <- read.csv('pop.csv', check.names = F)
head(pop)
gdp_long <- gather(
  gdp, 
  `1960`:`2021`, 
  key = 'Year',
  value = 'gdp',
  convert = TRUE
) %>% 
  rename(
    name = 1,
    country_code = 2
  )
gdp_long
# names(pop) 1960 - 2021
pop_long <- gather(
  pop, 
  `1960`:`2021`,
  key = "year",
  value = "pop",
  convert = TRUE
) %>% 
  rename(
    name = 1,
    code = 2
  )
pop_long
inner_join(
  gdp_long,
  pop_long %>% select(-name),
  by = c('country_code' = 'code', 'Year' = 'year')
)
LS0tCnRpdGxlOiAiQ2hhcHRlciAyOiBEYXRhIFdyYW5nbGluZyBpbiBSIFVzaW5nIGRwbHlyIGFuZCB0aWR5ciIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGlkeXIpCmBgYAoKIyMjIMOUbiB04bqtcCBjw6FjIGJ14buVaSB0csaw4bubYwoKKipEcGx5cioqCgpUaGFvIHTDoWMgduG7m2kgZMOybmcKCi0gICBM4buNYzogYGZpbHRlcmAKCi0gICBYb8OhIHRyw7luZyBs4bq3cDogYGRpc3RpbmN0YAoKLSAgIFPhuq9wIHjhur9wOiBgYXJyYW5nZWAKClRoYW8gdMOhYyB24bubaSBj4buZdAoKLSAgIENo4buNbiBj4buZdDogYHNlbGVjdGAKCi0gICDEkOG7lWkgdMOqbiBj4buZdDogYHJlbmFtZWAKCi0gICBU4bqhbyBj4buZdCBt4bubaTogYG11dGF0ZWAKCiAgICAtICAgYGNhc2Vfd2hlbmA6IMSRaeG7gXUga2nhu4duIChkw7luZyBjaG8gdGnhu4FuIHjhu60gbMO9LCBwaMOibiB04buVKQoKLSAgIFThuqFvIGLhuqNuZywgdOG7lW5nIGjhu6NwIGThu68gbGnhu4d1OiBgc3VtbWFyaXplKClgLCBgc3VtbWFyaXNlKClgCgogICAgLSAgIGBtZWFuYCwgYG1lZGlhbmAsIGBxdWFudGlsZWAsIGBtaW5gLCBgbWF4YCwgYHN1bWAKCiAgICAtICAgYGdyb3VwX2J5YCB0w61uaCBjaG8gdOG7q25nIG5ow7NtCgogICAgLSAgIGBuKClgOiBz4buRIHF1YW4gc8OhdAoKICAgIC0gICBgZmlyc3QoKWA6IMO0IMSR4bqndSB0acOqbiBj4bunYSBuaMOzbSDEkcOzCgogICAgLSAgIGBsYXN0KClgOiDDtCBjdeG7lWkgY8O5bmcgY+G7p2EgbmjDs20KCiAgICAtICAgYG50aChudW1iZXIpOmAgw7QgYuG6pXQga8OsIGPhu6dhIG5ow7NtCgotICAgYCU+JWA6IHBpcGUgb3BlcmF0b3IKCiAgICAtICAgVuG6vyB0csOhaSAlXD4lIFbhur8gcGjhuqNpCgogICAgLSAgIEzhuqV5IG91cHV0IGPhu6dhIGjDoG0gYsOqbiB0csOhaSwgbMOgbSBpbnB1dCDEkeG6p3UgdGnDqm4gY+G7p2EgaMOgbSBiw6puIHBo4bqjaQoKICAgIC0gICBWw60gZOG7pTogbOG7jWMgeGUgY8OzIG1wZyBcPiB0cnVuZyBiw6xuaCwgY2jhu41uIGPhu5l0IG1wZywgaHAsIHd0LCBz4bqvcCB44bq/cCBnaeG6o20gZOG6p24gdGhlbyB3dAoKYGBge3J9CmFycmFuZ2UoCiAgc2VsZWN0KAogICAgZmlsdGVyKG10Y2FycywgbXBnID4gbWVhbihtcGcpKSwKICAgIG1wZywgaHAsIHd0CiAgKSwKICBkZXNjKHd0KQopCmBgYAoKYGBge3J9CmZpbHRlcihtdGNhcnMsIG1wZyA+IG1lYW4obXBnKSkgJT4lIAogIHNlbGVjdChtcGcsIGhwLCB3dCkgJT4lICAKICBhcnJhbmdlKGRlc2Mod3QpKQpgYGAKCiMjIyBUaWR5cgoKIyMjIyAqKjEuIGdhdGhlciAoxJHhu5VpIHdpZGUgc2FuZyBsb25nKSoqCgpgYGB7cn0KZ2RwIDwtIHJlYWQuY3N2KCdnZHAuY3N2JywgY2hlY2submFtZXMgPSBGQUxTRSkKaGVhZChnZHApCmBgYAoKYGBge3J9CmdkcF9sb25nIDwtIGdhdGhlcigKICBnZHAsCiAgYDE5NjBgOmAyMDIxYCwKICBrZXk9InllYXIiLAogIHZhbHVlPSJHRFAiLAogIGNvbnZlcnQgPSBUUlVFCikgCmdkcF9sb25nCmBgYAoKYGBge3J9CmdkcF9sb25nICU+JQogIHNwcmVhZChZZWFyLCBHRFApCmBgYAoKYGBge3J9CmdkcCA8LSByZWFkLmNzdignZ2RwLmNzdicsIGNoZWNrLm5hbWVzID0gRikKcG9wIDwtIHJlYWQuY3N2KCdwb3AuY3N2JywgY2hlY2submFtZXMgPSBGKQpoZWFkKHBvcCkKYGBgCgpgYGB7cn0KZ2RwX2xvbmcgPC0gZ2F0aGVyKAogIGdkcCwgCiAgYDE5NjBgOmAyMDIxYCwgCiAga2V5ID0gJ1llYXInLAogIHZhbHVlID0gJ2dkcCcsCiAgY29udmVydCA9IFRSVUUKKSAlPiUgCiAgcmVuYW1lKAogICAgbmFtZSA9IDEsCiAgICBjb3VudHJ5X2NvZGUgPSAyCiAgKQpnZHBfbG9uZwpgYGAKCmBgYHtyfQojIG5hbWVzKHBvcCkgMTk2MCAtIDIwMjEKcG9wX2xvbmcgPC0gZ2F0aGVyKAogIHBvcCwgCiAgYDE5NjBgOmAyMDIxYCwKICBrZXkgPSAieWVhciIsCiAgdmFsdWUgPSAicG9wIiwKICBjb252ZXJ0ID0gVFJVRQopICU+JSAKICByZW5hbWUoCiAgICBuYW1lID0gMSwKICAgIGNvZGUgPSAyCiAgKQpwb3BfbG9uZwpgYGAKCmBgYHtyfQppbm5lcl9qb2luKAogIGdkcF9sb25nLAogIHBvcF9sb25nICU+JSBzZWxlY3QoLW5hbWUpLAogIGJ5ID0gYygnY291bnRyeV9jb2RlJyA9ICdjb2RlJywgJ1llYXInID0gJ3llYXInKQopCmBgYAo=