US hegemony
rm(list = ls())
library(rvest)
library(stringr)
library(dplyr)
library(ggplot2)
# Define URL:
wealth <- "https://en.wikipedia.org/wiki/List_of_countries_by_total_wealth"
# Extract data:
wealth %>%
read_html() %>% # đọc trang web
html_nodes(xpath = '//*[@id="mw-content-text"]/div[1]/table[3]') %>%
html_table() %>%
.[[1]] -> wealth1
wealth %>%
read_html() %>% # đọc trang web
html_nodes(xpath = '//*[@id="mw-content-text"]/div[1]/table[4]') %>%
html_table() %>% # lấy dữ liệu bảng
.[[1]] -> wealth2 # số 1
wealth20 <- rbind(wealth1, wealth2)
wealth20 %>%
select(-c(2,7,10,11)) -> wealth20
colnames(wealth20) <- c('year', 'canada', 'china','france', 'germany', 'italy', 'japan', 'united_kingdom', 'united_states','aggregateshare')
class(wealth20)
wealth20 gồm các loại data khác nhau: [1] “tbl_df” “tbl”
“data.frame”
Làm thế nào để một object có nhiều loại data chuyển hóa thành atomic
vector?
Nếu vẫn dùng lệnh xóa ký hiệu % thì sẽ được kết quả
str(wealth20)
wealth20 <- as.data.frame(lapply(wealth20, as.character))
wealth20 %>%
str_replace_all("%", "") %>%
as.numeric() -> wealth20
thì được thông báo: [1] NA NA NA NA NA NA NA NA NA NA Warning
messages: 1: In stri_replace_all_regex(string, pattern,
fix_replacement(replacement), : argument is not an atomic vector;
coercing 2: In wealth20 %>% str_replace_all(“%”, ““) %>%
as.numeric() : NAs introduced by coercion
```
LS0tDQp0aXRsZTogIlVTIGhlZ2Vtb255Ig0KYXV0aG9yOiAiRGFpIER1b25nIg0KZGF0ZTogIjIwMjMtMDMtMjQiDQpvdXRwdXQ6IA0KICBodG1sX2RvY3VtZW50Og0KICAgIGNvZGVfZG93bmxvYWQ6IHllcyAgI2NobyBwaMOpcCBkb3dubG9hZCB0aMOsIGdoaSB5ZXMNCiAgICBoaWdobGlnaHQ6IHplbmJ1cm4NCiAgICB0aGVtZTogZmxhdGx5DQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICB3b3JkX2RvY3VtZW50Og0KICAgIHRvYzogeWVzDQogIHBkZl9kb2N1bWVudDoNCiAgICB0b2M6IHllcw0KLS0tDQoNCmBgYHtyIHNldHVwLGluY2x1ZGU9RkFMU0V9DQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUsIHdhcm5pbmcgPSBGQUxTRSwgbWVzc2FnZSA9IEZBTFNFLCBjYWNoZSA9IFRSVUUpICNlY2hvID0gVFJVRSA6IHZp4bq/dCB0aOG6vyBuw6BvIHRow6wgY29kZSBoaeG7h24gcmEgbmjGsCB0aOG6vzsgbuG6v3UgZWNobyA9IEZBTFNFIHRow6wgY2jhu4kgaGnhu4duIGvhur90IHF14bqjIHbDoCBraMO0bmcgaGnhu4duIGNvZGU7IHdhcm5pbmcgPSBGQUxTRSB0aMOsIHThuq90IGPDoWMgd2FybmluZw0KDQpgYGANCg0KIyMgVVMgaGVnZW1vbnkNCmBgYA0Kcm0obGlzdCA9IGxzKCkpDQpsaWJyYXJ5KHJ2ZXN0KQ0KbGlicmFyeShzdHJpbmdyKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoZ2dwbG90MikNCiMgRGVmaW5lIFVSTDogDQoNCndlYWx0aCA8LSAiaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvTGlzdF9vZl9jb3VudHJpZXNfYnlfdG90YWxfd2VhbHRoIg0KDQojIEV4dHJhY3QgZGF0YTogDQoNCndlYWx0aCAlPiUgDQogIHJlYWRfaHRtbCgpICU+JSAgIyDEkeG7jWMgdHJhbmcgd2ViDQogIGh0bWxfbm9kZXMoeHBhdGggPSAnLy8qW0BpZD0ibXctY29udGVudC10ZXh0Il0vZGl2WzFdL3RhYmxlWzNdJykgJT4lDQogIGh0bWxfdGFibGUoKSAlPiUgDQogIC5bWzFdXSAtPiB3ZWFsdGgxDQoNCndlYWx0aCAlPiUgDQogIHJlYWRfaHRtbCgpICU+JSAgIyDEkeG7jWMgdHJhbmcgd2ViDQogIGh0bWxfbm9kZXMoeHBhdGggPSAnLy8qW0BpZD0ibXctY29udGVudC10ZXh0Il0vZGl2WzFdL3RhYmxlWzRdJykgJT4lIA0KICBodG1sX3RhYmxlKCkgJT4lICAjIGzhuqV5IGThu68gbGnhu4d1IGLhuqNuZw0KICAuW1sxXV0gLT4gd2VhbHRoMiAgIyBz4buRIDEgDQoNCndlYWx0aDIwIDwtIHJiaW5kKHdlYWx0aDEsIHdlYWx0aDIpDQoNCndlYWx0aDIwICU+JSANCiAgc2VsZWN0KC1jKDIsNywxMCwxMSkpICAtPiB3ZWFsdGgyMA0KDQpjb2xuYW1lcyh3ZWFsdGgyMCkgPC0gYygneWVhcicsICdjYW5hZGEnLCAnY2hpbmEnLCdmcmFuY2UnLCAnZ2VybWFueScsICdpdGFseScsICdqYXBhbicsICd1bml0ZWRfa2luZ2RvbScsICd1bml0ZWRfc3RhdGVzJywnYWdncmVnYXRlc2hhcmUnKQ0KDQpjbGFzcyh3ZWFsdGgyMCkNCmBgYA0Kd2VhbHRoMjAgZ+G7k20gY8OhYyBsb+G6oWkgZGF0YSBraMOhYyBuaGF1Og0KWzFdICJ0YmxfZGYiICAgICAidGJsIiAgICAgICAgImRhdGEuZnJhbWUiDQoNCkzDoG0gdGjhur8gbsOgbyDEkeG7gyBt4buZdCBvYmplY3QgY8OzIG5oaeG7gXUgbG/huqFpIGRhdGEgY2h1eeG7g24gaMOzYSB0aMOgbmggYXRvbWljIHZlY3Rvcj8NCg0KTuG6v3UgduG6q24gZMO5bmcgbOG7h25oIHjDs2Ega8O9IGhp4buHdSAlIHRow6wgc+G6vSDEkcaw4bujYyBr4bq/dCBxdeG6oyANCmBgYA0Kc3RyKHdlYWx0aDIwKQ0Kd2VhbHRoMjAgPC0gYXMuZGF0YS5mcmFtZShsYXBwbHkod2VhbHRoMjAsIGFzLmNoYXJhY3RlcikpDQoNCndlYWx0aDIwICU+JSANCiAgc3RyX3JlcGxhY2VfYWxsKCIlIiwgIiIpICU+JSANCiAgYXMubnVtZXJpYygpIC0+IHdlYWx0aDIwDQpgYGANCnRow6wgxJHGsOG7o2MgdGjDtG5nIGLDoW86DQogWzFdIE5BIE5BIE5BIE5BIE5BIE5BIE5BIE5BIE5BIE5BDQpXYXJuaW5nIG1lc3NhZ2VzOg0KMTogSW4gc3RyaV9yZXBsYWNlX2FsbF9yZWdleChzdHJpbmcsIHBhdHRlcm4sIGZpeF9yZXBsYWNlbWVudChyZXBsYWNlbWVudCksICA6DQogIGFyZ3VtZW50IGlzIG5vdCBhbiBhdG9taWMgdmVjdG9yOyBjb2VyY2luZw0KMjogSW4gd2VhbHRoMjAgJT4lIHN0cl9yZXBsYWNlX2FsbCgiJSIsICIiKSAlPiUgYXMubnVtZXJpYygpIDoNCiAgTkFzIGludHJvZHVjZWQgYnkgY29lcmNpb24NCg0KDQoNCg0KYGBgDQo=