Date : Jan-2021

Learning Objective

Parse an html table into a data frame

Tools

install.packages('rvest')
library('rvest')
#dow jones components
url='https://www.dividendmax.com/market-index-constituents/dow-jones-30'
xpath='/html/body/div[2]/div[2]/div/div/div/div[2]/div/table'
wpage <- read_html(url)
wdf=wpage %>% html_nodes(xpath=xpath) %>% html_table()
head(wdf[[1]],n=5)
#yahoo hsi components
url='https://finance.yahoo.com/quote/%5EHSI/components/?guccounter=1&guce_referrer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8&guce_referrer_sig=AQAAACp9HiHyrAzCcC9exwJ7K4thAXihsJhraS6VJR4yiJxlAbTjwCVmeBtUk2IS0IAWEqqSPVw0HOr5r87is6PhzeuAvYrBYjdhbU8ThKnvNUfDJTATsbM9-eiA_2GQlidLKqug9whSuIp9vdfgxbsi0kCami6RHLJQsqDpaevZjq62'
xpath='/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[1]/div/div/section/section/div/table'
wpage <- read_html(url)
wdf=wpage %>% html_nodes(xpath=xpath) %>% html_table()
head(wdf[[1]],n=5)

Reference

[1] https://rdrr.io/cran/rvest/man/html_table.html

LS0tDQp0aXRsZTogIkhvdyB0byBleHRyYWN0IHdlYiBodG1sIHRhYmxlIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KRGF0ZSA6IEphbi0yMDIxDQoNCiMjIyBMZWFybmluZyBPYmplY3RpdmUNClBhcnNlIGFuIGh0bWwgdGFibGUgaW50byBhIGRhdGEgZnJhbWUNCg0KIyMjIFRvb2xzDQoqIFIgUGFja2FnZTogcnZlc3QNCg0KDQpgYGB7cn0NCmluc3RhbGwucGFja2FnZXMoJ3J2ZXN0JykNCmxpYnJhcnkoJ3J2ZXN0JykNCmBgYA0KDQoNCmBgYHtyfQ0KI2RvdyBqb25lcyBjb21wb25lbnRzDQp1cmw9J2h0dHBzOi8vd3d3LmRpdmlkZW5kbWF4LmNvbS9tYXJrZXQtaW5kZXgtY29uc3RpdHVlbnRzL2Rvdy1qb25lcy0zMCcNCnhwYXRoPScvaHRtbC9ib2R5L2RpdlsyXS9kaXZbMl0vZGl2L2Rpdi9kaXYvZGl2WzJdL2Rpdi90YWJsZScNCmBgYA0KDQoNCmBgYHtyfQ0Kd3BhZ2UgPC0gcmVhZF9odG1sKHVybCkNCndkZj13cGFnZSAlPiUgaHRtbF9ub2Rlcyh4cGF0aD14cGF0aCkgJT4lIGh0bWxfdGFibGUoKQ0KaGVhZCh3ZGZbWzFdXSxuPTUpDQpgYGANCg0KYGBge3J9DQojeWFob28gaHNpIGNvbXBvbmVudHMNCnVybD0naHR0cHM6Ly9maW5hbmNlLnlhaG9vLmNvbS9xdW90ZS8lNUVIU0kvY29tcG9uZW50cy8/Z3VjY291bnRlcj0xJmd1Y2VfcmVmZXJyZXI9YUhSMGNITTZMeTkzZDNjdVoyOXZaMnhsTG1OdmJTOCZndWNlX3JlZmVycmVyX3NpZz1BUUFBQUNwOUhpSHlyQXpDY0M5ZXh3SjdLNHRoQVhpaHNKaHJhUzZWSlI0eWlKeGxBYlRqd0NWbWVCdFVrMklTMElBV0VxcVNQVncwSE9yNXI4N2lzNlBoemV1QXZZckJZamRoYlU4VGhLbnZOVWZESlRBVHNiTTktZWlBXzJHUWxpZExLcXVnOXdoU3VJcDl2ZGZneGJzaTBrQ2FtaTZSSExKUXNxRHBhZXZaanE2MicNCnhwYXRoPScvaHRtbC9ib2R5L2RpdlsxXS9kaXYvZGl2L2RpdlsxXS9kaXYvZGl2WzNdL2RpdlsxXS9kaXYvZGl2WzFdL2Rpdi9kaXYvc2VjdGlvbi9zZWN0aW9uL2Rpdi90YWJsZScNCmBgYA0KDQpgYGB7cn0NCndwYWdlIDwtIHJlYWRfaHRtbCh1cmwpDQp3ZGY9d3BhZ2UgJT4lIGh0bWxfbm9kZXMoeHBhdGg9eHBhdGgpICU+JSBodG1sX3RhYmxlKCkNCmhlYWQod2RmW1sxXV0sbj01KQ0KYGBgDQoNCg0KIyMjIFJlZmVyZW5jZQ0KDQpbMV0gaHR0cHM6Ly9yZHJyLmlvL2NyYW4vcnZlc3QvbWFuL2h0bWxfdGFibGUuaHRtbA0K