Further insights into the state of the labour market using open source data by scraping online. Web Scraping is to programmatically extract data from the HTML code of websites.
library(XML)
library(RCurl)
library(httr)
## Warning: package 'httr' was built under R version 3.6.3
con <- url("https://rpubs.com/chenlianghe")
htmlCode <- readLines(con)
close(con)
htmlCode
## [1] "<!DOCTYPE html>"
## [2] "<html lang='en'>"
## [3] "<head>"
## [4] "<meta content='IE=edge' http-equiv='X-UA-Compatible'>"
## [5] "<title>RPubs</title>"
## [6] "<meta name=\"csrf-param\" content=\"authenticity_token\" />"
## [7] "<meta name=\"csrf-token\" content=\"01P6guApzgGcrVzmuMFNoc5OKcbZqBGnNHIQHxDpI3fasdcw8U23w8/byrZUArV+0Mz154sYOsAgijas/nsSOg==\" />"
## [8] "<link rel=\"stylesheet\" media=\"all\" href=\"/assets/application-3956e416c438f98e8d8b82b039d6ac6cd5417ad8d51825485256a39737302686.css\" />"
## [9] "<script src=\"/assets/application-050918065a747f23455921e989643a0f9050e5da8573c9858fc4266f0ec88af2.js\"></script>"
## [10] "<meta content='width=device-width, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=0' name='viewport'>"
## [11] "<link rel=\"stylesheet\" href=\"https://use.typekit.net/tzi3tjz.css\">"
## [12] "<script>"
## [13] " (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){"
## [14] " (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),"
## [15] " m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)"
## [16] " })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');"
## [17] " ga('create', 'UA-20375833-3', 'auto', {'allowLinker': true});"
## [18] " ga('require', 'linker');"
## [19] " ga('linker:autoLink', ['rstudio.com', 'rstudio.github.io', 'rviews.rstudio.com', 'community.rstudio.com', 'rpubs.rstudio.com', 'environments.rstudio.com', 'rstudio.org', 'dailies.rstudio.com', 'pages.rstudio.com', 'db.rstudio.com', 'solutions.rstudio.com', 'docs.rstudio.com', 'spark.rstudio.com', 'shiny.rstudio.com', 'education.rstudio.com', 'rstudio.cloud', 'shinyapps.io', 'teamadmin.rstudio.com', 'blog.rstudio.com', 'support.rstudio.com'] );"
## [20] " ga('send', 'pageview');"
## [21] "</script>"
## [22] ""
## [23] "</head>"
## [24] "<body>"
## [25] "<div class='modal' id='login' style='display: none'>"
## [26] "<div class='modal-header'>"
## [27] "<h1>Sign In</h1>"
## [28] "</div>"
## [29] "<div class='modal-body'>"
## [30] "<div class='alert' id='login_message' style='display: none'></div>"
## [31] "<form action=\"/auth/login\" accept-charset=\"UTF-8\" method=\"post\"><input name=\"utf8\" type=\"hidden\" value=\"✓\" /><input type=\"hidden\" name=\"authenticity_token\" value=\"w8cjcQYaUEOtwqcd8cEe9c5gwmY1cC2ohw4BKtVe6rzTP+ud28RTpvT/T5Q8rjhwOzPSDVhv04h3N1GI0XCq3w==\" />"
## [32] "<input name='return_url' type='hidden'>"
## [33] "<div class='fieldset'>"
## [34] "<div class='control-group'>"
## [35] "<label class='control-label' for='login_username'>Username or Email</label>"
## [36] "<div class='controls'>"
## [37] "<input class='input-xlarge' id='login_username' name='username' type='text'>"
## [38] "</div>"
## [39] "</div>"
## [40] "<div class='control-group'>"
## [41] "<label class='control-label' for='login_password'>Password</label>"
## [42] "<div class='controls'>"
## [43] "<input class='input-xlarge' id='login_password' name='password' type='password'>"
## [44] "</div>"
## [45] "</div>"
## [46] "<div class='control-group'>"
## [47] "<a href='/auth/passwordhelp' target='_blank'>Forgot your password?</a>"
## [48] "</div>"
## [49] "</div>"
## [50] "</form>"
## [51] ""
## [52] ""
## [53] "</div>"
## [54] "<div class='modal-footer'>"
## [55] "<button class='btn btn-primary' id='login-modal-submit'>Sign In</button>"
## [56] "<button class='btn' id='login-modal-cancel'>Cancel</button>"
## [57] "</div>"
## [58] "</div>"
## [59] "<div id='main'>"
## [60] "<div id='pageheader'>"
## [61] "<div id='branding'>"
## [62] "<h1 id='logo'>"
## [63] "<a href='/'><span id='R'>R</span>Pubs"
## [64] "</a>"
## [65] "</h1>"
## [66] "<span id='tagline'>by RStudio</span>"
## [67] "</div>"
## [68] "<div id='identity'>"
## [69] "<div class='btn-group'>"
## [70] "<a class='btn btn-small pull-right' href='#' onclick='rpubs_showLogin(); return false'>"
## [71] "Sign in"
## [72] "</a>"
## [73] "<a class='btn btn-small pull-right' href='/users/new'>"
## [74] "Register"
## [75] "</a>"
## [76] "</div>"
## [77] "</div>"
## [78] "</div>"
## [79] "<div id='pagebody'>"
## [80] "<div class='pagebodyhead'>"
## [81] "<div class='userblock'>"
## [82] "<a href=\"https://rpubs.com/chenlianghe\" class=\"gravatar-link\"><img src=\"https://secure.gravatar.com/avatar/dd8355468216b0969ca61f3916f31be0?s=64\" class=\"gravatar\" alt=\"gravatar\" style=\"width: 64px; height: 64px\"/></a>"
## [83] "<h1>chenlianghe</h1>"
## [84] "<h3>Lianghe</h3>"
## [85] "</div>"
## [86] "</div>"
## [87] "<h2>Recently Published</h2>"
## [88] "<div class='pubs'>"
## [89] "<div class='pubrow2'></div>"
## [90] "<div class='pubrow3'></div>"
## [91] "<div class='pubtile'>"
## [92] "<a href=\"https://rpubs.com/chenlianghe/607956\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/59e9570dbe43dac4f8ff9c03b6a4d97c/png/?thumbnail_max_width=200&unique=1588416714&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607956_b530c49de6ac4f05ba587bc9f704e964.html&viewport=960x960\" /></a>"
## [93] "<div class='pubinfo'>"
## [94] "<h5><a href=\"https://rpubs.com/chenlianghe/607956\">2020-04-21 Test B</a></h5>"
## [95] "<div class='desc'>Question 1</div>"
## [96] "<time datetime='2020-05-02T10:51:54+00:00'>4 minutes ago</time>"
## [97] "</div>"
## [98] "</div>"
## [99] "<div class='pubtile'>"
## [100] "<a href=\"https://rpubs.com/chenlianghe/607951\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/68f73ee1954accb5d5f1aa8932bb4564/png/?thumbnail_max_width=200&unique=1588415808&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607951_5f08303483a44fe4afc8300393e14635.html&viewport=960x960\" /></a>"
## [101] "<div class='pubinfo'>"
## [102] "<h5><a href=\"https://rpubs.com/chenlianghe/607951\">2020-04-21 Test A</a></h5>"
## [103] "<div class='desc'>Question 1</div>"
## [104] "<time datetime='2020-05-02T10:36:48+00:00'>19 minutes ago</time>"
## [105] "</div>"
## [106] "</div>"
## [107] "<div class='pubrow2'></div>"
## [108] "<div class='pubtile'>"
## [109] "<a href=\"https://rpubs.com/chenlianghe/607949\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/0cbd86eb98162fa54426a10a63f0776c/png/?thumbnail_max_width=200&unique=1588415575&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607949_7efc18f05db041fca90841aa1b96a697.html&viewport=960x960\" /></a>"
## [110] "<div class='pubinfo'>"
## [111] "<h5><a href=\"https://rpubs.com/chenlianghe/607949\">2020-04-17 Data Scientist Assignment</a></h5>"
## [112] "<div class='desc'>Question 4 [Topic Modelling]</div>"
## [113] "<time datetime='2020-05-02T10:32:55+00:00'>23 minutes ago</time>"
## [114] "</div>"
## [115] "</div>"
## [116] "<div class='pubrow3'></div>"
## [117] "<div class='pubtile'>"
## [118] "<a href=\"https://rpubs.com/chenlianghe/607948\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/e1ccb8e6f587e289413404718f2c4f6d/png/?thumbnail_max_width=200&unique=1588415401&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607948_a5671772c11447d0907be2f0e089bee6.html&viewport=960x960\" /></a>"
## [119] "<div class='pubinfo'>"
## [120] "<h5><a href=\"https://rpubs.com/chenlianghe/607948\">2020-04-17 Data Scientist Assignment</a></h5>"
## [121] "<div class='desc'>Question 3 [Estimating Treatment Effects]</div>"
## [122] "<time datetime='2020-05-02T10:30:01+00:00'>25 minutes ago</time>"
## [123] "</div>"
## [124] "</div>"
## [125] "<div class='pubrow2'></div>"
## [126] "<div class='pubtile'>"
## [127] "<a href=\"https://rpubs.com/chenlianghe/607947\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/503752cebe0979c536b2d0f790b0a15e/png/?thumbnail_max_width=200&unique=1588415175&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607947_0ca57ca39ec140ca8827d6469e60b990.html&viewport=960x960\" /></a>"
## [128] "<div class='pubinfo'>"
## [129] "<h5><a href=\"https://rpubs.com/chenlianghe/607947\">2020-04-17 Data Scientist Assignment</a></h5>"
## [130] "<div class='desc'>Question 2 [Hedonic Regression Modelling]</div>"
## [131] "<time datetime='2020-05-02T10:26:15+00:00'>29 minutes ago</time>"
## [132] "</div>"
## [133] "</div>"
## [134] "<div class='pubtile'>"
## [135] "<a href=\"https://rpubs.com/chenlianghe/607946\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/782c86c0628e8b3f2153bc8405f3385c/png/?thumbnail_max_width=200&unique=1588414896&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607946_52f049f868714d4a8251de1b56aca70d.html&viewport=960x960\" /></a>"
## [136] "<div class='pubinfo'>"
## [137] "<h5><a href=\"https://rpubs.com/chenlianghe/607946\">2020-04-17 Data Scientist Assignment</a></h5>"
## [138] "<div class='desc'>Question 1 [Geospatial Data Visualization]</div>"
## [139] "<time datetime='2020-05-02T10:21:36+00:00'>34 minutes ago</time>"
## [140] "</div>"
## [141] "</div>"
## [142] "<div class='pubrow2'></div>"
## [143] "<div class='pubrow3'></div>"
## [144] "<div class='pubtile'>"
## [145] "<a href=\"https://rpubs.com/chenlianghe/607944\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/71d729e70ee99d41828bb51c2b3ee533/png/?thumbnail_max_width=200&unique=1588414339&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607944_e8371d14ee0b416db0e83c701ec32a95.html&viewport=960x960\" /></a>"
## [146] "<div class='pubinfo'>"
## [147] "<h5><a href=\"https://rpubs.com/chenlianghe/607944\">2020-03-17 Research Assignment</a></h5>"
## [148] "<div class='desc'>Question Part (b)</div>"
## [149] "<time datetime='2020-05-02T10:12:19+00:00'>43 minutes ago</time>"
## [150] "</div>"
## [151] "</div>"
## [152] "<div class='pubtile'>"
## [153] "<a href=\"https://rpubs.com/chenlianghe/607943\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/def9e94b5de4024dfc72789bf104ea1e/png/?thumbnail_max_width=200&unique=1588414283&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607943_43b5e10d887e4aa58ce88fdc387ce97d.html&viewport=960x960\" /></a>"
## [154] "<div class='pubinfo'>"
## [155] "<h5><a href=\"https://rpubs.com/chenlianghe/607943\">2020-03-17 Research Assignment</a></h5>"
## [156] "<div class='desc'>Question Part (a)</div>"
## [157] "<time datetime='2020-05-02T10:11:23+00:00'>44 minutes ago</time>"
## [158] "</div>"
## [159] "</div>"
## [160] "<div class='pubrow2'></div>"
## [161] "<div class='pubtile'>"
## [162] "<a href=\"https://rpubs.com/chenlianghe/607927\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/fdc1bef9e296fe1edd6d9dce6828665b/png/?thumbnail_max_width=200&unique=1588411461&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607927_51d205d54b854e22b11fc6ed8623a9ba.html&viewport=960x960\" /></a>"
## [163] "<div class='pubinfo'>"
## [164] "<h5><a href=\"https://rpubs.com/chenlianghe/607927\">2020-03-13 Case Study</a></h5>"
## [165] "<div class='desc'></div>"
## [166] "<time datetime='2020-05-02T09:24:21+00:00'>about 2 hours ago</time>"
## [167] "</div>"
## [168] "</div>"
## [169] "<div class='pubrow3'></div>"
## [170] "<div class='pubtile'>"
## [171] "<a href=\"https://rpubs.com/chenlianghe/607923\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/27e34e0e5442cd14f29d5c8bace34492/png/?thumbnail_max_width=200&unique=1588411021&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607923_00df51131e4e4c6099d9d64c8f01d977.html&viewport=960x960\" /></a>"
## [172] "<div class='pubinfo'>"
## [173] "<h5><a href=\"https://rpubs.com/chenlianghe/607923\">2020-03-06 Assessment</a></h5>"
## [174] "<div class='desc'>Question 2</div>"
## [175] "<time datetime='2020-05-02T09:17:01+00:00'>about 2 hours ago</time>"
## [176] "</div>"
## [177] "</div>"
## [178] "<div class='pubrow2'></div>"
## [179] "<div class='pubtile'>"
## [180] "<a href=\"https://rpubs.com/chenlianghe/607916\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/d313fde96454558521ea7f3aa24e66df/png/?thumbnail_max_width=200&unique=1588410511&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607916_a63c74e75b41457687ffc8afd54af9c2.html&viewport=960x960\" /></a>"
## [181] "<div class='pubinfo'>"
## [182] "<h5><a href=\"https://rpubs.com/chenlianghe/607916\">2020-03-06 Assessment</a></h5>"
## [183] "<div class='desc'>Question 1</div>"
## [184] "<time datetime='2020-05-02T09:08:31+00:00'>about 2 hours ago</time>"
## [185] "</div>"
## [186] "</div>"
## [187] "<div class='pubtile'>"
## [188] "<a href=\"https://rpubs.com/chenlianghe/607893\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/95e3d0f0528cf7224d6557a827b74b35/png/?thumbnail_max_width=200&unique=1588401433&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607893_041c277ab49b4afbb7ae0558169fe97e.html&viewport=960x960\" /></a>"
## [189] "<div class='pubinfo'>"
## [190] "<h5><a href=\"https://rpubs.com/chenlianghe/607893\">A Fast and Easy Way to Predict Words</a></h5>"
## [191] "<div class='desc'>Coursera Data Science Specialization"
## [192] "Data Science Capstone Final Project Submission</div>"
## [193] "<time datetime='2020-05-02T06:37:13+00:00'>about 4 hours ago</time>"
## [194] "</div>"
## [195] "</div>"
## [196] "<div class='pubrow2'></div>"
## [197] "<div class='pubrow3'></div>"
## [198] "<div class='pubtile'>"
## [199] "<a href=\"https://rpubs.com/chenlianghe/607892\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/a4de68f3ed98d690d5def280d654da14/png/?thumbnail_max_width=200&unique=1588401280&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607892_4d3bc5f14cf54ff79a1175944fdb8833.html&viewport=960x960\" /></a>"
## [200] "<div class='pubinfo'>"
## [201] "<h5><a href=\"https://rpubs.com/chenlianghe/607892\">Data Science Capstone Milestone Report</a></h5>"
## [202] "<div class='desc'>Coursera Data Science Specialization</div>"
## [203] "<time datetime='2020-05-02T06:34:40+00:00'>about 4 hours ago</time>"
## [204] "</div>"
## [205] "</div>"
## [206] "<div class='pubtile'>"
## [207] "<a href=\"https://rpubs.com/chenlianghe/607889\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/2b4fdebf985d6fc4d7fb5d7aa3c80ef7/png/?thumbnail_max_width=200&unique=1588400709&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607889_6af48a22e2c8483e94a7f52ce1e3fdca.html&viewport=960x960\" /></a>"
## [208] "<div class='pubinfo'>"
## [209] "<h5><a href=\"https://rpubs.com/chenlianghe/607889\">Prediction Assignment Writeup</a></h5>"
## [210] "<div class='desc'>Coursera Data Science Specialization</div>"
## [211] "<time datetime='2020-05-02T06:25:09+00:00'>about 5 hours ago</time>"
## [212] "</div>"
## [213] "</div>"
## [214] "<div class='pubrow2'></div>"
## [215] "<div class='pubtile'>"
## [216] "<a href=\"https://rpubs.com/chenlianghe/607885\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/dda035ed6b189ce2d34a84bde01aac53/png/?thumbnail_max_width=200&unique=1588399901&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607885_18f3f831056746409facc4beb8c22852.html&viewport=960x960\" /></a>"
## [217] "<div class='pubinfo'>"
## [218] "<h5><a href=\"https://rpubs.com/chenlianghe/607885\">Prediction of a Car's Miles per Gallon (MPG)</a></h5>"
## [219] "<div class='desc'>Coursera Data Science Specialization"
## [220] "Shiny Application and Reproducible Pitch</div>"
## [221] "<time datetime='2020-05-02T06:11:41+00:00'>about 5 hours ago</time>"
## [222] "</div>"
## [223] "</div>"
## [224] "<div class='pubrow3'></div>"
## [225] "<div class='pubtile'>"
## [226] "<a href=\"https://rpubs.com/chenlianghe/607884\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/3911a538ecfc0e71dc42846d146bf78c/png/?thumbnail_max_width=200&unique=1588399771&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607884_a8dcd52c474a43e99f81fbf7bdbe2493.html&viewport=960x960\" /></a>"
## [227] "<div class='pubinfo'>"
## [228] "<h5><a href=\"https://rpubs.com/chenlianghe/607884\">Regression Models Course Project</a></h5>"
## [229] "<div class='desc'>Coursera Data Science Specialization</div>"
## [230] "<time datetime='2020-05-02T06:09:31+00:00'>about 5 hours ago</time>"
## [231] "</div>"
## [232] "</div>"
## [233] "<div class='pubrow2'></div>"
## [234] "<div class='pubtile'>"
## [235] "<a href=\"https://rpubs.com/chenlianghe/607879\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/df4ce2a9577517d8c5612ae770a0d722/png/?thumbnail_max_width=200&unique=1588398086&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607879_e39cd0ff774b46cbac993be9f35019a0.html&viewport=960x960\" /></a>"
## [236] "<div class='pubinfo'>"
## [237] "<h5><a href=\"https://rpubs.com/chenlianghe/607879\">R Markdown Presentation & Plotly</a></h5>"
## [238] "<div class='desc'>Coursera Data Science Specialization</div>"
## [239] "<time datetime='2020-05-02T05:41:26+00:00'>about 5 hours ago</time>"
## [240] "</div>"
## [241] "</div>"
## [242] "<div class='pubtile'>"
## [243] "<a href=\"https://rpubs.com/chenlianghe/607878\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/f4329b5a4a969f735606bc391a748421/png/?thumbnail_max_width=200&unique=1588397968&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607878_3729d89267a94d6ab83a1298f3ffccc6.html&viewport=960x960\" /></a>"
## [244] "<div class='pubinfo'>"
## [245] "<h5><a href=\"https://rpubs.com/chenlianghe/607878\">R Markdown and Leaflet</a></h5>"
## [246] "<div class='desc'>Coursera Data Science Specialization</div>"
## [247] "<time datetime='2020-05-02T05:39:28+00:00'>about 5 hours ago</time>"
## [248] "</div>"
## [249] "</div>"
## [250] "<div class='pubrow2'></div>"
## [251] "<div class='pubrow3'></div>"
## [252] "<div class='pubtile'>"
## [253] "<a href=\"https://rpubs.com/chenlianghe/607877\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/8b3156369b3689722af011a3fb54c56d/png/?thumbnail_max_width=200&unique=1588397896&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607877_c107ab0c828c4be79be4a91addaecec9.html&viewport=960x960\" /></a>"
## [254] "<div class='pubinfo'>"
## [255] "<h5><a href=\"https://rpubs.com/chenlianghe/607877\">Statistical Inference Course Project Part 2: Basic Inferential Data Analysis</a></h5>"
## [256] "<div class='desc'>Coursera Data Science Specialization</div>"
## [257] "<time datetime='2020-05-02T05:38:16+00:00'>about 5 hours ago</time>"
## [258] "</div>"
## [259] "</div>"
## [260] "<div class='pubtile'>"
## [261] "<a href=\"https://rpubs.com/chenlianghe/607876\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/0fe57a4649e2538ba7f068e8bf5accff/png/?thumbnail_max_width=200&unique=1588397833&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607876_c77b320a24a44cbfb43efb9f750beb15.html&viewport=960x960\" /></a>"
## [262] "<div class='pubinfo'>"
## [263] "<h5><a href=\"https://rpubs.com/chenlianghe/607876\">Statistical Inference Course Project Part 1: Simulation Exercise</a></h5>"
## [264] "<div class='desc'>Coursera Data Science Specialization</div>"
## [265] "<time datetime='2020-05-02T05:37:13+00:00'>about 5 hours ago</time>"
## [266] "</div>"
## [267] "</div>"
## [268] "<div class='pubrow2'></div>"
## [269] "<div class='pubtile'>"
## [270] "<a href=\"https://rpubs.com/chenlianghe/607875\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/8234aecd74d94dbccafd3422302aeea5/png/?thumbnail_max_width=200&unique=1588397610&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607875_4ee0d922c1d34cce8621e4dbe7c47e51.html&viewport=960x960\" /></a>"
## [271] "<div class='pubinfo'>"
## [272] "<h5><a href=\"https://rpubs.com/chenlianghe/607875\">Reproducible Research: Course Project 2</a></h5>"
## [273] "<div class='desc'>Coursera Data Science Specialization</div>"
## [274] "<time datetime='2020-05-02T05:33:30+00:00'>about 5 hours ago</time>"
## [275] "</div>"
## [276] "</div>"
## [277] "<div class='pubrow3'></div>"
## [278] "<div class='pubtile'>"
## [279] "<a href=\"https://rpubs.com/chenlianghe/607866\"><img class=\"pubthumb\" alt=\"\" src=\"https://d38j8069scdbm5.cloudfront.net/v6/P5441C759E0FFE/72552985d5bf38144abb7194962a092a/png/?thumbnail_max_width=200&unique=1588396111&url=https%3A%2F%2Frstudio-pubs-static.s3.amazonaws.com%2F607866_75ca91eab68646d98eb211be08a8c1ac.html&viewport=960x960\" /></a>"
## [280] "<div class='pubinfo'>"
## [281] "<h5><a href=\"https://rpubs.com/chenlianghe/607866\">Reproducible Research: Peer Assessment 1</a></h5>"
## [282] "<div class='desc'>Coursera Data Science Specialization</div>"
## [283] "<time datetime='2020-05-02T05:08:31+00:00'>about 6 hours ago</time>"
## [284] "</div>"
## [285] "</div>"
## [286] ""
## [287] "</div>"
## [288] ""
## [289] "<div class='clearfix'></div>"
## [290] "</div>"
## [291] "</div>"
## [292] "</body>"
## [293] "</html>"
url <- "https://scholar.google.com/citations?hl=en&user=EXk94uwAAAAJ"
fileurl <- getURL(url)
html <- htmlTreeParse(fileurl, useInternalNodes = TRUE)
xpathSApply(html, "//title", xmlValue)
## [1] "Lianghe Chen - Google Scholar Citations"
html2 <- GET(url)
content2 <- content(html2, as = "text")
parsedHtml <- htmlParse(content2, asText = TRUE)
xpathSApply(parsedHtml, "//title", xmlValue)
## [1] "Lianghe Chen - Google Scholar Citations"
pg1 <- GET("http://httpbin.org/basic-auth/user/passwd")
pg1
## Response [http://httpbin.org/basic-auth/user/passwd]
## Date: 2020-05-02 10:55
## Status: 401
## Content-Type: <unknown>
## <EMPTY BODY>
names(pg1)
## [1] "url" "status_code" "headers" "all_headers" "cookies"
## [6] "content" "date" "times" "request" "handle"
pg2 <- GET("http://httpbin.org/basic-auth/user/passwd",
authenticate("user", "passwd"))
pg2
## Response [http://httpbin.org/basic-auth/user/passwd]
## Date: 2020-05-02 10:55
## Status: 200
## Content-Type: application/json
## Size: 47 B
## {
## "authenticated": true,
## "user": "user"
## }
names(pg2)
## [1] "url" "status_code" "headers" "all_headers" "cookies"
## [6] "content" "date" "times" "request" "handle"
google <- handle("http://google.com")
pg3 <- GET(handle = google, path = "search")
pg3
## Response [http://www.google.com/webhp]
## Date: 2020-05-02 10:55
## Status: 200
## Content-Type: text/html; charset=ISO-8859-1
## Size: 13.6 kB
## <!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="...
## document.documentElement.addEventListener("submit",function(b){var a;if(a=b.t...
## var a=window.location,b=a.href.indexOf("#");if(0<=b){var c=a.href.substring(b...
## </style><style>body,td,a,p,.h{font-family:arial,sans-serif}body{margin:0;over...
## if (!iesg){document.f&&document.f.q.focus();document.gbqf&&document.gbqf.q.fo...
## }
## })();</script><div id="mngb"> <div id=gbar><nobr><b class=gb1>Search</b> <a c...
## else top.location='/doodles/';};})();</script><input value="AINFCbYAAAAAXq1ft...
## setTimeout(function(){var b=document;var a="SCRIPT";"application/xhtml+xml"==...
## function _F_installCss(c){}
names(pg3)
## [1] "url" "status_code" "headers" "all_headers" "cookies"
## [6] "content" "date" "times" "request" "handle"