#Loading in "rvest" and "dplyr" libraries
library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Using chosen Biosample ID
#Collecting data from American Gut Project via URL and just my NCBI Biosample ID (31280774)
namePage <- paste0("https://www.ncbi.nlm.nih.gov/biosample/?term=",31280774)
#testPage variable will store all data from the webpage
#similar to what you see with "Inspect Element"
myPage <- read_html(namePage)
#Transforming the testpage into a table for just my biosample ID
tableText <- myPage %>%
html_node("table") %>%
html_table()
names(tableText)<-c('Question','Response')
head(tableText)
## # A tibble: 6 × 2
## Question Response
## <chr> <chr>
## 1 dominant hand I am right handed
## 2 environmental medium feces
## 3 environmental package human-gut
## 4 host body habitat UBERON:feces
## 5 host body mass index 20.4
## 6 host body product UBERON:feces
Using class biosample IDs
#Creating list for multiple entries
class_data <- list()
#Collecting data from American Gut Project via URL and NCBI Biosample IDs (31280770-31280777)
for (i in 1:8){
m = i + 31280769
namePage <- paste0("https://www.ncbi.nlm.nih.gov/biosample/?term=", m)
#Storing html webpage data into a new format for class biosample IDs
classPage <- read_html(namePage)
#Creating a table from the collected data
tableText <- classPage %>%
html_node("table") %>%
html_table()
names(tableText)<-c('Question','Response')
head(tableText)
class_data[[as.character(m)]] <- tableText
}
#Using "sapply" to receive data from class_survey_data list
survey_responses <- sapply(class_data, function(x){x[,2]})
height <- sapply(survey_responses, function (x) {x[8]})
#Using numerical format of "height" to create a pie chart
pie(table(height), main = "Pie Chart of Height")