library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
scrape_lydia_survey <- function(biosample_id = 31280773) {
url <- paste0("https://www.ncbi.nlm.nih.gov/biosample/?term=", biosample_id)
page <- read_html(url)
tables <- page %>% html_nodes("table") %>% html_table(fill = TRUE)
survey_df <- tables[[1]]
colnames(survey_df) <- c("Question", "Answer")
return(survey_df)
}
lydia_survey <- scrape_lydia_survey()
head(lydia_survey)
## # A tibble: 6 × 2
## Question Answer
## <chr> <chr>
## 1 dominant hand I am left handed
## 2 environmental medium feces
## 3 environmental package human-gut
## 4 host body habitat UBERON:feces
## 5 host body mass index 25.4
## 6 host body product UBERON:feces
question_index <- 1
question_text <- lydia_survey$Question[question_index]
response_table <- table(lydia_survey$Answer[question_index])
pie_df <- as.data.frame(response_table)
colnames(pie_df) <- c("Response", "Count")
ggplot(pie_df, aes(x = "", y = Count, fill = Response)) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
theme_void() +
ggtitle(paste("Response for Lydia's question:", question_text))
