library(rvest)     
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)   

scrape_lydia_survey <- function(biosample_id = 31280773) {
  

    url <- paste0("https://www.ncbi.nlm.nih.gov/biosample/?term=", biosample_id)
  
  page <- read_html(url)
  
  tables <- page %>% html_nodes("table") %>% html_table(fill = TRUE)
  
  survey_df <- tables[[1]]
  
  colnames(survey_df) <- c("Question", "Answer")
  
  return(survey_df)
}


lydia_survey <- scrape_lydia_survey()
head(lydia_survey)
## # A tibble: 6 × 2
##   Question              Answer          
##   <chr>                 <chr>           
## 1 dominant hand         I am left handed
## 2 environmental medium  feces           
## 3 environmental package human-gut       
## 4 host body habitat     UBERON:feces    
## 5 host body mass index  25.4            
## 6 host body product     UBERON:feces
question_index <- 1
question_text <- lydia_survey$Question[question_index]

response_table <- table(lydia_survey$Answer[question_index])
pie_df <- as.data.frame(response_table)
colnames(pie_df) <- c("Response", "Count")


ggplot(pie_df, aes(x = "", y = Count, fill = Response)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  theme_void() +
  ggtitle(paste("Response for Lydia's question:", question_text))