library(rvest)

#Defining URL
namePage <- paste0("https://www.ncbi.nlm.nih.gov/biosample/?term=",31280770)

#testPage variable will store all data from the webpage
testPage <- read_html(namePage)

#Renaming table column names and checking the first few rows
tableText <- testPage %>% 
    html_node("table") %>%
    html_table()

names(tableText)<-c('Question','Response')

head(tableText)
## # A tibble: 6 × 2
##   Question              Response         
##   <chr>                 <chr>            
## 1 dominant hand         I am right handed
## 2 environmental medium  feces            
## 3 environmental package human-gut        
## 4 host body habitat     UBERON:feces     
## 5 host body mass index  21.0             
## 6 host body product     UBERON:feces
# Scraping data for a given sample ID
scrape_sample_data <- function(sample_id) {
  url <- paste0("https://www.ncbi.nlm.nih.gov/biosample/?term=", sample_id)
  webpage <- read_html(url)
  
  # Extracting the table data
  table_data <- webpage %>% html_node("table") %>% html_table()
  
  # Renaming columns
  names(table_data) <- c('Question', 'Response')
  
  return(table_data)
}

# Sample IDs
sample_ids <- 31280770:31280775

# Empty list to store data frames
survey_results <- list()

# Looping through each sample ID and scraping the data
for (sample_id in sample_ids) {
  survey_results[[as.character(sample_id)]] <- scrape_sample_data(sample_id)
}
#Checking the first few rows of each survey sample
lapply(survey_results, head)
## $`31280770`
## # A tibble: 6 × 2
##   Question              Response         
##   <chr>                 <chr>            
## 1 dominant hand         I am right handed
## 2 environmental medium  feces            
## 3 environmental package human-gut        
## 4 host body habitat     UBERON:feces     
## 5 host body mass index  21.0             
## 6 host body product     UBERON:feces     
## 
## $`31280771`
## # A tibble: 6 × 2
##   Question              Response         
##   <chr>                 <chr>            
## 1 dominant hand         I am right handed
## 2 environmental medium  feces            
## 3 environmental package human-gut        
## 4 host body habitat     UBERON:feces     
## 5 host body mass index  26.0             
## 6 host body product     UBERON:feces     
## 
## $`31280772`
## # A tibble: 6 × 2
##   Question              Response        
##   <chr>                 <chr>           
## 1 dominant hand         I am left handed
## 2 environmental medium  feces           
## 3 environmental package human-gut       
## 4 host body habitat     UBERON:feces    
## 5 host body mass index  25.7            
## 6 host body product     UBERON:feces    
## 
## $`31280773`
## # A tibble: 6 × 2
##   Question              Response        
##   <chr>                 <chr>           
## 1 dominant hand         I am left handed
## 2 environmental medium  feces           
## 3 environmental package human-gut       
## 4 host body habitat     UBERON:feces    
## 5 host body mass index  25.4            
## 6 host body product     UBERON:feces    
## 
## $`31280774`
## # A tibble: 6 × 2
##   Question              Response         
##   <chr>                 <chr>            
## 1 dominant hand         I am right handed
## 2 environmental medium  feces            
## 3 environmental package human-gut        
## 4 host body habitat     UBERON:feces     
## 5 host body mass index  20.4             
## 6 host body product     UBERON:feces     
## 
## $`31280775`
## # A tibble: 6 × 2
##   Question              Response         
##   <chr>                 <chr>            
## 1 dominant hand         I am right handed
## 2 environmental medium  feces            
## 3 environmental package human-gut        
## 4 host body habitat     UBERON:feces     
## 5 host body mass index  33.3             
## 6 host body product     UBERON:feces
#Setting up data for visulaization in pie chart.
survey_dog <- data.frame(
  answer = c("Yes", "No"),
  count = c(3, 3)
)
#Creating a pie chart from surveys answers on dog ownership
library(ggplot2)
ggplot(survey_dog, aes(x = "", y = count, fill = answer)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y") +
  theme_void() +
  geom_text(aes(label = paste0(round((count/sum(count))*100), "%")), 
            position = position_stack(vjust = 0.5)) +
  labs(title = "Dog Owners in survey")