library(rvest)
#Defining URL
namePage <- paste0("https://www.ncbi.nlm.nih.gov/biosample/?term=",31280770)
#testPage variable will store all data from the webpage
testPage <- read_html(namePage)
#Renaming table column names and checking the first few rows
tableText <- testPage %>%
html_node("table") %>%
html_table()
names(tableText)<-c('Question','Response')
head(tableText)
## # A tibble: 6 × 2
## Question Response
## <chr> <chr>
## 1 dominant hand I am right handed
## 2 environmental medium feces
## 3 environmental package human-gut
## 4 host body habitat UBERON:feces
## 5 host body mass index 21.0
## 6 host body product UBERON:feces
# Scraping data for a given sample ID
scrape_sample_data <- function(sample_id) {
url <- paste0("https://www.ncbi.nlm.nih.gov/biosample/?term=", sample_id)
webpage <- read_html(url)
# Extracting the table data
table_data <- webpage %>% html_node("table") %>% html_table()
# Renaming columns
names(table_data) <- c('Question', 'Response')
return(table_data)
}
# Sample IDs
sample_ids <- 31280770:31280775
# Empty list to store data frames
survey_results <- list()
# Looping through each sample ID and scraping the data
for (sample_id in sample_ids) {
survey_results[[as.character(sample_id)]] <- scrape_sample_data(sample_id)
}
#Checking the first few rows of each survey sample
lapply(survey_results, head)
## $`31280770`
## # A tibble: 6 × 2
## Question Response
## <chr> <chr>
## 1 dominant hand I am right handed
## 2 environmental medium feces
## 3 environmental package human-gut
## 4 host body habitat UBERON:feces
## 5 host body mass index 21.0
## 6 host body product UBERON:feces
##
## $`31280771`
## # A tibble: 6 × 2
## Question Response
## <chr> <chr>
## 1 dominant hand I am right handed
## 2 environmental medium feces
## 3 environmental package human-gut
## 4 host body habitat UBERON:feces
## 5 host body mass index 26.0
## 6 host body product UBERON:feces
##
## $`31280772`
## # A tibble: 6 × 2
## Question Response
## <chr> <chr>
## 1 dominant hand I am left handed
## 2 environmental medium feces
## 3 environmental package human-gut
## 4 host body habitat UBERON:feces
## 5 host body mass index 25.7
## 6 host body product UBERON:feces
##
## $`31280773`
## # A tibble: 6 × 2
## Question Response
## <chr> <chr>
## 1 dominant hand I am left handed
## 2 environmental medium feces
## 3 environmental package human-gut
## 4 host body habitat UBERON:feces
## 5 host body mass index 25.4
## 6 host body product UBERON:feces
##
## $`31280774`
## # A tibble: 6 × 2
## Question Response
## <chr> <chr>
## 1 dominant hand I am right handed
## 2 environmental medium feces
## 3 environmental package human-gut
## 4 host body habitat UBERON:feces
## 5 host body mass index 20.4
## 6 host body product UBERON:feces
##
## $`31280775`
## # A tibble: 6 × 2
## Question Response
## <chr> <chr>
## 1 dominant hand I am right handed
## 2 environmental medium feces
## 3 environmental package human-gut
## 4 host body habitat UBERON:feces
## 5 host body mass index 33.3
## 6 host body product UBERON:feces
#Setting up data for visulaization in pie chart.
survey_dog <- data.frame(
answer = c("Yes", "No"),
count = c(3, 3)
)
#Creating a pie chart from surveys answers on dog ownership
library(ggplot2)
ggplot(survey_dog, aes(x = "", y = count, fill = answer)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
theme_void() +
geom_text(aes(label = paste0(round((count/sum(count))*100), "%")),
position = position_stack(vjust = 0.5)) +
labs(title = "Dog Owners in survey")
