R Markdown
I need Some help here
- I scrape a website and return a list of lists with ints that are currently in character form with special symbols
library(stringi)
library(tidyverse)
library(rvest)
library(stringr)
library(rvest)
library(purrr)
qbs <- lapply(paste0('https://overthecap.com/position/quarterback/',2010:2022,"/"),
function(url){
url %>% read_html() %>%
html_nodes('tr:nth-child(2) .mobile_drop+ td , tr:nth-child(3) .mobile_drop+ td , tr:nth-child(4) .mobile_drop+ td , tr:nth-child(5) .mobile_drop+ td , tr:nth-child(1) .mobile_drop+ td') %>%
html_text()
})
trial_qbs<- qbs
trial_qbs[[1]]
## [1] "$19,266,668" "$17,716,668" "$17,420,000" "$16,000,000" "$12,980,000"
So here was my workaround for the code
- Gets me desired result but I had to use a double for loop and it unlists my results. Unlist is easily fixed here, because most lists are the same size, but in general each list represented a year so I want to avoid it
salaries <-c()
for (x in trial_qbs){
for (y in x){
z <- as.integer(str_replace_all(y,"\\$|\\,",""))
salaries <- append(salaries,z)
}
}
str(salaries)
## int [1:63] 19266668 17716668 17420000 16000000 12980000 17228125 16000000 14981200 14400000 14100000 ...
This was my 1st attempt which got me completely lost.
- What type are these characters and how can I convert them?
- Looking at it now I’m guessing i needed to somehow iterate over blank_slate before running replace_all or gsub
completely_lost <- str_replace_all(trial_qbs,"\\$|,","")
completely_lost
## [1] "c(\"19266668\" \"17716668\" \"17420000\" \"16000000\" \"12980000\")"
## [2] "c(\"17228125\" \"16000000\" \"14981200\" \"14400000\" \"14100000\")"
## [3] "c(\"18000000\" \"15595000\" \"15310000\" \"13900000\" \"13500000\")"
## [4] "c(\"20850000\" \"17820000\" \"17500000\" \"17400000\" \"13800000\")"
## [5] "c(\"20400000\" \"18895000\" \"18500000\" \"18400000\" \"17610000\")"
## [6] "c(\"23800000\" \"21166668\" \"19500000\" \"18250000\" \"17721250\")"
## [7] "c(\"24200000\" \"23950000\" \"23750000\" \"22550000\" \"22500000\")"
## [8] "c(\"24550000\" \"24125000\" \"23943600\" \"23750000\" \"20300000\")"
## [9] "c(\"37000000\" \"26500000\" \"25000000\" \"24750000\" \"24400000\")"
## [10] "c(\"29500000\" \"27525000\" \"26500000\" \"25286766\" \"23200000\")"
## [11] "c(\"31500000\" \"28400000\" \"28250000\" \"26600000\" \"23000000\")"
## [12] "c(\"30000000\" \"26900000\" \"24250000\" \"22125000\" \"21000000\")"
## [13] "c(\"27000000\" \"23000000\" \"19877519\")"
gsub("\\$|,","",trial_qbs)
## [1] "c(\"19266668\" \"17716668\" \"17420000\" \"16000000\" \"12980000\")"
## [2] "c(\"17228125\" \"16000000\" \"14981200\" \"14400000\" \"14100000\")"
## [3] "c(\"18000000\" \"15595000\" \"15310000\" \"13900000\" \"13500000\")"
## [4] "c(\"20850000\" \"17820000\" \"17500000\" \"17400000\" \"13800000\")"
## [5] "c(\"20400000\" \"18895000\" \"18500000\" \"18400000\" \"17610000\")"
## [6] "c(\"23800000\" \"21166668\" \"19500000\" \"18250000\" \"17721250\")"
## [7] "c(\"24200000\" \"23950000\" \"23750000\" \"22550000\" \"22500000\")"
## [8] "c(\"24550000\" \"24125000\" \"23943600\" \"23750000\" \"20300000\")"
## [9] "c(\"37000000\" \"26500000\" \"25000000\" \"24750000\" \"24400000\")"
## [10] "c(\"29500000\" \"27525000\" \"26500000\" \"25286766\" \"23200000\")"
## [11] "c(\"31500000\" \"28400000\" \"28250000\" \"26600000\" \"23000000\")"
## [12] "c(\"30000000\" \"26900000\" \"24250000\" \"22125000\" \"21000000\")"
## [13] "c(\"27000000\" \"23000000\" \"19877519\")"