sburl <- 'http://espn.go.com/nfl/superbowl/history/winners'
sbwebpage <- read_html(sburl)
sb_table <- html_nodes(sbwebpage, 'table')
sb <- html_table(sb_table)[[1]]
head(sb)
## X1 X2
## 1 Super Bowl Winners and Results Super Bowl Winners and Results
## 2 NO. DATE
## 3 I Jan. 15, 1967
## 4 II Jan. 14, 1968
## 5 III Jan. 12, 1969
## 6 IV Jan. 11, 1970
## X3 X4
## 1 Super Bowl Winners and Results Super Bowl Winners and Results
## 2 SITE RESULT
## 3 Los Angeles Memorial Coliseum Green Bay 35, Kansas City 10
## 4 Orange Bowl (Miami) Green Bay 33, Oakland 14
## 5 Orange Bowl (Miami) New York Jets 16, Baltimore 7
## 6 Tulane Stadium (New Orleans) Kansas City 23, Minnesota 7
sb <- sb[-(1:2), ]
names(sb) <- c("number", "date", "site", "result")
head(sb)
## number date site
## 3 I Jan. 15, 1967 Los Angeles Memorial Coliseum
## 4 II Jan. 14, 1968 Orange Bowl (Miami)
## 5 III Jan. 12, 1969 Orange Bowl (Miami)
## 6 IV Jan. 11, 1970 Tulane Stadium (New Orleans)
## 7 V Jan. 17, 1971 Orange Bowl (Miami)
## 8 VI Jan. 16, 1972 Tulane Stadium (New Orleans)
## result
## 3 Green Bay 35, Kansas City 10
## 4 Green Bay 33, Oakland 14
## 5 New York Jets 16, Baltimore 7
## 6 Kansas City 23, Minnesota 7
## 7 Baltimore 16, Dallas 13
## 8 Dallas 24, Miami 3
sb$number <- 1:52
sb$date <- as.Date(sb$date, "%B. %d, %Y")
head(sb)
## number date site
## 3 1 1967-01-15 Los Angeles Memorial Coliseum
## 4 2 1968-01-14 Orange Bowl (Miami)
## 5 3 1969-01-12 Orange Bowl (Miami)
## 6 4 1970-01-11 Tulane Stadium (New Orleans)
## 7 5 1971-01-17 Orange Bowl (Miami)
## 8 6 1972-01-16 Tulane Stadium (New Orleans)
## result
## 3 Green Bay 35, Kansas City 10
## 4 Green Bay 33, Oakland 14
## 5 New York Jets 16, Baltimore 7
## 6 Kansas City 23, Minnesota 7
## 7 Baltimore 16, Dallas 13
## 8 Dallas 24, Miami 3
sb <- separate(sb, result, c('winner', 'loser'), sep=', ', remove=TRUE)
head(sb)
## number date site winner
## 3 1 1967-01-15 Los Angeles Memorial Coliseum Green Bay 35
## 4 2 1968-01-14 Orange Bowl (Miami) Green Bay 33
## 5 3 1969-01-12 Orange Bowl (Miami) New York Jets 16
## 6 4 1970-01-11 Tulane Stadium (New Orleans) Kansas City 23
## 7 5 1971-01-17 Orange Bowl (Miami) Baltimore 16
## 8 6 1972-01-16 Tulane Stadium (New Orleans) Dallas 24
## loser
## 3 Kansas City 10
## 4 Oakland 14
## 5 Baltimore 7
## 6 Minnesota 7
## 7 Dallas 13
## 8 Miami 3
pattern <- " \\d+$"
sb$winnerScore <- as.numeric(str_extract(sb$winner, pattern))
sb$loserScore <- as.numeric(str_extract(sb$loser, pattern))
sb$winner <- gsub(pattern, "", sb$winner)
sb$loser <- gsub(pattern, "", sb$loser)
head(sb)
## number date site winner
## 3 1 1967-01-15 Los Angeles Memorial Coliseum Green Bay
## 4 2 1968-01-14 Orange Bowl (Miami) Green Bay
## 5 3 1969-01-12 Orange Bowl (Miami) New York Jets
## 6 4 1970-01-11 Tulane Stadium (New Orleans) Kansas City
## 7 5 1971-01-17 Orange Bowl (Miami) Baltimore
## 8 6 1972-01-16 Tulane Stadium (New Orleans) Dallas
## loser winnerScore loserScore
## 3 Kansas City 35 10
## 4 Oakland 33 14
## 5 Baltimore 16 7
## 6 Minnesota 23 7
## 7 Dallas 16 13
## 8 Miami 24 3
write.csv(sb, 'superbowl.csv', row.names=F)
rburl <- 'https://www.rotoballer.com/2018-mlb-prospect-rankings-for-fantasy-baseball/444904'
rbwebpage <- read_html(rburl)
rb_table <- html_nodes(rbwebpage, 'table')
rb <- html_table(rb_table)[[1]]
head(rb)
## X1 X2 X3 X4
## 1 Ranking Player Team Pos
## 2 1 Shohei Ohtani LAA SP/OF
## 3 2 Ronald Acuna ATL OF
## 4 3 Lewis Brinson MIL OF
## 5 4 Gleyber Torres NYY 3B/SS
## 6 5 Victor Robles WAS OF
rb <- rb[-(1:1), ]
names(rb) <- c("Rank", "Player", "Team", "Pos")
head(rb)
## Rank Player Team Pos
## 2 1 Shohei Ohtani LAA SP/OF
## 3 2 Ronald Acuna ATL OF
## 4 3 Lewis Brinson MIL OF
## 5 4 Gleyber Torres NYY 3B/SS
## 6 5 Victor Robles WAS OF
## 7 6 Austin Meadows PIT OF
write.csv(rb, 'rotoballer.csv', row.names=F)
tennissite1=read_html("https://live-tennis.eu/en/atp-live-ranking")
tennisrankings=tennissite1 %>% html_nodes("div#plyrRankings table#u868 tbody td") %>% html_text()
head(tennisrankings, 100)
## [1] " 1 " " CH "
## [3] "" "Rafael Nadal"
## [5] "32.2" "ESP1"
## [7] "8085" "-"
## [9] "-1955" "US Open R64(R128 )"
## [11] "-" "US Open R64(R128 )"
## [13] "8130" "10040"
## [15] "" " 2 "
## [17] " 1 " ""
## [19] "Roger Federer" "37"
## [21] "SUI1" "6765"
## [23] "-" "-315"
## [25] "US Open R64(R128 )" "-"
## [27] "US Open R64(R128 )" "6810"
## [29] "8720" ""
## [31] " 3 " " CH "
## [33] "" "Alexander Zverev"
## [35] "21.3" "GER1"
## [37] "4845" "+1"
## [39] "-" "US Open R64(R128 )"
## [41] "-" "US Open R64(R128 )"
## [43] "4890" "6800"
## [45] "" " 4 "
## [47] " 3 " ""
## [49] "Juan Martín del Potro" "29.9"
## [51] "ARG1" "4825"
## [53] "-1" "-675"
## [55] "US Open R64(R128 )" "-"
## [57] "US Open R64(R128 )" "4870"
## [59] "6780" ""
## [61] " 5 " " 1 "
## [63] "" "Novak Djokovic"
## [65] "31.2" "SRB1"
## [67] "4490" "+1"
## [69] "+45" "US Open R64(R128 )"
## [71] "-" "US Open R64(R128 )"
## [73] "4535" "6445"
## [75] "" " 6 "
## [77] " 3 " ""
## [79] "Marin Cilic" "29.9"
## [81] "CRO1" "4400"
## [83] "+1" "-45"
## [85] "US Open R64(R128 )" "-"
## [87] "US Open R64(R128 )" "4445"
## [89] "6355" ""
## [91] " 7 " " 3 "
## [93] "" "Grigor Dimitrov"
## [95] "27.2" "BUL1"
## [97] "3755" "+1"
## [99] "-35" "-"
…Working on how to clean this up…