library(rvest)
library(rebus)
library(stringr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(plotly)
library(crosstalk)
library(lubridate)
#1225
#scrape the rotoguru site
dfs1225 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=25&year=2020&game=dk")
#extract data table
dfs1225 <- dfs1225 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1225 <- data.frame(dfs1225)
#remove top row
dfs1225 <- dfs1225[-1,]
#add date column
dfs1225 <- dfs1225 %>%
mutate(Date = as.POSIXct("2020-12-25"))
#1226
#scrape the rotoguru site
dfs1226 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=26&year=2020&game=dk")
#extract data table
dfs1226 <- dfs1226 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1226 <- data.frame(dfs1226)
#remove top row
dfs1226 <- dfs1226[-1,]
#add date column
dfs1226 <- dfs1226 %>%
mutate(Date = as.POSIXct("2020-12-26"))
#1227
#scrape the rotoguru site
dfs1227 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=27&year=2020&game=dk")
#extract data table
dfs1227 <- dfs1227 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1227 <- data.frame(dfs1227)
#remove top row
dfs1227 <- dfs1227[-1,]
#add date column
dfs1227 <- dfs1227 %>%
mutate(Date = as.POSIXct("2020-12-27"))
#1228
#scrape the rotoguru site
dfs1228 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=28&year=2020&game=dk")
#extract data table
dfs1228 <- dfs1228 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1228 <- data.frame(dfs1228)
#remove top row
dfs1228 <- dfs1228[-1,]
#add date column
dfs1228 <- dfs1228 %>%
mutate(Date = as.POSIXct("2020-12-28")) %>%
filter(X2 != "LeVert, Caris^")
#1229
#scrape the rotoguru site
dfs1229 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=29&year=2020&game=dk")
#extract data table
dfs1229 <- dfs1229 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1229 <- data.frame(dfs1229)
#remove top row
dfs1229 <- dfs1229[-1,]
#add date column
dfs1229 <- dfs1229 %>%
mutate(Date = as.POSIXct("2020-12-29"))
#1230
#scrape the rotoguru site
dfs1230 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=30&year=2020&game=dk")
#extract data table
dfs1230 <- dfs1230 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1230 <- data.frame(dfs1230)
#remove top row
dfs1230 <- dfs1230[-1,]
#add date column
dfs1230 <- dfs1230 %>%
mutate(Date = as.POSIXct("2020-12-30"))
#1231
#scrape the rotoguru site
dfs1231 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=31&year=2020&game=dk")
#extract data table
dfs1231 <- dfs1231 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1231 <- data.frame(dfs1231)
#remove top row
dfs1231 <- dfs1231[-1,]
#add date column
dfs1231 <- dfs1231 %>%
mutate(Date = as.POSIXct("2020-12-31"))
#0101
#scrape the rotoguru site
dfs0101 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=01&year=2021&game=dk")
#extract data table
dfs0101 <- dfs0101 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs0101 <- data.frame(dfs0101)
#remove top row
dfs0101 <- dfs0101[-1,]
#add date column
dfs0101<- dfs0101 %>%
mutate(Date = as.POSIXct("2021-01-01"))
#0102
#scrape the rotoguru site
dfs0102 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=02&year=2021&game=dk")
#extract data table
dfs0102 <- dfs0102 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs0102 <- data.frame(dfs0102)
#remove top row
dfs0102 <- dfs0102[-1,]
#add date column
dfs0102<- dfs0102 %>%
mutate(Date = as.POSIXct("2021-01-02"))
#0103
#scrape the rotoguru site
dfs0103 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=03&year=2021&game=dk")
#extract data table
dfs0103 <- dfs0103 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs0103 <- data.frame(dfs0103)
#remove top row
dfs0103 <- dfs0103[-1,]
#add date column
dfs0103 <- dfs0103 %>%
mutate(Date = as.POSIXct("2021-01-03"))
#0104
#scrape the rotoguru site
dfs0104 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=04&year=2021&game=dk")
#extract data table
dfs0104 <- dfs0104 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs0104 <- data.frame(dfs0104)
#remove top row
dfs0104 <- dfs0104[-1,]
#add date column
dfs0104 <- dfs0104 %>%
mutate(Date = as.POSIXct("2021-01-04"))
#bind rows
fullframe <- rbind(dfs1225, dfs1226, dfs1227, dfs1228, dfs1229, dfs1230, dfs1231, dfs0101, dfs0102, dfs0103, dfs0104)
#chaning df name to limit need for scraping during testing
todate <- fullframe
#change col names
colnames(todate)[1] <- "Pos"
colnames(todate)[2] <- "Name"
colnames(todate)[3] <- "Pts"
colnames(todate)[4] <- "Salary"
colnames(todate)[5] <- "Team"
colnames(todate)[6] <- "Opp"
colnames(todate)[7] <- "Score"
colnames(todate)[8] <- "Min"
colnames(todate)[9] <- "Statline"
#remove non player data
todate <- todate[ grep("RotoGuru", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Unlisted", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Guards", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Centers", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Forwards", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("DNP", todate$Min, invert = TRUE) , ]
#remove na minutes
todate <- todate %>%
drop_na(Min)
#remove NA's from Position - these are here because the site has NA equivalent to Unlisted
todate$Pos <- ifelse(is.na(todate$Pos),
'Unlisted', todate$Pos)
#use gsub to remove $ and comma from monetary values prior to data conversion to numeric (or you will get error)
todate$Salary <- gsub("\\$", "", todate$Salary)
todate$Salary <- gsub("\\,", "", todate$Salary)
#use this chunk to determine if there are any NA values in the binded dataframe - Set eval to TRUE to run
sapply(todate, function(x) sum(is.na(x)))
#use lapply to convert salary and points to a numeric
todate[, 3:4] <- lapply(todate[, 3:4], as.numeric)
#remove NA's for all unlisted players - still need to know why unlisted
todate <- todate %>%
drop_na(Salary)
#create a new column to capture Points over Salary
todate <- todate %>%
group_by(Name) %>%
mutate(Ppd = round(Pts/Salary,5)) %>%
ungroup()
#create columns for Avg DFK points and Number of games captured
todate <- todate %>%
group_by(Name) %>%
mutate(Avg_Pts= round(mean(Pts),2), Games=n()) %>%
ungroup()
#feature engineering avg salary column
todate <- todate %>%
group_by(Name) %>%
mutate(Avg_Salary = round(mean(Salary),2)) %>%
ungroup()
#Avg Ppd - need to work on this
todate <- todate %>%
group_by(Name) %>%
mutate(Avg_Ppd = round(mean(Avg_Pts/Avg_Salary),4)) %>%
ungroup()
#drop the ^ character - need to use \\ to drop special characters
todate$Name <- gsub("\\^", " ", todate$Name)
#convert the position from a character string to a factor
todate$Pos <- as.factor(todate$Pos)
#parse character strings from statline to separate columns using rebus
todate$Pt <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "pt") %>%
replace_na(0) %>%
unlist() %>%
str_remove_all("pt") %>%
as.numeric()
todate$Rb <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "rb") %>%
replace_na(0) %>%
unlist() %>%
str_remove_all("rb") %>%
as.numeric()
todate$As <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "as") %>%
replace_na(0) %>%
unlist()%>%
str_remove_all("as") %>%
as.numeric()
todate$St <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "st") %>%
replace_na(0) %>%
unlist() %>%
str_remove_all("st") %>%
as.numeric()
todate$Bl <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "bl") %>%
replace_na(0) %>%
unlist() %>%
str_remove_all("bl") %>%
as.numeric()
todate$To <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "to") %>%
replace_na(0) %>%
unlist()%>%
str_remove_all("to") %>%
as.numeric()
todate$Tr <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "trey") %>%
replace_na(0) %>%
unlist() %>%
str_remove_all("trey") %>%
as.numeric()
todate$Fgm <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "-" %R% one_or_more(DGT) %R% "fg") %>%
replace_na(0) %>%
unlist() %>%
str_remove_all("-" %R% one_or_more(DGT) %R% "fg") %>%
as.numeric()
todate$Fga <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "fg") %>%
replace_na(0) %>%
unlist() %>%
str_remove_all("fg") %>%
as.numeric()
todate$Ftm <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "-" %R% one_or_more(DGT) %R% "ft") %>%
replace_na(0) %>%
unlist() %>%
str_remove_all("-" %R% one_or_more(DGT) %R% "ft") %>%
as.numeric()
todate$Fta <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "ft") %>%
replace_na(0) %>%
unlist() %>%
str_remove_all("ft") %>%
as.numeric()
#convert min to time object - this might need work - also want to create avg min
todate$Min <- strptime(todate$Min, format = "%M:%S")
todate$Min <- as.character(todate$Min)
todate$Min <- str_extract_all(todate$Min, pattern = one_or_more(DGT) %R% ":" %R% one_or_more(DGT) %R% ":" %R% one_or_more(DGT)) %>%
unlist() %>%
as.ts()
#create a vector of team abbreviations
today <- c("cle", "orl", "cha", "phi", "ny", "atl", "okc", "mia", "bos", "tor", "dal", "hou", "det", "mil", "ind", "no", "sac", "gs")
#filter for todays games
todate <- todate %>%
filter(Team %in% today)
#avg columns - why does Caris Levert have 1 game that sticks out!!
todateavg <- todate %>%
group_by(Name) %>%
select(Name, Avg_Pts, Avg_Salary, Avg_Ppd, Team, Pos)
todateavg$Name <- as.factor(todate$Name)
#create dfs based on eligible position
#breakdown by position - Center eligible
center <- todateavg %>%
filter(Pos == "C"| Pos =="PF/C")
#breakdown by position - Forward Eligible
forward <- todateavg %>%
filter(Pos == "PF"| Pos =="PF/C" | Pos == "SF" | Pos == "SF/PF" | Pos == "SG/SF" | Pos == "PG/SF")
#breakdown by position - Guard Eligible
guard <- todateavg %>%
filter(Pos == "PG"| Pos =="PG/SF" | Pos == "PG/SG" | Pos == "SG" | Pos == "SG/SF")
#breakdown by position - Forward Eligible
sforward <- todateavg %>%
filter(Pos == "SF" | Pos == "SF/PF" | Pos == "SG/SF" | Pos == "PG/SF")
#breakdown by position - Guard Eligible
sguard <- todateavg %>%
filter( Pos == "PG/SG" | Pos == "SG" | Pos == "SG/SF")
#breakdown by position - Forward Eligible
pforward <- todateavg %>%
filter(Pos == "PF"| Pos =="PF/C" | Pos == "SF/PF")
#breakdown by position - Guard Eligible
pguard <- todateavg %>%
filter(Pos == "PG"| Pos =="PG/SF" | Pos == "PG/SG")
Info Viz
#change the size of the dot based on points per game metric (dk points/salary)
todateavg %>%
plot_ly(x=~Avg_Salary, y=~Avg_Pts, color = ~Team,
hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Avg Points:", Avg_Pts, "<br>",
"Avg Salary:", Avg_Salary, "<br>",
"Team:", Team)) %>%
add_markers(size = ~Avg_Ppd) %>%
layout(xaxis = list(title = "Avg DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "Avg DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Avg Salary v Avg Points")
cplotavg <- center %>%
plot_ly(
x = ~Avg_Salary, y = ~Avg_Pts, hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Average Pts:", Avg_Pts, "<br>",
"Avg Salary:", Avg_Salary, "<br>",
"Team:", Team)) %>%
add_markers(
size = ~Avg_Ppd,
color = ~Team,
marker = list(opacity = 0.3,
sizemode = "diameter",
sizeref = 2)) %>%
layout(xaxis = list(title = "AVG DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "AVG DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Centers AVG")
cplotavg
#faceted
todateavg %>%
group_by(Pos) %>%
do(p=plot_ly(., x= ~Avg_Salary, y= ~Avg_Pts, color = ~Team, hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Average Points:", Avg_Pts, "<br>",
"Average Salary:", Avg_Salary, "<br>",
"Team:", Team))) %>%
subplot(nrows=5, shareY=TRUE, shareX=TRUE)