library(rvest)
library(stringr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(plotly)
library(crosstalk)
library(lubridate)
#1225
#scrape the rotoguru site
dfs1225 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=25&year=2020&game=dk")
#extract data table
dfs1225 <- dfs1225 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1225 <- data.frame(dfs1225)
#remove top row
dfs1225 <- dfs1225[-1,]
#add date column
dfs1225 <- dfs1225 %>%
mutate(Date = as.POSIXct("2020-12-25"))
#1226
#scrape the rotoguru site
dfs1226 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=26&year=2020&game=dk")
#extract data table
dfs1226 <- dfs1226 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1226 <- data.frame(dfs1226)
#remove top row
dfs1226 <- dfs1226[-1,]
#add date column
dfs1226 <- dfs1226 %>%
mutate(Date = as.POSIXct("2020-12-26"))
#1227
#scrape the rotoguru site
dfs1227 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=27&year=2020&game=dk")
#extract data table
dfs1227 <- dfs1227 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1227 <- data.frame(dfs1227)
#remove top row
dfs1227 <- dfs1227[-1,]
#add date column
dfs1227 <- dfs1227 %>%
mutate(Date = as.POSIXct("2020-12-27"))
#1228
#scrape the rotoguru site
dfs1228 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=28&year=2020&game=dk")
#extract data table
dfs1228 <- dfs1228 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1228 <- data.frame(dfs1228)
#remove top row
dfs1228 <- dfs1228[-1,]
#add date column
dfs1228 <- dfs1228 %>%
mutate(Date = as.POSIXct("2020-12-28"))
#1229
#scrape the rotoguru site
dfs1229 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=29&year=2020&game=dk")
#extract data table
dfs1229 <- dfs1229 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1229 <- data.frame(dfs1229)
#remove top row
dfs1229 <- dfs1229[-1,]
#add date column
dfs1229 <- dfs1229 %>%
mutate(Date = as.POSIXct("2020-12-29"))
#1230
#scrape the rotoguru site
dfs1230 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=30&year=2020&game=dk")
#extract data table
dfs1230 <- dfs1230 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1230 <- data.frame(dfs1230)
#remove top row
dfs1230 <- dfs1230[-1,]
#add date column
dfs1230 <- dfs1230 %>%
mutate(Date = as.POSIXct("2020-12-30"))
#1231
#scrape the rotoguru site
dfs1231 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=31&year=2020&game=dk")
#extract data table
dfs1231 <- dfs1231 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs1231 <- data.frame(dfs1231)
#remove top row
dfs1231 <- dfs1231[-1,]
#add date column
dfs1231 <- dfs1231 %>%
mutate(Date = as.POSIXct("2020-12-31"))
#0101
#scrape the rotoguru site
dfs0101 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=01&year=2021&game=dk")
#extract data table
dfs0101 <- dfs0101 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs0101 <- data.frame(dfs0101)
#remove top row
dfs0101 <- dfs0101[-1,]
#add date column
dfs0101<- dfs0101 %>%
mutate(Date = as.POSIXct("2021-01-01"))
#0102
#scrape the rotoguru site
dfs0102 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=02&year=2021&game=dk")
#extract data table
dfs0102 <- dfs0102 %>%
html_nodes("table") %>%
.[9] %>%
html_table(fill = TRUE)
#convert to df
dfs0102 <- data.frame(dfs0102)
#remove top row
dfs0102 <- dfs0102[-1,]
#add date column
dfs0102<- dfs0102 %>%
mutate(Date = as.POSIXct("2021-01-02"))
#bind rows
fullframe <- rbind(dfs1225, dfs1226, dfs1227, dfs1228, dfs1229, dfs1230, dfs1231, dfs0101, dfs0102)
#chaning df name to limit need for scraping during testing
todate <- fullframe
#change col names
colnames(todate)[1] <- "Pos"
colnames(todate)[2] <- "Name"
colnames(todate)[3] <- "Pts"
colnames(todate)[4] <- "Salary"
colnames(todate)[5] <- "Team"
colnames(todate)[6] <- "Opp"
colnames(todate)[7] <- "Score"
colnames(todate)[8] <- "Min"
colnames(todate)[9] <- "Statline"
#remove non player data
todate <- todate[ grep("RotoGuru", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Unlisted", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Guards", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Centers", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Forwards", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("DNP", todate$Min, invert = TRUE) , ]
#remove na minutes
todate <- todate %>%
drop_na(Min)
#remove NA's from Position - these are here because the site has NA equivalent to Unlisted
todate$Pos <- ifelse(is.na(todate$Pos),
'Unlisted', todate$Pos)
#use gsub to remove $ and comma from monetary values prior to data conversion to numeric (or you will get error)
todate$Salary <- gsub("\\$", "", todate$Salary)
todate$Salary <- gsub("\\,", "", todate$Salary)
#use this chunk to determine if there are any NA values in the binded dataframe - Set eval to TRUE to run
sapply(todate, function(x) sum(is.na(x)))
#use lapply to convert salary and points to a numeric
todate[, 3:4] <- lapply(todate[, 3:4], as.numeric)
#remove NA's for all unlisted players - still need to know why unlisted
todate <- todate %>%
drop_na(Salary)
#create a new column to capture Points over Salary
todate <- todate %>%
mutate(Ppd = round(Pts/Salary,5))
#create columns for Avg DFK points and Number of games captured
todate <- todate %>%
group_by(Name) %>%
mutate(Avg_Pts= round(mean(Pts),2), Games=n())
todate <- todate %>%
group_by(Name) %>%
mutate(Avg_Salary = round(mean(Salary),2))
#drop the ^ character - need to use \\ to drop special characters
todate$Name <- gsub("\\^", "", todate$Name)
#convert the position from a character string to a factor
todate$Pos <- as.factor(todate$Pos)
#Need to separate the Statline Column, likely will need to use regex/string extract
#Need to change minutes from chr to time - this kind of works but not really, avg min is not returning a correct value
todate$Min<- ms(todate$Min)
todate <- todate %>%
mutate(Avg_Min = mean(Min))
#create a vector of team abbreviations
today <- c("was", "bkn", "lal", "mem", "den", "min", "uta", "sa", "dal", "chi", "lac", "pho", "por", "gs")
#filter for todays games
todate <- todate %>%
filter(Team %in% today)
#create dfs based on eligible position
#breakdown by position - Center eligible
center <- todate %>%
filter(Pos == "C"| Pos =="PF/C")
#breakdown by position - Forward Eligible
forward <- todate %>%
filter(Pos == "PF"| Pos =="PF/C" | Pos == "SF" | Pos == "SF/PF" | Pos == "SG/SF" | Pos == "PG/SF")
#breakdown by position - Guard Eligible
guard <- todate %>%
filter(Pos == "PG"| Pos =="PG/SF" | Pos == "PG/SG" | Pos == "SG" | Pos == "SG/SF")
#breakdown by position - Forward Eligible
sforward <- todate %>%
filter(Pos == "SF" | Pos == "SF/PF" | Pos == "SG/SF" | Pos == "PG/SF")
#breakdown by position - Guard Eligible
sguard <- todate %>%
filter( Pos == "PG/SG" | Pos == "SG" | Pos == "SG/SF")
#breakdown by position - Forward Eligible
pforward <- todate %>%
filter(Pos == "PF"| Pos =="PF/C" | Pos == "SF/PF")
#breakdown by position - Guard Eligible
pguard <- todate %>%
filter(Pos == "PG"| Pos =="PG/SF" | Pos == "PG/SG")
#Salary by Points, all Players, removed grid
todate %>%
plot_ly(x=~Salary, y=~Pts, color = ~Pos,
hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Points:", Pts, "<br>",
"Salary:", Salary, "<br>",
"Team:", Team, "<br>",
"Opp:", Opp, "<br>",
"Date:", Date)) %>%
add_markers() %>%
layout(xaxis = list(title = "DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Salary v Points")
#Plot Faceted out by Position
todate %>%
group_by(Pos) %>%
do(p=plot_ly(., x= ~Salary, y= ~Pts, color = ~Pos, type="scatter", hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Points:", Pts, "<br>",
"Salary:", Salary, "<br>",
"Team:", Team, "<br>",
"Opp:", Opp, "<br>",
"Date:", Date))) %>%
subplot(nrows=4, shareX= FALSE, shareY= TRUE)
#Return on Salary by Position
todate %>%
group_by(Pos) %>%
do(p=plot_ly(., x= ~Salary, y= ~Ppd, color = ~Pos, hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Points:", Pts, "<br>",
"Salary:", Salary, "<br>",
"Team:", Team, "<br>",
"Opp:", Opp, "<br>",
"Date:", Date))) %>%
subplot(nrows=4, shareY=TRUE)
#Adding color as a factor for Ppd and adding axis titles
todate %>%
plot_ly(x=~Salary, y=~Pts, color = ~Ppd,
hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Points:", Pts, "<br>",
"Team:", Team, "<br>",
"Opp:", Opp, "<br>",
"Date:", Date)) %>%
add_markers() %>%
layout(xaxis = list(title = "DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Salary by Points, Colored by Points per Dollar")
m <- loess(Pts ~ Salary, data = todate, span = 1.5)
m2 <- lm(Pts ~ poly(Salary, 2), data = todate)
#adding smoothers
todate %>%
plot_ly(x=~Salary, y=~Pts, color = ~Ppd,
hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Points:", Pts, "<br>",
"Salary:", Salary, "<br>",
"Team:", Team, "<br>",
"Opp:", Opp, "<br>",
"Date:", Date)) %>%
add_markers() %>%
add_lines(y = ~fitted(m), name = "LOESS") %>%
add_lines(y=~fitted(m2), name = "Polynomial") %>%
layout(xaxis = list(title = "DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Salary by Points, Colored by Points per Dollar")
#change the size of the dot based on points per game metric (dk points/salary)
todate %>%
plot_ly(x=~Salary, y=~Pts, color = ~Ppd,
hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Points:", Pts, "<br>",
"Salary:", Salary, "<br>",
"Team:", Team, "<br>",
"Opp:", Opp, "<br>",
"Date:", Date)) %>%
add_markers(size = ~Ppd) %>%
layout(xaxis = list(title = "DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Salary by Points, Colored/Sized by Points per Dollar")
cplot <- center %>%
plot_ly(
x = ~Salary, y = ~Pts, hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Points:", Pts, "<br>",
"Salary:", Salary, "<br>",
"Team:", Team, "<br>",
"Opp:", Opp, "<br>",
"Date:", Date)) %>%
add_markers(
size = ~Ppd,
color = ~Pos,
marker = list(opacity = 0.3,
sizemode = "diameter",
sizeref = 2)) %>%
layout(xaxis = list(title = "DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Centers - Individual Games")
cplot
cplotavg <- center %>%
plot_ly(
x = ~Avg_Salary, y = ~Avg_Pts, hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Average Pts:", Avg_Pts, "<br>",
"Avg Salary:", Avg_Salary, "<br>",
"Team:", Team)) %>%
add_markers(
color = ~Pos,
marker = list(opacity = 0.3,
sizemode = "diameter",
sizeref = 2)) %>%
layout(xaxis = list(title = "AVG DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "AVG DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Centers AVG")
cplotavg
fplot <- forward %>%
plot_ly(
x = ~Salary, y = ~Pts, hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Points:", Pts, "<br>",
"Salary:", Salary, "<br>",
"Team:", Team, "<br>",
"Opp:", Opp, "<br>",
"Date:", Date)) %>%
add_markers(
size = ~Ppd,
color = ~Pos,
marker = list(opacity = 0.3,
sizemode = "diameter",
sizeref = 2)) %>%
layout(xaxis = list(title = "DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Forwards - Individual Games")
fplot
fplotavg <- forward %>%
group_by(Name) %>%
plot_ly(
x = ~Avg_Salary, y = ~Avg_Pts, hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Average Pts:", Avg_Pts, "<br>",
"Avg Salary:", Avg_Salary, "<br>",
"Team:", Team)) %>%
add_markers(
color = ~Pos,
marker = list(opacity = 0.3,
sizemode = "diameter",
sizeref = 2)) %>%
layout(xaxis = list(title = "AVG DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "AVG DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Forward AVG")
fplotavg
gplot <- guard %>%
plot_ly(
x = ~Salary, y = ~Pts, hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Points:", Pts, "<br>",
"Salary:", Salary, "<br>",
"Team:", Team, "<br>",
"Opp:", Opp, "<br>",
"Date:", Date)) %>%
add_markers(
size = ~Ppd,
color = ~Pos,
marker = list(opacity = 0.3,
sizemode = "diameter",
sizeref = 2)) %>%
layout(xaxis = list(title = "DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Guards - Individual Games")
gplot
gplotavg <- guard %>%
plot_ly(
x = ~Avg_Salary, y = ~Avg_Pts, hoverinfo = "text",
text = ~paste("Name:", Name, "<br>",
"Average Pts:", Avg_Pts, "<br>",
"Avg Salary:", Avg_Salary, "<br>",
"Team:", Team)) %>%
add_markers(
color = ~Pos,
marker = list(opacity = 0.3,
sizemode = "diameter",
sizeref = 2)) %>%
layout(xaxis = list(title = "AVG DK Salary", zeroline = FALSE, showgrid=FALSE),
yaxis = list(title = "AVG DK Points", zeroline = FALSE, showgrid=FALSE),
title = "Guards AVG")
gplotavg
#use subplot to facet out by Center, Forward and Guard
#Enable linked brushing via highlight()
subplot(cplotavg, fplotavg, gplotavg, titleX = TRUE, titleY = TRUE) %>%
hide_legend() %>%
highlight(on = "plotly_selected", persistent =TRUE) %>%
layout(title = "Avg Across G, F, C")