library(rvest)
library(rebus)
library(stringr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(plotly)
library(crosstalk)
library(lubridate)
#1225
#scrape the rotoguru site
dfs1225 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=25&year=2020&game=dk")

#extract data table
dfs1225 <- dfs1225 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs1225 <- data.frame(dfs1225)

#remove top row
dfs1225 <- dfs1225[-1,]

#add date column
dfs1225 <- dfs1225 %>%
  mutate(Date = as.POSIXct("2020-12-25"))
#1226
#scrape the rotoguru site
dfs1226 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=26&year=2020&game=dk")

#extract data table
dfs1226 <- dfs1226 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs1226 <- data.frame(dfs1226)

#remove top row
dfs1226 <- dfs1226[-1,]

#add date column
dfs1226 <- dfs1226 %>%
  mutate(Date = as.POSIXct("2020-12-26"))
#1227
#scrape the rotoguru site
dfs1227 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=27&year=2020&game=dk")

#extract data table
dfs1227 <- dfs1227 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs1227 <- data.frame(dfs1227)

#remove top row
dfs1227 <- dfs1227[-1,]

#add date column
dfs1227 <- dfs1227 %>%
  mutate(Date = as.POSIXct("2020-12-27"))
#1228
#scrape the rotoguru site
dfs1228 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=28&year=2020&game=dk")

#extract data table
dfs1228 <- dfs1228 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs1228 <- data.frame(dfs1228)

#remove top row
dfs1228 <- dfs1228[-1,]

#add date column
dfs1228 <- dfs1228 %>%
  mutate(Date = as.POSIXct("2020-12-28")) %>%
  filter(X2 != "LeVert, Caris^")
#1229
#scrape the rotoguru site
dfs1229 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=29&year=2020&game=dk")

#extract data table
dfs1229 <- dfs1229 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs1229 <- data.frame(dfs1229)

#remove top row
dfs1229 <- dfs1229[-1,]

#add date column
dfs1229 <- dfs1229 %>%
  mutate(Date = as.POSIXct("2020-12-29"))
#1230
#scrape the rotoguru site
dfs1230 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=30&year=2020&game=dk")

#extract data table
dfs1230 <- dfs1230 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs1230 <- data.frame(dfs1230)

#remove top row
dfs1230 <- dfs1230[-1,]

#add date column
dfs1230 <- dfs1230 %>%
  mutate(Date = as.POSIXct("2020-12-30"))
#1231
#scrape the rotoguru site
dfs1231 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=12&day=31&year=2020&game=dk")

#extract data table
dfs1231 <- dfs1231 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs1231 <- data.frame(dfs1231)

#remove top row
dfs1231 <- dfs1231[-1,]

#add date column
dfs1231 <- dfs1231 %>%
  mutate(Date = as.POSIXct("2020-12-31"))
#0101
#scrape the rotoguru site
dfs0101 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=01&year=2021&game=dk")

#extract data table
dfs0101 <- dfs0101 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs0101 <- data.frame(dfs0101)

#remove top row
dfs0101 <- dfs0101[-1,]

#add date column
dfs0101<- dfs0101 %>%
  mutate(Date = as.POSIXct("2021-01-01"))
#0102
#scrape the rotoguru site
dfs0102 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=02&year=2021&game=dk")

#extract data table
dfs0102 <- dfs0102 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs0102 <- data.frame(dfs0102)

#remove top row
dfs0102 <- dfs0102[-1,]

#add date column
dfs0102<- dfs0102 %>%
  mutate(Date = as.POSIXct("2021-01-02"))
#0103
#scrape the rotoguru site
dfs0103 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=03&year=2021&game=dk")

#extract data table
dfs0103 <- dfs0103 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs0103 <- data.frame(dfs0103)

#remove top row
dfs0103 <- dfs0103[-1,]

#add date column
dfs0103 <- dfs0103 %>%
  mutate(Date = as.POSIXct("2021-01-03"))
#0104
#scrape the rotoguru site
dfs0104 <- read_html("http://rotoguru1.com/cgi-bin/hyday.pl?mon=01&day=04&year=2021&game=dk")

#extract data table
dfs0104 <- dfs0104 %>%
        html_nodes("table") %>%
        .[9] %>%
        html_table(fill = TRUE)

#convert to df
dfs0104 <- data.frame(dfs0104)

#remove top row
dfs0104 <- dfs0104[-1,]

#add date column
dfs0104 <- dfs0104 %>%
  mutate(Date = as.POSIXct("2021-01-04"))
#bind rows
fullframe <- rbind(dfs1225, dfs1226, dfs1227, dfs1228, dfs1229, dfs1230, dfs1231, dfs0101, dfs0102, dfs0103, dfs0104)
#chaning df name to limit need for scraping during testing 
todate <- fullframe
#change col names
colnames(todate)[1] <- "Pos"
colnames(todate)[2] <- "Name"
colnames(todate)[3] <- "Pts"
colnames(todate)[4] <- "Salary"
colnames(todate)[5] <- "Team"
colnames(todate)[6] <- "Opp"
colnames(todate)[7] <- "Score"
colnames(todate)[8] <- "Min"
colnames(todate)[9] <- "Statline"
#remove non player data 
todate <- todate[ grep("RotoGuru", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Unlisted", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Guards", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Centers", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("Forwards", todate$Pos, invert = TRUE) , ]
todate <- todate[ grep("DNP", todate$Min, invert = TRUE) , ]

#remove na minutes
todate <- todate %>%
        drop_na(Min)
#remove NA's from Position - these are here because the site has NA equivalent to Unlisted
todate$Pos <- ifelse(is.na(todate$Pos), 
             'Unlisted', todate$Pos)
#use gsub to remove $ and comma from monetary values prior to data conversion to numeric (or you will get error)
todate$Salary <- gsub("\\$", "", todate$Salary)
todate$Salary <- gsub("\\,", "", todate$Salary)
#use this chunk to determine if there are any NA values in the binded dataframe - Set eval to TRUE to run 
sapply(todate, function(x) sum(is.na(x)))
#use lapply to convert salary and points to a numeric
todate[, 3:4] <- lapply(todate[, 3:4], as.numeric)
#remove NA's for all unlisted players - still need to know why unlisted
todate <- todate %>%
        drop_na(Salary)
#create a new column to capture Points over Salary
todate <- todate %>%
        group_by(Name) %>%
        mutate(Ppd = round(Pts/Salary,5)) %>%
        ungroup()

#create columns for Avg DFK points and Number of games captured
todate <- todate %>%
  group_by(Name) %>%
  mutate(Avg_Pts= round(mean(Pts),2), Games=n()) %>%
  ungroup()

#feature engineering avg salary column
todate <- todate %>%
  group_by(Name) %>%
  mutate(Avg_Salary = round(mean(Salary),2)) %>%
  ungroup()
#Avg Ppd - need to work on this
todate <- todate %>%
  group_by(Name) %>%
  mutate(Avg_Ppd = round(mean(Avg_Pts/Avg_Salary),4)) %>%
  ungroup()
#drop the ^ character - need to use \\ to drop special characters
todate$Name <- gsub("\\^", " ", todate$Name)
#convert the position from a character string to a factor
todate$Pos <- as.factor(todate$Pos) 
#parse character strings from statline to separate columns using rebus
todate$Pt <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "pt") %>%
  replace_na(0) %>%
  unlist() %>%
  str_remove_all("pt") %>%
  as.numeric()

todate$Rb <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "rb") %>%
  replace_na(0) %>%
  unlist() %>%
  str_remove_all("rb") %>%
  as.numeric()

todate$As <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "as") %>%
  replace_na(0) %>%
  unlist()%>%
  str_remove_all("as") %>%
  as.numeric()

todate$St <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "st") %>%
  replace_na(0) %>%
  unlist() %>%
  str_remove_all("st") %>%
  as.numeric()

todate$Bl <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "bl") %>%
  replace_na(0) %>%
  unlist() %>%
  str_remove_all("bl") %>%
  as.numeric()

todate$To <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "to") %>%
  replace_na(0) %>%
  unlist()%>%
  str_remove_all("to") %>%
  as.numeric()

todate$Tr <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "trey") %>%
  replace_na(0) %>%
  unlist() %>%
  str_remove_all("trey") %>%
  as.numeric()

todate$Fgm <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "-" %R% one_or_more(DGT) %R% "fg") %>%
  replace_na(0) %>%
  unlist() %>%
  str_remove_all("-" %R% one_or_more(DGT) %R% "fg") %>%
  as.numeric()

todate$Fga <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "fg") %>%
  replace_na(0) %>%
  unlist() %>%
  str_remove_all("fg") %>%
  as.numeric()

todate$Ftm <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "-" %R% one_or_more(DGT) %R% "ft") %>%
  replace_na(0) %>%
  unlist() %>%
  str_remove_all("-" %R% one_or_more(DGT) %R% "ft") %>%
  as.numeric()

todate$Fta <- str_extract_all(todate$Statline, pattern = one_or_more(DGT) %R% "ft") %>%
  replace_na(0) %>%
  unlist() %>%
  str_remove_all("ft") %>%
  as.numeric()
#convert min to time object - this might need work - also want to create avg min
todate$Min <- strptime(todate$Min, format = "%M:%S")

todate$Min <- as.character(todate$Min)

todate$Min <- str_extract_all(todate$Min, pattern = one_or_more(DGT) %R% ":" %R% one_or_more(DGT) %R% ":" %R% one_or_more(DGT)) %>%
  unlist() %>%
  as.ts()
#create a vector of team abbreviations
today <- c("cle", "orl", "cha", "phi", "ny", "atl", "okc", "mia", "bos", "tor", "dal", "hou", "det", "mil", "ind", "no", "sac", "gs")

#filter for todays games
todate <- todate %>%
  filter(Team %in% today)
#avg columns - why does Caris Levert have 1 game that sticks out!! 
todateavg <- todate %>%
  group_by(Name) %>%
  select(Name, Avg_Pts, Avg_Salary, Avg_Ppd, Team, Pos)
todateavg$Name <- as.factor(todate$Name) 
#create dfs based on eligible position
#breakdown by position - Center eligible
center <- todateavg %>%
        filter(Pos == "C"| Pos =="PF/C") 

#breakdown by position - Forward Eligible
forward <- todateavg %>%
        filter(Pos == "PF"| Pos =="PF/C" | Pos == "SF" | Pos == "SF/PF" | Pos == "SG/SF" | Pos == "PG/SF")

#breakdown by position - Guard Eligible
guard <- todateavg %>%
        filter(Pos == "PG"| Pos =="PG/SF" | Pos == "PG/SG" | Pos == "SG" | Pos == "SG/SF")

#breakdown by position - Forward Eligible
sforward <- todateavg %>%
        filter(Pos == "SF" | Pos == "SF/PF" | Pos == "SG/SF" | Pos == "PG/SF")

#breakdown by position - Guard Eligible
sguard <- todateavg %>%
        filter( Pos == "PG/SG" | Pos == "SG" | Pos == "SG/SF")

#breakdown by position - Forward Eligible
pforward <- todateavg %>%
        filter(Pos == "PF"| Pos =="PF/C" | Pos == "SF/PF")

#breakdown by position - Guard Eligible
pguard <- todateavg %>%
        filter(Pos == "PG"| Pos =="PG/SF" | Pos == "PG/SG")

Info Viz

#change the size of the dot based on points per game metric (dk points/salary)
todateavg %>%
      plot_ly(x=~Avg_Salary, y=~Avg_Pts, color = ~Team,
                hoverinfo = "text",
                text = ~paste("Name:", Name, "<br>",
                              "Avg Points:", Avg_Pts, "<br>",
                              "Avg Salary:", Avg_Salary, "<br>",
                              "Team:", Team)) %>%
        add_markers(size = ~Avg_Ppd) %>%
        layout(xaxis = list(title = "Avg DK Salary", zeroline = FALSE, showgrid=FALSE),
                yaxis = list(title = "Avg DK Points", zeroline = FALSE,  showgrid=FALSE),
               title = "Avg Salary v Avg Points") 
cplotavg <- center %>%
        plot_ly(
                x = ~Avg_Salary, y = ~Avg_Pts, hoverinfo = "text",
                text = ~paste("Name:", Name, "<br>",
                              "Average Pts:", Avg_Pts, "<br>",
                              "Avg Salary:", Avg_Salary, "<br>",
                              "Team:", Team)) %>%
        add_markers(
                size = ~Avg_Ppd,
                color = ~Team,
                marker = list(opacity = 0.3,
                              sizemode = "diameter",
                              sizeref = 2)) %>%
        layout(xaxis = list(title = "AVG DK Salary", zeroline = FALSE, showgrid=FALSE),
                yaxis = list(title = "AVG DK Points", zeroline = FALSE,  showgrid=FALSE),
               title = "Centers AVG")
cplotavg
#faceted
todateavg %>%
        group_by(Pos) %>%
                do(p=plot_ly(., x= ~Avg_Salary, y= ~Avg_Pts, color = ~Team, hoverinfo = "text",
                text = ~paste("Name:", Name, "<br>",
                              "Average Points:", Avg_Pts, "<br>",
                              "Average Salary:", Avg_Salary, "<br>",
                              "Team:", Team))) %>%
        subplot(nrows=5, shareY=TRUE, shareX=TRUE)