===================================================================================================
We have written a few functions in R that could be useful, and can be applied to any similar data frame. The data frame must contain columns, \(player\), \(played\_est\), and \(minute\).
The function \(get\_starters()\) inputs a data frame, filters the data, and will return a data frame with only the starters for each game. We have defined a player as a starter, if \(played\_est = 1\) when \(minute=1\).
get_starters <- function(data) {
n <- length(unique(data$gameId))
m <- length(unique(data$player))
my_df <- data.frame()
for (i in 1:n) {
for (j in 1:m) {
game <- which(data$gameId == unique(data$gameId)[i]) # vector of unique games
player <- which(data$player == unique(data$player)[j]) # vector of unique players
if (!(!length(intersect(game, player)))) {
if (data[intersect(game, player), ]$played_est[1] == 1) {
starter <- data[intersect(game, player), ]
my_df <- rbind(my_df, starter)
}
}
}
}
return(my_df)
}
The function \(get\_bench()\) inputs a data frame, filters the data, and will return a data frame with only the bench players for each game. We have defined a player as a bench player, if \(played\_est=0\) when \(minute=1\).
get_bench <- function(data) {
n <- length(unique(data$gameId))
m <- length(unique(data$player))
my_df <- data.frame()
# pb <- txtProgressBar(min = 0, max = n, style = 3)
count = 0
for (i in 1:n) {
for (j in 1:m) {
game <- which(data$gameId == unique(data$gameId)[i])
player <- which(data$player == unique(data$player)[j])
if (!(!length(intersect(game, player)))) {
if (data[intersect(game, player), ]$played_est[1] == 0) {
bench <- data[intersect(game, player), ]
my_df <- rbind(my_df, bench)
}
}
}
# setTxtProgressBar(pb, i)
}
return(my_df)
}
The function \(filter\_absent()\) inputs a data frame, and will return a data frame with only the rows when a player has played in that game. If the sum of \(played\_est=0\) for a certain player during an individual game, then the data from that player/game is removed.
filter_absent <- function(data) {
n <- length(unique(data$gameId))
m <- length(unique(data$player))
my_df <- data.frame()
for (i in 1:n) {
for (j in 1:m) {
game <- which(data$gameId == unique(data$gameId)[i]) # vector of unique games
player <- which(data$player == unique(data$player)[j]) # vector of unique players
if (!(!length(intersect(game, player)))) {
if (sum(data[intersect(game, player), ]$played_est) != 0) {
non_absent <- data[intersect(game, player), ]
my_df <- rbind(my_df, non_absent)
}
}
}
}
return(my_df)
}
filter_player <- function(data, player1) {
my_df = data[which(data$player == player1), ]
return(my_df)
}
filter_minutes <- function(data, minutes) {
my_df = data[which(data$minute %in% minutes), ]
return(my_df)
}
add_quarters <- function(data1) {
data1$quarter <- ifelse(data1$minute <= 48, "4", "5")
data1$quarter <- ifelse(data1$minute <= 36, "3", data1$quarter)
data1$quarter <- ifelse(data1$minute <= 24, "2", data1$quarter)
data1$quarter <- ifelse(data1$minute <= 12, "1", data1$quarter)
return(data1)
}
add_cum_mins <- function(data) {
n <- length(unique(data$gameId))
m <- length(unique(data$player))
my_df <- data.frame()
for (i in 1:n) {
for (j in 1:m) {
game <- which(data$gameId == unique(data$gameId)[i]) # vector of unique games
player <- which(data$player == unique(data$player)[j])
player_game <- data[intersect(game, player), ]
player_game$cum_min <- cumsum(data[intersect(game, player), ]$mins_round)
final <- transform(player_game, consec_min = mins_round * ave(mins_round,
c(0L, cumsum(diff(mins_round) != 0)), FUN = seq_along))
my_df <- rbind(my_df, final)
}
}
return(my_df)
}
The function \(make\_heat()\) will aggregate the inputted data frame, taking the mean of the \(heat\) per each group of the \(y\_dim\), then genereate a heat map.
make_heat <- function(data, x_dim, y_dim, heat) {
sub <- data[which(data[x_dim] == data[x_dim][1, 1]), ]
my_df <- aggregate(sub[heat], by = list(sub[[y_dim]]), mean)
n <- length(unique(data[[x_dim]]))
cbind.fill <- function(...) {
transpoted <- lapply(list(...), t)
transpoted_dataframe <- lapply(transpoted, as.data.frame)
return(data.frame(t(rbind.fill(transpoted_dataframe))))
}
for (i in (unique(data[x_dim])[1, ] + 1):(unique(data[x_dim])[n, ])) {
sub <- data[which(data[[x_dim]] == i), ]
my_df <- cbind.fill(my_df, aggregate(sub[heat], by = list(sub[[y_dim]]),
mean)[2])
}
names(my_df)[1] <- c("Player")
names(my_df)[2:(n + 1)] <- as.character((unique(data[x_dim])[1, ]):(unique(data[x_dim])[n,
]))
my_df_melt <- melt(my_df, id.vars = "Player")
my_df_melt[is.na(my_df_melt)] <- 0
my_df_melt[3] <- as.numeric(data.matrix(my_df_melt[3]))
names(my_df_melt) <- c("Player", "Minute", "Probability")
base_size <- 8
p <- ggplot(my_df_melt, aes(Minute, Player))
p <- p + geom_tile(aes(fill = Probability), colour = "white")
p <- p + scale_fill_gradient(low = "red", high = "yellow", limits = c(0,
1))
p <- p + theme_grey(base_size = base_size)
p <- p + labs(x = x_dim, y = "")
p <- p + scale_x_discrete(expand = c(0, 0))
p <- p + scale_y_discrete(expand = c(0, 0))
p <- p + ggtitle("Heat Map: When a player is in the Game")
p <- p + theme(panel.background = element_rect(fill = "white"), axis.ticks = element_blank(),
title = element_text(size = base_size * 1.5, family = "Proxima Nova Bold"),
axis.text.x = element_text(size = base_size * 0.8, angle = 360, hjust = 0,
colour = "grey50"))
p
}
We now explore a heat map.
filtered <- filter_absent(cavs)
make_heat2(data = filtered, x_dim = "minute", y_dim = "player", heat = "played_est")
starters <- get_starters(cavs)
make_heat2(data = starters, x_dim = "minute", y_dim = "player", heat = "played_est")
filtered_bench <- get_bench(filtered)
make_heat2(data = filtered_bench, x_dim = "minute", y_dim = "player", heat = "played_est")
make_heat(data = filtered, x_dim = "fouls", y_dim = "player", heat = "played_est")
make_heat(data = starters, x_dim = "fouls", y_dim = "player", heat = "played_est")
make_heat(data = filtered_bench, x_dim = "fouls", y_dim = "player", heat = "played_est")
LeBron <- filter_player(cavs, "LeBron James")
LeBron <- filter_absent(LeBron)
make_heat3(LeBron, x_dim = "minute", y_dim = "fouls", heat = "played_est")
kyrie <- filter_player(cavs, "Kyrie Irving")
kyrie <- filter_absent(kyrie)
make_heat3(kyrie, x_dim = "minute", y_dim = "fouls", heat = "played_est")
love <- filter_player(cavs, "Kevin Love")
love <- filter_absent(love)
make_heat3(love, x_dim = "minute", y_dim = "fouls", heat = "played_est")
make_consec_min_plot(cavs)