code
rm(list = setdiff(ls(), "params"))
library(tidyverse)
<- "#a7000a" # gotta use our exact colors
blair_red
<- read_csv("raw_2023mrcmp.csv", show_col_types = FALSE) raw
This is a simple example of how you might write a script to generate scouting reports for the 2023 FRC game, Charged Up.
rm(list = setdiff(ls(), "params"))
library(tidyverse)
<- "#a7000a" # gotta use our exact colors
blair_red
<- read_csv("raw_2023mrcmp.csv", show_col_types = FALSE) raw
This code chunk will convert the data to being “by team-match” being “by team”. That is, we are grouping by the Team #
variable and then summarizing the data into statistics that will be easy for us to plot.
# aside for nerds: many of these constructions take the sum after adding the
# variables together - meaning we combine the variables into one big vector and
# then add that up. Because addition is associative it gets the same result,
# but a more expressive way to do this might be to take the sum() of each
# variable, and them add those results up. But that'd be harder to read, so
# screw that.
<- raw %>%
data group_by(`Team #`) %>%
summarize(
matches = n(),
mobility_pct = sum(Mobility) / n(),
auto_n_avg =
sum(`Auto Cone Upper` + `Auto Cone Mid` + `Auto Cone Low` +
`Auto Cube Upper` + `Auto Cube Mid` + `Auto Cube Low`) / n(),
auto_engage_pct = sum(`Auto Engaged`) / n(),
auto_dock_pct = sum(`Auto Docked`) / n(),
auto_bridge_avg =
sum(`Auto Engaged`) * 12) + (sum(`Auto Docked`) * 8)) / n(),
((tele_n_avg =
sum(`Cone Upper` + `Cone Mid` + `Cone Low` + `Cube Upper` +
`Cube Mid` + `Cube Low`) / n(),
n_cones_avg =
sum(`Auto Cone Upper` + `Auto Cone Mid` + `Auto Cone Low` +
`Cone Upper` + `Cone Mid` + `Cone Low`) / n(),
n_cubes_avg =
sum(`Cube Upper` + `Cube Mid` + `Cube Low` + `Auto Cube Upper` +
`Auto Cube Mid` + `Auto Cube Low`) / n(),
high_avg = sum(`Cone Upper` + `Cube Upper` +
`Auto Cone Upper` + `Auto Cube Upper`) / n(),
mid_avg = sum(`Cone Mid` + `Cube Mid` +
`Auto Cone Mid` + `Auto Cube Mid`) / n(),
low_avg = sum(`Cone Low` + `Cube Low` +
`Auto Cone Low` + `Auto Cube Low`) / n()
)
$n_pieces_avg <- data$auto_n_avg + data$tele_n_avg
data
# this line of code rounds all numeric variables to 2 decimal places
<- data %>%
data mutate(across(where(is.numeric), \(x) round(x, 2)))
$`Team #` <- factor(data$`Team #`)
data
<- c(params$red1, params$red2, params$red3)
red_alliance <- c(params$blue1, params$blue2, params$blue3)
blue_alliance
<- data[data$`Team #` %in% c(red_alliance, blue_alliance), ]
viz $color <- ifelse(viz$`Team #` %in% red_alliance, "red", "blue")
viz# turning this into a factor makes ordering much easier later
$`Team #` <- factor(viz$`Team #`, levels = c(red_alliance, blue_alliance),
vizlabels = c(red_alliance, blue_alliance))
ggplot(viz, aes(x = n_cones_avg, y = n_cubes_avg)) +
geom_label(aes(label = as.character(`Team #`), color = color)) +
geom_abline(slope = 1, color = blair_red, linetype = "dashed") + # could replace this slope with a better value
labs(title = "Cone/Cube Summary",
x = "Average # of Cones Scored", y = "Average # of Cubes Scored",
color = "Alliance") +
xlim(0, NA) + # leaving the upper limit as NA tells ggplot to compute it from the data
ylim(0, NA) +
theme_bw()
<- viz # store the old format of the data temporarily
tmp
<- viz %>%
viz pivot_longer(cols = c("high_avg", "mid_avg", "low_avg"),
names_to = "level",
values_to = "game_piece_avg")
$level <- factor(viz$level, levels = c("high_avg", "mid_avg", "low_avg"),
vizlabels = c("High", "Mid", "Low"))
ggplot(viz, aes(x = `Team #`, y = game_piece_avg,
fill = level)) +
geom_bar(position = "stack", stat = "identity") +
labs(title = "Level Summary",
x = "Team", y = "Average # of Game Pieces", fill = "Level") +
theme_bw()
<- tmp # restore the old format
viz rm(tmp) # tmp is no longer needed
Warning: This is actually quite complicated! There are many subtle ways to be wrong with this particular operation. Please carefully read the code and the comments to make sure you understand what’s happening here.
This heatmap will show the distribution of two categorical variables for a single team. All steps up to the plotting step are computing a dataframe that knows all we need to know for all the teams, but during the plotting step we subset to only the team we care about.
We need data that has 3 columns: team
, level
, type
, and avg
. level
refers to low/mid/high, type
refers to cone/cube, and avg
refers to the average number that that team scored at that level of that type. (So for example one row might be: “449/high/cone/1.8” implying that Team #449 averaged scoring 1.8 high cones per match.)
<- viz # store the old structure of the viz data
tmp
<- raw %>%
viz group_by(`Team #`) %>%
summarize(
high_cone = sum(`Auto Cone Upper` + `Cone Upper`) / n(),
mid_cone = sum(`Auto Cone Mid` + `Cone Mid`) / n(),
low_cone = sum(`Auto Cone Low` + `Cone Low`) / n(),
high_cube = sum(`Auto Cube Upper` + `Cube Upper`) / n(),
mid_cube = sum(`Auto Cube Mid` + `Cube Mid`) / n(),
low_cube = sum(`Auto Cube Low` + `Cube Low`) / n()
)
This is where the magic happens. We call pivot_longer
on the viz
dataframe to reshape the data the way we want. Instead of
<- viz %>%
viz pivot_longer(
# select all cols except `Team #`
cols = -`Team #`,
# break up the names of the columns into "level" and "type"
names_to = c("level", "type"),
# "_" is the separator character for the column names
names_sep = "_",
# send the values (the numbers) to "avg"
values_to = "avg"
)
Here, we’ll set the data types of the viz
dataframe to be factors, with an ordering that makes sense (low -> mid -> high, cubes -> cones)
<- viz %>%
viz mutate(
level = factor(level, levels = c("low", "mid", "high")),
type = factor(type, levels = c("cube", "cone"))
)
Now that the reshaping is done - plotting the actual heatmap is really quite easy! ggplot
does all the work for us.
# subset the data to only the team we care about
# for this example we'll look at whoever's red1 for the upcoming match, but
# if you want to see all the teams in the upcoming match you'd need 6 plots!
<- params$red1
target
ggplot(viz[viz$`Team #` == target, ], aes(x = level, y = type)) +
geom_tile(aes(fill = avg)) +
labs(title = paste("Scoring Heatmap for Team", target),
x = "Scoring Level", y = "Game Piece Type",
fill = "Average Game Pieces") +
theme_bw()
Let’s get into some more advanced plotting stuff here. For example… the blue color palette kinda sucks. It doesn’t communicate to the viewer well - I would assume the darker colors are more scoring. Also, the borders are ugly, and I want the values plotted inside the heatmap so I can read ’em off.
Also - we missed a big opportunity to make the plot more readable by putting Scoring Level on the x-axis instead of the y-axis. Let’s fix that. Isn’t it nicer to have the low game pieces low on the graph?
Lastly, that legend isn’t really doing anything. Let’s take it out.
Challenge question for you: why is green a particularly bad choice for the text against a red background? How could I improve the plot?
Hint:
# This one's particularly subtle if you aren't a member of the affected population.
# The affected population is about 8% of men and 0.5% of women.
ggplot(viz[viz$`Team #` == target, ], aes(x = type, y = level)) +
# color = "black" refers to the color of the borders - it defaults to transparent.
# lwd stands for "line width"
geom_tile(aes(fill = avg), color = "black", lwd = 0.8) +
# geom_text will let us put the text inside the boxes
# why do we have to round the output?
geom_text(aes(label = round(avg, digits = 3)), color = "green", size = 4) +
# this section fixes the color palette.
# note the use of `blair_red` as the "high". This is KEY. :p
scale_fill_gradient(low = "white", high = blair_red) +
labs(title = paste("Scoring Heatmap for Team", target),
x = "Game Piece Type", y = "Scoring Level",
fill = "Average Game Pieces") +
# this line removes the legend
theme(legend.position = "none") +
theme_bw()
<- tmp # restore the old structure of the viz data viz