library(tidyverse)
library(here)
library(dplyr)
library(janitor)
library(StatsBombR)
library(SBpitch)
library(ggplot2)
library(ggforce)
library(ggsoccer)
library(scales)
wcevents <- read.csv(here("wcevents.csv"))
head(wcevents)
## eventId subEventName tags playerId
## 1 8 Simple pass [{'id': 1801}] 122671
## 2 8 High pass [{'id': 1801}] 139393
## 3 1 Air duel [{'id': 703}, {'id': 1801}] 103668
## 4 1 Air duel [{'id': 701}, {'id': 1802}] 122940
## 5 8 Simple pass [{'id': 1801}] 122847
## 6 8 Simple pass [{'id': 1801}] 122832
## positions matchId eventName teamId matchPeriod
## 1 [{'y': 50, 'x': 50}, {'y': 53, 'x': 35}] 2057954 Pass 16521 1H
## 2 [{'y': 53, 'x': 35}, {'y': 19, 'x': 75}] 2057954 Pass 16521 1H
## 3 [{'y': 81, 'x': 25}, {'y': 83, 'x': 37}] 2057954 Duel 14358 1H
## 4 [{'y': 19, 'x': 75}, {'y': 17, 'x': 63}] 2057954 Duel 16521 1H
## 5 [{'y': 17, 'x': 63}, {'y': 15, 'x': 71}] 2057954 Pass 16521 1H
## 6 [{'y': 15, 'x': 71}, {'y': 11, 'x': 92}] 2057954 Pass 16521 1H
## eventSec subEventId id
## 1 1.656214 85 258612104
## 2 4.487814 83 258612106
## 3 5.937411 10 258612077
## 4 6.406961 10 258612112
## 5 8.562167 85 258612110
## 6 10.991292 85 258612113
croatiadata <- read.csv(here("CroatiaTeamData.csv"))
belgiumdata <- read.csv(here("BelgiumTeamData.csv"))
#Filter the data to get passes from Kevin De Bruyne and Luka Modric across all games in the 2018 World Cup
wc_debruyne <- belgiumdata %>%
filter(player_id == 38021 & event_name == "Pass")
wc_modric <- croatiadata %>%
filter(player_id == 8287 & event_name == "Pass")
# Count the number of records to get total passes from the filtered data
debruyne_record_count <- nrow(wc_debruyne)
modric_record_count <- nrow(wc_modric)
modric_kdb_data <- rbind(wc_modric, wc_debruyne)
glimpse(modric_kdb_data)
## Rows: 772
## Columns: 15
## $ event_id <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8…
## $ sub_event_name <chr> "Simple pass", "Simple pass", "Simple pass", "Simple pa…
## $ tags <chr> "[{'id': 1801}]", "[{'id': 1801}]", "[{'id': 1801}]", "…
## $ player_id <int> 8287, 8287, 8287, 8287, 8287, 8287, 8287, 8287, 8287, 8…
## $ start_y <int> 51, 38, 88, 67, 88, 94, 77, 73, 56, 8, 45, 21, 71, 59, …
## $ start_x <int> 29, 64, 44, 36, 55, 52, 53, 28, 45, 79, 33, 56, 46, 46,…
## $ end_y <int> 11, 39, 97, 57, 82, 84, 35, 96, 26, 36, 21, 11, 85, 8, …
## $ end_x <int> 29, 51, 42, 35, 54, 58, 61, 53, 92, 95, 28, 76, 58, 76,…
## $ match_id <int> 2057973, 2057973, 2057973, 2057973, 2057973, 2057973, 2…
## $ event_name <chr> "Pass", "Pass", "Pass", "Pass", "Pass", "Pass", "Pass",…
## $ team_id <int> 9598, 9598, 9598, 9598, 9598, 9598, 9598, 9598, 9598, 9…
## $ match_period <chr> "1H", "1H", "1H", "1H", "1H", "1H", "1H", "1H", "1H", "…
## $ event_sec <dbl> 49.49613, 60.22805, 83.27966, 187.61858, 223.42676, 232…
## $ sub_event_id <int> 85, 85, 85, 85, 85, 85, 85, 85, 83, 80, 85, 85, 85, 83,…
## $ id <int> 258792824, 258792841, 258792859, 258792919, 258792929, …
# Define pitch dimensions
pitch_wyscout <- list(
length = 100,
width = 100,
penalty_box_length = 16,
penalty_box_width = 62,
six_yard_box_length = 6,
six_yard_box_width = 26,
penalty_spot_distance = 10,
goal_width = 12,
origin_x = 0,
origin_y = 0
)
create_wyscout_pitch <- function() {
ggplot() +
# Full pitch outline
geom_rect(aes(xmin = pitch_wyscout$origin_x, xmax = pitch_wyscout$length,
ymin = pitch_wyscout$origin_y, ymax = pitch_wyscout$width),
color = "grey", fill = NA) +
# Penalty box (left)
geom_rect(aes(xmin = pitch_wyscout$origin_x,
xmax = pitch_wyscout$penalty_box_length,
ymin = (pitch_wyscout$width - pitch_wyscout$penalty_box_width) / 2,
ymax = (pitch_wyscout$width + pitch_wyscout$penalty_box_width) / 2),
color = "grey", fill = NA) +
# Penalty box (right)
geom_rect(aes(xmin = pitch_wyscout$length - pitch_wyscout$penalty_box_length,
xmax = pitch_wyscout$length,
ymin = (pitch_wyscout$width - pitch_wyscout$penalty_box_width) / 2,
ymax = (pitch_wyscout$width + pitch_wyscout$penalty_box_width) / 2),
color = "grey", fill = NA) +
# Six-yard box (left)
geom_rect(aes(xmin = pitch_wyscout$origin_x,
xmax = pitch_wyscout$six_yard_box_length,
ymin = (pitch_wyscout$width - pitch_wyscout$six_yard_box_width) / 2,
ymax = (pitch_wyscout$width + pitch_wyscout$six_yard_box_width) / 2),
color = "grey", fill = NA) +
# Six-yard box (right)
geom_rect(aes(xmin = pitch_wyscout$length - pitch_wyscout$six_yard_box_length,
xmax = pitch_wyscout$length,
ymin = (pitch_wyscout$width - pitch_wyscout$six_yard_box_width) / 2,
ymax = (pitch_wyscout$width + pitch_wyscout$six_yard_box_width) / 2),
color = "grey", fill = NA) +
# Center circle
geom_point(aes(x = pitch_wyscout$length / 2, y = pitch_wyscout$width / 2),
size = 0.5, color = "grey") +
geom_circle(aes(x0 = pitch_wyscout$length / 2, y0 = pitch_wyscout$width / 2, r = 9.15),
color = "grey") +
# Goals
geom_segment(aes(x = pitch_wyscout$origin_x,
xend = pitch_wyscout$origin_x,
y = (pitch_wyscout$width - pitch_wyscout$goal_width) / 2,
yend = (pitch_wyscout$width + pitch_wyscout$goal_width) / 2),
color = "black", size = 3) +
geom_segment(aes(x = pitch_wyscout$length,
xend = pitch_wyscout$length,
y = (pitch_wyscout$width - pitch_wyscout$goal_width) / 2,
yend = (pitch_wyscout$width + pitch_wyscout$goal_width) / 2),
color = "black", size = 3) +
# Center line
geom_segment(aes(x = pitch_wyscout$length / 2, xend = pitch_wyscout$length / 2,
y = pitch_wyscout$origin_y, yend = pitch_wyscout$width),
color = "grey") +
# Adjust theme
theme(
panel.grid = element_blank(),
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()
) +
coord_fixed() +
labs(title = "Football Pitch")
}
# Plot Kevin De Bruyne's pass distribution
create_wyscout_pitch() +
# Pass arrows (Red for De Bruyne)
geom_segment(data = wc_debruyne,
aes(x = start_x, y = start_y, xend = end_x, yend = end_y),
lineend = "round", linewidth = 0.2,
arrow = arrow(length = unit(0.08, "inches")),
color = "red", alpha = 1) +
# Adjust transparency
labs(
title = "Kevin De Bruyne Pass Distribution (World Cup 2018)",
) +
theme(
legend.position = "right",
) +
coord_fixed(ratio = 60 / 100)
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
ggsave("Lab3DeBruynePasses.png")
## Saving 7 x 5 in image
#Filter passes to end in final third
wc_debruyne_finalthird <- belgiumdata %>%
filter(player_id == 38021 & event_name == "Pass" & end_x > 75 & end_y >20 & end_y <80)
# Plot Kevin De Bruyne's pass distribution
create_wyscout_pitch() +
# Pass arrows (Red for De Bruyne)
geom_segment(data = wc_debruyne_finalthird,
aes(x = start_x, y = start_y, xend = end_x, yend = end_y),
lineend = "round", linewidth = 0.2,
arrow = arrow(length = unit(0.08, "inches")),
color = "red", alpha = 1) + # Adjust transparency
# Attack direction arrow (adjust coordinates as needed)
annotate("segment", x = 10, xend = 40, y = 105, yend = 105,
arrow = arrow(length = unit(0.3, "inches")), color = "black", size = 1.2) +
annotate("text", x = 25, y = 108, label = "Attacking", size = 5, fontface = "bold") +
labs(
title = "Kevin De Bruyne Pass Distribution (World Cup 2018)",
) +
theme(
legend.position = "right",
) +
coord_fixed(ratio = 60 / 100)
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
ggsave("Lab3DeBruyneFinalThirdPasses.png")
## Saving 7 x 5 in image
#Filter passes to end in final third
wc_modric_finalthird <- croatiadata %>%
filter(player_id == 8287 & event_name == "Pass" & end_x > 75 & end_y >20 & end_y <80)
# Plot Luka Modric's pass distribution
create_wyscout_pitch() +
# Pass arrows (Blue for Modric)
geom_segment(data = wc_modric_finalthird,
aes(x = start_x, y = start_y, xend = end_x, yend = end_y),
lineend = "round", linewidth = 0.2,
arrow = arrow(length = unit(0.08, "inches")),
color = "blue", alpha = 1) + # Adjust transparency
# Attack direction arrow (adjust coordinates as needed)
annotate("segment", x = 10, xend = 40, y = 105, yend = 105,
arrow = arrow(length = unit(0.3, "inches")), color = "black", size = 1.2) +
annotate("text", x = 25, y = 108, label = "Attacking", size = 5, fontface = "bold") +
labs(
title = "Luka Modric Pass Distribution (World Cup 2018)",
) +
theme(
legend.position = "right",
) +
coord_fixed(ratio = 60 / 100)
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
ggsave("Lab3ModricFinalThirdPasses.png")
## Saving 7 x 5 in image
# Generate the subtitle with the record count
subtitle_text <- paste("Kevin DeBruyne had", debruyne_record_count, "total passes across the seven matches in the 2018 World Cup, with",wc_debruyne_finalthird, "being categorized as 'dangerous passes'.")
# Plot Kevin De Bruyne's pass distribution
create_wyscout_pitch() +
# Pass arrows (Red for De Bruyne)
geom_segment(data = wc_debruyne_finalthird,
aes(x = start_x, y = start_y, xend = end_x, yend = end_y),
lineend = "round", linewidth = 0.2,
arrow = arrow(length = unit(0.08, "inches")),
color = "red", alpha = 1) +
# Density heatmap using pass counts
stat_density_2d_filled(
data = wc_debruyne,
aes(x = start_x, y = start_y, fill = as.numeric(after_stat(level))), # Only use `fill = after_stat(level)`
alpha = 0.7,
contour = TRUE
) +
# Adjust legend to show number of passes with white-to-blue scale
scale_fill_gradient(
low = "white", high = "red", # White for low density, blue for high density
name = "Number of Passes",
labels = scales::comma_format() # Format large numbers properly
) +
# Attack direction arrow (adjust coordinates as needed)
annotate("segment", x = 10, xend = 40, y = 105, yend = 105,
arrow = arrow(length = unit(0.3, "inches")), color = "black", size = 1.2) +
annotate("text", x = 25, y = 108, label = "Attacking", size = 5, fontface = "bold") +
# Adjust transparency
labs(
title = "Kevin De Bruyne Pass Distribution (World Cup 2018)",
subtitle = subtitle_text
) +
theme(
legend.position = "right",
) +
coord_fixed(ratio = 60 / 100)
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
ggsave("Lab3DeBruyneHeatMap.png")
## Saving 7 x 5 in image
# Generate the subtitle with the record count
subtitle_text <- paste("Luka Modric had", modric_record_count, "total passes across the seven matches in the 2018 World Cup, with",wc_modric_finalthird, "being categorized as 'dangerous passes'.")
# Plot Luka Modric's pass distribution
create_wyscout_pitch() +
# Pass arrows (Blue for Modric)
geom_segment(data = wc_modric_finalthird,
aes(x = start_x, y = start_y, xend = end_x, yend = end_y),
lineend = "round", linewidth = 0.2,
arrow = arrow(length = unit(0.08, "inches")),
color = "blue", alpha = 1) +
# Density heatmap using pass counts
stat_density_2d_filled(
data = wc_modric,
aes(x = start_x, y = start_y, fill = as.numeric(after_stat(level))), # Only use `fill = after_stat(level)`
alpha = 0.7,
contour = TRUE
) +
# Adjust legend to show number of passes with white-to-blue scale
scale_fill_gradient(
low = "white", high = "blue", # White for low density, blue for high density
name = "Number of Passes",
labels = scales::comma_format() # Format large numbers properly
) +
# Attack direction arrow (adjust coordinates as needed)
annotate("segment", x = 10, xend = 40, y = 105, yend = 105,
arrow = arrow(length = unit(0.3, "inches")), color = "black", size = 1.2) +
annotate("text", x = 25, y = 108, label = "Attacking", size = 5, fontface = "bold") +
# Adjust transparency
labs(
title = "Luka Modric Pass Distribution (World Cup 2018)",
subtitle = subtitle_text
) +
theme(
legend.position = "right",
) +
coord_fixed(ratio = 60 / 100)
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
ggsave("Lab3ModricHeatMap.png")
## Saving 7 x 5 in image
# Add a "player" column based on player_id
wc_debruyne_start <- wc_debruyne %>% mutate(player = ifelse(player_id == 38021, "De Bruyne", NA))
wc_modric_start <- wc_modric %>% mutate(player = ifelse(player_id == 8287, "Modric", NA))
# Combine both datasets into a single dataframe
wc_combined <- bind_rows(wc_debruyne_start, wc_modric_start)
# Generate the subtitle with the record count
subtitle_text <- paste("De Bruyne Total Passes:", debruyne_record_count, " // Modric Total Passes:", modric_record_count)
# Plot both players' passing distributions
create_wyscout_pitch() +
stat_density_2d_filled(
data = wc_combined,
aes(x = start_x, y = start_y, fill = player, alpha = as.numeric(after_stat(level))), # Reference `player` column
contour = TRUE
) +
scale_fill_manual(
values = c("De Bruyne" = "red", "Modric" = "blue", "NA"="white"), # Assign colors manually
name = "Player",
na.value = "white"
) +
scale_alpha(range = c(0, 0.6), guide = "none") +
# Attack direction arrow (adjust coordinates as needed)
annotate("segment", x = 10, xend = 40, y = 105, yend = 105,
arrow = arrow(length = unit(0.3, "inches")), color = "black", size = 1.2) +
annotate("text", x = 25, y = 108, label = "Attacking", size = 5, fontface = "bold") +
# Adjust transparency
labs(
title = "Kevin De Bruyne & Luka Modric Pass Distribution (World Cup 2018)",
subtitle = subtitle_text
) +
theme(
legend.position = "right",
) +
coord_fixed(ratio = 60 / 100)
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
ggsave("Lab3ComparisonHeatMap.png")
## Saving 7 x 5 in image