1. Spatiotemporal metrics from player tracking data

First thing first, load the required packages below.

library(ggplot2)
library(MASS)
library(dplyr)
library(mvtnorm)
library(purrr)
library(tidyr)
# Import the data
tracking_data <- read.csv('metrica_game1_clean.csv')
## data viz to assign GK
tracking_data %>%
  group_by(player) %>%
  sample_n(100) %>%  # group_by then sample_n to pick 100 random observations from each player
  ggplot(aes(x = x, y = y, col = team)) +
  geom_point() +
  theme_bw() +
  coord_equal() + facet_wrap(~player)

As you can see above , there are a lot of plots that could have shown that Player 11 and 25 were the goal keepers. This scatter plot shows it quite clearly and that the data contains both halves without flipping so that each team only goes one way.

Next is we going to remove the goal keepers

#remove the goal keepers

tracking_data <- subset(tracking_data, ! player %in% c('Player25', 'Player11'))
# Length, Width and centroid position
space_metrics <- tracking_data %>%
  group_by(period, frame, team) %>%
  summarise(
    
    # These three lines keep the time and ball location in the summary dataframe
    time = time[1],
    ball_x = ball_x[1],
    ball_y = ball_y[1],
    
    # Check for number of players
    n_player = n(),      
    
    # centroid cooordinates
    mean_x = mean(x),    
    mean_y = mean(y),
    
    # length and width
    length = max(x) - min(x),
    width = max(y) - min(y)
    
  )
## `summarise()` has grouped output by 'period', 'frame'. You can override using
## the `.groups` argument.
space_metrics %>%
  filter(team == 'Home_Team' & frame == 1)
## # A tibble: 1 Ă— 11
## # Groups:   period, frame [1]
##   period frame team       time ball_x ball_y n_player mean_x mean_y length width
##    <int> <dbl> <chr>     <dbl>  <dbl>  <dbl>    <int>  <dbl>  <dbl>  <dbl> <dbl>
## 1      1     1 Home_Team  0.04   54.6   31.0       10   48.6   34.1   29.2  41.1
# Figuring out convex hull area

## Pull out the data from just a single team in a single frame
oneframe <- tracking_data %>%
  filter(team == 'Home_Team' & frame == 1)

## 'chull' function takes the x and y coordinates and returns the row numbers corresponding to the convex hull
?chull
## starting httpd help server ... done
hull_rows <- chull(oneframe$x, oneframe$y)
hull_pts <- oneframe[hull_rows, c('x','y')]
hull_pts
##         x       y
## 9 54.4656 16.9392
## 5 38.5644 17.0096
## 4 37.1124 28.4232
## 1 39.1776 52.2576
## 6 49.3128 58.0712
## 2 66.2916 34.6152
## use the 'areapl' function
library(splancs)
?areapl
## input is set of convex hull points we got from 'chull' (as a matrix, not a dataframe though)
areapl(as.matrix(hull_pts))
## [1] 821.5264
# Making a function for convex hull area given x and y points
hull_area <- function (x, y){
  
  hull_rows <- chull(x, y)
  hull_pts <- cbind(x[hull_rows], y[hull_rows])
  surf_area <- areapl(hull_pts)
  return(surf_area)
  
}

## check that it works
hull_area(oneframe$x, oneframe$y)
## [1] 821.5264
# Get the hull area of each frame
team_areas <- tracking_data %>%
  group_by(period, frame, team) %>%
  summarise(
    area = hull_area(x, y)
  )
## `summarise()` has grouped output by 'period', 'frame'. You can override using
## the `.groups` argument.
## time series plots
ggplot(space_metrics, aes(x = time, y = length, col = team)) +
  geom_line()

ggplot(team_areas, aes(x = frame, y = area, col = team)) +
  geom_line()

# Scatter plot of the two surface areas
## convert the long data frame to wide
wide_areas <- pivot_wider(team_areas, 
                          names_from = 'team',
                          values_from = 'area')


# the plot showing the area modes
ggplot(wide_areas, aes(x = Home_Team, y = Away_Team)) +
  geom_point(alpha = 0.01, size = 1) +
  theme_bw() +
  coord_equal() +
  geom_abline(col = 'blue', linetype = 'dashed')

2. Simulated vs. theoretical distributions

2.1 1-D normal distributions

  • Simulate 5000 observations of 1-D normal data with μ=0�=0 and σ=1�=1:

    simdata <- rnorm(n=5000, mean = 0, sd = 1)
    sim1d <- as.data.frame(simdata)

Plot it!

ggplot(data = sim1d, aes(x = simdata)) +
  geom_density()

  • Visualize the theoretical distribution

    # choose a range of values to plot over
    xrange <- seq(-3, 3, length.out = 100)
    ## This will give you 100 equally spaced points between -3 and 3
    
    # get the theoretical value of the density at each of these points
    truedata <- dnorm(x = xrange, mean = 0, sd = 1)
    
    # make a data frame
    true1d <- data.frame(x = xrange, d = truedata)
    
    ggplot(data = true1d, aes(x = xrange, y = d)) +
      geom_line()

compare with below

ggplot() +
  geom_line(data = true1d, aes(x = xrange, y = d, col = 'theoretical')) +
  geom_density(data = sim1d, aes(x = simdata, col = 'simulated'))

2.2 2-D normal distributions

Simulated data

# means of each variable (N = number of variables)
mu = c(0,0)

# covariance matrix (N x N)
Sigma <- matrix(c(1,0,0,1), ncol = 2)
Sigma
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
# Simulate some data (n = number of simulated points)
## for function details: ?rmvnorm
n <- 1000
simdata <- rmvnorm(n = n, mean = mu, sigma = Sigma)
# turn it into a data frame
sim2d <- as.data.frame(simdata)
ggplot(data = sim2d, aes(x = V1, y = V2)) +
  geom_point() +
  coord_equal()

ggplot(data = sim2d, aes(x = V1, y = V2)) +
  geom_point() +
  geom_density_2d() +
  coord_equal()

ggplot(data = sim2d, aes(x = V1, y = V2)) +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white") +
  scale_fill_viridis_c() +
  coord_equal()

Theoretical 2D distribution:

range2d <- expand.grid(
  V1 = seq(-3,3,length.out=100),
  V2 = seq(-3,3,length.out=100)
)

range2d$truedata <- dmvnorm(x = as.matrix(range2d), mean = mu, sigma = Sigma)
ggplot(range2d, aes(x = V1, y = V2, z = truedata)) +
  stat_contour_filled()

Now change the covariance matrix:

# means of each variable (N = number of variables)
mu = c(0,0)

# covariance matrix (N x N)
Sigma <- matrix(c(4,2,2,3), ncol = 2)
Sigma
##      [,1] [,2]
## [1,]    4    2
## [2,]    2    3
# Simulate some data (n = number of simulated points)
## for function details: ?rmvnorm
n <- 1000
simdata <- rmvnorm(n = n, mean = mu, sigma = Sigma)
# turn it into a data frame
sim2d <- as.data.frame(simdata)
ggplot(data = sim2d, aes(x = V1, y = V2)) +
  geom_point() +
  geom_density_2d() +
  coord_equal()

ggplot(data = sim2d, aes(x = V1, y = V2)) +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white") +
  scale_fill_viridis_c() +
  coord_equal()

range2d <- expand.grid(
  V1 = seq(-3,3,length.out=100),
  V2 = seq(-3,3,length.out=100)
)

range2d$truedata <- dmvnorm(x = as.matrix(range2d), mean = mu, sigma = Sigma)
ggplot(range2d, aes(x = V1, y = V2, z = truedata)) +
  stat_contour_filled()

That’s all thanks!!!