So, today we going to do some data visualization to figure out which two players were the goalkeepers and filter them out from the data.
We also going to calculate the length, width, centroid, for each team at each time-point.
Pull out the data from the first frame for the home team (should
only be 10 rows). Use the
functions chull and areapl (from
the splancs package) to calculate the convex hull
area.
Convert the code you wrote for the previous task into a function to calculate convex hull area given the x and y player coordinates.Use your function to get the convex hull area for each team in each frame.
Plot the surface areas of the two teams as time series plots.
First thing first, load the required packages below.
library(ggplot2)
library(MASS)
library(dplyr)
library(mvtnorm)
library(purrr)
library(tidyr)
# Import the data
tracking_data <- read.csv('metrica_game1_clean.csv')
## data viz to assign GK
tracking_data %>%
group_by(player) %>%
sample_n(100) %>% # group_by then sample_n to pick 100 random observations from each player
ggplot(aes(x = x, y = y, col = team)) +
geom_point() +
theme_bw() +
coord_equal() + facet_wrap(~player)
As you can see above , there are a lot of plots that could have shown that Player 11 and 25 were the goal keepers. This scatter plot shows it quite clearly and that the data contains both halves without flipping so that each team only goes one way.
Next is we going to remove the goal keepers
#remove the goal keepers
tracking_data <- subset(tracking_data, ! player %in% c('Player25', 'Player11'))
# Length, Width and centroid position
space_metrics <- tracking_data %>%
group_by(period, frame, team) %>%
summarise(
# These three lines keep the time and ball location in the summary dataframe
time = time[1],
ball_x = ball_x[1],
ball_y = ball_y[1],
# Check for number of players
n_player = n(),
# centroid cooordinates
mean_x = mean(x),
mean_y = mean(y),
# length and width
length = max(x) - min(x),
width = max(y) - min(y)
)
## `summarise()` has grouped output by 'period', 'frame'. You can override using
## the `.groups` argument.
space_metrics %>%
filter(team == 'Home_Team' & frame == 1)
## # A tibble: 1 Ă— 11
## # Groups: period, frame [1]
## period frame team time ball_x ball_y n_player mean_x mean_y length width
## <int> <dbl> <chr> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 Home_Team 0.04 54.6 31.0 10 48.6 34.1 29.2 41.1
# Figuring out convex hull area
## Pull out the data from just a single team in a single frame
oneframe <- tracking_data %>%
filter(team == 'Home_Team' & frame == 1)
## 'chull' function takes the x and y coordinates and returns the row numbers corresponding to the convex hull
?chull
## starting httpd help server ... done
hull_rows <- chull(oneframe$x, oneframe$y)
hull_pts <- oneframe[hull_rows, c('x','y')]
hull_pts
## x y
## 9 54.4656 16.9392
## 5 38.5644 17.0096
## 4 37.1124 28.4232
## 1 39.1776 52.2576
## 6 49.3128 58.0712
## 2 66.2916 34.6152
## use the 'areapl' function
library(splancs)
?areapl
## input is set of convex hull points we got from 'chull' (as a matrix, not a dataframe though)
areapl(as.matrix(hull_pts))
## [1] 821.5264
# Making a function for convex hull area given x and y points
hull_area <- function (x, y){
hull_rows <- chull(x, y)
hull_pts <- cbind(x[hull_rows], y[hull_rows])
surf_area <- areapl(hull_pts)
return(surf_area)
}
## check that it works
hull_area(oneframe$x, oneframe$y)
## [1] 821.5264
# Get the hull area of each frame
team_areas <- tracking_data %>%
group_by(period, frame, team) %>%
summarise(
area = hull_area(x, y)
)
## `summarise()` has grouped output by 'period', 'frame'. You can override using
## the `.groups` argument.
## time series plots
ggplot(space_metrics, aes(x = time, y = length, col = team)) +
geom_line()
ggplot(team_areas, aes(x = frame, y = area, col = team)) +
geom_line()
# Scatter plot of the two surface areas
## convert the long data frame to wide
wide_areas <- pivot_wider(team_areas,
names_from = 'team',
values_from = 'area')
# the plot showing the area modes
ggplot(wide_areas, aes(x = Home_Team, y = Away_Team)) +
geom_point(alpha = 0.01, size = 1) +
theme_bw() +
coord_equal() +
geom_abline(col = 'blue', linetype = 'dashed')
Simulate 5000 observations of 1-D normal data with μ=0�=0 and σ=1�=1:
simdata <- rnorm(n=5000, mean = 0, sd = 1)
sim1d <- as.data.frame(simdata)Plot it!
ggplot(data = sim1d, aes(x = simdata)) +
geom_density()
Visualize the theoretical distribution
# choose a range of values to plot over
xrange <- seq(-3, 3, length.out = 100)
## This will give you 100 equally spaced points between -3 and 3
# get the theoretical value of the density at each of these points
truedata <- dnorm(x = xrange, mean = 0, sd = 1)
# make a data frame
true1d <- data.frame(x = xrange, d = truedata)
ggplot(data = true1d, aes(x = xrange, y = d)) +
geom_line()
compare with below
ggplot() +
geom_line(data = true1d, aes(x = xrange, y = d, col = 'theoretical')) +
geom_density(data = sim1d, aes(x = simdata, col = 'simulated'))
Simulated data
# means of each variable (N = number of variables)
mu = c(0,0)
# covariance matrix (N x N)
Sigma <- matrix(c(1,0,0,1), ncol = 2)
Sigma
## [,1] [,2]
## [1,] 1 0
## [2,] 0 1
# Simulate some data (n = number of simulated points)
## for function details: ?rmvnorm
n <- 1000
simdata <- rmvnorm(n = n, mean = mu, sigma = Sigma)
# turn it into a data frame
sim2d <- as.data.frame(simdata)
ggplot(data = sim2d, aes(x = V1, y = V2)) +
geom_point() +
coord_equal()
ggplot(data = sim2d, aes(x = V1, y = V2)) +
geom_point() +
geom_density_2d() +
coord_equal()
ggplot(data = sim2d, aes(x = V1, y = V2)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white") +
scale_fill_viridis_c() +
coord_equal()
Theoretical 2D distribution:
range2d <- expand.grid(
V1 = seq(-3,3,length.out=100),
V2 = seq(-3,3,length.out=100)
)
range2d$truedata <- dmvnorm(x = as.matrix(range2d), mean = mu, sigma = Sigma)
ggplot(range2d, aes(x = V1, y = V2, z = truedata)) +
stat_contour_filled()
Now change the covariance matrix:
# means of each variable (N = number of variables)
mu = c(0,0)
# covariance matrix (N x N)
Sigma <- matrix(c(4,2,2,3), ncol = 2)
Sigma
## [,1] [,2]
## [1,] 4 2
## [2,] 2 3
# Simulate some data (n = number of simulated points)
## for function details: ?rmvnorm
n <- 1000
simdata <- rmvnorm(n = n, mean = mu, sigma = Sigma)
# turn it into a data frame
sim2d <- as.data.frame(simdata)
ggplot(data = sim2d, aes(x = V1, y = V2)) +
geom_point() +
geom_density_2d() +
coord_equal()
ggplot(data = sim2d, aes(x = V1, y = V2)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white") +
scale_fill_viridis_c() +
coord_equal()
range2d <- expand.grid(
V1 = seq(-3,3,length.out=100),
V2 = seq(-3,3,length.out=100)
)
range2d$truedata <- dmvnorm(x = as.matrix(range2d), mean = mu, sigma = Sigma)
ggplot(range2d, aes(x = V1, y = V2, z = truedata)) +
stat_contour_filled()
That’s all thanks!!!