1.0 Mapping court

Compared with other sports, tennis court can be drawn very easily as there are all horizontal or vertical lines.

By presenting location and trajectory of the shot, it can easily be consumed by readers.

To plot a tennis court, the ggplot2 package on R will be used.

Coordinates of tennis court was used from this cite https://www.kaggle.com/floriansck/animating-the-australien-open-finale

1.1 Plot Tennis court

The court will be created using the ggplot function using the xlim() and ylim() functions

The lines will be added in order in which the code is formatted using the geom_path function

#Load in the "ggplot2" package 
library(ggplot2)

#coordinates of the baseline + side line 
#save the coordinates as a data frame as "base_side_line"
base_side_line <- data.frame(
  x = c(0, 0, 23.77, 23.77, 0),
  y = c(0, 10.97, 10.97, 0, 0)
)
#coordinates of the service line and center line 
#save the coordinates as a data frame as "serves_center_line"
serves_center_line <- data.frame(
  x = c(5.585, 5.585, 5.585, 18.385, 18.385, 18.385),
  y = c(1.37, 9.6, 5.485, 5.485, 1.37, 9.6)
)

#Plot the tennis court 
##save the tennis court with name 'court'
court <- ggplot() +
  geom_path(data = base_side_line, aes(x = x, y = y)) +
  geom_path(data = serves_center_line, aes(x = x, y = y)) +
  geom_path(aes(x = c(23.77, 0), y = c(1.37, 1.37))) + # include lower singles lines
  geom_path(aes(x = c(23.77, 0), y = c(9.6, 9.6))) + # include upper singles lines
  geom_path(aes(x = c(11.985, 11.985), y = c(0, 10.97)), lty = 2) + # include dotted net line
  ylim(c(-1, 11.97)) + xlim(c(-4, 27.77)) + # zoom out 
  theme_void()

We saved the coordinates under the name “court” above

By entering the command court, this can show the tennis court

court 

2.0 Shots

Now, rally data from a data set can be plotted on the tennis court we just created

We are going to overlay the shots to the court.

First, lets download the data set

Download the ATP 2019 Australian Open Finals data from the following link https://www.kaggle.com/robseidl/tennis-atp-tour-australian-open-final-2019

Save the four data (“events”, “points”, “rallies”, “serve”) into your file where your project is in. Insert it into a folder and call in ‘data’

# Load the "dplyr" package 
library(dplyr)

#Load in the all data from data folder
events <- read.csv("data/events.csv")
points <- read.csv("data/points.csv")
rallies <- read.csv("data/rallies.csv")

The axis in this data set is swapped in comparison to the court plot created in 1.0, which means that we need to do some adjustments of the column names.

Need to transpose the coordinates.

We can rename table by table manually, but lets use the “dplyr” package we downloaded before. Using the %>%

#Change the coordinates in each table 
#Overwrite and save it into the same table 

#change the coordinates in the events table
events <- events %>% 
    rename(hitter_x = hitter_y,
           hitter_y = hitter_x,
           receiver_x = receiver_y,
           receiver_y = receiver_x)

#change the coordinates in the points table
points <- points %>% 
    rename(x = y,
           y = x)

Now plot the shots!

Using the ggplot function again, but this time geom_point() to plot dots where the shot was taken

court + # overlay the shot location on the court plot
    geom_point(data = events, # using the events table
               #retrieve the shot location from the x axis and y axis data 
               #colour code the shots by the player
             aes(x = hitter_x, y = hitter_y, colour = hitter) 
             ) +
    theme(legend.position = "bottom", # Position the legends at the bottom
          plot.title = element_text(hjust = 0.5)) + #Assign the size 
    ggtitle("Hitter Positions") #Give the plot a title

Lets look at only Djokovic’s shot locations

#Get shots only hit by Nadal 
djokovic <- events %>%
  subset(hitter == "Djokovic")

#plot using the geom_point function
court +
  geom_point(data = djokovic, #use "nadal" table
             aes(x = hitter_x, y = hitter_y, colour = hitter)) +
  
  theme(legend.position = "bottom", #Legend at the bottom 
        plot.title = element_text(hjust = 0.5)) + 
  ggtitle("Djokovic Shot Posision") #Give the plot a title 

The hexbin visualisation is going to be used here.

#Load the "hexbin* package
library(hexbin)

#Shot heatmap 
court + 
  geom_hex(data = djokovic, 
           bins = 35, 
           aes(x = hitter_x, y = hitter_y), 
           alpha = 0.65) + 
  scale_fill_gradient(low = "yellow", high = "red") + 
  ggtitle("Djokovic shot position") + 
  coord_equal()

How to plot shot categories of Nadal on court map

#Shot category plot
court + 
  geom_point(data = djokovic, 
             aes(x = hitter_x, y = hitter_y, colour = stroke)) +
  ggtitle("Djokovic shot type")

3.0 Rallies

Rallies can occur multiple times in tennis.

When curious about a certain rally, we can also plot this.

Change of the table needs to be done first:

Using the melt() function, we need to obtain the player position for a specific rallyid from events table.

#load "reshape2" package for melt function 
library(reshape2)
#load "knitr" package for kable funtion
library(knitr)


#x coordinates of the hitter of every rallyid 
ball_x <- events %>%
    melt(id.vars = c("rallyid", "strokeid", "hitter", "receiver"),
         measure.vars = c("hitter_x")
    ) %>% 
    rename(x_kind = variable,
           x = value)

#y coordinates of the hitter of every rallyid
ball_y <- events %>% 
    melt(id.vars = c("rallyid", "strokeid", "hitter", "receiver"),
         measure.vars = c("hitter_y")
    ) %>% 
    rename(y_kind = variable,
           y = value)

#bind the ball_x and ball_y by the rally id and stroke id 
ball_position <- inner_join(ball_x, ball_y[, c("rallyid", "strokeid", "y")], by = c("rallyid", "strokeid"))

#return a table of which includes ball_bos and the rally ids
kable(head(arrange(ball_position, rallyid)))
rallyid strokeid hitter receiver x_kind x y
1 1 Djokovic Nadal hitter_x -0.24 6.50
1 2 Nadal Djokovic hitter_x 25.59 0.05
1 3 Djokovic Nadal hitter_x 2.33 1.42
2 1 Djokovic Nadal hitter_x -0.01 4.48
3 1 Djokovic Nadal hitter_x -0.37 4.48
3 2 Nadal Djokovic hitter_x 24.79 7.40

Now lets plot a rally using the ggplot package again on the court using the ball_pos table created

Choose a random rally id of your choice.

#Rally id: 100
rally100 <- ball_position %>% 
    filter(rallyid == 100) 

#Plot the rally on the court 
#name it rallie100_line for later
rally100_line <- court + 
  #Use the geom_point() function to plot the shot hit from
    geom_point(data = rally100, aes(x = x, y = y), colour = "red") +
  #Use the geom_path() function to plot the trajectory of the shot
    geom_path(data = rally100, aes(x = x, y = y), colour = "red")

The rally plot created above is a good plot, but to make it more appealing, lets animate it using the transition_reveal function from the gganimate package

#Load the "gganimate" package
library(gganimate)

#Animate the rally 
rallie100_animate <- rally100_line +     
    transition_reveal(strokeid)
rallie100_animate

If we don’t want the lines on the plot:

rally100_ball <- court + 
  #plot the ball point on court 
    geom_point(data = rally100, aes(x = x, y = y), colour = "red") +
  #make the colour of the ball line invisible 
    geom_path(data = rally100, aes(x = x, y = y), colour = NA) +     
    transition_reveal(strokeid)

animate(rally100_ball, nframes = 200)