library(ggplot2)
library(mvtnorm)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ✔ purrr   0.3.4      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(colorspace)
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

Consider the following model the location of a tennis player’s serve when serving to the right. Let :

where N2 (v, C) is the bivariate normal distribution with mean vector v and covariance matrix C and X1 and X2 represent the lateral and depth locations of the landing point of the serve on the court. A serve is considered legal if the ball lands in the cross court service box, the area bounded by 18 <=X1<= 31.5 and 0 <=X2<= 21.

Question:2.a

Generate 5,000 independent realizations of X and use ggplot to create a scatterplot of your simulated values of X over the provided tennis court plot.

Approach: We take the tennis court code provided and superimpose with scatter plot generated by below R code.

#Creates a data.frame object, the easy structure to use for ggploting
tennisCourt = data.frame(x1 = c(0,4.5,18,31.5,36,0,4.5,4.5,0,-2),
                         x2 = c(0,4.5,18,31.5,36,36,31.5,31.5,36,38),
                         y1 = c(-39,-39,-21,-39,-39,39,21,-21,-39,0), 
                         y2 = c(39,39,21,39,39,39,21,-21,-39,0),
                         width = c(rep(1,9),3))


#Creates a plot object called ggTennis
ggTennis = ggplot(tennisCourt) + 
  geom_segment(aes(x = x1,y = y1,xend = x2,yend = y2),size = tennisCourt$width) +
  labs(x = "Serve Line",y = 'Lenght of the court',
       title = 'Tennis Court')

#ggTennis will show our tennis court as a plot
# For Q 2.a
#Generating 5000 points
mu = c(29,16)
sigma = matrix(c(4, 4, 4, 16), nrow =2)
n = 5000
set.seed(1304)
data = rmvnorm(n, mean = mu, sigma = sigma)
X = data.frame(data)
#Superimposing these points to TennisCourt
#Here's the points data frame to use in the plotting
pointsToAdd = X
#The geom_point function helps us create points. Note that we give it new data,
ggTennisWithPoints = ggTennis + 
  geom_point(data = pointsToAdd,aes(x = X1, y = X2),color = 'firebrick') +
  labs(x = "Serve Line",y = 'Lenght of the court',
       title = 'Serve Scatter Plot')

#Visulaizing what we made
ggTennisWithPoints

Question:2.b

Using the model, what is the theoretical probability a serve from the player will be legal? Additionally, show how you can approximate this probability from the realizations of X and provide the numeric value of your approximation.

#For Q2b. Theoretical prob and Simulate prob solution code below
lowerbound <- c(18, 0)
upperbound <- c(31.5, 21)
# Theoretical 
Th = pmvnorm(lower = lowerbound, upper = upperbound, mean = mu, sigma = sigma)
print(Th)
## [1] 0.8236971
## attr(,"error")
## [1] 1e-15
## attr(,"msg")
## [1] "Normal Completion"
# Simulation
count = sum(X$X1 >= 18 & X$X1 <= 31.5 & X$X2 >= 0 & X$X2 <= 21)
sim_prob = count/n
print(sim_prob)
## [1] 0.8222

Theoretical probability for serve to be legal is = 0.8237

Simulated probability for serve to be legal is = 0.8222

Question:2.c

Say the player decides to evaluate their serves that land further to the right (positive X1 direction). Given that the player examines their serves landing around X1=30.5 , what is the conditional distribution of X2? What is the probability that these serves are legal (only considering depth, not width)? (Hint: Consider using the {pnorm} function)

Conditional distribution of X2 given X1=30.5

#Conditional distribution of X2 considering Given that the player examines 
#their serves landing around X1= 30.5
#Legal server is considered if serve lands before and at service line X2=21
prob_legalserves_X2 = pnorm(q=21, mean=17.5, sd=sqrt(12), lower.tail=TRUE)
print(prob_legalserves_X2)
## [1] 0.8438393

Conditional probability of Serve to be legal = 0.84384

Question:2.d

Generate 500 realizations of X2 from the conditional distribution found in part c. Create a new version of the scatter plot constructed for part a that includes plots of the 500 realizations of X2 plotted as different color points with X1 fixed at 30.5. Add a small amount of random noise to the X1 component to reduce the effects of overplotting (consider using R’s jitter function). Describe your results. How do your values of X2 generated from the conditional distribution compare to the values generated directly from the original distribution?

# For Q2d
cond_x1 = rep(30.5, 500)
cond_x2 = rnorm(n = 500, mean = 17.5, sd = sqrt(12))
cond_X = data.frame(cond_x1, cond_x2)
sim_legal_serve = ggTennisWithPoints + 
  geom_jitter(data = cond_X, aes(cond_x1, cond_x2), color = "purple") +
  labs(title = 'Legal Serve Scatter Plot')
#Visualizing what we made
sim_legal_serve

Discussion: