library(cubature) # cubature' provides 'adaptIntegrate()' for multidimensional integration
# --- Player Data
Jersey <- c("32","13","07","04","22","03","21","02","15","00","24","05")
Player <- c("Jakucionis","Ivisic","Riley","Boswell","White",
"Humrichous","Johnson Jr.","Gibbs-Lawhorn",
"Davis","Booth","Kutcher","Redd")
FTM <- c(142, 48, 84, 113, 84, 24, 55, 29, 3, 2, 0, 1) # Free Throws Made
FTA <- c(168, 64, 116, 143, 102, 34, 89, 36, 6, 4, 0, 6) # Free Throw Attempts
IlliniFT2024 <- data.frame(Player, FTM, FTA, row.names = Jersey)
IlliniFT2024
Prior Distribution
\[
y \mid \theta \sim \text{Binomial}(n, \theta)
\]
Likelihood Distribution
\[
\theta \sim \text{Beta}\left(\tfrac{1}{2}, \tfrac{1}{2}\right)
\]
Posterior Distribution
\[
\theta_i \mid y_i
\sim \text{Beta}\left(y_i + \tfrac{1}{2},\; n_i - y_i +
\tfrac{1}{2}\right)
\]
Task 1: Estimate the average true free throw percentage across all
players using Bayesian inference.
# Observed data
y <- FTM # number of made free throws
n <- FTA # number of attempts per player
# Likelihood function: f(y|θ), y|θ ~ binomial(n,θ)
like <- function(theta) {
prod(dbinom(y, n, theta))
}
# Prior function: p(θ), θ ~ Beta(1/2, 1/2)
prior <- function(theta) {
prod(dbeta(theta, 1/2, 1/2))
}
Method 0: true posteior_mean calculate from posterior distribution
(Not Possible in Most Case)
player_post_means <- (y + 0.5) / (n + 1)
cat("Posterior Mean (Analytical Calculation):", round(posterior_mean_true, 6), "\n")
Posterior Mean (Analytical Calculation): 0.6457
Method 1: (numerical Integration)
\[
E[\bar{\theta} \mid y]
= \frac{\int \bar{\theta} \, p(y \mid \theta) \, p(\theta) \, d\theta}
{\int p(y \mid \theta) \, p(\theta) \, d\theta}
\]
# Numerator integrand:
# mean(theta) * likelihood * prior
numerator.integrand <- function(theta) {
mean(theta) * like(theta) * prior(theta)
}
# Compute numerator integral
numerator <- adaptIntegrate(
numerator.integrand,
rep(0, length(n)), # lower bounds for theta_i (0)
rep(1, length(n)) # upper bounds for theta_i (1)
)
#-----------------------------------------------------------------------
# Denominator integrand:
# likelihood * prior
denominator.integrand <- function(theta) {
like(theta) * prior(theta)
}
# Compute denominator integral
denominator <- adaptIntegrate(
denominator.integrand,
rep(0, length(n)),
rep(1, length(n))
)
# Posterior mean of team average using Numerical Integration
posterior_mean_NI <- numerator$integral / denominator$integral
cat("Posterior Mean (Numerical Integration):", round(posterior_mean_NI, 6), "\n")
Posterior Mean (Numerical Integration): 0.700805
Method 2: Monte Carlo Simulation), draw ample directly from the
posterior distribution
\[
\theta_i \mid y_i
\sim \text{Beta}\left(y_i + \tfrac{1}{2},\; n_i - y_i +
\tfrac{1}{2}\right)
\]
Nsim <- 100000 # Number of simulations
n_players <- nrow(IlliniFT2024) # Number of players
theta.vecs <- matrix(NA, n_players,Nsim) # empty matrix to store all simulated theta values
# Loop through each player and generate Beta samples
for (i in 1:n_players) {
alpha <- IlliniFT2024$FTM[i] + 0.5 # successes + prior
beta <- IlliniFT2024$FTA[i] - IlliniFT2024$FTM[i] + 0.5 # failures + prior
theta.vecs[i, ] <- rbeta(Nsim, alpha, beta) # draw from Beta(alpha, beta)
}
\[
\texttt{theta.vecs} =
\begin{bmatrix}
\theta_1^{(1)} & \theta_1^{(2)} & \cdots &
\theta_1^{(100000)} \\
\theta_2^{(1)} & \theta_2^{(2)} & \cdots &
\theta_2^{(100000)} \\
\vdots & \vdots & \ddots & \vdots \\
\theta_{12}^{(1)} & \theta_{12}^{(2)} & \cdots &
\theta_{12}^{(100000)}
\end{bmatrix}
\]
dim(theta.vecs) # 100,000 * samples of each player’s true free-throw percentage.
[1] 12 100000
# Each row = a player (12 players total)
# Each column = one full Monte Carlo simulation (100,000 total)
Average (p =12) player θ’s for each simulation (N_sim = 100,000
total) \[
\bar{\theta}^{(s)} = \frac{1}{p} \sum_{i=1}^p \theta_i^{(s)},
\quad s = 1, \dots, N_{\text{sim}}
\]
# Visualize the Posterior Distribution
theta_bars = colMeans(theta.vecs)
hist(thetabars, freq = FALSE, xlab = expression(bar(theta)))
lines(density(theta_bars), col = "blue")

The posterior is roughly bell-shaped. The most likely team average
lies around 0.64–0.66, with some uncertainty.
# 95% credible interval for posterior_mean_MC
quantile(theta_bars, c(0.025, 0.975))
2.5% 97.5%
0.5690695 0.7236910
# Posterior Probability of Exceeding 69%
mean(thetabars > 0.69)
[1] 0.14777
There’s roughly a 15% chance the team’s average true free-throw
ability exceeds 69%.
Compute Posterior Mean (Expected Value) using Monte Carlo
average
\[
E[\bar{\theta} \mid \mathbf{y}] \approx \frac{1}{N_{\text{sim}}}
\sum_{s=1}^{N_{\text{sim}}} \bar{\theta}^{(s)}
\]
# Posterior mean of team average
posterior_mean_MC <- mean(thetabars)
# Monte Carlo standard error (uncertainty due to finite sampling)
Monte_Carlo_error <- sd(thetabars) / sqrt(Nsim)
cat("Posterior Mean (MC estimate):", round(posterior_mean_MC, 6), "\n")
Posterior Mean (MC estimate): 0.645625
cat("Monte Carlo Standard Error :", round(Monte_Carlo_error, 8), "\n")
Monte Carlo Standard Error : 0.00012882
Posterior Mean Comparsion Across 3 different methods
# --- 1. Numerical Integration Posterior Mean ---
posterior_mean_NI <- numerator$integral / denominator$integral
# --- 2. Monte Carlo Posterior Mean ---
posterior_mean_MC <- mean(thetabars)
# --- 3. Analytical Posterior Mean (Exact Formula) ---
posterior_mean_Analytical <- with(IlliniFT2024, mean((FTM + 0.5) / (FTA + 1)))
cat("Posterior Mean Comparison:\n")
Posterior Mean Comparison:
cat("1. Numerical Integration :", round(posterior_mean_NI, 6), "\n")
1. Numerical Integration : 0.700805
cat("2. Monte Carlo Simulation:", round(posterior_mean_MC, 6), "\n")
2. Monte Carlo Simulation: 0.645625
cat("3. Analytical Formula :", round(posterior_mean_Analytical, 6), "\n")
3. Analytical Formula : 0.6457
Task 2: Goal: Use the same Monte Carlo setup to study the maximum
player ability max(θ_i) and to find which player is most likely the
best.
# Same Input from the task 1
## - IlliniFT2024 (data frame with Player, FTM, FTA)
## - theta.vecs (12 x Nsim matrix of posterior draws; rows=players, cols=sims)
## - N_sim (number of simulations; e.g., 100000)
# Consider the maximum of the di values: max(θ_i). This represents the team's best free throw success probability.
## Player's best simulated θ across all 100,000 draws
player_best_theta = apply(theta.vecs, 1, max)
# -> One value per player (length 12)
# -> Each value = that player's highest simulated true FT%
## Team's best free-throw probability in each simulation
team_best_theta = apply(theta.vecs, 2, max)
# -> One value per simulation (length 100000)
# -> Each value = best player's FT% in that simulated season
post_mean_max <- mean(team_best_theta)
cat("Posterior mean of max θ_i:", round(post_mean_max, 3)*100,"%\n")
Posterior mean of max θ_i: 88.5 %
The average (posterior mean) of the team’s best shooter’s true FT
rate across all simulations is 88.5%
mc_se_max <- sd(team_best_theta) / sqrt(Nsim)
cat("MC standard error:", round(mc_se_max, 8), "\n")
MC standard error: 0.00015575
ci_max <- quantile(team_best_theta, c(0.025, 0.975))
cat("95% Credible Interval for max θ_i:", paste(round(ci_max, 6), collapse = " — "), "\n")
95% Credible Interval for max θ_i: 0.81918 — 0.998452
hist(team_best_theta, freq = FALSE,
xlab = expression(max[i]~theta[i]),
main = "Posterior of the Team's Best FT Probability")
lines(density(team_best_theta), lwd = 2)

Predicting the best player
best_player_index <- apply(theta.vecs, 2, which.max)
best_counts <- table(best_player_index)
prob_best_player <- prop.table(best_counts)
names(prob_best_player) <- IlliniFT2024$Player[as.integer(names(prob_best_player))]
# Find the player with the highest posterior probability
best_player <- names(prob_best_player)[which.max(prob_best_player)]
best_prob <- max(prob_best_player)
prob_best_player
Jakucionis Ivisic Riley Boswell White Humrichous Gibbs-Lawhorn Davis
0.34873 0.01337 0.00049 0.02682 0.16360 0.01080 0.15484 0.01159
Booth Kutcher Redd
0.02764 0.24207 0.00005
cat("Most likely best shooter:", best_player, "\n")
Most likely best shooter: Jakucionis
cat("Posterior probability:", round(best_prob, 4), "\n")
Posterior probability: 0.3487
---
title: "Illini Free Throws (2024–25 Men’s Basketball)"
output: html_notebook
author: Joshua Zhang
---
```{r}
library(cubature) # cubature' provides 'adaptIntegrate()' for multidimensional integration
```

```{r}
# --- Player Data 
Jersey <- c("32","13","07","04","22","03","21","02","15","00","24","05")
Player <- c("Jakucionis","Ivisic","Riley","Boswell","White",
            "Humrichous","Johnson Jr.","Gibbs-Lawhorn",
            "Davis","Booth","Kutcher","Redd")
FTM <- c(142, 48, 84, 113, 84, 24, 55, 29, 3, 2, 0, 1)   # Free Throws Made
FTA <- c(168, 64, 116, 143, 102, 34, 89, 36, 6, 4, 0, 6) # Free Throw Attempts
IlliniFT2024 <- data.frame(Player, FTM, FTA, row.names = Jersey)
IlliniFT2024
```

### Prior Distribution
$$
y \mid \theta \sim \text{Binomial}(n, \theta)
$$

### Likelihood Distribution
$$
\theta \sim \text{Beta}\left(\tfrac{1}{2}, \tfrac{1}{2}\right)
$$

### Posterior Distribution
$$
\theta_i \mid y_i 
\sim \text{Beta}\left(y_i + \tfrac{1}{2},\; n_i - y_i + \tfrac{1}{2}\right)
$$


# Task 1: Estimate the average true free throw percentage across all players using Bayesian inference.
```{r}
# Observed data
y <- FTM  # number of made free throws
n <- FTA  # number of attempts per player

# Likelihood function: f(y|θ), y|θ ~ binomial(n,θ)
like <- function(theta) {
  prod(dbinom(y, n, theta))
}

# Prior function: p(θ),  θ ~ Beta(1/2, 1/2)
prior <- function(theta) {
  prod(dbeta(theta, 1/2, 1/2))
}
```

## Method 0: true posteior_mean calculate from posterior distribution (Not Possible in Most Case)
```{r}
player_post_means <- (y + 0.5) / (n + 1)
cat("Posterior Mean (Analytical Calculation):", round(posterior_mean_true, 6), "\n")
```


## Method 1: (numerical Integration)
$$
E[\bar{\theta} \mid y] 
= \frac{\int \bar{\theta} \, p(y \mid \theta) \, p(\theta) \, d\theta}
       {\int p(y \mid \theta) \, p(\theta) \, d\theta}
$$


```{r}
# Numerator integrand:
# mean(theta) * likelihood * prior
numerator.integrand <- function(theta) {
  mean(theta) * like(theta) * prior(theta)
}

# Compute numerator integral
numerator <- adaptIntegrate(
  numerator.integrand,
  rep(0, length(n)),  # lower bounds for theta_i (0)
  rep(1, length(n))   # upper bounds for theta_i (1)
)

#-----------------------------------------------------------------------
# Denominator integrand:
# likelihood * prior
denominator.integrand <- function(theta) {
  like(theta) * prior(theta)
}
# Compute denominator integral
denominator <- adaptIntegrate(
  denominator.integrand,
  rep(0, length(n)),
  rep(1, length(n))
)

# Posterior mean of team average using Numerical Integration
posterior_mean_NI <- numerator$integral / denominator$integral
cat("Posterior Mean (Numerical Integration):", round(posterior_mean_NI, 6), "\n")
```

## Method 2: Monte Carlo Simulation), draw ample directly from the posterior distribution
$$
\theta_i \mid y_i 
\sim \text{Beta}\left(y_i + \tfrac{1}{2},\; n_i - y_i + \tfrac{1}{2}\right)
$$

```{r}
Nsim <- 100000  # Number of simulations
n_players <- nrow(IlliniFT2024) # Number of players
theta.vecs <- matrix(NA, n_players,Nsim) # empty matrix to store all simulated theta values

# Loop through each player and generate Beta samples
for (i in 1:n_players) {
  alpha <- IlliniFT2024$FTM[i] + 0.5           # successes + prior
  beta  <- IlliniFT2024$FTA[i] - IlliniFT2024$FTM[i] + 0.5  # failures + prior
  theta.vecs[i, ] <- rbeta(Nsim, alpha, beta)  # draw from Beta(alpha, beta)
}
```

$$
\texttt{theta.vecs} =
\begin{bmatrix}
\theta_1^{(1)} & \theta_1^{(2)} & \cdots & \theta_1^{(100000)} \\
\theta_2^{(1)} & \theta_2^{(2)} & \cdots & \theta_2^{(100000)} \\
\vdots & \vdots & \ddots & \vdots \\
\theta_{12}^{(1)} & \theta_{12}^{(2)} & \cdots & \theta_{12}^{(100000)}
\end{bmatrix}
$$

```{r}
dim(theta.vecs) # 100,000 * samples of each player’s true free-throw percentage.
# Each row = a player (12 players total)
# Each column = one full Monte Carlo simulation (100,000 total)
```

Average (p =12) player θ’s for each simulation (N_sim = 100,000 total)
$$
\bar{\theta}^{(s)} = \frac{1}{p} \sum_{i=1}^p \theta_i^{(s)},
\quad s = 1, \dots, N_{\text{sim}}
$$

```{r}
# Visualize the Posterior Distribution
theta_bars = colMeans(theta.vecs)
hist(thetabars, freq = FALSE, xlab = expression(bar(theta)))
lines(density(theta_bars), col = "blue")
```
The posterior is roughly bell-shaped.
The most likely team average lies around 0.64–0.66, with some uncertainty.

```{r}
# 95% credible interval for posterior_mean_MC
quantile(theta_bars, c(0.025, 0.975))

# Posterior Probability of Exceeding 69%
mean(thetabars > 0.69)
```
There’s roughly a 15% chance the team’s average true free-throw ability exceeds 69%.

### Compute Posterior Mean (Expected Value) using Monte Carlo average
$$
E[\bar{\theta} \mid \mathbf{y}] \approx \frac{1}{N_{\text{sim}}}
\sum_{s=1}^{N_{\text{sim}}} \bar{\theta}^{(s)}
$$

```{r}
# Posterior mean of team average
posterior_mean_MC <- mean(thetabars)

# Monte Carlo standard error (uncertainty due to finite sampling)
Monte_Carlo_error <- sd(thetabars) / sqrt(Nsim)

cat("Posterior Mean (MC estimate):", round(posterior_mean_MC, 6), "\n")
cat("Monte Carlo Standard Error  :", round(Monte_Carlo_error, 8), "\n")
```

### Posterior Mean Comparsion Across 3 different methods
```{r}
# --- 1. Numerical Integration Posterior Mean ---
posterior_mean_NI <- numerator$integral / denominator$integral

# --- 2. Monte Carlo Posterior Mean ---
posterior_mean_MC <- mean(thetabars)

# --- 3. Analytical Posterior Mean (Exact Formula) ---
posterior_mean_Analytical <- with(IlliniFT2024, mean((FTM + 0.5) / (FTA + 1)))


cat("Posterior Mean Comparison:\n")
cat("1. Numerical Integration :", round(posterior_mean_NI, 6), "\n")
cat("2. Monte Carlo Simulation:", round(posterior_mean_MC, 6), "\n")
cat("3. Analytical Formula    :", round(posterior_mean_Analytical, 6), "\n")
```


# Task 2: Goal: Use the same Monte Carlo setup to study the maximum player ability max(θ_i) and to find which player is most likely the best.
```{r}
# Same Input from the task 1
## - IlliniFT2024  (data frame with Player, FTM, FTA)
## - theta.vecs    (12 x Nsim matrix of posterior draws; rows=players, cols=sims)
## - N_sim          (number of simulations; e.g., 100000)
# Consider the maximum of the di values: max(θ_i). This represents the team's best free throw success probability.
```

```{r}
## Player's best simulated θ across all 100,000 draws
player_best_theta = apply(theta.vecs, 1, max)
# -> One value per player (length 12)
# -> Each value = that player's highest simulated true FT%


## Team's best free-throw probability in each simulation
team_best_theta = apply(theta.vecs, 2, max)
# -> One value per simulation (length 100000)
# -> Each value = best player's FT% in that simulated season
```

```{r}
post_mean_max <- mean(team_best_theta)
cat("Posterior mean of max θ_i:", round(post_mean_max, 3)*100,"%\n")
```
The average (posterior mean) of the team’s best shooter’s true FT rate across all simulations is 88.5%

```{r}
mc_se_max <- sd(team_best_theta) / sqrt(Nsim)
cat("MC standard error:", round(mc_se_max, 8), "\n")
```

```{r}
ci_max <- quantile(team_best_theta, c(0.025, 0.975))
cat("95% Credible Interval for max θ_i:", paste(round(ci_max, 6), collapse = " — "), "\n")
```

```{r}
hist(team_best_theta, freq = FALSE,
     xlab = expression(max[i]~theta[i]),
     main = "Posterior of the Team's Best FT Probability")
lines(density(team_best_theta), lwd = 2)
```

### Predicting the best player
```{r}
best_player_index <- apply(theta.vecs, 2, which.max)
best_counts <- table(best_player_index)
prob_best_player <- prop.table(best_counts)
names(prob_best_player) <- IlliniFT2024$Player[as.integer(names(prob_best_player))]

# Find the player with the highest posterior probability
best_player <- names(prob_best_player)[which.max(prob_best_player)]
best_prob <- max(prob_best_player)

prob_best_player
cat("Most likely best shooter:", best_player, "\n")
cat("Posterior probability:", round(best_prob, 4), "\n")
```
