Probability

Hot Hands

Getting Started

load("more/kobe.RData")
head(kobe)
##    vs game quarter time
## 1 ORL    1       1 9:47
## 2 ORL    1       1 9:07
## 3 ORL    1       1 8:11
## 4 ORL    1       1 7:41
## 5 ORL    1       1 7:03
## 6 ORL    1       1 6:01
##                                               description basket
## 1                 Kobe Bryant makes 4-foot two point shot      H
## 2                               Kobe Bryant misses jumper      M
## 3                        Kobe Bryant misses 7-foot jumper      M
## 4 Kobe Bryant makes 16-foot jumper (Derek Fisher assists)      H
## 5                         Kobe Bryant makes driving layup      H
## 6                               Kobe Bryant misses jumper      M
kobe$basket[1:9]
## [1] "H" "M" "M" "H" "H" "M" "M" "M" "M"

Streak of length 1 means one basket was made, and the next shot was a miss. So there are 1 hit and 1 miss in a streak of 1. A streak of length zero has no hits, the first and only shot in the streak was a miss.

kobe_streak <- calc_streak(kobe$basket)
kobe_streak
##  [1] 1 0 2 0 0 0 3 2 0 3 0 1 3 0 0 0 0 0 1 1 0 4 1 0 1 0 1 0 1 2 0 1 2 1 0
## [36] 0 1 0 0 0 1 1 0 1 0 2 0 0 0 3 0 1 0 1 2 1 0 1 0 0 1 3 3 1 1 0 0 0 0 0
## [71] 1 1 0 0 0 1
barplot(table(kobe_streak))

Typical streak length was 0 with a total of 39. Streaks of 1 were second with 24, then 6 streaks of 2 and 3, and only one streak of 4

table(kobe_streak)
## kobe_streak
##  0  1  2  3  4 
## 39 24  6  6  1

Longest streak of baskets was 4

max(kobe_streak)
## [1] 4

Simulations in R

With fair coins heads and tails were about 50% each, for the unfair coin about 20% heads and 80% tails

outcomes <- c("heads", "tails")
sample(outcomes, size = 1, replace = TRUE)
## [1] "heads"
sim_fair_coin <- sample(outcomes, size = 100, replace = TRUE)
sim_fair_coin
##   [1] "heads" "heads" "heads" "heads" "tails" "heads" "tails" "tails"
##   [9] "heads" "heads" "heads" "heads" "heads" "tails" "heads" "tails"
##  [17] "tails" "tails" "tails" "heads" "tails" "tails" "tails" "tails"
##  [25] "heads" "tails" "heads" "heads" "tails" "tails" "heads" "tails"
##  [33] "tails" "heads" "tails" "tails" "heads" "heads" "tails" "heads"
##  [41] "heads" "heads" "heads" "heads" "heads" "tails" "heads" "tails"
##  [49] "tails" "heads" "tails" "tails" "heads" "tails" "heads" "heads"
##  [57] "heads" "heads" "heads" "heads" "tails" "heads" "heads" "tails"
##  [65] "tails" "heads" "heads" "tails" "tails" "heads" "tails" "tails"
##  [73] "heads" "heads" "tails" "heads" "heads" "heads" "heads" "heads"
##  [81] "heads" "heads" "heads" "tails" "tails" "tails" "heads" "tails"
##  [89] "tails" "heads" "heads" "tails" "heads" "heads" "tails" "tails"
##  [97] "heads" "heads" "heads" "heads"
table(sim_fair_coin)
## sim_fair_coin
## heads tails 
##    58    42
sim_unfair_coin <- sample(outcomes, size = 100, replace = TRUE, prob = c(0.2, 0.8))
table(sim_unfair_coin)
## sim_unfair_coin
## heads tails 
##    10    90

Simulating the Independent Shooter

outcomes <- c("H", "M")
sim_basket <- sample(outcomes, size = 1, replace = TRUE)
sim_basket
## [1] "M"

The prob variable on the sample function needs to be set to 45% (for hits) and 55% (for misses). Then the sample variable is set to 133.

sim_basket <- sample(outcomes, size = 133, replace = TRUE,prob = c(0.45,0.55))
sim_basket
##   [1] "H" "H" "M" "M" "H" "H" "H" "H" "M" "H" "H" "M" "M" "H" "M" "H" "M"
##  [18] "M" "H" "H" "M" "M" "M" "H" "M" "H" "M" "M" "H" "H" "M" "M" "M" "M"
##  [35] "H" "H" "M" "M" "H" "M" "H" "M" "H" "M" "H" "H" "M" "M" "M" "M" "M"
##  [52] "H" "M" "H" "M" "M" "H" "M" "M" "M" "M" "M" "M" "H" "M" "M" "H" "H"
##  [69] "M" "M" "H" "M" "M" "M" "M" "H" "H" "H" "M" "H" "H" "M" "M" "M" "M"
##  [86] "M" "M" "M" "M" "H" "M" "M" "H" "M" "M" "H" "M" "H" "H" "M" "M" "M"
## [103] "M" "H" "H" "H" "M" "M" "M" "H" "M" "M" "M" "H" "H" "M" "H" "M" "H"
## [120] "M" "H" "H" "H" "H" "M" "M" "H" "H" "M" "M" "M" "H" "H"
table(sim_basket)
## sim_basket
##  H  M 
## 56 77
kobe$basket
##   [1] "H" "M" "M" "H" "H" "M" "M" "M" "M" "H" "H" "H" "M" "H" "H" "M" "M"
##  [18] "H" "H" "H" "M" "M" "H" "M" "H" "H" "H" "M" "M" "M" "M" "M" "M" "H"
##  [35] "M" "H" "M" "M" "H" "H" "H" "H" "M" "H" "M" "M" "H" "M" "M" "H" "M"
##  [52] "M" "H" "M" "H" "H" "M" "M" "H" "M" "H" "H" "M" "H" "M" "M" "M" "H"
##  [69] "M" "M" "M" "M" "H" "M" "H" "M" "M" "H" "M" "M" "H" "H" "M" "M" "M"
##  [86] "M" "H" "H" "H" "M" "M" "H" "M" "M" "H" "M" "H" "H" "M" "H" "M" "M"
## [103] "H" "M" "M" "M" "H" "M" "H" "H" "H" "M" "H" "H" "H" "M" "H" "M" "H"
## [120] "M" "M" "M" "M" "M" "M" "H" "M" "H" "M" "M" "M" "M" "H"
table(kobe$basket)
## 
##  H  M 
## 58 75

On your own

Results shown below using table and max functions. Results change each time simulation is run.

independentShooter_streak<-calc_streak(sim_basket)
table(independentShooter_streak)
## independentShooter_streak
##  0  1  2  3  4 
## 44 18 12  2  2
max(independentShooter_streak)
## [1] 4

Running the simulation a second time will show somewhat similar results. They wouldn’t be the same becouse we are running a simulation and “picking shots out of the hat at random”. But they wouldn’t be completely different becouse the amount of hit and miss “shots in the hat” are the same for each simulation, with a defined proportion of 45% to 55% hits and miss respectively.

sim_basket <- sample(outcomes, size = 133, replace = TRUE,prob = c(0.45,0.55))
sim_basket
##   [1] "M" "H" "H" "M" "H" "H" "H" "H" "H" "M" "M" "H" "M" "H" "M" "M" "M"
##  [18] "H" "H" "M" "H" "M" "M" "H" "M" "M" "H" "M" "M" "M" "M" "H" "H" "H"
##  [35] "H" "M" "M" "M" "H" "H" "H" "M" "H" "M" "M" "M" "M" "H" "M" "M" "H"
##  [52] "M" "H" "M" "M" "H" "H" "H" "H" "M" "H" "M" "H" "M" "H" "M" "M" "M"
##  [69] "M" "M" "M" "H" "H" "M" "H" "H" "H" "M" "H" "M" "H" "M" "H" "M" "H"
##  [86] "M" "M" "H" "M" "H" "H" "H" "M" "M" "H" "H" "M" "M" "H" "M" "M" "M"
## [103] "M" "M" "M" "M" "M" "H" "H" "M" "H" "H" "M" "M" "M" "M" "H" "M" "H"
## [120] "M" "H" "H" "M" "M" "H" "H" "H" "M" "M" "M" "H" "H" "H"
independentShooter_streak<-calc_streak(sim_basket)
table(independentShooter_streak)
## independentShooter_streak
##  0  1  2  3  4  5 
## 37 20  7  5  2  1
max(independentShooter_streak)
## [1] 5

As can be seen in the distribution plots below, both are very alike. We find both of them being right skewed. We also calculate the median and IQR for these distributions, with which we find very similar results. Becouse we find both distributions so alike, we do not find evidence that Kobe’s Bryant is a “hot hand”

table(kobe_streak)
## kobe_streak
##  0  1  2  3  4 
## 39 24  6  6  1
table(independentShooter_streak)
## independentShooter_streak
##  0  1  2  3  4  5 
## 37 20  7  5  2  1
kobeVector<-unlist(table(kobe_streak))
independentVector<-unlist(table(independentShooter_streak))
if (length(kobeVector)<length(independentVector)) {
  kobeVector<-c(kobeVector,rep(0,length(independentVector)-length(kobeVector)))
} else {
  independentVector<-c(independentVector,rep(0,length(kobeVector)-length(independentVector)))  
}

kobeVector
##  0  1  2  3  4    
## 39 24  6  6  1  0
independentVector
## independentShooter_streak
##  0  1  2  3  4  5 
## 37 20  7  5  2  1
h<-rbind(independentVector,kobeVector)
h
##                    0  1 2 3 4 5
## independentVector 37 20 7 5 2 1
## kobeVector        39 24 6 6 1 0
barplot(h,beside = T,main="Independent Shooter Distribution vs. Kobe Distribution",col=c("darkblue","red"),xlab="Streak Lengths",ylab = "Number of Streaks",legend=c("Independent Shooter","Kobe"),ylim=c(0,max(max(kobeVector),max(independentVector))+10),xpd = FALSE)

We calculate a descriptive statistics summary and do not find major differences between both distributions.

median(kobeVector)
## [1] 6
IQR(kobeVector)
## [1] 17.25
median(independentVector)
## [1] 6
IQR(independentVector)
## [1] 14