knitr::opts_chunk$set(echo = TRUE)

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
glimpse(kobe_basket)
## Rows: 133
## Columns: 6
## $ vs          <fct> ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL…
## $ game        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ quarter     <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3…
## $ time        <fct> 9:47, 9:07, 8:11, 7:41, 7:03, 6:01, 4:07, 0:52, 0:00, 6:35…
## $ description <fct> Kobe Bryant makes 4-foot two point shot, Kobe Bryant misse…
## $ shot        <chr> "H", "M", "M", "H", "H", "M", "M", "M", "M", "H", "H", "H"…

E1

In a streak of 1, there is one shot made “H”, followed by a miss “M”. A streak of zero is a “M” preceeded by a streak of 1+.

kobe_streak <- calc_streak(kobe_basket$shot)

ggplot(data = kobe_streak, aes(x = length)) +
  geom_bar()

E2

Kobe Bryant’s typical streak was a streak of zero and his longest streak was 4. The distribution of his streaks during the 2009 NBA final shows that as length increases the count of each streak decreases, or right skewed.

coin_outcomes <- c("heads", "tails")
sample(coin_outcomes, size = 1, replace = TRUE)
## [1] "tails"
sim_fair_coin <- sample(coin_outcomes, size = 100, replace = TRUE)

sim_fair_coin
##   [1] "heads" "heads" "heads" "tails" "heads" "tails" "tails" "tails" "heads"
##  [10] "tails" "tails" "heads" "heads" "heads" "tails" "tails" "tails" "heads"
##  [19] "tails" "tails" "heads" "tails" "tails" "tails" "heads" "heads" "tails"
##  [28] "tails" "heads" "heads" "heads" "heads" "heads" "heads" "heads" "heads"
##  [37] "tails" "tails" "tails" "heads" "heads" "heads" "tails" "heads" "tails"
##  [46] "heads" "tails" "tails" "tails" "heads" "heads" "heads" "heads" "tails"
##  [55] "tails" "tails" "heads" "tails" "tails" "heads" "tails" "tails" "tails"
##  [64] "heads" "heads" "heads" "tails" "tails" "tails" "heads" "heads" "tails"
##  [73] "heads" "tails" "tails" "heads" "tails" "heads" "tails" "heads" "heads"
##  [82] "heads" "tails" "tails" "tails" "heads" "tails" "tails" "tails" "tails"
##  [91] "heads" "heads" "heads" "tails" "heads" "tails" "tails" "tails" "heads"
## [100] "heads"
table(sim_fair_coin)
## sim_fair_coin
## heads tails 
##    49    51
sim_unfair_coin <- sample(coin_outcomes, size = 100, replace = TRUE, 
                          prob = c(0.2, 0.8))

sim_unfair_coin
##   [1] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "heads"
##  [10] "tails" "tails" "tails" "heads" "tails" "tails" "heads" "tails" "tails"
##  [19] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
##  [28] "tails" "tails" "tails" "tails" "heads" "heads" "tails" "tails" "tails"
##  [37] "tails" "tails" "tails" "heads" "tails" "heads" "tails" "tails" "tails"
##  [46] "tails" "tails" "tails" "tails" "tails" "heads" "tails" "heads" "heads"
##  [55] "tails" "tails" "tails" "tails" "heads" "heads" "heads" "tails" "heads"
##  [64] "heads" "tails" "tails" "tails" "tails" "tails" "heads" "tails" "tails"
##  [73] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "heads" "tails"
##  [82] "tails" "heads" "tails" "tails" "tails" "heads" "tails" "tails" "tails"
##  [91] "tails" "heads" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
## [100] "tails"
table(sim_unfair_coin)
## sim_unfair_coin
## heads tails 
##    20    80
set.seed(121212)                 # make sure to change the seed

E3

In the unfair coin simulation, heads came up 17 times and tails appeared 83 times.

shot_outcomes <- c("H", "M")
sim_basket <- sample(shot_outcomes, size = 133, replace = TRUE, 
                          prob = c(0.45, 0.55))

sim_basket
##   [1] "M" "M" "H" "M" "M" "M" "M" "M" "H" "M" "H" "M" "H" "M" "M" "M" "H" "H"
##  [19] "M" "M" "H" "M" "M" "H" "H" "M" "M" "M" "H" "H" "H" "M" "H" "H" "M" "M"
##  [37] "M" "M" "M" "M" "H" "M" "M" "M" "H" "M" "M" "M" "H" "H" "M" "H" "M" "H"
##  [55] "H" "M" "M" "M" "H" "M" "H" "H" "H" "H" "H" "H" "H" "M" "M" "M" "H" "M"
##  [73] "H" "M" "M" "H" "H" "M" "H" "M" "H" "M" "M" "M" "M" "M" "M" "M" "M" "M"
##  [91] "H" "M" "M" "H" "H" "M" "H" "H" "M" "H" "H" "M" "M" "H" "H" "H" "M" "M"
## [109] "M" "H" "H" "M" "M" "H" "M" "H" "M" "H" "M" "H" "M" "H" "H" "H" "H" "M"
## [127] "H" "M" "H" "H" "H" "M" "H"
table(sim_basket)
## sim_basket
##  H  M 
## 60 73
set.seed(343434)
sim_streak <- calc_streak(sim_basket)
ggplot(data = sim_streak, aes(x = length)) +
  geom_bar()

E6

The distribution is still skewed right and maintains a typical streak of zero, however, the longest streak is now 5.

shot_outcomes <- c("H", "M")
sim_basket_2 <- sample(shot_outcomes, size = 133, replace = TRUE, 
                          prob = c(0.45, 0.55))

sim_basket_2
##   [1] "M" "M" "H" "H" "M" "H" "H" "H" "M" "M" "M" "M" "H" "H" "M" "H" "M" "M"
##  [19] "H" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "H" "H" "H" "H" "M" "M"
##  [37] "M" "M" "H" "M" "H" "H" "M" "H" "H" "H" "H" "M" "H" "M" "M" "H" "M" "H"
##  [55] "M" "M" "H" "M" "M" "H" "M" "H" "M" "M" "M" "M" "H" "M" "H" "M" "H" "M"
##  [73] "M" "H" "H" "H" "H" "M" "M" "M" "H" "M" "M" "M" "M" "H" "M" "M" "H" "M"
##  [91] "H" "H" "M" "M" "H" "M" "H" "H" "H" "M" "H" "M" "M" "H" "H" "M" "H" "M"
## [109] "M" "H" "H" "H" "H" "M" "M" "M" "M" "M" "H" "H" "H" "H" "H" "H" "M" "M"
## [127] "H" "H" "M" "M" "H" "H" "M"
table(sim_basket_2)
## sim_basket_2
##  H  M 
## 60 73
set.seed(98989)

sim_streak_2 <- calc_streak(sim_basket_2)

ggplot(data = sim_streak_2, aes(x = length)) +
  geom_bar()

E7

I would expect the distribution to maintain a right skew and the typical length remain zero. I have created a new df called sim_streak_2 and graphed the distribution to show that it would maintain a right skew and typical length of zero.

E8

The sim shooter and Kobe had a similar distribution. Hot Hand suggests that while a player is “heating up” that they will have a higher chance at making the second shot and so on. The sim shooter was locked at a probablity of a hit at 45%, hot hand model says that a shooter would have a 65% chance at making a second shot after hitting the first. Because the distributions are so similar I would say that the hot hand model does not match the observations seen in the Kobe 2009 final dataset.