probability lab

Author

E. Nguyen

library

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
Loading required package: airports
Loading required package: cherryblossom
Loading required package: usdata
data(kobe_basket)
glimpse(kobe_basket)
Rows: 133
Columns: 6
$ vs          <fct> ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL…
$ game        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ quarter     <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3…
$ time        <fct> 9:47, 9:07, 8:11, 7:41, 7:03, 6:01, 4:07, 0:52, 0:00, 6:35…
$ description <fct> Kobe Bryant makes 4-foot two point shot, Kobe Bryant misse…
$ shot        <chr> "H", "M", "M", "H", "H", "M", "M", "M", "M", "H", "H", "H"…

Exercise 1

#Answer: A streak length of 1 means he hit one shot, then missed after. A streak length of 0 means he has not scored a shot.

#calculating streak lengths and storing it into kobe_streak kobe_streak <- calc_streak(kobe_basket$shot)

#distribution of streak lengths ggplot(data = kobe_streak, aes( x=length)) + geom_bar()

Exercise 2

#Answer: His typical streak length was 0. His longest streak of baskets is 4.
kobe_streak <- calc_streak(kobe_basket$shot)
ggplot(data = kobe_streak, aes( x=length)) + 
geom_bar()

#vector with 50/50 chance coin_outcomes <- c(“heads”, “tails”)

#creating a sample of coin outcomes sample(coin_outcomes, size = 1, replace = TRUE)

#saving 100 samples into object sim_fair_coin <- sample(coin_outcomes, size = 100, replace = TRUE)

#view data sim_fair_coin

#tabulate data table(sim_fair_coin)

Exercise 3

# Keeps the sample the same
set.seed(12203)

#same thing but now with 20/80 odds
coin_outcomes <- c("heads", "tails")
sim_unfair_coin <- sample(coin_outcomes, size = 100, replace = TRUE, prob = c(0.2, 0.8))
sim_unfair_coin
  [1] "tails" "heads" "tails" "tails" "tails" "tails" "heads" "tails" "tails"
 [10] "heads" "tails" "tails" "tails" "tails" "tails" "heads" "tails" "tails"
 [19] "tails" "tails" "heads" "tails" "tails" "tails" "tails" "tails" "tails"
 [28] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "heads" "tails"
 [37] "heads" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
 [46] "tails" "tails" "tails" "heads" "tails" "heads" "tails" "heads" "heads"
 [55] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "heads"
 [64] "heads" "tails" "heads" "tails" "tails" "tails" "heads" "tails" "tails"
 [73] "tails" "tails" "tails" "tails" "tails" "tails" "heads" "tails" "tails"
 [82] "tails" "tails" "tails" "tails" "tails" "heads" "tails" "tails" "tails"
 [91] "tails" "heads" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
[100] "tails"
table(sim_unfair_coin)
sim_unfair_coin
heads tails 
   18    82 
#Answer: 18 coins came up heads. 

#simulate independent shooter with 50/50 chance

Exercise 4

shot_outcomes <- c("H", "M")
sim_basket <- sample(shot_outcomes, size = 133, replace = TRUE, prob = c(0.45, 0.55))

Exercise 5

sim_streak <- calc_streak(sim_basket)

Exercise 6

ggplot(data = sim_streak, aes( x=length)) + 
geom_bar()

#Answer: Typical streak length is 0 for simulated independent shooter. Longest streak of baskets is 6 shots.

Exercise 7

#Answer: I would expect the streak distribution to be similar, but not exactly the same. This is because if we take another sample, it will not be the exact same as the previous sample. However, it would be pretty close because the sample size is decently large with 133 shots, so the chance that the data would be completely different is lessened. 

Exercise 8

#Answer: No, the distributions look pretty similar, with a 0 streak being the highest, followed be a steep decline for a streak of 1. There is little evidence that the hot hand model fits Kobe's shooting patterns as a simulation with an 45% shooting percentage per shot has very similar streaks compared to Kobe.