This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(openintro)
## Warning: package 'openintro' was built under R version 4.1.2
## Loading required package: airports
## Warning: package 'airports' was built under R version 4.1.2
## Loading required package: cherryblossom
## Warning: package 'cherryblossom' was built under R version 4.1.2
## Loading required package: usdata
## Warning: package 'usdata' was built under R version 4.1.2
#Viewing Data
kobe_basket
## # A tibble: 133 x 6
## vs game quarter time description shot
## <fct> <int> <fct> <fct> <fct> <chr>
## 1 ORL 1 1 9:47 Kobe Bryant makes 4-foot two point shot H
## 2 ORL 1 1 9:07 Kobe Bryant misses jumper M
## 3 ORL 1 1 8:11 Kobe Bryant misses 7-foot jumper M
## 4 ORL 1 1 7:41 Kobe Bryant makes 16-foot jumper (Derek Fish~ H
## 5 ORL 1 1 7:03 Kobe Bryant makes driving layup H
## 6 ORL 1 1 6:01 Kobe Bryant misses jumper M
## 7 ORL 1 1 4:07 Kobe Bryant misses 12-foot jumper M
## 8 ORL 1 1 0:52 Kobe Bryant misses 19-foot jumper M
## 9 ORL 1 1 0:00 Kobe Bryant misses layup M
## 10 ORL 1 2 6:35 Kobe Bryant makes jumper H
## # ... with 123 more rows
glimpse(kobe_basket)
## Rows: 133
## Columns: 6
## $ vs <fct> ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL, ORL~
## $ game <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ quarter <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3~
## $ time <fct> 9:47, 9:07, 8:11, 7:41, 7:03, 6:01, 4:07, 0:52, 0:00, 6:35~
## $ description <fct> Kobe Bryant makes 4-foot two point shot, Kobe Bryant misse~
## $ shot <chr> "H", "M", "M", "H", "H", "M", "M", "M", "M", "H", "H", "H"~
#What does a streak length of 1 mean, i.e. how many hits and misses are in a streak of 1? What about a streak length of 0? ## Anwer: Streak length of 1 means, in a streak, he has 1 hit and 1 miss. ## whereas Streak length of 0 means, in a streak, he has 0 hit in first attempt which is counted as 1 miss.
# use the custom function calc_streak to calculate them, and store the results in a data frame
# called kobe_streak as the length variable.
kobe_streak <- calc_streak(kobe_basket$shot)
kobe_streak
## length
## 1 1
## 2 0
## 3 2
## 4 0
## 5 0
## 6 0
## 7 3
## 8 2
## 9 0
## 10 3
## 11 0
## 12 1
## 13 3
## 14 0
## 15 0
## 16 0
## 17 0
## 18 0
## 19 1
## 20 1
## 21 0
## 22 4
## 23 1
## 24 0
## 25 1
## 26 0
## 27 1
## 28 0
## 29 1
## 30 2
## 31 0
## 32 1
## 33 2
## 34 1
## 35 0
## 36 0
## 37 1
## 38 0
## 39 0
## 40 0
## 41 1
## 42 1
## 43 0
## 44 1
## 45 0
## 46 2
## 47 0
## 48 0
## 49 0
## 50 3
## 51 0
## 52 1
## 53 0
## 54 1
## 55 2
## 56 1
## 57 0
## 58 1
## 59 0
## 60 0
## 61 1
## 62 3
## 63 3
## 64 1
## 65 1
## 66 0
## 67 0
## 68 0
## 69 0
## 70 0
## 71 1
## 72 1
## 73 0
## 74 0
## 75 0
## 76 1
ggplot(data = kobe_streak, aes(x = length)) +
geom_bar()
# Exercise 2: # Describe the distribution of Kobe’s streak lengths from the 2009 NBA finals. What was his typical streak length? How long was # his longest streak of baskets?
## Answer: Typical Streak length = 0, longest streak = 4
coin_outcomes <- c("heads", "tails")
sample(coin_outcomes, size = 1, replace = TRUE)
## [1] "tails"
sim_fair_coin <- sample(coin_outcomes, size = 100, replace = TRUE)
sim_fair_coin
## [1] "heads" "tails" "tails" "heads" "tails" "heads" "heads" "heads" "heads"
## [10] "heads" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
## [19] "tails" "heads" "heads" "tails" "tails" "heads" "tails" "heads" "tails"
## [28] "tails" "tails" "tails" "heads" "tails" "heads" "tails" "tails" "tails"
## [37] "tails" "heads" "heads" "tails" "tails" "heads" "heads" "heads" "tails"
## [46] "tails" "tails" "tails" "heads" "heads" "tails" "tails" "tails" "tails"
## [55] "tails" "heads" "heads" "heads" "heads" "tails" "heads" "heads" "tails"
## [64] "tails" "heads" "tails" "tails" "heads" "heads" "tails" "tails" "tails"
## [73] "heads" "heads" "tails" "tails" "tails" "heads" "tails" "tails" "tails"
## [82] "tails" "heads" "tails" "heads" "heads" "heads" "heads" "tails" "heads"
## [91] "heads" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
## [100] "tails"
table(sim_fair_coin)
## sim_fair_coin
## heads tails
## 39 61
sim_unfair_coin <- sample(coin_outcomes, size = 100, replace = TRUE,
prob = c(0.2, 0.8))
sim_unfair_coin
## [1] "tails" "tails" "tails" "tails" "tails" "tails" "heads" "tails" "tails"
## [10] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "heads"
## [19] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
## [28] "tails" "tails" "tails" "heads" "heads" "tails" "heads" "tails" "tails"
## [37] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
## [46] "tails" "tails" "tails" "tails" "heads" "tails" "tails" "tails" "heads"
## [55] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
## [64] "tails" "tails" "tails" "tails" "tails" "tails" "heads" "tails" "tails"
## [73] "tails" "heads" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
## [82] "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
## [91] "tails" "heads" "tails" "tails" "tails" "tails" "tails" "tails" "tails"
## [100] "tails"
table(sim_fair_coin)
## sim_fair_coin
## heads tails
## 39 61
shot_outcomes <- c("H", "M")
sim_basket <- sample(shot_outcomes, size = 1, replace = TRUE)
sim_basket
## [1] "M"
shot_outcomes <- c("H", "M")
sim_basket <- sample(shot_outcomes, size = 133, replace = TRUE,
prob = c(0.45, 0.55))
table(sim_basket)
## sim_basket
## H M
## 60 73
sim_basket
## [1] "M" "M" "M" "H" "H" "H" "M" "H" "M" "H" "H" "M" "M" "M" "H" "H" "M" "H"
## [19] "M" "H" "M" "H" "H" "M" "H" "M" "M" "M" "M" "H" "M" "M" "M" "M" "M" "H"
## [37] "M" "H" "M" "M" "H" "M" "M" "M" "M" "M" "H" "H" "M" "H" "H" "H" "M" "M"
## [55] "M" "M" "H" "M" "H" "M" "H" "M" "H" "M" "H" "M" "H" "H" "M" "H" "H" "M"
## [73] "M" "M" "M" "M" "H" "H" "H" "H" "M" "M" "M" "H" "M" "M" "H" "H" "M" "M"
## [91] "H" "M" "M" "H" "M" "H" "H" "H" "M" "M" "M" "H" "M" "M" "H" "H" "H" "H"
## [109] "H" "H" "H" "M" "M" "H" "M" "H" "H" "M" "M" "M" "M" "H" "H" "M" "M" "H"
## [127] "H" "M" "M" "M" "M" "H" "H"
sim_streak <- sim_basket %>%
calc_streak()
sim_streak
## length
## 1 0
## 2 0
## 3 0
## 4 3
## 5 1
## 6 2
## 7 0
## 8 0
## 9 2
## 10 1
## 11 1
## 12 2
## 13 1
## 14 0
## 15 0
## 16 0
## 17 1
## 18 0
## 19 0
## 20 0
## 21 0
## 22 1
## 23 1
## 24 0
## 25 1
## 26 0
## 27 0
## 28 0
## 29 0
## 30 2
## 31 3
## 32 0
## 33 0
## 34 0
## 35 1
## 36 1
## 37 1
## 38 1
## 39 1
## 40 2
## 41 2
## 42 0
## 43 0
## 44 0
## 45 0
## 46 4
## 47 0
## 48 0
## 49 1
## 50 0
## 51 2
## 52 0
## 53 1
## 54 0
## 55 1
## 56 3
## 57 0
## 58 0
## 59 1
## 60 0
## 61 7
## 62 0
## 63 1
## 64 2
## 65 0
## 66 0
## 67 0
## 68 2
## 69 0
## 70 2
## 71 0
## 72 0
## 73 0
## 74 2
ggplot(data = sim_streak, aes(x = length)) +
geom_bar()
# Exercise 6: # Describe the distribution of streak lengths. What is the typical streak length for this simulated # independent shooter with a 45% shooting percentage? How long is the player’s longest streak of baskets in 133 shots? # Make sure to include a plot in your answer.
##Answer: Typical Streak length = 0, longest streak = 6
# The Distribution will varry because the sample will vary every time we run the simulation.
# Setting the seed will ensure to select the same sample each time.
Exercise 8: # How does Kobe Bryant’s distribution of streak lengths compare to the distribution # of streak lengths for the simulated shooter? Using this comparison, do you have # evidence that the hot hand model fits Kobe’s shooting patterns? Explain.
# The typical streak length is higher for the simulated shooter than the typical Kobe Bryant’s
# distribution of streak lengths and the longest streak is greater for the simulated shooter.
# Both have independent shooting patterns and there is no evidence that the hot hand model
# fits Kobe’s shooting patterns.
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.