#Getting Started

load("more/kobe.RData")
head(kobe)
##    vs game quarter time
## 1 ORL    1       1 9:47
## 2 ORL    1       1 9:07
## 3 ORL    1       1 8:11
## 4 ORL    1       1 7:41
## 5 ORL    1       1 7:03
## 6 ORL    1       1 6:01
##                                               description basket
## 1                 Kobe Bryant makes 4-foot two point shot      H
## 2                               Kobe Bryant misses jumper      M
## 3                        Kobe Bryant misses 7-foot jumper      M
## 4 Kobe Bryant makes 16-foot jumper (Derek Fisher assists)      H
## 5                         Kobe Bryant makes driving layup      H
## 6                               Kobe Bryant misses jumper      M
View(kobe)
kobe$basket[1:9]
## [1] "H" "M" "M" "H" "H" "M" "M" "M" "M"
##Exercise1: What does a streak length of 1 mean, i.e. how many hits and misses are in a streak of 1? What about a streak length of 0?
## A streak length of one means there has been 1 Hit and after that 1 Miss.
## A streak length of 0 means there is one miss. Basically, the Hit is counted for each streak until there is a Miss, leading to a length of 0.

##The custom function calc_streak, which was loaded in with the data, may be used to calculate the lengths of all shooting streaks and then look at the distribution.
kobe_streak <- calc_streak(kobe$basket)
barplot(table(kobe_streak))

##Exercise2: Describe the distribution of Kobe's streak lengths from the 2009 NBA finals. What was his typical streak length? How long was his longest streak of baskets?
summary(kobe_streak)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.7632  1.0000  4.0000
##The most frequent (median) is a streak of 0. The maximum is 4.
##The streak mean is 0.7632 and the IQR is 1.

#Simulations in R
outcomes <- c("heads", "tails")
sample(outcomes, size = 1, replace = TRUE)
## [1] "heads"
sim_fair_coin <- sample(outcomes, size = 100, replace = TRUE)
sim_fair_coin
##   [1] "tails" "heads" "tails" "tails" "heads" "tails" "tails" "heads"
##   [9] "tails" "heads" "tails" "tails" "heads" "tails" "tails" "heads"
##  [17] "tails" "heads" "tails" "heads" "heads" "heads" "heads" "heads"
##  [25] "heads" "tails" "heads" "heads" "tails" "tails" "tails" "tails"
##  [33] "heads" "tails" "heads" "tails" "tails" "heads" "heads" "tails"
##  [41] "tails" "tails" "heads" "heads" "heads" "heads" "heads" "tails"
##  [49] "heads" "tails" "heads" "tails" "heads" "heads" "heads" "heads"
##  [57] "heads" "heads" "heads" "tails" "tails" "tails" "tails" "heads"
##  [65] "tails" "tails" "heads" "heads" "tails" "tails" "tails" "heads"
##  [73] "heads" "tails" "tails" "tails" "heads" "tails" "heads" "tails"
##  [81] "heads" "heads" "tails" "heads" "tails" "heads" "tails" "heads"
##  [89] "tails" "tails" "tails" "tails" "heads" "tails" "tails" "heads"
##  [97] "tails" "tails" "tails" "heads"
table(sim_fair_coin)
## sim_fair_coin
## heads tails 
##    48    52
sim_unfair_coin <- sample(outcomes, size = 100, replace = TRUE, prob = c(0.2, 0.8))
##Exercise3: In your simulation of flipping the unfair coin 100 times, how many flips came up heads?
table(sim_unfair_coin)
## sim_unfair_coin
## heads tails 
##    22    78
##The answer is close to the 20% probability as expected.

#Simulating the Independent Shooter
outcomes <- c("H", "M")
sim_basket <- sample(outcomes, size = 1, replace = TRUE)

##Exercise4: What change needs to be made to the sample function so that it reflects a shooting percentage of 45%? Make this adjustment, then run a simulation to sample 133 shots. Assign the output of this simulation to a new object called  sim_basket.
sim_basket <- sample(outcomes, size = 133, replace = T, prob = c(.45, .55))
##Look at Kobe data
kobe$basket
##   [1] "H" "M" "M" "H" "H" "M" "M" "M" "M" "H" "H" "H" "M" "H" "H" "M" "M"
##  [18] "H" "H" "H" "M" "M" "H" "M" "H" "H" "H" "M" "M" "M" "M" "M" "M" "H"
##  [35] "M" "H" "M" "M" "H" "H" "H" "H" "M" "H" "M" "M" "H" "M" "M" "H" "M"
##  [52] "M" "H" "M" "H" "H" "M" "M" "H" "M" "H" "H" "M" "H" "M" "M" "M" "H"
##  [69] "M" "M" "M" "M" "H" "M" "H" "M" "M" "H" "M" "M" "H" "H" "M" "M" "M"
##  [86] "M" "H" "H" "H" "M" "M" "H" "M" "M" "H" "M" "H" "H" "M" "H" "M" "M"
## [103] "H" "M" "M" "M" "H" "M" "H" "H" "H" "M" "H" "H" "H" "M" "H" "M" "H"
## [120] "M" "M" "M" "M" "M" "M" "H" "M" "H" "M" "M" "M" "M" "H"
##Look at our simulation
sim_basket
##   [1] "H" "M" "M" "H" "M" "M" "M" "H" "M" "M" "M" "M" "M" "M" "M" "M" "M"
##  [18] "H" "M" "M" "M" "M" "M" "H" "M" "M" "M" "H" "H" "M" "M" "H" "M" "M"
##  [35] "M" "M" "H" "M" "H" "M" "M" "M" "M" "H" "H" "H" "M" "M" "M" "H" "H"
##  [52] "M" "M" "M" "M" "H" "M" "M" "M" "M" "H" "M" "M" "H" "H" "M" "M" "H"
##  [69] "H" "M" "H" "H" "M" "M" "H" "M" "M" "H" "M" "M" "H" "M" "M" "H" "H"
##  [86] "M" "M" "H" "M" "H" "M" "M" "H" "H" "H" "M" "H" "M" "H" "M" "M" "M"
## [103] "M" "M" "M" "M" "H" "M" "M" "H" "M" "H" "M" "H" "M" "H" "M" "M" "M"
## [120] "M" "H" "H" "H" "H" "M" "H" "M" "M" "H" "H" "H" "M" "M"
##Both data sets represent the results of 133 shot attempts, each with the same shooting percentage of 45%. We know that our simulated data is from a shooter that has independent shots. That is, we know the simulated shooter does not have a hot hand.

#On Your Own
##Using calc_streak, compute the streak lengths of sim_basket.
sim_streak <- calc_streak(sim_basket)
length(sim_streak)
## [1] 86
##1. Describe the distribution of streak lengths. What is the typical streak length for this
##simulated independent shooter with a 45% shooting percentage? How long is the player's longest
##streak of baskets in 133 shots?
summary(sim_streak)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.5581  1.0000  4.0000
sd(sim_streak)
## [1] 0.862345
table(sim_streak)
## sim_streak
##  0  1  2  3  4 
## 53 23  6  3  1
barplot(table(sim_streak))

##The distribution ranges from 0 to 4 with a mean of 0.5227 and a median of 0.
##The third quartile has a value of 1 which means 75% of the streak lengths are 1 or below.
##The standard deviation is 0.8016
##The simulated distribution is significantly positively skewed.

##The typical streak length is 0 as shown by the median.
max(sim_streak)
## [1] 4
##The longest streak length is 4 as shown by the max function.

##If you were to run the simulation of the independent shooter a second time, how would you
##expect its streak distribution to compare to the distribution from the question above? 
##Exactly the same? Somewhat similar? Totally different? Explain your reasoning.

##I would expect the second simulation distrubitoon to be similar in nature.
##From the barplot, we can see that the frequency of a treak over 1 is very low. That makes sense
##if the events are independent. The probability of 2 hits is .45*.45, the probability of 3 consecutive hits is
##.45*.45*.45.

##3.How does Kobe Bryant's distribution of streak lengths compare to the distribution of streak lengths for the simulated shooter? Using this comparison, do you have evidence that the hot hand model fits Kobe's shooting patterns? Explain.

barplot(table(kobe_streak))

barplot(table(sim_streak))

summary(kobe_streak)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.7632  1.0000  4.0000
summary(sim_streak)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.5581  1.0000  4.0000
sd(kobe_streak)
## [1] 0.9915432
sd(sim_streak)
## [1] 0.862345
##Kobe's streak has a median of 0 and mean of 0.7632
##Sim streak has a median of 0 and a mean of 0.5227
##The maximum streak length is 4 for both
##The IQR is 1 for both
##The standard deviation is .99 for Kobe and .80 for sim.
##The two distributions are very similar in nature
##Based on the above evidence, i would say each shot is independent and Kobe does not have a hot hand.