nba <- nba %>%
distinct(Year, Player, Tm, .keep_all = T)
H0: the mean 3PA for PF/C in the 1980s is equal to the mean 3PA for PF/C in the 2010s
Ha: the mean 3PA for PF/C in the 2010s is greater than the mean 3PA for PF/C in the 1980s
alpha = 0.1
power = 0.9
delta = 10
Create new column that holds decade
nba <- nba %>%
mutate(Decade = Year - (Year %% 10)) %>%
relocate(Decade, .after = Year)
Create data set that only contains players from the 1980s and 2010 that played the PF or C position
test1 <- nba %>%
filter((Decade == 1980 | Decade == 2010) & (Pos == "PF" | Pos == "C")) %>%
select(Decade, Player, Pos, G, MP, `3P`, `3PA`, `3P%`)
test1 %>%
group_by(Decade) %>%
summarise(mean = mean(`3PA`),
sd = sd(`3PA`),
count = n())
## # A tibble: 2 × 4
## Decade mean sd count
## <dbl> <dbl> <dbl> <int>
## 1 1980 4.81 17.1 1515
## 2 2010 47.6 90.9 2282
pwrss.t.2means(mu1 = 10,
sd1 = sd(test1$`3PA`),
kappa = 1515/2282,
power = 0.90, alpha = 0.1,
alternative = "greater")
## Difference between Two means
## (Independent Samples t Test)
## H0: mu1 = mu2
## HA: mu1 > mu2
## ------------------------------
## Statistical power = 0.9
## n1 = 604
## n2 = 910
## ------------------------------
## Alternative = "greater"
## Degrees of freedom = 1512
## Non-centrality parameter = 2.565
## Type I error rate = 0.1
## Type II error rate = 0.1
Here we can see that to meet our specification listed above we can see that we have an adequate sample size to perform the test.
Filter down to get list of 3PA for each decade
decade80 <- test1 %>%
filter(Decade == 1980) %>%
select(`3PA`)
decade10 <- test1 %>%
filter(Decade == 2010) %>%
select(`3PA`)
T-test for difference in mean for PF/C 3PAs in 1980s vs mean for PF/C 3PAs in 2010s
test_result <- t.test(decade10,decade80, paired = FALSE, alternative = "greater", conf.level = 0.90)
test_result
##
## Welch Two Sample t-test
##
## data: decade10 and decade80
## t = 21.938, df = 2520.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 90 percent confidence interval:
## 40.32123 Inf
## sample estimates:
## mean of x mean of y
## 47.638037 4.814521
Based on the results of the hypothesis test there is sufficient evidence to suggest that the mean 3PA for PF/C in the 2010s is greater than the mean 3PAs for PF/C in the 1980s.
test1 <- test1 %>%
mutate(`3PM` = `3PA` - `3P`) %>%
relocate(`3PM`, .after = `3P`)
test1_table <- test1 %>%
group_by(Decade) %>%
summarise(total_makes = sum(`3P`),
total_misses = sum(`3PM`))
fisher.test(select(test1_table, total_makes, total_misses))
##
## Fisher's Exact Test for Count Data
##
## data: select(test1_table, total_makes, total_misses)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.5275089 0.5905700
## sample estimates:
## odds ratio
## 0.5582526
Based on the results of this Fishers Exact test there is sufficient evidence to suggest that the the decade and 3P(made) variables are related. i.e. if you know a PF/C makes a 3 they are more likely to be from the 2010 decade.
n_abbreviations <- length(unique(nba$Decade))
palette1 <- rep('darkgrey',times = n_abbreviations)
palette1_named <- setNames(object = palette1, nm = unique(nba$Decade))
palette1_named['1980'] = 'red'
palette1_named['2010'] = 'red'
nba %>%
filter(Pos == "PF" | Pos == "C") %>%
group_by(Year) %>%
summarise(total_made = sum(`3P`),
total_attempted = sum(`3PA`),
average_percent = total_made/total_attempted) %>%
mutate(Decade = (Year - (Year %% 10))) %>%
ggplot() +
geom_line(mapping = aes(x = Year, y = total_attempted, color = factor(Decade))) +
scale_color_manual(values = palette1_named) +
labs(title = "Total 3 Point Attempts by Year for PF/C positions",
subtitle = "Highlighted 1980s and 2010s (groups in tests seen above)") +
ylab("Total Attempts")
Has the game gotten younger?
H0: the mean age of players in the 1980s is equal to the mean age of players in the 2010s
Ha: the mean age of players in the 1980s is greater than to the mean age of players in the 2010s (i.e. the average player has gotten younger since the 1980s)
alpha = 0.10
power = 0.95
delta = 1
nba <- nba %>%
mutate(Decade = Year - (Year %% 10)) %>%
relocate(Decade, .after = Year)
test2 <- nba %>%
filter(Decade == 1980 | Decade == 2010) %>%
select(Decade, Player, Age)
test2 %>%
group_by(Decade) %>%
summarise(mean = mean(Age),
sd = sd(Age),
count = n())
## # A tibble: 2 × 4
## Decade mean sd count
## <dbl> <dbl> <dbl> <int>
## 1 1980 26.3 3.29 3690
## 2 2010 26.5 4.13 5847
pwrss.t.2means(mu1 = 1,
sd1 = sd(test2$Age),
kappa = 3690/5847,
power = 0.95, alpha = 0.05,
alternative = "greater")
## Difference between Two means
## (Independent Samples t Test)
## H0: mu1 = mu2
## HA: mu1 > mu2
## ------------------------------
## Statistical power = 0.95
## n1 = 259
## n2 = 411
## ------------------------------
## Alternative = "greater"
## Degrees of freedom = 668
## Non-centrality parameter = 3.296
## Type I error rate = 0.05
## Type II error rate = 0.05
age2010 <- test2 %>%
filter(Decade == 2010)
age1980 <- test2 %>%
filter(Decade == 1980)
test_result <- t.test(age2010$Age,age1980$Age, paired = FALSE, alternative = "less", conf.level = 0.95)
test_result
##
## Welch Two Sample t-test
##
## data: age2010$Age and age1980$Age
## t = 2.9345, df = 9035.2, p-value = 0.9983
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.3498855
## sample estimates:
## mean of x mean of y
## 26.50334 26.27913
Based on the results of the t test for difference in means we can conclude that there is not sufficient evidence to reject the null hypothesis which states that the average age of NBA players on a roster in the 1980s was equal to the average age of an NBA player on a roster in the 2010s. This results steers us away from the alternative which states that the players in the 1980s have a higher average age than the players in the 2010s.
test2_table <- test2 %>%
group_by(Decade) %>%
summarise(young = sum(Age<30),
old = n() - sum(Age<30))
test2_table
## # A tibble: 2 × 3
## Decade young old
## <dbl> <int> <int>
## 1 1980 3036 654
## 2 2010 4473 1374
fisher.test(select(test2_table, young, old))
##
## Fisher's Exact Test for Count Data
##
## data: select(test2_table, young, old)
## p-value = 1.343e-11
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.283817 1.584758
## sample estimates:
## odds ratio
## 1.425918
Similar to the results of the Neyman-Pearson the results of this test shows that there is a relationship between Decade and the young and old categories. Based on the results we can see that there is a greater chance of being in the old category for the 2010 decade players.
nba %>%
group_by(Year) %>%
summarise(avg_age = mean(Age)) %>%
mutate(Decade = (Year - (Year %% 10))) %>%
ggplot() +
geom_line(mapping = aes(x = Year, y = avg_age, color = factor(Decade))) +
scale_color_manual(values = palette1_named) +
labs(title = "Average Player Age by Season",
subtitle = "Highlighted 1980s and 2010s (groups in tests seen above)") +
ylab("Average Age")
nba %>%
group_by(Year,Pos) %>%
summarise(avg_age = mean(Age)) %>%
mutate(Decade = (Year - (Year %% 10))) %>%
ggplot() +
geom_line(mapping = aes(x = Year, y = avg_age, color = factor(Decade))) +
geom_hline(yintercept = mean(nba$Age), alpha = 0.5) +
scale_color_manual(values = palette1_named) +
labs(title = "Average Player Age for Each Position by Season",
subtitle = "Highlighted 1980s and 2010s (groups in tests seen above)") +
ylab("Average Age") +
facet_wrap(~Pos)
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.