library(ggplot2)
library(dplyr)
library(kableExtra)
library(knitr)
# 設定存檔位址setwd("/Users/kevinpiger/Desktop/碩一下/統模/Homework02/")
You think you know baseball? Do home run hitters have the highest batting averages? Think about this hypothesis, then analyze the following experience based on a half season with the Braves:
# Question 02 #
x <- c(0.252,0.305,0.299,0.303,0.285,0.191,0.283,0.272,0.310,0.266,0.215,0.211,0.244,0.320)
y <- c(12,6,4,15,2,2,16,6,8,10,0,3,6,7)
Ques <- rbind(x[1:7], y[1:7], x[8:14], y[8:14])
rownames(Ques) <- c("Batting Average","Home Runs","Batting Average","Home Runs")
colnames(Ques) <- c(1:7)
kable(Ques, "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F) %>%
add_header_above(c("Label" = 1, 'Baseball Data' = 7))
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | |
|---|---|---|---|---|---|---|---|
| Batting Average | 0.252 | 0.305 | 0.299 | 0.303 | 0.285 | 0.191 | 0.283 |
| Home Runs | 12.000 | 6.000 | 4.000 | 15.000 | 2.000 | 2.000 | 16.000 |
| Batting Average | 0.272 | 0.310 | 0.266 | 0.215 | 0.211 | 0.244 | 0.320 |
| Home Runs | 6.000 | 8.000 | 10.000 | 0.000 | 3.000 | 6.000 | 7.000 |
num = 100,000 為模擬次數, \(\alpha\) = 0.05
alpha = 0.05
num = 100000
N = NULL
for(i in 1:num){
x1 <- sample(x,14,F)
y1 <- sample(y,14,F)
N <- c(N, sum(x1*y1))}
t0 <- sum(x*y)
N1 <- data.frame("BatXHit" = N) %>% mutate(over = ifelse(BatXHit >=t0, ">= critical value","< critical value"))
N1$over <- N1$over %>% as.factor()
ggplot(N1,aes(x = BatXHit,fill = over))+
geom_histogram(bins = 100,alpha = 0.7)+
scale_fill_manual(values = c("skyblue","blue"),
name = paste0("Label\n","k = ",t0),
breaks = c(">= critical value","< critical value k"),
labels = c(">= critical value","< critical value k"))+
theme(plot.title = element_text(colour = "black", face = "bold", size = 25))+
geom_vline(aes(xintercept = t0),linetype="dashed",color = "red")+
labs(title = "Permutation ",x = "Simulation of BatXHit", y = "Count")+
scale_x_continuous(breaks = c(24:27,t0,28),
labels = c(24:27,"k",28))
pv <- sum(N > t0 )/num
print(ifelse(pv > alpha, paste('p value',pv,'>',alpha,'Do not reject H0'),paste('p value',pv,'<=',alpha,'Reject H0')))
## [1] "p value 0.04804 <= 0.05 Reject H0"