library(ggplot2)
library(dplyr)
library(kableExtra)
library(knitr)
# 設定存檔位址setwd("/Users/kevinpiger/Desktop/碩一下/統模/Homework02/")

Question 02

You think you know baseball? Do home run hitters have the highest batting averages? Think about this hypothesis, then analyze the following experience based on a half season with the Braves:

# Question 02 #
x <- c(0.252,0.305,0.299,0.303,0.285,0.191,0.283,0.272,0.310,0.266,0.215,0.211,0.244,0.320)
y <- c(12,6,4,15,2,2,16,6,8,10,0,3,6,7)
Ques <- rbind(x[1:7], y[1:7], x[8:14], y[8:14]) 
rownames(Ques) <- c("Batting Average","Home Runs","Batting Average","Home Runs")
colnames(Ques) <- c(1:7)
kable(Ques, "html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F) %>% 
  add_header_above(c("Label" = 1, 'Baseball Data' = 7))
Label
Baseball Data
1 2 3 4 5 6 7
Batting Average 0.252 0.305 0.299 0.303 0.285 0.191 0.283
Home Runs 12.000 6.000 4.000 15.000 2.000 2.000 16.000
Batting Average 0.272 0.310 0.266 0.215 0.211 0.244 0.320
Home Runs 6.000 8.000 10.000 0.000 3.000 6.000 7.000
\(H_0:\) 打擊率與全壘打無關
\(H_1:\) 打擊率與全壘打有關

num = 100,000 為模擬次數, \(\alpha\) = 0.05

alpha = 0.05
num = 100000
N = NULL
for(i in 1:num){
x1 <- sample(x,14,F) 
y1 <- sample(y,14,F) 
N <- c(N, sum(x1*y1))}
t0 <- sum(x*y)
N1 <- data.frame("BatXHit" = N) %>% mutate(over = ifelse(BatXHit >=t0,  ">= critical value","< critical value"))
N1$over <- N1$over %>% as.factor()
ggplot(N1,aes(x = BatXHit,fill = over))+
  geom_histogram(bins = 100,alpha = 0.7)+
  scale_fill_manual(values = c("skyblue","blue"),
                    name = paste0("Label\n","k = ",t0),
                    breaks = c(">= critical value","< critical value k"),
                    labels = c(">= critical value","< critical value k"))+
  theme(plot.title = element_text(colour = "black", face = "bold", size = 25))+
  geom_vline(aes(xintercept = t0),linetype="dashed",color = "red")+
  labs(title = "Permutation ",x = "Simulation of BatXHit", y = "Count")+
  scale_x_continuous(breaks = c(24:27,t0,28),
                     labels = c(24:27,"k",28))

pv <- sum(N > t0 )/num
print(ifelse(pv > alpha, paste('p value',pv,'>',alpha,'Do not reject H0'),paste('p value',pv,'<=',alpha,'Reject H0')))
## [1] "p value 0.04804 <= 0.05 Reject H0"