Question 03

library(bootstrap)
library(dplyr)
library(kableExtra)
library(knitr)
library(ggplot2)

Read data

data = law82
data %>% kable(.,"html") %>% kable_styling() %>% scroll_box(height = "200px")
School LSAT GPA
1 622 3.23
2 542 2.83
3 579 3.24
4 653 3.12
5 606 3.09
6 576 3.39
7 620 3.10
8 615 3.40
9 553 2.97
10 607 2.91
11 558 3.11
12 596 3.24
13 635 3.30
14 581 3.22
15 661 3.43
16 547 2.91
17 599 3.23
18 646 3.47
19 622 3.15
20 611 3.33
21 546 2.99
22 614 3.19
23 628 3.03
24 575 3.01
25 662 3.39
26 627 3.41
27 608 3.04
28 632 3.29
29 587 3.16
30 581 3.17
31 605 3.13
32 704 3.36
33 477 2.57
34 591 3.02
35 578 3.03
36 572 2.88
37 615 3.37
38 606 3.20
39 603 3.23
40 535 2.98
41 595 3.11
42 575 2.92
43 573 2.85
44 644 3.38
45 545 2.76
46 645 3.27
47 651 3.36
48 562 3.19
49 609 3.17
50 555 3.00
51 586 3.11
52 580 3.07
53 594 2.96
54 594 3.05
55 560 2.93
56 641 3.28
57 512 3.01
58 631 3.21
59 597 3.32
60 621 3.24
61 617 3.03
62 637 3.33
62 572 3.08
64 610 3.13
65 562 3.01
66 635 3.30
67 614 3.15
68 546 2.82
69 598 3.20
70 666 3.44
71 570 3.01
72 570 2.92
73 605 3.45
74 565 3.15
75 686 3.50
76 608 3.16
77 595 3.19
78 590 3.15
79 558 2.81
80 611 3.16
81 564 3.02
82 575 2.74
cor(data$LSAT,data$GPA)
## [1] 0.7599979

bootstrap correlatoin

# design function
Q3 <- function(num,Sim_num,rep_num){
  Sim_cor = NULL
  for(i in 1:rep_num){
    temp1 <- sample(num,Sim_num,T)
    temp <- data[temp1,2:3] %>% cor()
    Sim_cor <- c(Sim_cor,temp[1,2])}
  return(c(mean(Sim_cor),var(Sim_cor)))}

observation = 10

set.seed(106354012)
Sim_num = 10
num <- sample(1:82,Sim_num ,F)

A = NULL;B = NULL
for( i in c(seq(50,10000,by = 50))){
  A <- c(A,Q3(num,Sim_num,i)[2])
  B <- c(B,Q3(num,Sim_num,i)[1])}
# mean variance of correlation #
mean(A)
## [1] 0.04274998
# mean correlation #
mean(B)
## [1] 0.7444454
temp <- data.frame(num = c(1:200),boots_var = A)
ggplot(temp) + 
  geom_point(aes(x = temp$num, y = temp$boots_var)) +
  geom_smooth(aes(x = temp$num, y = temp$boots_var)) +
  labs(title="Bootstrap of variance with 10 Observarion ",
       x="Index",
       y="Variance of correlation") +
  theme_bw() 
## `geom_smooth()` using method = 'loess'

temp <- data.frame(num = c(1:200),boots_mean = B)
ggplot(temp) + 
  geom_point(aes(x = temp$num, y = temp$boots_mean)) +
  geom_smooth(aes(x = temp$num, y = temp$boots_mean)) +
  labs(title="Bootstrap of mean with 10 Observarion  ",
       x="Index",
       y="Mean of correlation") +
  theme_bw() 
## `geom_smooth()` using method = 'loess'

observation = 15

set.seed(106354012)
Sim_num = 15
num <- sample(1:82,Sim_num ,F)

A = NULL;B = NULL
for( i in c(seq(50,10000,by = 50))){
  A <- c(A,Q3(num,Sim_num,i)[2])
  B <- c(B,Q3(num,Sim_num,i)[1])}
# mean variance of correlation #
mean(A)
## [1] 0.01197177
# mean correlation #
mean(B)
## [1] 0.8001203
temp <- data.frame(num = c(1:200),boots_var = A)
ggplot(temp) + 
  geom_point(aes(x = temp$num, y = temp$boots_var)) +
  geom_smooth(aes(x = temp$num, y = temp$boots_var)) +
  labs(title="Bootstrap of variance with 15 Observarion ",
       x="Index",
       y="Variance of correlation") +
  theme_bw() 
## `geom_smooth()` using method = 'loess'

temp <- data.frame(num = c(1:200),boots_mean = B)
ggplot(temp) + 
  geom_point(aes(x = temp$num, y = temp$boots_mean)) +
  geom_smooth(aes(x = temp$num, y = temp$boots_mean)) +
  labs(title="Bootstrap of mean with 15 Observarion ",
       x="Index",
       y="Mean of correlation") +
  theme_bw() 
## `geom_smooth()` using method = 'loess'

observation = 20

set.seed(106354012)
Sim_num = 20
num <- sample(1:82,Sim_num ,F)

A = NULL;B = NULL
for( i in c(seq(50,10000,by = 50))){
  A <- c(A,Q3(num,Sim_num,i)[2])
  B <- c(B,Q3(num,Sim_num,i)[1])}
# mean variance of correlation #
mean(A)
## [1] 0.008962489
# mean correlation #
mean(B)
## [1] 0.7763331
temp <- data.frame(num = c(1:200),boots_var = A)

ggplot(temp) + 
  geom_point(aes(x = temp$num, y = temp$boots_var)) +
  geom_smooth(aes(x = temp$num, y = temp$boots_var)) +
  labs(title="Bootstrap of variance with 20 Observarion ",
       x="Index",
       y="Variance of correlation") +
  theme_bw() 
## `geom_smooth()` using method = 'loess'

temp <- data.frame(num = c(1:200),boots_mean = B)
ggplot(temp) + 
  geom_point(aes(x = temp$num, y = temp$boots_mean)) +
  geom_smooth(aes(x = temp$num, y = temp$boots_mean)) +
  labs(title="Bootstrap of mean with 20 Observarion ",
       x="Index",
       y="Mean of correlation") +
  theme_bw() 
## `geom_smooth()` using method = 'loess'

observation = 25

set.seed(106354012)
Sim_num = 25
num <- sample(1:82,Sim_num ,F)

A = NULL;B = NULL
for( i in c(seq(50,10000,by = 50))){
  A <- c(A,Q3(num,Sim_num,i)[2])
  B <- c(B,Q3(num,Sim_num,i)[1])}
# mean variance of correlation #
mean(A)
## [1] 0.008449368
# mean correlation #
mean(B)
## [1] 0.7656147
temp <- data.frame(num = c(1:200),boots_var = A)

ggplot(temp) + 
  geom_point(aes(x = temp$num, y = temp$boots_var)) +
  geom_smooth(aes(x = temp$num, y = temp$boots_var)) +
  labs(title="Bootstrap of variance with 25 Observarion ",
       x="Index",
       y="Variance of correlation") +
  theme_bw() 
## `geom_smooth()` using method = 'loess'

temp <- data.frame(num = c(1:200),boots_mean = B)
ggplot(temp) + 
  geom_point(aes(x = temp$num, y = temp$boots_mean)) +
  geom_smooth(aes(x = temp$num, y = temp$boots_mean)) +
  labs(title="Bootstrap of mean with 25 Observarion ",
       x="Index",
       y="Mean of correlation") +
  theme_bw() 
## `geom_smooth()` using method = 'loess'