Question 03
library(bootstrap)
library(dplyr)
library(kableExtra)
library(knitr)
library(ggplot2)
Read data
data = law82
data %>% kable(.,"html") %>% kable_styling() %>% scroll_box(height = "200px")
|
School
|
LSAT
|
GPA
|
|
1
|
622
|
3.23
|
|
2
|
542
|
2.83
|
|
3
|
579
|
3.24
|
|
4
|
653
|
3.12
|
|
5
|
606
|
3.09
|
|
6
|
576
|
3.39
|
|
7
|
620
|
3.10
|
|
8
|
615
|
3.40
|
|
9
|
553
|
2.97
|
|
10
|
607
|
2.91
|
|
11
|
558
|
3.11
|
|
12
|
596
|
3.24
|
|
13
|
635
|
3.30
|
|
14
|
581
|
3.22
|
|
15
|
661
|
3.43
|
|
16
|
547
|
2.91
|
|
17
|
599
|
3.23
|
|
18
|
646
|
3.47
|
|
19
|
622
|
3.15
|
|
20
|
611
|
3.33
|
|
21
|
546
|
2.99
|
|
22
|
614
|
3.19
|
|
23
|
628
|
3.03
|
|
24
|
575
|
3.01
|
|
25
|
662
|
3.39
|
|
26
|
627
|
3.41
|
|
27
|
608
|
3.04
|
|
28
|
632
|
3.29
|
|
29
|
587
|
3.16
|
|
30
|
581
|
3.17
|
|
31
|
605
|
3.13
|
|
32
|
704
|
3.36
|
|
33
|
477
|
2.57
|
|
34
|
591
|
3.02
|
|
35
|
578
|
3.03
|
|
36
|
572
|
2.88
|
|
37
|
615
|
3.37
|
|
38
|
606
|
3.20
|
|
39
|
603
|
3.23
|
|
40
|
535
|
2.98
|
|
41
|
595
|
3.11
|
|
42
|
575
|
2.92
|
|
43
|
573
|
2.85
|
|
44
|
644
|
3.38
|
|
45
|
545
|
2.76
|
|
46
|
645
|
3.27
|
|
47
|
651
|
3.36
|
|
48
|
562
|
3.19
|
|
49
|
609
|
3.17
|
|
50
|
555
|
3.00
|
|
51
|
586
|
3.11
|
|
52
|
580
|
3.07
|
|
53
|
594
|
2.96
|
|
54
|
594
|
3.05
|
|
55
|
560
|
2.93
|
|
56
|
641
|
3.28
|
|
57
|
512
|
3.01
|
|
58
|
631
|
3.21
|
|
59
|
597
|
3.32
|
|
60
|
621
|
3.24
|
|
61
|
617
|
3.03
|
|
62
|
637
|
3.33
|
|
62
|
572
|
3.08
|
|
64
|
610
|
3.13
|
|
65
|
562
|
3.01
|
|
66
|
635
|
3.30
|
|
67
|
614
|
3.15
|
|
68
|
546
|
2.82
|
|
69
|
598
|
3.20
|
|
70
|
666
|
3.44
|
|
71
|
570
|
3.01
|
|
72
|
570
|
2.92
|
|
73
|
605
|
3.45
|
|
74
|
565
|
3.15
|
|
75
|
686
|
3.50
|
|
76
|
608
|
3.16
|
|
77
|
595
|
3.19
|
|
78
|
590
|
3.15
|
|
79
|
558
|
2.81
|
|
80
|
611
|
3.16
|
|
81
|
564
|
3.02
|
|
82
|
575
|
2.74
|
cor(data$LSAT,data$GPA)
## [1] 0.7599979
bootstrap correlatoin
# design function
Q3 <- function(num,Sim_num,rep_num){
Sim_cor = NULL
for(i in 1:rep_num){
temp1 <- sample(num,Sim_num,T)
temp <- data[temp1,2:3] %>% cor()
Sim_cor <- c(Sim_cor,temp[1,2])}
return(c(mean(Sim_cor),var(Sim_cor)))}
observation = 10
set.seed(106354012)
Sim_num = 10
num <- sample(1:82,Sim_num ,F)
A = NULL;B = NULL
for( i in c(seq(50,10000,by = 50))){
A <- c(A,Q3(num,Sim_num,i)[2])
B <- c(B,Q3(num,Sim_num,i)[1])}
# mean variance of correlation #
mean(A)
## [1] 0.04274998
# mean correlation #
mean(B)
## [1] 0.7444454
temp <- data.frame(num = c(1:200),boots_var = A)
ggplot(temp) +
geom_point(aes(x = temp$num, y = temp$boots_var)) +
geom_smooth(aes(x = temp$num, y = temp$boots_var)) +
labs(title="Bootstrap of variance with 10 Observarion ",
x="Index",
y="Variance of correlation") +
theme_bw()
## `geom_smooth()` using method = 'loess'

temp <- data.frame(num = c(1:200),boots_mean = B)
ggplot(temp) +
geom_point(aes(x = temp$num, y = temp$boots_mean)) +
geom_smooth(aes(x = temp$num, y = temp$boots_mean)) +
labs(title="Bootstrap of mean with 10 Observarion ",
x="Index",
y="Mean of correlation") +
theme_bw()
## `geom_smooth()` using method = 'loess'

observation = 15
set.seed(106354012)
Sim_num = 15
num <- sample(1:82,Sim_num ,F)
A = NULL;B = NULL
for( i in c(seq(50,10000,by = 50))){
A <- c(A,Q3(num,Sim_num,i)[2])
B <- c(B,Q3(num,Sim_num,i)[1])}
# mean variance of correlation #
mean(A)
## [1] 0.01197177
# mean correlation #
mean(B)
## [1] 0.8001203
temp <- data.frame(num = c(1:200),boots_var = A)
ggplot(temp) +
geom_point(aes(x = temp$num, y = temp$boots_var)) +
geom_smooth(aes(x = temp$num, y = temp$boots_var)) +
labs(title="Bootstrap of variance with 15 Observarion ",
x="Index",
y="Variance of correlation") +
theme_bw()
## `geom_smooth()` using method = 'loess'

temp <- data.frame(num = c(1:200),boots_mean = B)
ggplot(temp) +
geom_point(aes(x = temp$num, y = temp$boots_mean)) +
geom_smooth(aes(x = temp$num, y = temp$boots_mean)) +
labs(title="Bootstrap of mean with 15 Observarion ",
x="Index",
y="Mean of correlation") +
theme_bw()
## `geom_smooth()` using method = 'loess'

observation = 20
set.seed(106354012)
Sim_num = 20
num <- sample(1:82,Sim_num ,F)
A = NULL;B = NULL
for( i in c(seq(50,10000,by = 50))){
A <- c(A,Q3(num,Sim_num,i)[2])
B <- c(B,Q3(num,Sim_num,i)[1])}
# mean variance of correlation #
mean(A)
## [1] 0.008962489
# mean correlation #
mean(B)
## [1] 0.7763331
temp <- data.frame(num = c(1:200),boots_var = A)
ggplot(temp) +
geom_point(aes(x = temp$num, y = temp$boots_var)) +
geom_smooth(aes(x = temp$num, y = temp$boots_var)) +
labs(title="Bootstrap of variance with 20 Observarion ",
x="Index",
y="Variance of correlation") +
theme_bw()
## `geom_smooth()` using method = 'loess'

temp <- data.frame(num = c(1:200),boots_mean = B)
ggplot(temp) +
geom_point(aes(x = temp$num, y = temp$boots_mean)) +
geom_smooth(aes(x = temp$num, y = temp$boots_mean)) +
labs(title="Bootstrap of mean with 20 Observarion ",
x="Index",
y="Mean of correlation") +
theme_bw()
## `geom_smooth()` using method = 'loess'

observation = 25
set.seed(106354012)
Sim_num = 25
num <- sample(1:82,Sim_num ,F)
A = NULL;B = NULL
for( i in c(seq(50,10000,by = 50))){
A <- c(A,Q3(num,Sim_num,i)[2])
B <- c(B,Q3(num,Sim_num,i)[1])}
# mean variance of correlation #
mean(A)
## [1] 0.008449368
# mean correlation #
mean(B)
## [1] 0.7656147
temp <- data.frame(num = c(1:200),boots_var = A)
ggplot(temp) +
geom_point(aes(x = temp$num, y = temp$boots_var)) +
geom_smooth(aes(x = temp$num, y = temp$boots_var)) +
labs(title="Bootstrap of variance with 25 Observarion ",
x="Index",
y="Variance of correlation") +
theme_bw()
## `geom_smooth()` using method = 'loess'

temp <- data.frame(num = c(1:200),boots_mean = B)
ggplot(temp) +
geom_point(aes(x = temp$num, y = temp$boots_mean)) +
geom_smooth(aes(x = temp$num, y = temp$boots_mean)) +
labs(title="Bootstrap of mean with 25 Observarion ",
x="Index",
y="Mean of correlation") +
theme_bw()
## `geom_smooth()` using method = 'loess'
