#데이터 만들기
x<-rnorm(1000,0,1)
m<-x+rnorm(1000,0,1)
y<-x-m+rnorm(1000,0,1)
coco<-rnorm(1000,0,1)
coco2<-rnorm(1000,0,1)
da<-data.frame(y,x,m,coco,coco2)
library(stats)
이전에 한 방법과 같이 루프를 사용하여 bootstrapping을 해 보았다. 약 18초정도가 소요되었다. 이렇게 느려서 어따 쓸까…
##total_result
boot2<-function(xxx,mmm,yyy,d,bootnum){
###estimate a*m
boot1<-function(xxx,mmm,yyy,d){
n<-sample(1:nrow(d),nrow(d),replace = T)
nnk<-d[n,]
nnk<-as.data.frame(nnk)
k1<-lm(nnk[,mmm]~ nnk[,xxx], data=nnk)
k2<-lm(nnk[,yyy]~ nnk[,xxx] + nnk[,mmm], data=nnk)
s1<-summary(k1)
s2<-summary(k2)
coea<-c(s1$coefficients[2,1])
coem<-c(s2$coefficients[3,1])
dididi<-c(s2$coefficients[2,1])
tototo<-c(coea*coem + dididi)
eff<-data.frame(coea*coem,dididi, tototo)
colnames(eff)<-c("indirect", "direct", "total")
eff}
k<-1
l<-matrix(rep(NA,3*bootnum),ncol = 3)
l<-as.data.frame(l)
repeat{
l[k,]<-boot1(xxx,mmm,yyy,d)
k<-k+1
if(k>=bootnum+1) break
}
ci1<-quantile(l[,1],probs = c(0.01,0.05,0.10,0.90,0.95,0.99))
ci2<-quantile(l[,2],probs = c(0.01,0.05,0.10,0.90,0.95,0.99))
ci3<-quantile(l[,3],probs = c(0.01,0.05,0.10,0.90,0.95,0.99))
kmkmkmkm<-list(c(mean(l[,1]),sd(l[,1])),ci1,c(mean(l[,2]),sd(l[,2])),ci2, c(mean(l[,3]),sd(l[,3])),ci3)
names(kmkmkmkm)<-c("indirecteffect_mean_BootSE", "indirecteffect_CI","directeffect_mean_BootSE", "directeffect_CI",
"totaleffect_mean_BootSE", "totaleffect_CI")
kmkmkmkm
}
system.time(boot2(3,2,1,da,5000))
## user system elapsed
## 17.25 0.07 17.37
병렬처리를 해 보았다. 일단 할당 코어를 설정하였다.
## Loading required package: iterators
## Loading required package: parallel
그 다음 병렬처리에 맞게 함수를 살짝 수정했다.
boot3<-function(xxx,mmm,yyy,d,bootnum){
index<-1
l<-matrix(rep(NA,3*bootnum),ncol = 3)
l<-as.data.frame(l)
foreach(index = 1:bootnum, .combine = rbind) %dopar%
{
boot1<-function(xxx,mmm,yyy,d){
colnames(d)<-c("yyy","xxx","mmm")
dadadada<-d[,-1]
colnames(dadadada)<-c("xxx","mmm")
n<-sample(1:nrow(d),nrow(d),replace = T)
nnk<-d[n,]
nnk1<-dadadada[n,]
nnk<-as.data.frame(nnk)
nnk1<-as.data.frame(nnk1)
k1<-lm(mmm ~xxx + nnk1[,3]+nnk1[,4] , data=nnk1)
k2<-lm(yyy~ xxx + mmm +nnk[,4]+nnk[,5] , data=nnk)
s1<-summary(k1)
s2<-summary(k2)
coea<-c(s1$coefficients[2,1])
coeb<-c(s2$coefficients[3,1])
dididi<-c(s2$coefficients[2,1])
tototo<-c(coea*coeb + dididi)
eff<-data.frame(coea*coeb,dididi, tototo)
colnames(eff)<-c("indirect", "direct", "total")
eff
}
l[index,]<-boot1(xxx,yyy,mmm,d)
}
}
그 다음 동일하게 5000번 연산을 하였더니 약 8초로 소요시간이 감소 하였다. 그렇지만 여전히 SPSS에서 macro로 연산하는게 더 빠른 것 같아서 아쉽다…굳이…
system.time(boot3(x,m,y,da,5000))
## user system elapsed
## 2.17 0.33 7.08
vv<-boot3(x,m,y,da,5000)
boot_model4_summary<-function(vv){
ci1<-quantile(vv[,1],probs = c(0.01,0.05,0.10,0.90,0.95,0.99))
ci2<-quantile(vv[,2],probs = c(0.01,0.05,0.10,0.90,0.95,0.99))
ci3<-quantile(vv[,3],probs = c(0.01,0.05,0.10,0.90,0.95,0.99))
kmkmkmkm<-list(c(mean(vv[,1]),sd(vv[,1])),ci1,c(mean(vv[,2]),sd(vv[,2])),ci2, c(mean(vv[,3]),sd(vv[,3])),ci3)
names(kmkmkmkm)<-c("indirecteffect_mean_BootSE", "indirecteffect_CI","directeffect_mean_BootSE", "directeffect_CI",
"totaleffect_mean_BootSE", "totaleffect_CI")
kmkmkmkm
}
boot_model4_summary(vv)
## $indirecteffect_mean_BootSE
## [1] -1.00659886 0.04565939
##
## $indirecteffect_CI
## 1% 5% 10% 90% 95% 99%
## -1.1141467 -1.0827747 -1.0650272 -0.9485107 -0.9319799 -0.9048939
##
## $directeffect_mean_BootSE
## [1] 1.01494577 0.04454202
##
## $directeffect_CI
## 1% 5% 10% 90% 95% 99%
## 0.9121387 0.9421014 0.9576429 1.0726808 1.0882354 1.1170318
##
## $totaleffect_mean_BootSE
## [1] 0.008346909 0.045618006
##
## $totaleffect_CI
## 1% 5% 10% 90% 95% 99%
## -0.09678446 -0.06610988 -0.04978444 0.06797253 0.08128519 0.11308740