The five imputed data sets are found below.
require(norm)
## Loading required package: norm
## Warning: package 'norm' was built under R version 3.3.2
hw3data <- read.csv("C:/Users/Kajal/Downloads/hw3data.csv")
impDatList<-list()
prelim.dat<-prelim.norm(as.matrix(hw3data))
theta.hat<-em.norm(prelim.dat)
## Iterations of EM:
## 1...2...3...4...5...6...7...8...9...10...11...12...13...
rngseed(1234)
impDatList[[1]]<-imp.norm(prelim.dat,theta.hat,as.matrix(hw3data))
for (i in 2:5){
theta.hat.DA<-da.norm(prelim.dat,theta.hat, steps=100)
impDatList[[i]]<-imp.norm(prelim.dat,theta.hat.DA,as.matrix(hw3data))
}
impDatList<-lapply(impDatList,as.data.frame)
impDatList[[1]][2,]
## y x1 x2 x3
## 2 -64.55692 0.1341045 12.24235 97.85021
impDatList[[2]][2,]
## y x1 x2 x3
## 2 -64.55692 0.1341045 12.24235 97.85021
impDatList[[3]][2,]
## y x1 x2 x3
## 2 -64.55692 0.1341045 12.24235 97.85021
From the summaries, we see the different estimates for each \(B_0\) and \(B_1\)
lm1<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[1]]))
summary(lm1)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[1]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -142.839 -34.419 -1.536 32.520 153.322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -102.7383 45.1271 -2.277 0.0232 *
## x1 -4.4084 3.5466 -1.243 0.2145
## x2 -0.4712 1.2474 -0.378 0.7058
## x3 1.1035 0.4437 2.487 0.0132 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.19 on 496 degrees of freedom
## Multiple R-squared: 0.01311, Adjusted R-squared: 0.007141
## F-statistic: 2.196 on 3 and 496 DF, p-value: 0.08762
lm2<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[2]]))
summary(lm2)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[2]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -147.633 -33.471 -2.915 32.955 155.008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -138.7960 43.3102 -3.205 0.001439 **
## x1 -6.5978 3.3218 -1.986 0.047558 *
## x2 -0.5788 1.2349 -0.469 0.639495
## x3 1.4774 0.4288 3.446 0.000618 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.91 on 496 degrees of freedom
## Multiple R-squared: 0.02375, Adjusted R-squared: 0.01785
## F-statistic: 4.022 on 3 and 496 DF, p-value: 0.007611
lm3<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[3]]))
summary(lm3)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[3]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -128.631 -33.277 -1.175 32.629 148.650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -172.3207 43.7894 -3.935 9.50e-05 ***
## x1 -7.5393 3.3498 -2.251 0.0248 *
## x2 -0.9718 1.2214 -0.796 0.4266
## x3 1.8495 0.4362 4.240 2.67e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.58 on 496 degrees of freedom
## Multiple R-squared: 0.03661, Adjusted R-squared: 0.03078
## F-statistic: 6.283 on 3 and 496 DF, p-value: 0.000344
lm4<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[4]]))
summary(lm4)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[4]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -144.675 -33.364 -1.879 32.718 151.587
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -125.5204 44.2050 -2.840 0.00470 **
## x1 -4.6620 3.3386 -1.396 0.16322
## x2 -0.5903 1.2417 -0.475 0.63468
## x3 1.3465 0.4325 3.113 0.00196 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.99 on 496 degrees of freedom
## Multiple R-squared: 0.02094, Adjusted R-squared: 0.01502
## F-statistic: 3.536 on 3 and 496 DF, p-value: 0.01471
lm5<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[5]]))
summary(lm5)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[5]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -151.316 -33.660 -1.323 33.661 150.781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -150.4040 44.8171 -3.356 0.000852 ***
## x1 -7.3838 3.6111 -2.045 0.041406 *
## x2 -0.6507 1.2376 -0.526 0.599312
## x3 1.6003 0.4427 3.615 0.000332 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.84 on 496 degrees of freedom
## Multiple R-squared: 0.02651, Adjusted R-squared: 0.02062
## F-statistic: 4.502 on 3 and 496 DF, p-value: 0.003961
Below are the combined estimates across the 5 data sets for each of the parameters, along with the variance. This is labeled below.
betaList<-seList<-list()
for (i in 1:5){
betaList[[i]]<-summary(lm(y~x1+x2+x3,data=as.data.frame(impDatList[[i]])))$coefficients[,1]
seList[[i]]<-summary(lm(y~x1+x2+x3,data=as.data.frame(impDatList[[i]])))$coefficients[,2]
}
M<-5
#Combined Estimates
apply(do.call(rbind,betaList),2,mean)
## (Intercept) x1 x2 x3
## -137.9558847 -6.1182503 -0.6525596 1.4754427
#B
(B<-apply(do.call(rbind,betaList),2,var))
## (Intercept) x1 x2 x3
## 682.88121687 2.22387571 0.03603207 0.07761653
#W
(W<-apply(do.call(rbind,seList)^2,2,mean))
## (Intercept) x1 x2 x3
## 1958.4796341 11.8040462 1.5292409 0.1908197
T<-(1+1/M)*B + W
#Standard Error
sqrt(T)
## (Intercept) x1 x2 x3
## 52.7061391 3.8042998 1.2539854 0.5328785
Below is the approximate rate. However from mi.inference, we see that in Part E under finf, the rate is y= 0.32378281 x1= 0.19791282 x2= 0.02786445 x3= 0.36145372. The approximation below is close to the finf.
B/(W+B)
## (Intercept) x1 x2 x3
## 0.25853386 0.15853209 0.02301967 0.28914328
The 95% confidence are found below.
mi.inference(betaList,seList) #95% confidence interval
## $est
## (Intercept) x1 x2 x3
## -137.9558847 -6.1182503 -0.6525596 1.4754427
##
## $std.err
## (Intercept) x1 x2 x3
## 52.7061391 3.8042998 1.2539854 0.5328785
##
## $df
## (Intercept) x1 x2 x3
## 45.96762 117.64553 5290.40516 37.17936
##
## $signif
## (Intercept) x1 x2 x3
## 0.011952507 0.110461721 0.602814599 0.008723737
##
## $lower
## (Intercept) x1 x2 x3
## -244.049857 -13.652035 -3.110888 0.395904
##
## $upper
## (Intercept) x1 x2 x3
## -31.861912 1.415534 1.805769 2.554981
##
## $r
## (Intercept) x1 x2 x3
## 0.41841510 0.22607933 0.02827448 0.48810389
##
## $fminf
## (Intercept) x1 x2 x3
## 0.32378281 0.19791282 0.02786445 0.36145372
The five imputed data sets are found below.
require(mice)
## Loading required package: mice
## Warning: package 'mice' was built under R version 3.3.2
set.seed(1234)
mids<-mice(hw3data,m=5,method="pmm")
##
## iter imp variable
## 1 1 x1 x3
## 1 2 x1 x3
## 1 3 x1 x3
## 1 4 x1 x3
## 1 5 x1 x3
## 2 1 x1 x3
## 2 2 x1 x3
## 2 3 x1 x3
## 2 4 x1 x3
## 2 5 x1 x3
## 3 1 x1 x3
## 3 2 x1 x3
## 3 3 x1 x3
## 3 4 x1 x3
## 3 5 x1 x3
## 4 1 x1 x3
## 4 2 x1 x3
## 4 3 x1 x3
## 4 4 x1 x3
## 4 5 x1 x3
## 5 1 x1 x3
## 5 2 x1 x3
## 5 3 x1 x3
## 5 4 x1 x3
## 5 5 x1 x3
impDatList<-list()
impDatList[[1]]<-complete(mids)
impDatList[[2]]<-complete(mids,2)
impDatList[[3]]<-complete(mids,3)
impDatList[[4]]<-complete(mids,4)
impDatList[[5]]<-complete(mids,5)
impDatList[[1]][2,]
## y x1 x2 x3
## 2 -64.55692 0.1341045 12.24235 97.85021
impDatList[[2]][2,]
## y x1 x2 x3
## 2 -64.55692 0.1341045 12.24235 97.85021
impDatList[[3]][2,]
## y x1 x2 x3
## 2 -64.55692 0.1341045 12.24235 97.85021
impDatList[[4]][2,]
## y x1 x2 x3
## 2 -64.55692 0.1341045 12.24235 97.85021
impDatList[[5]][2,]
## y x1 x2 x3
## 2 -64.55692 0.1341045 12.24235 97.85021
From the summaries, we see the different estimates for each \(B_0\) and \(B_1\)
lm1<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[1]]))
summary(lm1)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[1]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -153.830 -34.663 -1.855 33.052 152.926
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -60.8919 44.4517 -1.370 0.171
## x1 -0.4186 3.5515 -0.118 0.906
## x2 -0.7892 1.2584 -0.627 0.531
## x3 0.7171 0.4399 1.630 0.104
##
## Residual standard error: 51.29 on 496 degrees of freedom
## Multiple R-squared: 0.009366, Adjusted R-squared: 0.003374
## F-statistic: 1.563 on 3 and 496 DF, p-value: 0.1975
lm2<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[2]]))
summary(lm2)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[2]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -143.037 -33.369 -2.148 32.979 151.989
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -135.539 42.582 -3.183 0.001549 **
## x1 -6.346 3.264 -1.944 0.052474 .
## x2 -0.554 1.234 -0.449 0.653772
## x3 1.439 0.420 3.426 0.000663 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.92 on 496 degrees of freedom
## Multiple R-squared: 0.02343, Adjusted R-squared: 0.01753
## F-statistic: 3.967 on 3 and 496 DF, p-value: 0.008205
lm3<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[3]]))
summary(lm3)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[3]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -146.654 -34.477 -1.538 32.334 153.232
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -137.5152 44.8898 -3.063 0.00231 **
## x1 -6.9224 3.3312 -2.078 0.03822 *
## x2 -0.2534 1.2323 -0.206 0.83716
## x3 1.4304 0.4386 3.261 0.00119 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.98 on 496 degrees of freedom
## Multiple R-squared: 0.02105, Adjusted R-squared: 0.01512
## F-statistic: 3.554 on 3 and 496 DF, p-value: 0.01436
lm4<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[4]]))
summary(lm4)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[4]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -134.830 -33.760 -1.527 33.418 152.108
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -148.4622 42.7552 -3.472 0.000561 ***
## x1 -7.7574 3.3348 -2.326 0.020411 *
## x2 -0.3924 1.2370 -0.317 0.751182
## x3 1.5551 0.4201 3.702 0.000238 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.83 on 496 degrees of freedom
## Multiple R-squared: 0.02691, Adjusted R-squared: 0.02103
## F-statistic: 4.573 on 3 and 496 DF, p-value: 0.003592
lm5<-lm(y~x1+x2+x3,data=as.data.frame(impDatList[[5]]))
summary(lm5)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = as.data.frame(impDatList[[5]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -146.894 -34.093 -2.109 32.572 153.769
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -62.0893 42.6913 -1.454 0.1465
## x1 -1.6562 3.3965 -0.488 0.6260
## x2 -0.6458 1.2514 -0.516 0.6060
## x3 0.7141 0.4313 1.656 0.0984 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.35 on 496 degrees of freedom
## Multiple R-squared: 0.006918, Adjusted R-squared: 0.0009118
## F-statistic: 1.152 on 3 and 496 DF, p-value: 0.3277
Below are the combined estimates across the 5 data sets for each of the parameters, along with the variance. This is labeled below.
betaList<-seList<-list()
for (i in 1:5){
betaList[[i]]<-summary(lm(y~x1+x2+x3,data=as.data.frame(impDatList[[i]])))$coefficients[,1]
seList[[i]]<-summary(lm(y~x1+x2+x3,data=as.data.frame(impDatList[[i]])))$coefficients[,2]
}
M<-5
apply(do.call(rbind,betaList),2,mean)
## (Intercept) x1 x2 x3
## -108.8994370 -4.6200156 -0.5269705 1.1711433
(B<-apply(do.call(rbind,betaList),2,var))
## (Intercept) x1 x2 x3
## 1.897407e+03 1.113941e+01 4.413824e-02 1.753721e-01
(W<-apply(do.call(rbind,seList)^2,2,mean))
## (Intercept) x1 x2 x3
## 1890.9719198 11.4046745 1.5443709 0.1849593
T<-(1+1/M)*B + W
sqrt(T)
## (Intercept) x1 x2 x3
## 64.558971 4.977144 1.263858 0.628813
Below is the approximate rate. However from mi.inference, we see that in Part E under finf, the rate is y=0.60161624 x1=0.59462756 x2=0.03368997 x3=0.58687248. The approximation below is close to the finf.
B/(W+B)
## (Intercept) x1 x2 x3
## 0.50084937 0.49411667 0.02778596 0.48669675
This is the 95% confidence interval with the lower and upper bound.
require(norm)
mi.inference(betaList,seList)
## $est
## (Intercept) x1 x2 x3
## -108.8994370 -4.6200156 -0.5269705 1.1711433
##
## $std.err
## (Intercept) x1 x2 x3
## 64.558971 4.977144 1.263858 0.628813
##
## $df
## (Intercept) x1 x2 x3
## 13.40302 13.73707 3637.98053 14.12090
##
## $signif
## (Intercept) x1 x2 x3
## 0.11476097 0.36930350 0.67673671 0.08347384
##
## $lower
## (Intercept) x1 x2 x3
## -247.9454970 -15.3141278 -3.0049109 -0.1764439
##
## $upper
## (Intercept) x1 x2 x3
## 30.146623 6.074097 1.950970 2.518731
##
## $r
## (Intercept) x1 x2 x3
## 1.2040839 1.1720885 0.0342961 1.1377993
##
## $fminf
## (Intercept) x1 x2 x3
## 0.60161624 0.59462756 0.03368997 0.58687248
Each \(\hat{\rho}\) value is found below. Set 1 0.3293771 Set 2 0.2971818 Set 3 0.2714318 Set 4 0.2936432 Set 5 0.3427167
rho1<-cor(impDatList[[1]][,3], impDatList[[1]][,4])
rho1
## [1] 0.3293771
rho2<-cor(impDatList[[2]][,3], impDatList[[2]][,4])
rho2
## [1] 0.2971818
rho3<-cor(impDatList[[3]][,3], impDatList[[3]][,4])
rho3
## [1] 0.2714318
rho4<-cor(impDatList[[4]][,3], impDatList[[4]][,4])
rho4
## [1] 0.2936432
rho5<-cor(impDatList[[5]][,3], impDatList[[5]][,4])
rho5
## [1] 0.3427167
The standard errors of the data sets are as follows: Set 1 0.03747854 Set 2 0.03745454 Set 3 0.03836562 Set 4 0.03659800 Set 5 0.03523512
se<-c()
rho_hat<-c()
for(m in 1:5){
rho_hat[[m]]<-cor(impDatList[[m]][,3:4])[1,2]
for(i in 1:1000){
ind <-sample(1:500,500,replace=TRUE)
boot<-impDatList[[m]][ind,]
rho_hat[i]<-cor(boot[,3], boot[,4])
}
se[m]<-sqrt(var(rho_hat))
}
se
## [1] 0.03747854 0.03745454 0.03836562 0.03659800 0.03523512
mean(rho_hat)
## [1] 0.3436373
var(rho_hat)
## [1] 0.001241514
sqrt(var(rho_hat))
## [1] 0.03523512
The combined estimate is 0.3436373 and the combined standard error is 0.05113532.
rho_hat<-lapply(rho_hat, as.matrix)
se<- lapply(se, as.matrix)
mi.inference(rho_hat, se)
## $est
## [1] 0.3436373
##
## $std.err
## [1] 0.05113532
##
## $df
## [1] 4422.607
##
## $signif
## [1] 2.045808e-11
##
## $lower
## [1] 0.2433865
##
## $upper
## [1] 0.4438881
##
## $r
## [1] 0.9057545
##
## $fminf
## [1] 0.4755106
```