# install.packages("Hmisc")
# install.packages("psych")
library(Hmisc)
library(psych)
# install.packages("Amelia")
library(Amelia)
# install.packages("Zelig")
library(Zelig)
options(scipen=999, digits = 2)#Create a new dataset with no missing observations
good <- read.csv("good.csv")
goodR<-na.omit(good[,c("CHID","readss97","AGE97","faminc97",
"HOME97","bthwht","CHRACE","WICpreg")])
attach(goodR)
#Run the following regression
lm1<-lm(readss97~AGE97 + faminc97 + bthwht + HOME97 + CHRACE + WICpreg,
data=goodR)
summary(lm1)##
## Call:
## lm(formula = readss97 ~ AGE97 + faminc97 + bthwht + HOME97 +
## CHRACE + WICpreg, data = goodR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.27 -8.99 -0.59 9.24 55.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.02737026 2.72627440 27.89 < 0.0000000000000002 ***
## AGE97 0.43810617 0.12146312 3.61 0.00032 ***
## faminc97 0.00004610 0.00000692 6.66 0.000000000036 ***
## bthwht -2.16852141 0.73519573 -2.95 0.00322 **
## HOME97 1.18979122 0.12692926 9.37 < 0.0000000000000002 ***
## CHRACE -0.69706124 0.31853801 -2.19 0.02877 *
## WICpreg -3.56180259 0.78733537 -4.52 0.000006459134 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14 on 1843 degrees of freedom
## Multiple R-squared: 0.19, Adjusted R-squared: 0.187
## F-statistic: 71.9 on 6 and 1843 DF, p-value: <0.0000000000000002
#Figure out to what extent missing data presents a problem among model variables
#Identify % of observations that have missing data for each variable by doing basic descriptive statistics.# `Hmisc` function determines whether the variable is character, factor, category, binary, discrete numeric, and continuous numeric, and prints a concise statistical summary according to each. Use package `Hmisc` with n and nmiss options to see what % of observations have missing values.
library(Hmisc)
describe(good$CHRACE)## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 3563 1.8 1.2 2 1.6 1.5 1 9 8 2.8 9.9 0.02
describe(good$AGE97)## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2223 7.5 2.9 7 7.4 4.4 3 13 10 0.04 -1.2 0.06
hist(good$AGE97)describe(good$AGE02)## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2644 12 3.7 12 12 4.4 5 19 14 0.05 -1.2 0.07
hist(good$AGE02)library(Amelia)
detach("package:Hmisc", unload = TRUE)
# For **numerical variables**, use package `psych` with n and nmiss options
# to see what % of observations have missing values;
library(psych)
describe(good$bthwht) # dummy## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 3563 0.39 0.49 0 0.36 0 0 1 1 0.46 -1.8 0.01
describeBy(good$bthwht, group = good$WICpreg)##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1882 0.32 0.47 0 0.27 0 0 1 1 0.77 -1.4 0.01
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1440 0.42 0.49 0 0.4 0 0 1 1 0.32 -1.9 0.01
detach("package:psych", unload = TRUE)List-wise (or case-wise) deletion, probably the most widely used approach, ignores observations with any missing data on the variables included in the analysis.
#always check your NA setup for R
options()
getOption("na.action")lmLD <-lm(readss97~AGE97 + faminc97 + bthwht + HOME97 + CHRACE + WICpreg,
data=good, na.action=na.omit)
summary(lmLD) # same as lm1##
## Call:
## lm(formula = readss97 ~ AGE97 + faminc97 + bthwht + HOME97 +
## CHRACE + WICpreg, data = good, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.27 -8.99 -0.59 9.24 55.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.02737026 2.72627440 27.89 < 0.0000000000000002 ***
## AGE97 0.43810617 0.12146312 3.61 0.00032 ***
## faminc97 0.00004610 0.00000692 6.66 0.000000000036 ***
## bthwht -2.16852141 0.73519573 -2.95 0.00322 **
## HOME97 1.18979122 0.12692926 9.37 < 0.0000000000000002 ***
## CHRACE -0.69706124 0.31853801 -2.19 0.02877 *
## WICpreg -3.56180259 0.78733537 -4.52 0.000006459134 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14 on 1843 degrees of freedom
## (1713 observations deleted due to missingness)
## Multiple R-squared: 0.19, Adjusted R-squared: 0.187
## F-statistic: 71.9 on 6 and 1843 DF, p-value: <0.0000000000000002
Mean substitutionreplaces each missing value with the mean of the observed data for the variable.
# as an illustration
Meansub <- ifelse(is.na(good$AGE02), mean(good$AGE02, na.rm = TRUE), good$AGE02)
View(Meansub)
mean(goodR$AGE02, na.rm = TRUE)## Warning in mean.default(goodR$AGE02, na.rm = TRUE): argument is not numeric
## or logical: returning NA
## [1] NA
mean(Meansub) ##the same!## [1] 12
# Mean substitution will bias parameter estimates. And by treating the missing data as if they were observed, mean imputation exaggerates the effective size of the data set, further distorting statistical inference. (DO NOT USE!!!!!!!)The algorithm uses the familiar EM (expectation-maximization) algorithm on multiple bootstrapped samples of the original incomplete data to draw values of the complete-data parameters. The algorithm then draws imputed values from each set of boostrapped parameters, replacing the missing values with these draws. (See more: https://gking.harvard.edu/files/gking/files/amelia_jss.pdf)
# recoding Race from 9 to 2 categories
# -.5 = black
# .5 = white
summary(good$CHRACE)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 1.0 2.0 1.8 2.0 9.0
good$race <- ifelse(good$CHRACE == 1, .5,
ifelse(good$CHRACE == 2, -.5, NA))
goodNew <- good[,c("CHID","read97","race","faminc97",
"AGE97","HOME97","WICpreg","bthwht")]
summary(goodNew$read97)## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2 45 61 56 71 97 2041
#Next, let's create an imputed datasetPriors are a four or five column matrix containing the priors for either individual missing observations or variable-wide missing values.
# Create a dataframe for specifications for the prior argument
# (row, column, min, max, confidence)
library(Amelia)
read97prior <- c(0,2,7,100,.99)
faminc97prior <- c(0,4,-72296.26,784610.59,.99)
AGE97prior <- c(0,5,3,13,.99)
prior <- rbind(read97prior,faminc97prior,AGE97prior)
#By setting the seed we can reproduce our results exactly
set.seed(1000)
# Run EM function, dataset, m = 1 for one imputation,
#boot.type none for EM
#noms = used to impute nominal variables
#ords = used to impute ordinal (integer) variables
#idvars = variables you do not want to impute
goodEM <- amelia(goodNew, m = 1, priors = prior,
boot.type = "none", ords="AGE97",
idvars=c("CHID","race","HOME97","WICpreg","bthwht"))## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
summary(goodEM$imputations[[1]])## CHID read97 race faminc97
## Min. : 4037 Min. : 2 Min. :0 Min. :-72296
## 1st Qu.:1439535 1st Qu.:43 1st Qu.:0 1st Qu.: 20176
## Median :2726031 Median :54 Median :0 Median : 39118
## Mean :3324784 Mean :53 Mean :0 Mean : 49841
## 3rd Qu.:5615110 3rd Qu.:64 3rd Qu.:0 3rd Qu.: 64495
## Max. :6872174 Max. :97 Max. :0 Max. :784611
## NA's :466
## AGE97 HOME97 WICpreg bthwht
## Min. : 3.0 Min. : 7.0 Min. :0 Min. :0.00
## 1st Qu.: 6.0 1st Qu.:16.0 1st Qu.:0 1st Qu.:0.00
## Median : 8.0 Median :19.2 Median :0 Median :0.00
## Mean : 7.6 Mean :18.9 Mean :0 Mean :0.39
## 3rd Qu.:10.0 3rd Qu.:21.8 3rd Qu.:1 3rd Qu.:1.00
## Max. :13.0 Max. :27.0 Max. :1 Max. :1.00
## NA's :241
summary(goodNew)## CHID read97 race faminc97
## Min. : 4037 Min. : 2 Min. :0 Min. :-72296
## 1st Qu.:1439535 1st Qu.:45 1st Qu.:0 1st Qu.: 20176
## Median :2726031 Median :61 Median :0 Median : 39118
## Mean :3324784 Mean :56 Mean :0 Mean : 49841
## 3rd Qu.:5615110 3rd Qu.:71 3rd Qu.:0 3rd Qu.: 64495
## Max. :6872174 Max. :97 Max. :0 Max. :784611
## NA's :2041 NA's :466
## AGE97 HOME97 WICpreg bthwht
## Min. : 3 Min. : 7.0 Min. :0 Min. :0.00
## 1st Qu.: 5 1st Qu.:16.0 1st Qu.:0 1st Qu.:0.00
## Median : 7 Median :19.2 Median :0 Median :0.00
## Mean : 7 Mean :18.9 Mean :0 Mean :0.39
## 3rd Qu.:10 3rd Qu.:21.8 3rd Qu.:1 3rd Qu.:1.00
## Max. :13 Max. :27.0 Max. :1 Max. :1.00
## NA's :1340 NA's :241
imputed <- goodEM$imputations[[1]] # Dataset with no missing values
summary(imputed)## CHID read97 race faminc97
## Min. : 4037 Min. : 2 Min. :0 Min. :-72296
## 1st Qu.:1439535 1st Qu.:43 1st Qu.:0 1st Qu.: 20176
## Median :2726031 Median :54 Median :0 Median : 39118
## Mean :3324784 Mean :53 Mean :0 Mean : 49841
## 3rd Qu.:5615110 3rd Qu.:64 3rd Qu.:0 3rd Qu.: 64495
## Max. :6872174 Max. :97 Max. :0 Max. :784611
## NA's :466
## AGE97 HOME97 WICpreg bthwht
## Min. : 3.0 Min. : 7.0 Min. :0 Min. :0.00
## 1st Qu.: 6.0 1st Qu.:16.0 1st Qu.:0 1st Qu.:0.00
## Median : 8.0 Median :19.2 Median :0 Median :0.00
## Mean : 7.6 Mean :18.9 Mean :0 Mean :0.39
## 3rd Qu.:10.0 3rd Qu.:21.8 3rd Qu.:1 3rd Qu.:1.00
## Max. :13.0 Max. :27.0 Max. :1 Max. :1.00
## NA's :241
# Below lets compare regression results between a dataset with listwise deletion
# and an imputed dataset
lm2<-lm(read97~AGE97 + faminc97 + bthwht + HOME97 + WICpreg
, data=goodNew)
summary(lm2)##
## Call:
## lm(formula = read97 ~ AGE97 + faminc97 + bthwht + HOME97 + WICpreg,
## data = goodNew)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.63 -7.66 0.58 8.36 37.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -28.87537727 2.98166090 -9.68 < 0.0000000000000002 ***
## AGE97 7.16641631 0.17304225 41.41 < 0.0000000000000002 ***
## faminc97 0.00003764 0.00000695 5.42 0.0000000712755379 ***
## bthwht -3.21273060 0.72709180 -4.42 0.0000107122220245 ***
## HOME97 0.99786269 0.12488186 7.99 0.0000000000000028 ***
## WICpreg -2.33210096 0.76573251 -3.05 0.0024 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12 on 1378 degrees of freedom
## (2179 observations deleted due to missingness)
## Multiple R-squared: 0.609, Adjusted R-squared: 0.608
## F-statistic: 430 on 5 and 1378 DF, p-value: <0.0000000000000002
lmEM<-lm(read97~AGE97 + faminc97 + bthwht + HOME97 + WICpreg
, data=imputed)
summary(lmEM) ##
## Call:
## lm(formula = read97 ~ AGE97 + faminc97 + bthwht + HOME97 + WICpreg,
## data = imputed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.24 -7.18 1.50 8.64 34.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.33616750 1.41923710 17.15 <0.0000000000000002 ***
## AGE97 3.20628290 0.08465726 37.87 <0.0000000000000002 ***
## faminc97 0.00004489 0.00000491 9.14 <0.0000000000000002 ***
## bthwht 0.45311520 0.47352460 0.96 0.3387
## HOME97 0.13653337 0.06608072 2.07 0.0389 *
## WICpreg -1.58618703 0.50206848 -3.16 0.0016 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13 on 3316 degrees of freedom
## (241 observations deleted due to missingness)
## Multiple R-squared: 0.34, Adjusted R-squared: 0.339
## F-statistic: 342 on 5 and 3316 DF, p-value: <0.0000000000000002
# See the changes in coefficients, significance and R^2Multiple imputation is a method for analyzing incomplete multivariate data. This function will take an incomplete dataset in either data frame or matrix form and return m imputed datatsets with no missing values.
The algorithm first creates a bootstrapped version of the original data, estimates the sufficient statistics (with priors if specified) by EM on this bootstrapped sample, and then imputes the missing values of the original data using the estimated sufficient statistics. It repeats this process m times to produce the m complete datasets where the observed values are the same and the unobserved values are drawn from their posterior distributions.
set.seed(1000) # for replicating the data
library(Amelia)
# Create a dataframe for specifications for the priors
# Priors are a four or five column matrix containing the priors for either individual missing observations or variable-wide missing values
#(row, column, min, max, confidence)
read97prior <- c(0,2,7,100,.99)
faminc97prior <- c(0,4,-72296.26,784610.59,.99)
AGE97prior <- c(0,5,3,13,.99)
prior <- rbind(read97prior,faminc97prior,AGE97prior)goodEMultiple <- amelia(goodNew, m = 5, priors = prior,
parallel = "multicore", ords = "AGE97" ,
idvars=c("CHID","race","HOME97","WICpreg","bthwht")) ## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
# Take note of the changes to the m = and parallel = argument.
summary(goodEMultiple)##
## Amelia output with 5 imputed datasets.
## Return code: 1
## Message: Normal EM convergence.
##
## Chain Lengths:
## --------------
## Imputation 1: 13
## Imputation 2: 13
## Imputation 3: 13
## Imputation 4: 14
## Imputation 5: 13
##
## Rows after Listwise Deletion: 1256
## Rows after Imputation: 2936
## Patterns of missingness in the data: 12
##
## Fraction Missing for original variables:
## -----------------------------------------
##
## Fraction Missing
## CHID 0.000
## read97 0.573
## race 0.131
## faminc97 0.000
## AGE97 0.376
## HOME97 0.000
## WICpreg 0.068
## bthwht 0.000
summary(goodEMultiple$imputations[[1]])## CHID read97 race faminc97
## Min. : 4037 Min. : 2 Min. :0 Min. :-72296
## 1st Qu.:1439535 1st Qu.:42 1st Qu.:0 1st Qu.: 20176
## Median :2726031 Median :53 Median :0 Median : 39118
## Mean :3324784 Mean :53 Mean :0 Mean : 49841
## 3rd Qu.:5615110 3rd Qu.:64 3rd Qu.:0 3rd Qu.: 64495
## Max. :6872174 Max. :97 Max. :0 Max. :784611
## NA's :466
## AGE97 HOME97 WICpreg bthwht
## Min. : 3.0 Min. : 7.0 Min. :0 Min. :0.00
## 1st Qu.: 6.0 1st Qu.:16.0 1st Qu.:0 1st Qu.:0.00
## Median : 8.0 Median :19.2 Median :0 Median :0.00
## Mean : 7.6 Mean :18.9 Mean :0 Mean :0.39
## 3rd Qu.:10.0 3rd Qu.:21.8 3rd Qu.:1 3rd Qu.:1.00
## Max. :13.0 Max. :27.0 Max. :1 Max. :1.00
## NA's :241
summary(goodEMultiple$imputations[[2]])## CHID read97 race faminc97
## Min. : 4037 Min. : 2 Min. :0 Min. :-72296
## 1st Qu.:1439535 1st Qu.: 43 1st Qu.:0 1st Qu.: 20176
## Median :2726031 Median : 54 Median :0 Median : 39118
## Mean :3324784 Mean : 53 Mean :0 Mean : 49841
## 3rd Qu.:5615110 3rd Qu.: 64 3rd Qu.:0 3rd Qu.: 64495
## Max. :6872174 Max. :104 Max. :0 Max. :784611
## NA's :466
## AGE97 HOME97 WICpreg bthwht
## Min. : 3.0 Min. : 7.0 Min. :0 Min. :0.00
## 1st Qu.: 6.0 1st Qu.:16.0 1st Qu.:0 1st Qu.:0.00
## Median : 8.0 Median :19.2 Median :0 Median :0.00
## Mean : 7.6 Mean :18.9 Mean :0 Mean :0.39
## 3rd Qu.:10.0 3rd Qu.:21.8 3rd Qu.:1 3rd Qu.:1.00
## Max. :13.0 Max. :27.0 Max. :1 Max. :1.00
## NA's :241
summary(goodEMultiple$imputations[[3]])## CHID read97 race faminc97
## Min. : 4037 Min. : 2 Min. :0 Min. :-72296
## 1st Qu.:1439535 1st Qu.:43 1st Qu.:0 1st Qu.: 20176
## Median :2726031 Median :53 Median :0 Median : 39118
## Mean :3324784 Mean :53 Mean :0 Mean : 49841
## 3rd Qu.:5615110 3rd Qu.:64 3rd Qu.:0 3rd Qu.: 64495
## Max. :6872174 Max. :97 Max. :0 Max. :784611
## NA's :466
## AGE97 HOME97 WICpreg bthwht
## Min. : 3.0 Min. : 7.0 Min. :0 Min. :0.00
## 1st Qu.: 6.0 1st Qu.:16.0 1st Qu.:0 1st Qu.:0.00
## Median : 8.0 Median :19.2 Median :0 Median :0.00
## Mean : 7.6 Mean :18.9 Mean :0 Mean :0.39
## 3rd Qu.:10.0 3rd Qu.:21.8 3rd Qu.:1 3rd Qu.:1.00
## Max. :13.0 Max. :27.0 Max. :1 Max. :1.00
## NA's :241
summary(goodEMultiple$imputations[[4]])## CHID read97 race faminc97
## Min. : 4037 Min. : 2 Min. :0 Min. :-72296
## 1st Qu.:1439535 1st Qu.:43 1st Qu.:0 1st Qu.: 20176
## Median :2726031 Median :53 Median :0 Median : 39118
## Mean :3324784 Mean :53 Mean :0 Mean : 49841
## 3rd Qu.:5615110 3rd Qu.:64 3rd Qu.:0 3rd Qu.: 64495
## Max. :6872174 Max. :97 Max. :0 Max. :784611
## NA's :466
## AGE97 HOME97 WICpreg bthwht
## Min. : 3.0 Min. : 7.0 Min. :0 Min. :0.00
## 1st Qu.: 6.0 1st Qu.:16.0 1st Qu.:0 1st Qu.:0.00
## Median : 8.0 Median :19.2 Median :0 Median :0.00
## Mean : 7.6 Mean :18.9 Mean :0 Mean :0.39
## 3rd Qu.:10.0 3rd Qu.:21.8 3rd Qu.:1 3rd Qu.:1.00
## Max. :13.0 Max. :27.0 Max. :1 Max. :1.00
## NA's :241
summary(goodEMultiple$imputations[[5]])## CHID read97 race faminc97
## Min. : 4037 Min. : 2 Min. :0 Min. :-72296
## 1st Qu.:1439535 1st Qu.:43 1st Qu.:0 1st Qu.: 20176
## Median :2726031 Median :53 Median :0 Median : 39118
## Mean :3324784 Mean :53 Mean :0 Mean : 49841
## 3rd Qu.:5615110 3rd Qu.:64 3rd Qu.:0 3rd Qu.: 64495
## Max. :6872174 Max. :97 Max. :0 Max. :784611
## NA's :466
## AGE97 HOME97 WICpreg bthwht
## Min. : 3.0 Min. : 7.0 Min. :0 Min. :0.00
## 1st Qu.: 5.5 1st Qu.:16.0 1st Qu.:0 1st Qu.:0.00
## Median : 8.0 Median :19.2 Median :0 Median :0.00
## Mean : 7.6 Mean :18.9 Mean :0 Mean :0.39
## 3rd Qu.:10.0 3rd Qu.:21.8 3rd Qu.:1 3rd Qu.:1.00
## Max. :13.0 Max. :27.0 Max. :1 Max. :1.00
## NA's :241
imput5 <- goodEMultiple$imputations[[5]]
lmEMultiple<-lm(read97~AGE97 + faminc97 + bthwht + HOME97 + WICpreg,
data=imput5)
summary(lmEMultiple)##
## Call:
## lm(formula = read97 ~ AGE97 + faminc97 + bthwht + HOME97 + WICpreg,
## data = imput5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.94 -7.17 1.49 8.79 35.07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.00843546 1.43243719 15.36 <0.0000000000000002 ***
## AGE97 3.23362566 0.08474402 38.16 <0.0000000000000002 ***
## faminc97 0.00004738 0.00000492 9.62 <0.0000000000000002 ***
## bthwht 0.77868223 0.47454779 1.64 0.1009
## HOME97 0.21470581 0.06618172 3.24 0.0012 **
## WICpreg -1.47394194 0.50368793 -2.93 0.0035 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13 on 3316 degrees of freedom
## (241 observations deleted due to missingness)
## Multiple R-squared: 0.348, Adjusted R-squared: 0.347
## F-statistic: 354 on 5 and 3316 DF, p-value: <0.0000000000000002
# install.packages("Zelig")
library(Zelig)
# Zelig allows each individual package, for each statistical model,
# to be accessed by a common uniformly structured call and set of arguments.
# Let's fit a a model to each of the imputed dataset and then
# pool the results together.
lmEM<-lm(read97~AGE97 + faminc97 + bthwht + HOME97 + WICpreg,
data=imputed)
summary(lmEM)##
## Call:
## lm(formula = read97 ~ AGE97 + faminc97 + bthwht + HOME97 + WICpreg,
## data = imputed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.24 -7.18 1.50 8.64 34.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.33616750 1.41923710 17.15 <0.0000000000000002 ***
## AGE97 3.20628290 0.08465726 37.87 <0.0000000000000002 ***
## faminc97 0.00004489 0.00000491 9.14 <0.0000000000000002 ***
## bthwht 0.45311520 0.47352460 0.96 0.3387
## HOME97 0.13653337 0.06608072 2.07 0.0389 *
## WICpreg -1.58618703 0.50206848 -3.16 0.0016 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13 on 3316 degrees of freedom
## (241 observations deleted due to missingness)
## Multiple R-squared: 0.34, Adjusted R-squared: 0.339
## F-statistic: 342 on 5 and 3316 DF, p-value: <0.0000000000000002
z <- zelig(read97 ~ AGE97 + faminc97 + bthwht + HOME97 + WICpreg,
data=goodEMultiple$imputations, model = "ls")## How to cite this model in Zelig:
## R Core Team. 2007.
## ls: Least Squares Regression for Continuous Dependent Variables
## in Christine Choirat, Christopher Gandrud, James Honaker, Kosuke Imai, Gary King, and Olivia Lau,
## "Zelig: Everyone's Statistical Software," http://zeligproject.org/
summary(z)## Model: Combined Imputations
##
## Estimate Std.Error z value Pr(>|z|)
## (Intercept) 22.2882630 1.5817789 14.1 <0.0000000000000002
## AGE97 3.2645886 0.0969809 33.7 <0.0000000000000002
## faminc97 0.0000498 0.0000055 9.0 <0.0000000000000002
## bthwht 0.6806057 0.5281322 1.3 0.20
## HOME97 0.1807990 0.0712462 2.5 0.01
## WICpreg -1.3750186 0.6333730 -2.2 0.03
##
## For results from individual imputed datasets, use summary(x, subset = i:j)
## Next step: Use 'setx' method
summary(z, subset = 1:3)## Imputed Dataset 1
## Call:
## z5$zelig(formula = read97 ~ AGE97 + faminc97 + bthwht + HOME97 +
## WICpreg, data = goodEMultiple$imputations)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.51 -7.01 1.34 8.85 34.81
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.83414139 1.44222367 15.14 < 0.0000000000000002
## AGE97 3.31281067 0.08527634 38.85 < 0.0000000000000002
## faminc97 0.00005357 0.00000495 10.82 < 0.0000000000000002
## bthwht 0.81160811 0.47765007 1.70 0.08938
## HOME97 0.17082234 0.06656437 2.57 0.01032
## WICpreg -1.75311677 0.50663412 -3.46 0.00055
##
## Residual standard error: 13 on 3316 degrees of freedom
## (241 observations deleted due to missingness)
## Multiple R-squared: 0.358, Adjusted R-squared: 0.357
## F-statistic: 370 on 5 and 3316 DF, p-value: <0.0000000000000002
##
## Imputed Dataset 2
## Call:
## z5$zelig(formula = read97 ~ AGE97 + faminc97 + bthwht + HOME97 +
## WICpreg, data = goodEMultiple$imputations)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.80 -7.06 1.44 8.64 44.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.6510589 1.4228193 15.92 <0.0000000000000002
## AGE97 3.2255015 0.0847115 38.08 <0.0000000000000002
## faminc97 0.0000495 0.0000049 10.12 <0.0000000000000002
## bthwht 0.3871338 0.4719871 0.82 0.4121
## HOME97 0.1872592 0.0658454 2.84 0.0045
## WICpreg -0.8008693 0.5009571 -1.60 0.1100
##
## Residual standard error: 13 on 3316 degrees of freedom
## (241 observations deleted due to missingness)
## Multiple R-squared: 0.343, Adjusted R-squared: 0.342
## F-statistic: 346 on 5 and 3316 DF, p-value: <0.0000000000000002
##
## Imputed Dataset 3
## Call:
## z5$zelig(formula = read97 ~ AGE97 + faminc97 + bthwht + HOME97 +
## WICpreg, data = goodEMultiple$imputations)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.66 -7.26 1.63 8.85 39.44
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.20456328 1.42732053 16.26 <0.0000000000000002
## AGE97 3.24198193 0.08539518 37.96 <0.0000000000000002
## faminc97 0.00004875 0.00000491 9.92 <0.0000000000000002
## bthwht 0.52864137 0.47362740 1.12 0.264
## HOME97 0.14771982 0.06607827 2.24 0.025
## WICpreg -1.49185429 0.50246480 -2.97 0.003
##
## Residual standard error: 13 on 3316 degrees of freedom
## (241 observations deleted due to missingness)
## Multiple R-squared: 0.344, Adjusted R-squared: 0.343
## F-statistic: 348 on 5 and 3316 DF, p-value: <0.0000000000000002
##
## Next step: Use 'setx' method