Race
##
## White AfrAm
## 0 304 419
## 1 878 1199
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tbl2
## X-squared = 0.0038454373, df = 1, p-value = 0.9505536
Sex
##
## Women Men
## 0 391 332
## 1 1183 894
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tbl3
## X-squared = 1.6884265, df = 1, p-value = 0.1938085
Income
##
## Above 20,000 Below 20,000
## 0 62 70
## 1 981 1096
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tbl4
## X-squared = 4.3634385e-29, df = 1, p-value = 1
Employment
##
## Yes No
## 0 333 320
## 1 1204 873
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tbl5
## X-squared = 9.5364426, df = 1, p-value = 0.002014318
Education
##
## 8th grade or below Bachelor's degree HS graduate/GED
## 0 65 21 204
## 1 118 137 705
##
## Some college/Associate Degree
## 0 138
## 1 490
##
## Some Graduate School/Graduate Degree (MA,PhD,MD,JD) Some HS
## 0 33 189
## 1 104 523
##
## Pearson's Chi-squared test
##
## data: tbl6
## X-squared = 28.49065, df = 5, p-value = 2.918025e-05
Poverty Status
##
## Above Below
## 0 440 283
## 1 1214 863
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tbl7
## X-squared = 1.1884274, df = 1, p-value = 0.275647
Age
##
## Welch Two Sample t-test
##
## data: y by x
## t = 1.8462909, df = 1255.168, p-value = 0.06508529
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0461739977 1.5215464328
## sample estimates:
## mean in group 0 mean in group 1
## 48.58506224 47.84737602
summary(MeasurementData1$Edcat)
## 8th grade or below
## 118
## Bachelor's degree
## 137
## HS graduate/GED
## 705
## Some college/Associate Degree
## 490
## Some Graduate School/Graduate Degree (MA,PhD,MD,JD)
## 104
## Some HS
## 523
histogram(MeasurementData1$Educ)
par(mfrow=c(1,2))
hist(MeasurementData1$Educ[MeasurementData1$Race==0],main="African American",xlab="Years of Education")
## Error in hist.default(MeasurementData1$Educ[MeasurementData1$Race == 0], : invalid number of 'breaks'
hist(MeasurementData1$Educ[MeasurementData1$Race==1],main="White",xlab="Years of Education")
## Error in hist.default(MeasurementData1$Educ[MeasurementData1$Race == 1], : invalid number of 'breaks'
fit = lm(MCLcountry~Race + Sex + Educ + Employment01 + acasiincomx01 + Neighborhood02 + CES + sHealthNum + Race * Educ + Race * Employment01 + Sex * Employment01 + Race * Sex , data = MeasurementData1,x=T)
Assessing Outliers
outlierTest(fit) # Bonferonni p-value for most extreme obs
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 601 3.645496042 0.00027348 0.56802
qqPlot(fit, main="QQ Plot") #qq plot for studentized resid
leveragePlots(fit) # leverage plots
Assessing Non-normality
# Normality of Residuals
# qq plot for studentized resid
qqPlot(fit, main="QQ Plot")
# distribution of studentized residuals
library(MASS)
sresid <- studres(fit)
hist(sresid, freq=FALSE,main="Distribution of Studentized Residuals")
xfit<-seq(min(sresid),max(sresid),length=40)
yfit<-dnorm(xfit)
lines(xfit, yfit)
Non-constant Error Variance
# Evaluate homoscedasticity
# non-constant error variance test
ncvTest(fit)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 45.08660365 Df = 1 p = 1.885105553e-11
# plot studentized residuals vs. fitted values
spreadLevelPlot(fit)
##
## Suggested power transformation: 1.868632856
Multi-collinearity
# Evaluate Collinearity
vif(fit) # variance inflation factors
## Race Sex Educ Employment01
## 24.259507886 2.547697591 2.748440810 3.368656430
## acasiincomx01 Neighborhood02 CES sHealthNum
## 1.387073133 1.100131360 1.208089679 1.209289174
## Race:Educ Race:Employment01 Sex:Employment01 Race:Sex
## 22.918430035 2.550543515 3.307577387 3.139138713
sqrt(vif(fit)) > 2 # problem?
## Race Sex Educ Employment01
## TRUE FALSE FALSE FALSE
## acasiincomx01 Neighborhood02 CES sHealthNum
## FALSE FALSE FALSE FALSE
## Race:Educ Race:Employment01 Sex:Employment01 Race:Sex
## TRUE FALSE FALSE FALSE
Non-independence of Errors
# Test for Autocorrelated Errors
durbinWatsonTest(fit)
## lag Autocorrelation D-W Statistic p-value
## 1 0.03843837402 1.922005271 0.072
## Alternative hypothesis: rho != 0
Global test of model assumptions
library(gvlma)
gvmodel <- gvlma(fit)
summary(gvmodel)
##
## Call:
## lm(formula = MCLcountry ~ Race + Sex + Educ + Employment01 +
## acasiincomx01 + Neighborhood02 + CES + sHealthNum + Race *
## Educ + Race * Employment01 + Sex * Employment01 + Race *
## Sex, data = MeasurementData1, x = T)
##
## Residuals:
## MCL: SES Standing in country
## Min 1Q Median 3Q Max
## -5.1129700 -1.1779336 0.2989504 1.9761218 6.6717081
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 4.825348372 0.379893060 12.70186
## RaceWhite -0.840511672 0.403304645 -2.08406
## SexWomen -0.075600086 0.130395285 -0.57978
## Educ 0.026229490 0.022372069 1.17242
## Employment01Unemployed -0.277576321 0.150403199 -1.84555
## acasiincomx01Below 20,000 -0.328536933 0.095424299 -3.44291
## Neighborhood02 -0.125862875 0.039799930 -3.16239
## CES -0.036803866 0.003921716 -9.38463
## sHealthNum 0.212745163 0.058485155 3.63759
## RaceWhite:Educ 0.071920283 0.028647758 2.51050
## RaceWhite:Employment01Unemployed -0.437636335 0.174592700 -2.50661
## SexWomen:Employment01Unemployed 0.616750455 0.167815293 3.67517
## RaceWhite:SexWomen -0.384755428 0.167063807 -2.30304
## Pr(>|t|)
## (Intercept) < 2.22e-16
## RaceWhite 0.03727729
## SexWomen 0.56212881
## Educ 0.24116325
## Employment01Unemployed 0.06510101
## acasiincomx01Below 20,000 0.00058698
## Neighborhood02 0.00158754
## CES < 2.22e-16
## sHealthNum 0.00028195
## RaceWhite:Educ 0.01213185
## RaceWhite:Employment01Unemployed 0.01226575
## SexWomen:Employment01Unemployed 0.00024375
## RaceWhite:SexWomen 0.02137533
##
## Residual standard error: 1.843448 on 2064 degrees of freedom
## Multiple R-squared: 0.1540766, Adjusted R-squared: 0.1491585
## F-statistic: 31.32811 on 12 and 2064 DF, p-value: < 2.2204e-16
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = fit)
##
## Value p-value Decision
## Global Stat 22.4206500 0.0001652513 Assumptions NOT satisfied!
## Skewness 14.7761517 0.0001210568 Assumptions NOT satisfied!
## Kurtosis 3.0792498 0.0792970942 Assumptions acceptable.
## Link Function 0.5396667 0.4625708771 Assumptions acceptable.
## Heteroscedasticity 4.0255818 0.0448151599 Assumptions NOT satisfied!
#NOTE: 0 = African American; 1 = Whites
tblRACE = table(MeasurementData1$acasiincomx01, MeasurementData1$Race)
tblRACE
##
## African American White
## Above 20,000 531 565
## Below 20,000 668 313
prop.table(tblRACE)
##
## African American White
## Above 20,000 0.2556571979 0.2720269620
## Below 20,000 0.3216177179 0.1506981223
SEX
tblSEX = table(MeasurementData1$acasiincomx01, MeasurementData1$Sex)
#NOTE: 0 = Women; 1 = Men
tblSEX
##
## Men Women
## Above 20,000 509 587
## Below 20,000 385 596
prop.table(tblSEX)
##
## Men Women
## Above 20,000 0.2450649976 0.2826191623
## Below 20,000 0.1853635051 0.2869523351
Education
op <- par(mar = c(10,12,1,2) + 0.1)
counts <- table(MeasurementData1$Edcat)
barplot(counts, main="Frequency of Education", las=2, beside=TRUE,cex.names=0.7)
par(op)
Employment Status by Race
counts <- table(MeasurementData1$Race, MeasurementData1$Employment01)
barplot(counts, main="Employment by Race",
xlab="Employment", col=c("black","gray"),ylim=c(0,700),
legend = rownames(counts), beside=TRUE)
Employment Status by Sex
counts <- table(MeasurementData1$Sex, MeasurementData1$Employment01)
barplot(counts, main="Employment by Sex",
xlab="Employment", col=c("black","gray"),ylim=c(0,800),
legend = rownames(counts), beside=TRUE)
Education by Race
op <- par(mar = c(10,12,1,2) + 0.1)
ylabels = c("8th grade or below", "Bachelor's Degree","HS graduate/GED", "Graduate Degree","Some College/Associate Degree","Some grad school/Graduate degree","Some HS")
counts <- table(MeasurementData1$Race, MeasurementData1$Edcat)
barplot(counts, main="Education by Race", col=c("black","gray"),ylim=c(0,500),
legend = rownames(counts), las=2,beside=TRUE,cex.names = 0.7)
par(op)
Education by Sex
op <- par(mar = c(10,12,1,2) + 0.1)
counts <- table(MeasurementData1$Sex, MeasurementData1$Edcat)
barplot(counts, main="Education by Sex", col=c("black","gray"),ylim=c(0,400),
legend = rownames(counts),las=2, beside=TRUE,cex.names=0.7)
par(op)
Income by Race
counts <- table(MeasurementData1$Race, MeasurementData1$acasiincomx01)
barplot(counts, main="Income by Race",
xlab="Income", col=c("black","gray"),ylim=c(0,800),
legend = rownames(counts), beside=TRUE)
Income by Sex
counts <- table(MeasurementData1$Sex, MeasurementData1$acasiincomx01)
barplot(counts, main="Income by Sex",ylim=c(0,700),
xlab="Income", col=c("black","gray"),
legend = rownames(counts), beside=TRUE)
Education (numeric) by Race
par(mfrow=c(1,2))
hist(MeasurementData1$Educ[MeasurementData1$Race=="African American"],xlab="African American",main="Years of Education",xlim=c(0,21),ylim=c(0,600))
hist(MeasurementData1$Educ[MeasurementData1$Race=="White"],xlab="White",main="Years of Education",xlim=c(0,21),ylim=c(0,600))
Education (numeric) by Sex
par(mfrow=c(1,2))
hist(MeasurementData1$Educ[MeasurementData1$Sex=="Women"],xlab="Women",main="Years of Education",xlim=c(0,21),ylim=c(0,600))
hist(MeasurementData1$Educ[MeasurementData1$Sex=="Men"],xlab="Men",main="Years of Education",xlim=c(0,21),ylim=c(0,600))