myGithub.

Graphical analysis of Duncan data

load car package and the data

library(car) 
data(Duncan,package="carData") 
head(Duncan)
##            type income education prestige
## accountant prof     62        86       82
## pilot      prof     72        76       83
## architect  prof     75        92       90
## author     prof     55        90       76
## chemist    prof     64        86       90
## minister   prof     21        84       87

first few observations

standard pairs plot

plot( ~ prestige + income + education,data=Duncan) 
pairs( ~prestige + income + education,data=Duncan)

enhanced version

scatterplotMatrix( ~prestige + income + education,data=Duncan, id.n=2)

model plots

duncan.mod  <- lm(prestige  ~ income + education,data=Duncan)

op  <- par(mfrow=c(2,2), mar=c(4,4,2,1)+.1) 
plot(duncan.mod, cex=1.2,cex.lab=1.2, pch=16, lwd=3) 

par(op)

better version of an influence plot

influencePlot(duncan.mod,id.n=3)

##                StudRes        Hat      CookD
## minister     3.1345186 0.17305816 0.56637974
## reporter    -2.3970224 0.05439356 0.09898456
## conductor   -1.7040324 0.19454165 0.22364122
## RR.engineer  0.8089221 0.26908963 0.08096807

added variable plots show the partial relations of each preditor

avPlots(duncan.mod, id.n=2, pch=16, ellipse=TRUE,ellipse.args=list(levels=0.68, fill=TRUE, fill.alpha=0.1))

effect plots


library(effects)

duncan.eff  <- allEffects(duncan.mod) 
plot(duncan.eff)

add term for type of job

duncan.mod1  <- update(duncan.mod, .  ~. + type) 
summary(duncan.mod1)
## 
## Call:
## lm(formula = prestige ~ income + education + type, data = Duncan)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.890  -5.740  -1.754   5.442  28.972 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.18503    3.71377  -0.050  0.96051    
## income        0.59755    0.08936   6.687 5.12e-08 ***
## education     0.34532    0.11361   3.040  0.00416 ** 
## typeprof     16.65751    6.99301   2.382  0.02206 *  
## typewc      -14.66113    6.10877  -2.400  0.02114 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.744 on 40 degrees of freedom
## Multiple R-squared:  0.9131, Adjusted R-squared:  0.9044 
## F-statistic:   105 on 4 and 40 DF,  p-value: < 2.2e-16

duncan.eff1  <- allEffects(duncan.mod1) 
plot(duncan.eff1)


plot(duncan.eff1, ci.style="bands", rows=1, cols=3)

coefficient plots

library(coefplot)
duncan.mod2  <- lm(prestige  ~ income  * education, data=Duncan)
coefplot(duncan.mod2, intercept=FALSE, lwdInner=2, lwdOuter=1,point.size=5, title="Coefficient plot for duncan.mod2") + theme_bw()

Ernesto Ibáñez Twitter @eeibanez