setwd('C:/Users/mchsu/Downloads/Mich/')
Data<-read.csv("Imigration.csv")
M<-lm(Inc~En+Lit+US5,data=Data)
SumM<-summary(M)
print(SumM)
##
## Call:
## lm(formula = Inc ~ En + Lit + US5, data = Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5223 -0.2935 0.1564 0.7294 1.3823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.575812 1.311500 1.964 0.058549 .
## En 0.041500 0.023948 1.733 0.093054 .
## Lit 0.079103 0.020128 3.930 0.000444 ***
## US5 -0.003091 0.020731 -0.149 0.882447
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.059 on 31 degrees of freedom
## Multiple R-squared: 0.7772, Adjusted R-squared: 0.7556
## F-statistic: 36.05 on 3 and 31 DF, p-value: 3.158e-10
names(SumM)
## [1] "call" "terms" "residuals" "coefficients"
## [5] "aliased" "sigma" "df" "r.squared"
## [9] "adj.r.squared" "fstatistic" "cov.unscaled"
Density Plot of Residuals
plot(density(SumM$residuals))
Normal Density Plot: set to default
setEPS(paper='special')
Plot a Normal Density: Encapsulated PostScript
postscript('NormalDensity.eps',width=7,height=5,pointsize=12)
x<-seq(-3.1,3.1,by=0.001)
plot(x,dnorm(x),type='l',axes=FALSE,ylab='',xlab='',ylim=c(0,dnorm(0)+.3))
axis(1,at=c(-3,-2,-1,0,1,2,3),labels=c(expression(mu-3*sigma),expression(mu-2*sigma),expression(mu-1*sigma),expression(mu), expression(mu+1*sigma),expression(mu+2*sigma),expression(mu+3*sigma)))
mtext(expression(paste('Density - f(y|' , mu,",",sigma,') ',)),2)
lines(c(-1,-1),c(0,dnorm(0)+.07));lines(c(1,1),c(0,dnorm(0)+.07))
text(0,dnorm(0)+.03,'68% within',pos=3,cex=.8)
text(0,dnorm(0),'1 standard deviation',pos=3,cex=.8)
arrows(-.8,dnorm(0)+.04,-1,dnorm(0)+.04,length=.1)
arrows(.8,dnorm(0)+.04,1,dnorm(0)+.04,length=.1)#box()
lines(c(-2,-2),c(0,dnorm(0)+.15));lines(c(2,2),c(0,dnorm(0)+.15))
text(0,dnorm(0)+.12,'95% within',pos=3,cex=.8)
text(0,dnorm(0)+.09,'2 standard deviation',pos=3,cex=.8)
arrows(-.8,dnorm(0)+.13,-2,dnorm(0)+.13,length=.1)
arrows(.8,dnorm(0)+.13,2,dnorm(0)+.13,length=.1)#box()
lines(c(-3,-3),c(0,dnorm(0)+.25));lines(c(3,3),c(0,dnorm(0)+.25))
text(0,dnorm(0)+.22,'99.7% within',pos=3,cex=.8)
text(0,dnorm(0)+.19,'3 standard deviation',pos=3,cex=.8)
arrows(-.8,dnorm(0)+.23,-3,dnorm(0)+.23,length=.1)
arrows(.8,dnorm(0)+.23,3,dnorm(0)+.23,length=.1)#box()
### Plot a bernoulli and Binomial probability mass function
postscript('BinomialDensity.eps',width=8,height=5,pointsize=12)
par(mfrow=c(1,2))
x<- 0:1;
db<-dbinom(x,1,0.2)
plot(x,db,type='h',lwd=7,col='blue',xlab='',ylab='Density - Bernoulli(0.2)',axes=F,xlim=c(-0.2,1.2))
axis(1,at=c(0,1),label=c(0,1))
axis(2)
x<- 0:10;
db<-dbinom(x,10,0.2)
plot(x,db,type='h',lwd=7,col='blue',xlab='',ylab='Density - Binomial(10, 0.2)', axes=F,xlim=c(-0.2,10.4))
axis(1,at=0:10,label=0:10)
axis(1,at=10,label=10)
axis(2)
dev.off()
## png
## 2
Concatenate and Print (Note: read the data as vector)
cat("1 396 1 568 1 1212 1 171 1 554 1 1104 1 257 1 435 1 295 1 397
1 288 1 1004 1 431 1 795 1 1621 1 1378 1 902 1 958 1 1283 1 2415
2 375 2 375 2 752 2 208 2 151 2 116 2 736 2 192 2 315 2 1252
2 675 2 700 2 440 2 771 2 688 2 426 2 410 2 979 2 377 2 503", file="data.txt")
Data1 <- scan("Data.txt")
Data1
## [1] 1 396 1 568 1 1212 1 171 1 554 1 1104 1 257
## [15] 1 435 1 295 1 397 1 288 1 1004 1 431 1 795
## [29] 1 1621 1 1378 1 902 1 958 1 1283 1 2415 2 375
## [43] 2 375 2 752 2 208 2 151 2 116 2 736 2 192
## [57] 2 315 2 1252 2 675 2 700 2 440 2 771 2 688
## [71] 2 426 2 410 2 979 2 377 2 503
Transform the data “matrix()”
Data2<-matrix(Data1,ncol=2,byrow=TRUE)
Data<-data.frame(Data2)
names(Data)<-c('disease','cd4')
Data$disease<-factor(Data$disease)
1.Normal distribution for the response (CD4 count)
N1 <- glm(cd4 ~ disease, data=Data) # Object N1 contains all components of the model
names(N1) # get the names of the objcts
## [1] "coefficients" "residuals" "fitted.values"
## [4] "effects" "R" "rank"
## [7] "qr" "family" "linear.predictors"
## [10] "deviance" "aic" "null.deviance"
## [13] "iter" "weights" "prior.weights"
## [16] "df.residual" "df.null" "y"
## [19] "converged" "boundary" "model"
## [22] "call" "formula" "terms"
## [25] "data" "offset" "control"
## [28] "method" "contrasts" "xlevels"
#[1] "coefficients" "residuals" "fitted.values" "effects" "R" "rank" "qr" "family"
# [9] "linear.predictors" "deviance" "aic" "null.deviance" "iter" "weights" "prior.weights" "df.residual"
#[17] "df.null" "y" "converged" "boundary" "model" "call" "formula" "terms"
#[25] "data" "offset" "control" "method" "contrasts" "xlevels"
N1$family # What generalized linear model do we fit
##
## Family: gaussian
## Link function: identity
#Family: gaussian
#Link function: identity
N1$contrasts # Treatment constrats - as in SAS
## $disease
## [1] "contr.treatment"
S1<- summary(N1) # Summary of the output
names(S1) # Get the components of S1
## [1] "call" "terms" "family" "deviance"
## [5] "aic" "contrasts" "df.residual" "null.deviance"
## [9] "df.null" "iter" "deviance.resid" "coefficients"
## [13] "aliased" "dispersion" "df" "cov.unscaled"
## [17] "cov.scaled"
#[1] "call" "terms" "family" "deviance" "aic" "contrasts" "df.residual" "null.deviance" "df.null" "iter"
#[11] "deviance.resid" "coefficients" "aliased" "dispersion" "df" "cov.unscaled" "cov.scaled"
S1$coefficients # Get the coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 823.20 100.8259 8.164567 6.937410e-10
## disease2 -301.15 142.5894 -2.112009 4.131601e-02
# Estimate Std. Error t value Pr(>|t|)
#(Intercept) 823.20 100.8259 8.164567 6.937410e-10
#disease2 -301.15 142.5894 -2.112009 4.131601e-02
Construct the log(CD4 count): Lognormal Model for the CD4 counts
Data$LogCD4 <- log(Data$cd4)
LN1 <- glm(LogCD4~disease, data=Data)
SN1<-summary(LN1)
SN1$coefficients # Get coefficients stats
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.4869319 0.1500490 43.232102 6.346047e-34
## disease2 -0.3982919 0.2122013 -1.876953 6.821821e-02
dev.res <- SN1$deviance.resid # get the devoaiance residuals
qqnorm(dev.res) # Normal q-q plot of the residuals
Normal Disribution with LOG link
N2 <- glm(cd4 ~ disease, family=gaussian(link=log), data = Data)
### Other options
#binomial(link = "logit")
#gaussian(link = "identity")
#Gamma(link = "inverse")
#inverse.gaussian(link = "1/mu^2")
#poisson(link = "log")
#quasi(link = "identity", variance = "constant") # Specify your own
#quasibinomial(link = "logit") # Overdispered and Underdispersed binomial
#quasipoisson(link = "log") # Overdispered and Underdispersed poisson
N2$family # what generalized linear model is fit
##
## Family: gaussian
## Link function: log
#Family: gaussian
#Link function: log
SN2 <- summary(N2)
dev.res2 <- SN2$deviance.resid # get the devoaiance residuals
qqnorm(dev.res2) # Normal q-q plot of the residuals
Poisson Distribution
P1 <- glm(cd4 ~ disease, family=poisson(link = log), data=Data)
P1$family # what generalized linear model is fit
##
## Family: poisson
## Link function: log
#Family: poisson
#Link function: log
SP1 <- summary(P1)
dev.res3 <- SP1$deviance.resid # get the deviance residuals
qqnorm(dev.res3) # Normal q-q plot of the residuals
Negative Binomial Distribution
library(MASS) ### Need to upload a new library to fit the negative binomial model
NB1 <- glm.nb(cd4 ~ disease, data=Data)
SNB1<-summary(NB1)
SNB1$family # what generalized linear model is fit
##
## Family: Negative Binomial(2.6962)
## Link function: log
#Family: Negative Binomial(2.6962)
#Link function: log
dev.res4 <- SNB1$deviance.resid # get the devoaiance residuals
qqnorm(dev.res4) # Normal q-q plot of the residuals