work.d<-"C:/Users/iihsk/Desktop/SeongJin Kim/6. ESC/2. Assignments/4. Week4 Assignment"
setwd(work.d)
ucla<-read.csv("binary.csv", header=T)
attach(ucla)
str(ucla)
## 'data.frame': 400 obs. of 4 variables:
## $ admit: int 0 1 1 1 0 1 1 0 1 0 ...
## $ gre : int 380 660 800 640 520 760 560 400 540 700 ...
## $ gpa : num 3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92 ...
## $ rank : int 3 3 1 4 4 2 1 2 3 2 ...
cor(ucla[,-1])
## gre gpa rank
## gre 1.0000000 0.38426588 -0.12344707
## gpa 0.3842659 1.00000000 -0.05746077
## rank -0.1234471 -0.05746077 1.00000000
cor(gpa, gre)
## [1] 0.3842659
lm.gpa.gre<-lm(gre~gpa)
plot(gpa, gre)
abline(lm.gpa.gre, col="red")
llm.fit<-glm(admit~., data=ucla, family="binomial")
summary(llm.fit)
##
## Call:
## glm(formula = admit ~ ., family = "binomial", data = ucla)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5802 -0.8848 -0.6382 1.1575 2.1732
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.449548 1.132846 -3.045 0.00233 **
## gre 0.002294 0.001092 2.101 0.03564 *
## gpa 0.777014 0.327484 2.373 0.01766 *
## rank -0.560031 0.127137 -4.405 1.06e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.98 on 399 degrees of freedom
## Residual deviance: 459.44 on 396 degrees of freedom
## AIC: 467.44
##
## Number of Fisher Scoring iterations: 4
par(mfrow=c(2,2))
plot(llm.fit)
A. Probability of Y=1 with given predictors
llm.probs<-predict(llm.fit, type="response")
head(llm.probs, 4)
## 1 2 3 4
## 0.1895527 0.3177807 0.7178136 0.1489492
odds.llm<-llm.probs/(1-llm.probs)
logits.llm<-log(llm.probs/(1-llm.probs))
A. Reconstruct logistic regression model with 280 separate training data
set.seed(8783)
test = sample(1:nrow(ucla),3/10*nrow(ucla), replace=FALSE)
str(ucla[-test,])
## 'data.frame': 280 obs. of 4 variables:
## $ admit: int 0 1 1 1 0 1 1 0 1 0 ...
## $ gre : int 380 660 800 640 520 760 560 400 540 800 ...
## $ gpa : num 3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 4 ...
## $ rank : int 3 3 1 4 4 2 1 2 3 4 ...
llm.fit<-glm(admit~., data=ucla[-test,],family="binomial")
summary(llm.fit)
##
## Call:
## glm(formula = admit ~ ., family = "binomial", data = ucla[-test,
## ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4858 -0.8856 -0.6127 1.1420 2.1806
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.156955 1.404417 -2.960 0.003077 **
## gre 0.002249 0.001312 1.714 0.086547 .
## gpa 0.952075 0.404758 2.352 0.018662 *
## rank -0.524506 0.147454 -3.557 0.000375 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 348.59 on 279 degrees of freedom
## Residual deviance: 317.98 on 276 degrees of freedom
## AIC: 325.98
##
## Number of Fisher Scoring iterations: 4
attach(llm.fit)
## The following object is masked from ucla:
##
## rank
B. Test model’s predictory power with test set.
drumroll<-ucla[test,]$admit
llm.probs<-predict(llm.fit, ucla[test,], type="response")
llm.pred=rep(0,3/10*nrow(ucla))
llm.pred[llm.probs>=0.5]=1
table(llm.pred, drumroll)
## drumroll
## llm.pred 0 1
## 0 77 33
## 1 4 6
mean(llm.pred!=drumroll)
## [1] 0.3083333