make gaussian glm function
gaussian_glm =
function(X,y,threshold = 0.01, max_iter = 1000000){
cal_p = function(X,beta){
beta = as.vector(beta)
return(X%*%beta)
}
beta = rep(0,ncol(X))
diff = 10000
iter_count = 0
while(diff > threshold ) #tests for convergence
{
#calculate probabilities using current estimate of beta
p = as.vector(cal_p(X,beta))
#the weights are constant
#W does not exist (constant)
#calculate the change in beta
beta_change = solve(t(X)%*%X) %*% t(X)%*%(y - p)
#update beta
beta = beta + beta_change
#calculate how much we changed beta by in this iteration
diff = sum(beta_change^2)
#see if we've hit the maximum number of iterations
iter_count = iter_count + 1
if(iter_count > max_iter) {
stop("This isn't converging.")
}
}
#result of coef
x1=colnames(X)[2]
x2=colnames(X)[3]
coef = c("(Intercept)" = round(beta[1],6), x1 = round(beta[2],6), x2 = round(beta[3],6))
#sigma.hat=(t(y-X%*%beta)%*%(y-X%*%beta))/(nrow(y)-ncol(X)-1)
sigma.hat=(t(y-X%*%beta)%*%(y-X%*%beta))/(nrow(y)-ncol(X))
cov=as.numeric(sigma.hat)*solve(t(X)%*%X)
std.error=sqrt(diag(cov))
tvalue=coef/std.error
pvalue=2*pt(-abs(tvalue),df=nrow(y)-ncol(X))
result.coef=rbind(coef,std.error,tvalue,pvalue)
#deviance residuals
d<-c()
for (i in 1:length(y)){
d[i]=sign(y[i]-(X%*%beta)[i])*sqrt((y[i]-(X%*%beta)[i])^2)
}
result.d=c("Min"=round(min(d),3),"1Q"=round(quantile(d,0.25),3),"Median"=round(median(d),3),
"3Q"=round(quantile(d,0.75),3),"Max"=round(max(d),3))
form=paste("glm(formula=y~",colnames(X)[2],"+",colnames(X)[3],",family=gaussian)")
return(list("Call"=form,"Deviance residuals:"=result.d, "Coefficients"=t(result.coef),
"----"=paste0("(Dispersion parameter for binomial family taken to be ",round(sigma.hat,2),")")))
}
Check if the function is same as the package
data("airquality")
airquality<-na.omit(airquality)
tempfit<-glm(Ozone~Wind+Temp,data=airquality,family="gaussian")
X=as.matrix(cbind(rep(1,111),airquality[,3:4]))
Y=as.matrix(airquality$Ozone)
gaussian_glm(X,Y)
## $Call
## [1] "glm(formula=y~ Wind + Temp ,family=gaussian)"
##
## $`Deviance residuals:`
## Min 1Q.25% Median 3Q.75% Max
## -42.156 -13.216 -3.123 10.598 98.492
##
## $Coefficients
## coef std.error tvalue pvalue
## (Intercept) -67.321953 23.6210451 -2.850084 5.236066e-03
## x1 -3.294839 0.6711482 -4.909257 3.261734e-06
## x2 1.827554 0.2505520 7.294110 5.294680e-11
##
## $`----`
## [1] "(Dispersion parameter for binomial family taken to be 472.12)"
summary(tempfit)
##
## Call:
## glm(formula = Ozone ~ Wind + Temp, family = "gaussian", data = airquality)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -42.156 -13.216 -3.123 10.598 98.492
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -67.3220 23.6210 -2.850 0.00524 **
## Wind -3.2948 0.6711 -4.909 3.26e-06 ***
## Temp 1.8276 0.2506 7.294 5.29e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 472.12)
##
## Null deviance: 121802 on 110 degrees of freedom
## Residual deviance: 50989 on 108 degrees of freedom
## AIC: 1003.4
##
## Number of Fisher Scoring iterations: 2
make Logistic glm function
Logistic_glm =
function(X,y,threshold = 0.001, max_iter = 100000){
cal_p = function(X,beta){
beta = as.vector(beta)
return(exp(X%*%beta) / (1+ exp(X%*%beta)))
}
beta = rep(0,ncol(X))
diff = 10000
iter_count = 0
while(diff > threshold ) #tests for convergence
{
#calculate probabilities using current estimate of beta
p = as.vector(cal_p(X,beta))
#calculate matrix of weights W
W = diag(p*(1-p))
#calculate the change in beta
beta_change = solve(t(X)%*%W%*%X) %*% t(X)%*%(y - p)
#update beta
beta = beta + beta_change
#calculate how much we changed beta by in this iteration
diff = sum(beta_change^2)
#see if we've hit the maximum number of iterations
iter_count = iter_count + 1
if(iter_count > max_iter) {
stop("This isn't converging.")
}
}
#result of coef
x1=colnames(X)[2]
x2=colnames(X)[3]
coef = c("(Intercept)" = round(beta[1],6), x1 = round(beta[2],6), x2 = round(beta[3],6))
cov=solve(t(X)%*%W%*%X)
std.error=sqrt(diag(cov))
zvalue=coef/std.error
pvalue=2*pnorm(-abs(zvalue))
result.coef=rbind(coef,std.error,zvalue,pvalue)
#result of deviance residuals
d<-c()
for (i in 1:length(y)){
if (y[i]==1){
d[i]=sign(y[i]-(1*p[i]))*sqrt(2*y[i]*log(y[i]/(1*p[i])))
} else if(y[i]==0){
d[i]=sign(y[i]-(1*p[i]))*sqrt(2*(1-y[i])*log((1-y[i])/(1*(1-p[i]))))
}
}
result.d=c("min"=min(d),"1Q"=quantile(d,0.25),"median"=median(d),
"3Q"=quantile(d,0.75),"max"=max(d))
form=paste("glm(formula=y~",colnames(X)[2],"+",colnames(X)[3],",family=binomial)")
return(list("Call"=form,"Deviance Residuals"=result.d,"Coefficients"=t(result.coef),
"----"="(Dispersion parameter for binomial family taken to be 1)"))
}
Check if the function is same as the package
record<-read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")[,c(1:3)]
recordfit <- glm(admit ~ gre+gpa, data = record, family = "binomial")
X=as.matrix(cbind(rep(1,400),record[c(2,3)]))
Y=as.matrix(record$admit)
Logistic_glm(X,Y)
## $Call
## [1] "glm(formula=y~ gre + gpa ,family=binomial)"
##
## $`Deviance Residuals`
## min 1Q.25% median 3Q.75% max
## -1.2729816 -0.8988205 -0.7206340 1.3012952 2.0619874
##
## $Coefficients
## coef std.error zvalue pvalue
## (Intercept) -4.949378 1.075088489 -4.603694 4.150630e-06
## x1 0.002691 0.001057488 2.544709 1.093687e-02
## x2 0.754687 0.319584760 2.361461 1.820308e-02
##
## $`----`
## [1] "(Dispersion parameter for binomial family taken to be 1)"
summary(recordfit)
##
## Call:
## glm(formula = admit ~ gre + gpa, family = "binomial", data = record)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2730 -0.8988 -0.7206 1.3013 2.0620
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.949378 1.075093 -4.604 4.15e-06 ***
## gre 0.002691 0.001057 2.544 0.0109 *
## gpa 0.754687 0.319586 2.361 0.0182 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.98 on 399 degrees of freedom
## Residual deviance: 480.34 on 397 degrees of freedom
## AIC: 486.34
##
## Number of Fisher Scoring iterations: 4