make gaussian glm function

gaussian_glm = 
  function(X,y,threshold = 0.01, max_iter = 1000000){
    cal_p = function(X,beta){
      beta = as.vector(beta)
      return(X%*%beta)
    }  
    beta = rep(0,ncol(X))
    diff = 10000 
    iter_count = 0
    while(diff > threshold ) #tests for convergence
    {
      #calculate probabilities using current estimate of beta
      p = as.vector(cal_p(X,beta))
      
      #the weights are constant
      #W does not exist (constant)
      
      #calculate the change in beta
      beta_change = solve(t(X)%*%X) %*% t(X)%*%(y - p)
      
      #update beta
      beta = beta + beta_change
      
      #calculate how much we changed beta by in this iteration 
      diff = sum(beta_change^2)
      
      #see if we've hit the maximum number of iterations
      iter_count = iter_count + 1
      if(iter_count > max_iter) {
        stop("This isn't converging.")
      }
    }
    #result of coef
    x1=colnames(X)[2]
    x2=colnames(X)[3]
    coef = c("(Intercept)" = round(beta[1],6), x1 = round(beta[2],6), x2 = round(beta[3],6))
    #sigma.hat=(t(y-X%*%beta)%*%(y-X%*%beta))/(nrow(y)-ncol(X)-1)
    sigma.hat=(t(y-X%*%beta)%*%(y-X%*%beta))/(nrow(y)-ncol(X))
    cov=as.numeric(sigma.hat)*solve(t(X)%*%X)
    std.error=sqrt(diag(cov))
    tvalue=coef/std.error
    pvalue=2*pt(-abs(tvalue),df=nrow(y)-ncol(X))
    result.coef=rbind(coef,std.error,tvalue,pvalue)
    
    #deviance residuals
    d<-c()
    for (i in 1:length(y)){
      d[i]=sign(y[i]-(X%*%beta)[i])*sqrt((y[i]-(X%*%beta)[i])^2)
    }
    result.d=c("Min"=round(min(d),3),"1Q"=round(quantile(d,0.25),3),"Median"=round(median(d),3),
               "3Q"=round(quantile(d,0.75),3),"Max"=round(max(d),3))
    form=paste("glm(formula=y~",colnames(X)[2],"+",colnames(X)[3],",family=gaussian)")
    return(list("Call"=form,"Deviance residuals:"=result.d, "Coefficients"=t(result.coef),
                "----"=paste0("(Dispersion parameter for binomial family taken to be ",round(sigma.hat,2),")"))) 
  }

Check if the function is same as the package

data("airquality")
airquality<-na.omit(airquality)
tempfit<-glm(Ozone~Wind+Temp,data=airquality,family="gaussian")
X=as.matrix(cbind(rep(1,111),airquality[,3:4]))
Y=as.matrix(airquality$Ozone)
gaussian_glm(X,Y)
## $Call
## [1] "glm(formula=y~ Wind + Temp ,family=gaussian)"
## 
## $`Deviance residuals:`
##     Min  1Q.25%  Median  3Q.75%     Max 
## -42.156 -13.216  -3.123  10.598  98.492 
## 
## $Coefficients
##                   coef  std.error    tvalue       pvalue
## (Intercept) -67.321953 23.6210451 -2.850084 5.236066e-03
## x1           -3.294839  0.6711482 -4.909257 3.261734e-06
## x2            1.827554  0.2505520  7.294110 5.294680e-11
## 
## $`----`
## [1] "(Dispersion parameter for binomial family taken to be 472.12)"
summary(tempfit)
## 
## Call:
## glm(formula = Ozone ~ Wind + Temp, family = "gaussian", data = airquality)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -42.156  -13.216   -3.123   10.598   98.492  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -67.3220    23.6210  -2.850  0.00524 ** 
## Wind         -3.2948     0.6711  -4.909 3.26e-06 ***
## Temp          1.8276     0.2506   7.294 5.29e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 472.12)
## 
##     Null deviance: 121802  on 110  degrees of freedom
## Residual deviance:  50989  on 108  degrees of freedom
## AIC: 1003.4
## 
## Number of Fisher Scoring iterations: 2

make Logistic glm function

Logistic_glm = 
  function(X,y,threshold = 0.001, max_iter = 100000){
   cal_p = function(X,beta){
    beta = as.vector(beta)
    return(exp(X%*%beta) / (1+ exp(X%*%beta)))
  }  
  beta = rep(0,ncol(X))
  diff = 10000 
  iter_count = 0
  while(diff > threshold ) #tests for convergence
  {
    #calculate probabilities using current estimate of beta
    p = as.vector(cal_p(X,beta))
    
    #calculate matrix of weights W
    W =  diag(p*(1-p)) 
    
    #calculate the change in beta
    beta_change = solve(t(X)%*%W%*%X) %*% t(X)%*%(y - p)
    
    #update beta
    beta = beta + beta_change
    
    #calculate how much we changed beta by in this iteration 
    diff = sum(beta_change^2)
    
    #see if we've hit the maximum number of iterations
    iter_count = iter_count + 1
    if(iter_count > max_iter) {
      stop("This isn't converging.")
    }
  }
  #result of coef
  x1=colnames(X)[2]
  x2=colnames(X)[3]
  coef = c("(Intercept)" = round(beta[1],6), x1 = round(beta[2],6), x2 = round(beta[3],6))
  cov=solve(t(X)%*%W%*%X)
  std.error=sqrt(diag(cov))
  zvalue=coef/std.error
  pvalue=2*pnorm(-abs(zvalue))
  result.coef=rbind(coef,std.error,zvalue,pvalue)

  #result of deviance residuals
  d<-c()
  for (i in 1:length(y)){
    if (y[i]==1){
      d[i]=sign(y[i]-(1*p[i]))*sqrt(2*y[i]*log(y[i]/(1*p[i])))
    } else if(y[i]==0){
      d[i]=sign(y[i]-(1*p[i]))*sqrt(2*(1-y[i])*log((1-y[i])/(1*(1-p[i]))))
    }
  }
  result.d=c("min"=min(d),"1Q"=quantile(d,0.25),"median"=median(d),
                      "3Q"=quantile(d,0.75),"max"=max(d))
  form=paste("glm(formula=y~",colnames(X)[2],"+",colnames(X)[3],",family=binomial)")
  return(list("Call"=form,"Deviance Residuals"=result.d,"Coefficients"=t(result.coef),
              "----"="(Dispersion parameter for binomial family taken to be 1)"))
  }

Check if the function is same as the package

record<-read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")[,c(1:3)]
recordfit <- glm(admit ~ gre+gpa, data = record, family = "binomial")
X=as.matrix(cbind(rep(1,400),record[c(2,3)]))
Y=as.matrix(record$admit)
Logistic_glm(X,Y)
## $Call
## [1] "glm(formula=y~ gre + gpa ,family=binomial)"
## 
## $`Deviance Residuals`
##        min     1Q.25%     median     3Q.75%        max 
## -1.2729816 -0.8988205 -0.7206340  1.3012952  2.0619874 
## 
## $Coefficients
##                  coef   std.error    zvalue       pvalue
## (Intercept) -4.949378 1.075088489 -4.603694 4.150630e-06
## x1           0.002691 0.001057488  2.544709 1.093687e-02
## x2           0.754687 0.319584760  2.361461 1.820308e-02
## 
## $`----`
## [1] "(Dispersion parameter for binomial family taken to be 1)"
summary(recordfit)
## 
## Call:
## glm(formula = admit ~ gre + gpa, family = "binomial", data = record)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2730  -0.8988  -0.7206   1.3013   2.0620  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -4.949378   1.075093  -4.604 4.15e-06 ***
## gre          0.002691   0.001057   2.544   0.0109 *  
## gpa          0.754687   0.319586   2.361   0.0182 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 499.98  on 399  degrees of freedom
## Residual deviance: 480.34  on 397  degrees of freedom
## AIC: 486.34
## 
## Number of Fisher Scoring iterations: 4