Road library

require(rmutil); require(aod);library(emdbook);library(VGAM);library(ggplot2);
library(easyGgplot2);library(devtools)

Make fixed variables

beta0 <- 1 ## logit rate at x=0
beta1 <- 2 ## increase in logit-prob(nausea) per unit x
k <- 20
n <- 10 #rep(seq(1:5),4)
theta <- 0.6  ## shape parameter, equivalent to alpha+beta
phi=theta/(1+theta)

Simulation given right variance

betahat<-data.frame("beta0.mle"=rep(1,1000),
                    "beta1.mle"=rep(1,1000),
                    "beta0.qle"=rep(1,1000),
                    "beta1.qle"=rep(1,1000),
                    "truebeta0"=rep(beta0,1000),
                    "truebeta1"=rep(beta1,1000),
                    "se.beta0.mle"=rep(1,1000),
                    "se.beta1.mle"=rep(1,1000),
                    "se.beta0.qle"=rep(1,1000),
                    "se.beta1.qle"=rep(1,1000))

for (i in 1:1000){
  x <- runif(k)  
  eta <- beta0+beta1*x
  prob <- plogis(eta)
  y <- rbetabinom(k, prob=prob, size=n)
  s<-n*y
  df1<-data.frame(cbind("y"=y,"n"=n,"x"=x,"s"=s))
  #MLE
  fit1.m<-vglm(cbind(y,n-y) ~ x, betabinomial(irho=0.5),data=df1,theta=0.6)
  betahat[i,1]<-summaryvglm(fit1.m)@coefficients[1]
  betahat[i,2]<-summaryvglm(fit1.m)@coefficients[3]
  #se in mle
  betahat[i,7]<-sqrt(diag(vcov(fit1.m)))[1]
  betahat[i,8]<-sqrt(diag(vcov(fit1.m)))[3]
  #QLE
  fit1.q<-quasibin(cbind(y,n-y) ~ x, data=df1)
  betahat[i,3]<-fit1.q@fm$coefficients[1]
  betahat[i,4]<-fit1.q@fm$coefficients[2]
  #se in qle
  betahat[i,9]<-sqrt(diag(vcov(fit1.m)))[1]
  betahat[i,10]<-sqrt(diag(vcov(fit1.m)))[2]
}

Simulation given wrong variance

betahat.w<-data.frame("beta0.mle"=rep(1,1000),
                      "beta1.mle"=rep(1,1000),
                      "beta0.qle"=rep(1,1000),
                      "beta1.qle"=rep(1,1000),
                      "truebeta0"=rep(beta0,1000),
                      "truebeta1"=rep(beta1,1000),
                      "se.beta0.mle"=rep(1,1000),
                      "se.beta1.mle"=rep(1,1000),
                      "se.beta0.qle"=rep(1,1000),
                      "se.beta1.qle"=rep(1,1000))
rnorm2 <- function(n,mean,sd) { mean+sd*scale(rnorm(n)) }

for (i in 1:1000){
  x <- runif(k)  
  eta <- beta0+beta1*x
  prob <- plogis(eta)
  mu=mean(prob)
  var.wrong<-phi*mu*(1-mu)/n
  y.wrn <- rnorm2(1000,mu,var.wrong)
  s<-n*y
  df1<-data.frame(cbind("y"=y,"n"=n,"x"=x,"s"=s))
  #MLE
  fit1.m<-vglm(cbind(y,n-y) ~ x, betabinomial(irho=0.5),data=df1,theta=0.6)
  betahat.w[i,1]<-summaryvglm(fit1.m)@coefficients[1]
  betahat.w[i,2]<-summaryvglm(fit1.m)@coefficients[3]
  #se in mle
  betahat.w[i,7]<-sqrt(diag(vcov(fit1.m)))[1]
  betahat.w[i,8]<-sqrt(diag(vcov(fit1.m)))[3]
  #QLE
  fit1.q<-quasibin(cbind(y,n-y) ~ x, data=df1)
  betahat.w[i,3]<-fit1.q@fm$coefficients[1]
  betahat.w[i,4]<-fit1.q@fm$coefficients[2]
  #se in qle
  betahat.w[i,9]<-sqrt(abs(diag(vcov(fit1.m))))[1]
  betahat.w[i,10]<-sqrt(abs(diag(vcov(fit1.m))))[2]
}

Histogram to compare bias

 #beta0 given right variance
library(reshape2)
DFforHisto<-melt(data=betahat,id.vars=c(2,4,5,6,7,8,9,10))
ggplot2.histogram(data=DFforHisto, xName='value',
                  groupName='variable', legendPosition="top",
                  alpha=0.5, addDensity=TRUE,
                  meanLineColor="white", meanLineSize=1.5)+
  geom_vline(xintercept=1, colour="red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#beta1 given right variance
DFforHisto2<-melt(data=betahat,id.vars=c(1,3,5,6,7,8,9,10))
ggplot2.histogram(data=DFforHisto2, xName='value',
                  groupName='variable', legendPosition="top",
                  alpha=0.5, addDensity=TRUE,
                  meanLineColor="white", meanLineSize=1.5)+
  geom_vline(xintercept=2, colour="red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#beta0 given WRONG variance
DFforHisto.w1<-melt(data=betahat.w,id.vars=c(2,4,5,6,7,8,9,10))
ggplot2.histogram(data=DFforHisto.w1, xName='value',
                  groupName='variable', legendPosition="top",
                  alpha=0.5, addDensity=TRUE,
                  meanLineColor="white", meanLineSize=1.5)+
  geom_vline(xintercept=1, colour="red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#beta1 given WRONG variance
DFforHisto.w2<-melt(data=betahat.w,id.vars=c(1,3,5,6,7,8,9,10))
ggplot2.histogram(data=DFforHisto.w2, xName='value',
                  groupName='variable', legendPosition="top",
                  alpha=0.5, addDensity=TRUE,
                  meanLineColor="white", meanLineSize=1.5)+
  geom_vline(xintercept=2, colour="red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

remove na, just in case

betahat<-na.omit(betahat)
betahat.w<-na.omit(betahat.w)

In the Right Model

###Compare SE(beta0) with SD(beta0)
sd(betahat$beta0.mle)

## [1] 0.4265289

mean(betahat$se.beta0.mle)

## [1] 0.2244866

###Compare SE(beta1) with SD(beta1)
sd(betahat$beta1.mle)

## [1] 0.9213408

mean(betahat$se.beta1.mle)

## [1] 0.4842197

In the Wrong Model

###Compare SE(beta0) with SD(beta0)
sd(betahat.w$beta0.mle)

## [1] 0.4024823

mean(betahat.w$se.beta0.mle)

## [1] 0.2188144

###Compare SE(beta1) with SD(beta1)
sd(betahat.w$beta1.mle)

## [1] 0.7938887

mean(betahat.w$se.beta1.mle)

## [1] 0.3859304

RMSE in the right model

rmse.beta0.mle<-sqrt((sum(betahat$beta0.mle-betahat$truebeta0)^2)/1000)
rmse.beta1.mle<-sqrt((sum(betahat$beta1.mle-betahat$truebeta1)^2)/1000)
rmse.beta0.qle<-sqrt((sum(betahat$beta0.qle-betahat$truebeta0)^2)/1000)
rmse.beta1.qle<-sqrt((sum(betahat$beta1.qle-betahat$truebeta1)^2)/1000)
Compare.RMSE<-data.frame("rmse.beta0"=c(rmse.beta0.mle,rmse.beta0.qle),
           "rmse.beta1"=c(rmse.beta1.mle,rmse.beta1.qle))
row.names(Compare.RMSE)<-c("MLE","QLE")
Compare.RMSE

##     rmse.beta0 rmse.beta1
## MLE  0.2650934   2.702448
## QLE  0.2739240   2.664952

RMSE in the wrong model

#RMSE of QLE is Better(lower) than RMSE of MLE in the Wrong Model
rmse.beta0.mle<-sqrt((sum(betahat.w$beta0.mle-betahat.w$truebeta0)^2)/1000)
rmse.beta1.mle<-sqrt((sum(betahat.w$beta1.mle-betahat.w$truebeta1)^2)/1000)
rmse.beta0.qle<-sqrt((sum(betahat.w$beta0.qle-betahat.w$truebeta0)^2)/1000)
rmse.beta1.qle<-sqrt((sum(betahat.w$beta1.qle-betahat.w$truebeta1)^2)/1000)
Compare.RMSE<-data.frame("rmse.beta0"=c(rmse.beta0.mle,rmse.beta0.qle),
                         "rmse.beta1"=c(rmse.beta1.mle,rmse.beta1.qle))
row.names(Compare.RMSE)<-c("MLE","QLE")
Compare.RMSE

##     rmse.beta0 rmse.beta1
## MLE   30.30557   62.82963
## QLE   30.30794   62.82932

bias chart

attach(betahat)
d1<-data.frame("bias of beta0"=c(mean(beta0.mle)-1,mean(beta0.qle)-1),
               "bias of beta1"=c(mean(beta1.mle)-2,mean(beta1.qle)-2))
row.names(d1)<-c("MLE","QLE")
attach(betahat.w)

## The following objects are masked from betahat:
## 
##     beta0.mle, beta0.qle, beta1.mle, beta1.qle, se.beta0.mle,
##     se.beta0.qle, se.beta1.mle, se.beta1.qle, truebeta0, truebeta1

d2<-data.frame("biasbar of beta0"=c(mean(beta0.mle)-1,mean(beta0.qle)-1),
               "biasbar of beta1"=c(mean(beta1.mle)-2,mean(beta1.qle)-2))
row.names(d2)<-c("MLE","QLE")
#compare bias from right model
d1

##     bias.of.beta0 bias.of.beta1
## MLE   0.008382990    0.08545889
## QLE   0.008662239    0.08427317

#compare bias from wrong model
d2

##     biasbar.of.beta0 biasbar.of.beta1
## MLE        0.9583462        -1.986847
## QLE        0.9584211        -1.986838

GLM_Assignment3

김민지

2018년 10월 24일

Road library

Make fixed variables

Simulation given right variance

Simulation given wrong variance

Histogram to compare bias

remove na, just in case

In the Right Model

In the Wrong Model

RMSE in the right model

RMSE in the wrong model

bias chart