Lab 10-2021

Task 1

getwd()

## [1] "C:/Users/Caleb/Documents/Civil Engineering degree coursework/Applied Statistical Methods/Labs/Lab 10"

Task 2

outer(1:4,5:10,function(x,y) paste(x,y,sep=" "))

##      [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  
## [1,] "1 5" "1 6" "1 7" "1 8" "1 9" "1 10"
## [2,] "2 5" "2 6" "2 7" "2 8" "2 9" "2 10"
## [3,] "3 5" "3 6" "3 7" "3 8" "3 9" "3 10"
## [4,] "4 5" "4 6" "4 7" "4 8" "4 9" "4 10"

Line A forms a matrix with x rows and param columns where each entry is the function 1fun evaluated at the points x and param.

Line B first forms a logical vector of whether y is the max of y then outputs the largest indice in which a true is outputted.

The formula for the likelihood is \[L(p)=p(y_1,y_2,...,y_8)=p(y_1)p(y_2)p(y_3)p(y_4)p(y_5)p(y_6)p(y_7)p(y_8)\]

The R code for this would be \[L(p)=dbinom(y_1,20,p)*dbinom(y_2,20,p)*...*dbinom(y_8,20,p)\]

logbin=function(x,param) log(dbinom(x,prob=param,size=20))

mymaxlik=function(lfun,x,param,...){
# how many param values are there?
np=length(param)
# outer -- notice the order, x then param
# this produces a matrix -- try outer(1:4,5:10,function(x,y) paste(x,y,sep=" "))   to understand
z=outer(x,param,lfun)
# z is a matrix where each x,param is replaced with the function evaluated at those values
y=apply(z,2,sum)

# y is a vector made up of the column sums
# Each y is the log lik for a new parameter value
plot(param,y,col="Blue",type="l",lwd=2,...)
# which gives the index for the value of y == max.
# there could be a max between two values of the parameter, therefore 2 indices
# the first max will take the larger indice
i=max(which(y==max(y)))
abline(v=param[i],lwd=2,col="Red")

# plots a nice point where the max lik is
points(param[i],y[i],pch=19,cex=1.5,col="Black")
axis(3,param[i],round(param[i],2))
#check slopes. If it is a max the slope should change sign from + to 
# We should get three + and two -vs
ifelse(i-3>=1 & i+2<=np, slope<-(y[(i-2):(i+2)]-y[(i-3):(i+1)])/(param[(i-2):(i+2)]-param[(i-3):(i+1)]),slope<-"NA")
return(list(i=i,parami=param[i],yi=y[i],slope=slope))
}

mymaxlik(x=c(3,3,4,3,4,5,5,4),param=seq(0,1,length=1000),lfun=logbin,xlab=expression(pi),main="Binomial",cex.main=2)

## $i
## [1] 195
## 
## $parami
## [1] 0.1941942
## 
## $yi
## [1] -12.78734
## 
## $slope
## [1]  2.12579310  1.08781082  0.05802047 -0.96371372 -1.97752478

Task 3

logpoiss=function(x,param) log(dpois(x,lambda=param))

mymaxlik(x=c(4,6,7,6,5),param=seq(0,20,length=1000),lfun=logpoiss,xlab=expression(lambda),main="Poisson",cex.main=2)

## $i
## [1] 281
## 
## $parami
## [1] 5.605606
## 
## $yi
## [1] -9.411759
## 
## $slope
## [1]  0.040005454  0.021908448  0.003940937 -0.013898462 -0.031611116

The maximum likelihood estimate of \(\lambda\) is \(5.61 \frac{protons}{sec}\).

The log likelihood is \[l(\lambda)=-5\lambda +28*log(\lambda)-log(4!*6!*7!*6!*5!)\]

myNRML=function(x0,delta=0.001,llik,xrange,parameter="param"){
f=function(x) (llik(x+delta)-llik(x))/delta
fdash=function(x) (f(x+delta)-f(x))/delta
d=1000
i=0
x=c()
y=c()
x[1]=x0
y[1]=f(x[1])
while(d > delta & i<100){
i=i+1
x[i+1]=x[i]-f(x[i])/fdash(x[i])
y[i+1]=f(x[i+1])
d=abs(y[i+1])
}
layout(matrix(1:2,nr=1,nc=2,byrow=TRUE),width=c(1,2))
curve(llik(x), xlim=xrange,xlab=parameter,ylab="log Lik",main="Log Lik")
curve(f(x),xlim=xrange,xaxt="n", xlab=parameter,ylab="derivative",main=  "Newton-Raphson Algorithm \n on the derivative")
points(x,y,col="Red",pch=19,cex=1.5)
axis(1,x,round(x,2),las=2)
abline(h=0,col="Red")

segments(x[1:(i-1)],y[1:(i-1)],x[2:i],rep(0,i-1),col="Blue",lwd=2)
segments(x[2:i],rep(0,i-1),x[2:i],y[2:i],lwd=0.5,col="Green")

list(x=x,y=y)
}

myNRML(x0=1,delta=0.000001,llik=function(x) log(dpois(4,x)*dpois(6,x)*dpois(7,x)*dpois(6,x)*dpois(5,x)),xrange=c(0,20),parameter="lambda" )

## $x
## [1] 1.000000 1.821147 3.048672 4.436748 5.359232 5.589568 5.599970 5.600000
## 
## $y
## [1]  2.299999e+01  1.037492e+01  4.184327e+00  1.310927e+00  2.246292e-01
## [6]  9.330797e-03  2.633627e-05 -3.019807e-08

The function myNRML() gives the value 5.6 as the value for \(\hat{\lambda}\).

Task 4

logbin2=function(theta){log(dbinom(2,prob=theta,size=6)) + log(dbinom(4,prob=theta,size=10))}

mymaxlikg=function(lfun="logbin2",theta) { # default log lik is a combination bin
nth=length(theta)  # nu. of valuse used in theta
thmat=matrix(theta,nr=nth,nc=1,byrow=TRUE) # Matrix of theta
z=apply(thmat,1,lfun) # z holds the log lik values
zmax=max(which(z==max(z)))  # finding the INDEX of the max lik
plot(theta,exp(z),type="l") # plot of lik
abline(v=theta[zmax],col="Blue")   #  verical line through max
axis(3,theta[zmax],round(theta[zmax],4))  # one tick on the third axis 
theta[zmax]   # theta corresponding to max lik
}

mymaxlikg(theta=seq(0,1,length=10000))

## [1] 0.3750375

Task 5

\[l(\theta_1, \theta_2)=log_e(n!)-log_e(y_1!)-log_e((n-y_1)!)+y_1log_e(\theta_1)+(n-y)log_e(1-\theta_1)+y_2log(\theta_2)-\theta_2-log(y_2!)\]

logbinpois=function(theta1,theta2) log(dbinom(4,size=20,prob=theta1)) + log(dpois(4,lambda=theta2))

maxlikg2=function(theta1,theta2,lfun="logbinpois",...){
n1=length(theta1)
n2=length(theta2)
z=outer(theta1,theta2,lfun)
contour(theta1,theta2,exp(z),...) # exp(z) gives the lik
maxl=max(exp(z))    # max lik
coord=which(exp(z)==maxl,arr.ind=TRUE)  # find the co-ords of the max
th1est=theta1[coord[1]] # mxlik estimate of theta1
th2est=theta2[coord[2]]
abline(v=th1est,h=th2est)
axis(3,th1est,round(th1est,2))
axis(4,th2est,round(th2est,2),las=1)
list(th1est=th1est,th2est=th2est)
}
maxlikg2(theta1=seq(0,1,length=1000),theta2=seq(0,10,length=1000),nlevels=20)

## $th1est
## [1] 0.2002002
## 
## $th2est
## [1] 4.004004

Task 6

mymlnorm=function(x,mu,sig,...){  #x sample vector
nmu=length(mu) # number of values in mu
nsig=length(sig)
n=length(x) # sample size
zz=c()    ## initialize a new vector
lfun=function(x,m,p) log(dnorm(x,mean=m,sd=p))   # log lik for normal
for(j in 1:nsig){
z=outer(x,mu,lfun,p=sig[j]) # z a matrix 
# col 1 of z contains lfun evaluated at each x with first value of mu, 
# col2 each x with 2nd value of m 
# all with sig=sig[j]
y=apply(z,2,sum)
# y is a vector filled with log lik values, 
# each with a difft mu and all with the same sig[j]
zz=cbind(zz,y)
## zz is the matrix with each column containing log L values, rows difft mu, cols difft sigmas 
}
maxl=max(exp(zz))
coord=which(exp(zz)==maxl,arr.ind=TRUE)
maxlsig=apply(zz,1,max)
contour(mu,sig,exp(zz),las=3,xlab=expression(mu),ylab=expression(sigma),axes=TRUE,
main=expression(paste("L(",mu,",",sigma,")",sep="")),...)
mlx=round(mean(x),2)  # theoretical
mly=round(sqrt((n-1)/n)*sd(x),2)
#axis(1,at=c(0:20,mlx),labels=sort(c(0:20,mlx)))
#axis(2,at=c(0:20,mly),labels=TRUE)
abline(v=mean(x),lwd=2,col="Green")
abline(h=sqrt((n-1)/n)*sd(x),lwd=2,col="Red")

# Now find the estimates from the co-ords
muest=mu[coord[1]]
sigest=sig[coord[2]]

abline(v=muest, h=sigest)
return(list(x=x,coord=coord,maxl=maxl))
}

mymlnorm(x=c(10,12,13,15,12,11,10),mu=seq(5,10,length=1000),sig=seq(0.1,4,length=1000),lwd=2,labcex=1)

## $x
## [1] 10 12 13 15 12 11 10
## 
## $coord
##       row col
## [1,] 1000 610
## 
## $maxl
## [1] 8.453622e-08

Task 7

sam= rbeta(30,shape1=3,shape2=4)

sam1=sample(sam,30,replace=TRUE)
sam2=sample(sam,30,replace=TRUE)
sam3=sample(sam,30,replace=TRUE)
sam4=sample(sam,30,replace=TRUE)
sam5=sample(sam,30,replace=TRUE)
sam6=sample(sam,30,replace=TRUE)
sam7=sample(sam,30,replace=TRUE)
sam8=sample(sam,30,replace=TRUE)
sam9=sample(sam,30,replace=TRUE)
sam10=sample(sam,30,replace=TRUE)
sam11=sample(sam,30,replace=TRUE)
sam12=sample(sam,30,replace=TRUE)

mymlbeta=function(x,alpha,beta,...){  #x sample vector
na=length(alpha) # number of values in alpha
nb=length(beta)
n=length(x) # sample size
zz=c()    ## initialize a new vector
lfun=function(x,a,b) log(dbeta(x,shape1=a,shape2=b))   # log lik for beta
for(j in 1:nb){
z=outer(x,alpha,lfun,b=beta[j]) # z a matrix 
# col 1 of z contains lfun evaluated at each x with first value of alpha, 
# col2 each x with 2nd value of a 
# all with b=beta[j]
y=apply(z,2,sum)
# y is a vector filled with log lik values, 
# each with a difft alpha and all with the same sig[j]
zz=cbind(zz,y)
## zz is the matrix with each column containing log L values, rows difft alpha, cols difft betas 
}
maxl=max(exp(zz))    # max lik
coord=which(exp(zz)==maxl,arr.ind=TRUE)  # find the co-ords of the max
aest=alpha[coord[1]] # mxlik estimate of alpha
best=beta[coord[2]]
contour(alpha,beta,exp(zz),las=3,xlab=expression(alpha),ylab=expression(beta),axes=TRUE,
main=expression(paste("L(",alpha,",",beta,")",sep="")),...)

abline(v=aest, h=best)
points(aest,best,pch=19)
axis(4,best,round(best,2),col="Red")
axis(3,aest,round(aest,2),col="Red")
return(list(x=x,coord=coord,maxl=maxl,maxalpha=aest,maxbeta=best))
}

est1=mymlbeta(x=sam1,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha1=est1$maxalpha
beta1=est1$maxbeta

est2=mymlbeta(x=sam2,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha2=est2$maxalpha
beta2=est2$maxbeta

est3=mymlbeta(x=sam3,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha3=est3$maxalpha
beta3=est3$maxbeta

est4=mymlbeta(x=sam4,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha4=est4$maxalpha
beta4=est4$maxbeta

est5=mymlbeta(x=sam5,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha5=est5$maxalpha
beta5=est5$maxbeta

est6=mymlbeta(x=sam6,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha6=est6$maxalpha
beta6=est6$maxbeta

est7=mymlbeta(x=sam7,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha7=est7$maxalpha
beta7=est7$maxbeta

est8=mymlbeta(x=sam8,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha8=est8$maxalpha
beta8=est8$maxbeta

est9=mymlbeta(x=sam9,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha9=est9$maxalpha
beta9=est9$maxbeta

est10=mymlbeta(x=sam10,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha10=est10$maxalpha
beta10=est10$maxbeta

est11=mymlbeta(x=sam11,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha11=est11$maxalpha
beta11=est11$maxbeta

est12=mymlbeta(x=sam12,alpha=seq(1,4,length=100),beta=seq(2,8,length=100),lwd=2,labcex=1)

alpha12=est12$maxalpha
beta12=est12$maxbeta

alpha=3
beta=4

x=c(alpha,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11,alpha12)

y=c(beta,beta1,beta2,beta3,beta4,beta5,beta6,beta7,beta8,beta9,beta10,beta11,beta12)

names=c("known","est1","est2","est3","est4","est5","est6","est7","est8","est9","est10","est11","est12")

mycol = ifelse(x==3 & y==4, "Red",("Black"))

plot(x, y, main = "Beta vs. Alpha",
 xlab = "Alpha", ylab = "Beta",
 pch = 19,col=mycol)
text(x,y,labels=names)

Task 8

logbin=function(x,param) log(dbinom(x,prob=param,size=20))

est=MATH4753GRAY::mymaxlik(x=c(3,2,4,1,4,5,5,3),param=seq(0,1,length=1000),lfun=logbin,xlab=expression(pi),main="Binomial",cex.main=2)