Please ensure that you rename this file as “StudentNumber.Rmd”. i.e. If you student number is CLRALL001 then save the file as “CLRALL001.Rmd”. Also ensure that you create the object named “stdnumber” in the following R chunk.

stdnumber<-"VLJCHR004"
knitr::opts_chunk$set(echo = TRUE,message = F,warning = F)

Question 1

Q1.a

Q1.a

#define the PDF

f.x <- function(x){

b*x*(1-x)^3

}

#initialize an x variable: a monotone vector in the range over which the function is defined, of length 1000
xrange <- seq(0,1,length.out=1000)

#initialize a range of guesses for which b might resukt in the function being a pdf (integrating to 1)
b. <- -10:30

#initialize an empty variable to store b values and the asociated error
b.range.error <- c(NA,NA,NA)

#loop through all the guesses for b, integrate the function with that b value
#subtract the integration value from 1 to get an error term
for(guess in b.){
  b <- guess
  answer <- integrate(f.x,0,1)
  answer <- as.numeric(answer$value)
  error <- 1-answer
  this.b.and.error <- c(b,error,answer)
  b.range.error <- rbind(b.range.error,this.b.and.error)
}

#remove the empty row used to initialize results table
b.range.error <- b.range.error[-1,]

b.range.error <-as.data.frame(b.range.error)

names(b.range.error) <- c("b","error","integral")

require(knitr)

kable(head(b.range.error))
b error integral
this.b.and.error -10 1.50 -0.50
this.b.and.error.1 -9 1.45 -0.45
this.b.and.error.2 -8 1.40 -0.40
this.b.and.error.3 -7 1.35 -0.35
this.b.and.error.4 -6 1.30 -0.30
this.b.and.error.5 -5 1.25 -0.25
#plot 
plot(x=b.range.error$b,y=b.range.error$error,type="l",col="red",main="Range of b values with associated integral and error",
     xlab="",ylab="",sub="Red = error, Blue=Integral, dashed lines indicate true value for b,\nwhere error is = 0 and integral is = 1")
lines(x=b.range.error$b,y=b.range.error$integral,col="blue")
abline(h=0,lty="dashed",col="blue")
abline(h=1,lty="dashed",col="red")
abline(v=b.range.error$b[(which(b.range.error$error==0))],lty="dotted",col="black")

#print the b value that results in this being a PDF

b <- b.range.error$b[which(b.range.error$error==0)]

print(paste0("The b value that results in an area under the curve of 1 (the defining property of a probability density function) is: ", b))
## [1] "The b value that results in an area under the curve of 1 (the defining property of a probability density function) is: 20"
Q1.b

Q1.b

xrange <- seq(0,1,length.out = 1000)
  

#calculatre mean of random variable:

  ex.value <- function(x=xrange){
        pdf <- f.x(x)
        X <- x*pdf
        return(X)
    
  }
    

    
  mu <- integrate(ex.value,0,1)
  
  mu <- mu$value
  
#using mean, calculate variance:
  
variance <- function(xrange){
  
x <- xrange

answer <- ((x-mu)^2)*f.x(x)
 
}

varX <- integrate(variance,0,1)
varX <- varX$value
    

#check if variance is = 8/252, as per question
varX==8/252
## [1] TRUE
Q1.c

Q1.c

modeX <- function(){
  y <- f.x(xrange)
  #y <- y*xrange
  counter <- c()
  for(i in unique(y)){
    count <- length(which(y==i))
    counter <- c(counter,count)
  }
  plot(x=unique(y),y=counter,ylim=c(0,3),type="l")
  plot(x=xrange,y=y)
  unique.y <- unique(y)
  print(paste0("modeX is: ", unique.y[(which(counter==max(counter)))]))
}

modeX()

## [1] "modeX is: 0"
Q1.d

Q1.d

h.x <- function(x){
  return(rep(1,length(x)))
}

g.x <- function(x){
  return(f.x(x)/h.x(x))
}

maxC <- max(g.x(xrange))
maxC
## [1] 2.109374
Q1.e

Q1.e

C <-maxC
#sample size 100000
N <- 100000

count.x <- 0

x <- numeric(N)

while(count.x <= N){
  
  U <- runif(1)
  Y <- h.x(U)
  g.y <- g.x(U)/C
  
  if(g.y>=U){
    
    count.x <- count.x+1
    x[count.x] <- Y
    
  }
  
  
}

plot(x)

lines(xrange,dbeta(xrange,2,4))

Question 2

Q2

Q2

doors <- c("car","goat","goat")

N <- 1000000

sam <- matrix(nrow=N,ncol = 3)

i <- 1

while(i <=N){
  
  a <- sample(doors,size=3,replace = F)
  sam[i,] <- a
  
  i <- i+1
  
}
  
sam <- as.data.frame(sam)
names(sam) <- c("door1","door2","door3")

require(dplyr)

door3goat <- sam %>%
  subset(door3=="goat")

door3goat$door1 <- ifelse(door3goat$door1=="car",1,0)
door3goat$door2 <- ifelse(door3goat$door2=="car",1,0)

d1 <- length(which(door3goat$door1==1))
d2 <- length(which(door3goat$door2==1))

print(paste0("he should choose door ",which.max(c(d1,d2))))
## [1] "he should choose door 2"
print(paste0("He has a probability of ",d2/(d1+d2)," of winning"))
## [1] "He has a probability of 0.500089951785843 of winning"

Question 3

Q3

Q3

f.xy <- function(x,y=seq(-10,10,length.out = 1000)){
  return((x-10)^2+y^2+0.001*(x^4+(y+5)^4))
}

df.dx <- function(x){
  return((2*(x-10))+(0.04*x^3))
  
}

df.dy <- function(y){
  return((2*y)+(0.04*(y+5)^3)) 
}


#curve(f.xy,from=-15,to=10,n=1000)

x <- seq(-15,10,length.out = 10000)
dz.dx <- df.dx(x)

y <- seq(-10,10,length.out = 10000)
dz.dy <- df.dy(y)

par(mfrow=c(2,1))
plot(y=dz.dy,x=y)
plot(y=dz.dx,x=x)

x1 <- -15
y1 <- -10
z1 <- f.xy(x=x1,y=y1)
Q3a

Q3a

N <- 50

x <- seq(-15,10,length.out = N)
y <- seq(-10,10,length.out = N)


my.dat <- matrix(ncol=3,nrow=N)

my.dat[,1] <- x
my.dat[,2] <- y

my.dat[,3] <- f.xy(x=my.dat[,1],y=my.dat[,2])

my.dat <- as.data.frame(my.dat)

names(my.dat) <- c("x","y","z")

data.loess <- loess(z ~ x * y, data = my.dat)

xgrid <- seq(-15,10,by=0.03)
ygrid <- seq(-10,10,by=0.3)

data.fit <-  expand.grid(x=xgrid,y=ygrid)

mtrx3d <-  predict(data.loess, newdata = data.fit)

contour(x = xgrid, y = ygrid, z = mtrx3d, xlab ="x", ylab = "y")

Q3b

Q3b

x <- seq(-15,10,length.out = 30)
y <- seq(-10,10,length.out = 30)

z <- matrix(ncol=30,nrow=30)

for(i in 1:length(x)){
  xi <- x[i]
  for(j in 1:length(y)){
    yi <- y[j]
    
    z[i,j] <- f.xy(xi,yi)
    
  }
}


persp(x=xgrid,y=ygrid,z=mtrx3d,xlab="x",ylab="y",zlab="z",border = "red")

persp(x=xgrid,y=ygrid,z=mtrx3d,xlab="x",ylab="y",zlab="z",border = "red",phi=55)

persp(x=xgrid,y=ygrid,z=mtrx3d,xlab="x",ylab="y",zlab="z",border = "red",theta=45)

persp(x=xgrid,y=ygrid,z=mtrx3d,xlab="x",ylab="y",zlab="z",border="red",theta=90)

alpha <- 0.01
x0 <- 15
y0 <- 10

Z.function <- function(par=c(x0,y0)){
  
  alpha0 <- alpha
  
  #using starting values for X and Y, we calculate z0
  y0 <- par[2]
  x0 <- par[1]
  z0 <- f.xy(x0,y0)
  
  #we first increase both x and y and use the formula given in the question to calculate a new Z
  x1 <- x0+0.001
  y1 <- y0+0.001
  
  #first we keep y the same and increase x, then we keep x the same and increase y
  z1.1 <- z0-(alpha0*f.xy(x1,y0))
  z1.2 <- z0-(alpha0*f.xy(x1,y0))
  
  #now we decrease x and y by the same amount
  x2 <- x0-0.001
  y2 <- y0-0.001
  z2.1 <- z0-(alpha0*f.xy(x2,y0))
  z2.2 <- z0-(alpha0*f.xy(x0,y2))
  
  #we calculate the gradient we should get, at one step up and one step down in x and y, respectively
  dz.dx1 <- df.dx(x1)
  dz.dy1 <- df.dy(y1)
  
  dz.dx2 <- df.dx(x2)
  dz.dy2 <- df.dy(y2)
  
  #we calculate the real difference in z, resulting from an increase and a decrease in x
  formula.dz1.1 <- z0-z1.1
  formula.dz1.2 <- z0-z1.2
  
  formula.dz2.1 <- z0-z2.1
  formula.dz2.2 <- z0-z2.2
  
  #our actual increment is always + or - 0.001
  dx.real <- 0.001
  
  #this is how much x changed upwards and downwards, y changes with the same amount, so we don't declare another variable for that
  diff.x1 <- dx.real
  diff.x2 <- -dx.real
  
  #according to our formula, this is how much z should change with a shift in x up and down
  diff.z1.x <- dz.dx1*diff.x1
  diff.z2.x <- dz.dx2*diff.x2
  
  #and the same for shifts in y
  diff.z1.y <- dz.dy1*diff.x1
  diff.z2.y <- dz.dy2*diff.x2
  
  #error in shift in z due to changes in x
  dx.error1 <- (formula.dz1.1-diff.z1.x)^2
  dx.error2 <- (formula.dz2.1-diff.z2.x)^2
  
  #same for y
  dy.error1 <- (formula.dz1.2-diff.z1.y)^2
  dy.error2 <- (formula.dz2.2-diff.z2.y)^2
  
  #we sum the squared error terms
  Error <- sum(dx.error1,dx.error2,dy.error1,dy.error2)
  
  #return the square root of sum of error squared
  Error <- sqrt(Error)
  Error
}
Q3d

Q3d

par. <- c(x0,y0)#,alpha)

opt <- optim(fn=Z.function,par=par.,method = "BFGS")

optimal.x <- opt$par[1]
optimal.y <- opt$par[2]
#optimal.alpha <- opt$par[3]

print(paste0("Optimal X is : ",optimal.x))
## [1] "Optimal X is : 8.63324570932063"
print(paste0("Optimal Y is: ",optimal.y))
## [1] "Optimal Y is: -0.222907067584461"
#print(paste0("Optimal Alpha is: ",optimal.alpha))

lowest.z <- f.xy(optimal.x,optimal.y)

print(paste0("Function reaches a minimum at: ",lowest.z))
## [1] "Function reaches a minimum at: 7.9936432815087"
Q4a

Q4a

xy <- read.csv("xynonlinear.csv",sep=";")

i <- 1:100

error <- rnorm(100)

x <- xy$x
y <- xy$y

a <- 1
b <- -1

non.lin.mod <- function(par=c(a,b)){
  
  a <- par[1]
  b <- par[2]
  
  Total.error <- 0
  
  for(ii in i){
    
    y.real <- y[ii]
    x.i <- x[ii]
    error.i <- error[ii]
    
    y.i <- a*exp(b*x.i)+error.i
    
    LS.error <- (y.real-y.i)^2
    Total.error <- Total.error+LS.error
    
}
  
  return(Total.error)
}

optim(par=c(a,b),fn=non.lin.mod,method = "L-BFGS-B",lower=c(0,-1),upper=c(Inf,0))
## $par
## [1]  4.82313372 -0.05374854
## 
## $value
## [1] 107.0272
## 
## $counts
## function gradient 
##       38       38 
## 
## $convergence
## [1] 0
## 
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
find.pars <- optim(par=c(a,b),fn=non.lin.mod,method = "L-BFGS-B",lower=c(0,-1),upper=c(Inf,0))

a.new <- find.pars$par[1]
b.new <- find.pars$par[2]

non.lin.mod(c(a.new,b.new))
## [1] 107.0272
y.func <- function(a=rep(a.new,length(i)),b=rep(b.new,length(i)),x.i=x,error.i=error){
  y <- a*exp(b*x.i)+error.i
}

#max(y.func())

plot(x=xy$x,y=error,type="h",col="orange",ylim=c(-6,6),main="BLUE: real data, RED: formula curve, ORANGE: e ~ N(0,1)",xlab="x",ylab="y")
points(x=xy$x,y=xy$y,col="blue",pch=16)
lines(x=xy$x,y=y.func(),col="red")

plot(x=xy$x,y=xy$y-y.func(),type="h",col="blue",main="Function error (BLUE) vs provided error term (ORANGE)",xlab="x",ylab = "error")

lines(x=xy$x,y=error,type="h",col="orange")

#it is clear that providing a larger error term results in a larger error, but often in the opposite direction

if we don’t provide an error term, but use the function error, we see that this is normally distributed, as expected

xy <- read.csv("xynonlinear.csv",sep=";")

i <- 1:100

#error <- rnorm(100)

x <- xy$x
y <- xy$y

a <- 1
b <- -1

non.lin.mod <- function(par=c(a,b)){
  
  a <- par[1]
  b <- par[2]
  
  Total.error <- 0
  
  for(ii in i){
    
    y.real <- y[ii]
    x.i <- x[ii]
    #error.i <- error[ii]
    
    y.i <- a*exp(b*x.i)#+error.i
    
    LS.error <- (y.real-y.i)^2
    Total.error <- Total.error+LS.error
    
}
  
  return(Total.error)
}

optim(par=c(a,b),fn=non.lin.mod,method = "L-BFGS-B",lower=c(0,-1),upper=c(Inf,0))
## $par
## [1]  4.97916150 -0.04992288
## 
## $value
## [1] 24.24663
## 
## $counts
## function gradient 
##       42       42 
## 
## $convergence
## [1] 0
## 
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
find.pars <- optim(par=c(a,b),fn=non.lin.mod,method = "L-BFGS-B",lower=c(0,-1),upper=c(Inf,0))

a.new <- find.pars$par[1]
b.new <- find.pars$par[2]

non.lin.mod(c(a.new,b.new))
## [1] 24.24663
y.func <- function(a=rep(a.new,length(i)),b=rep(b.new,length(i)),x.i=x){
  y <- a*exp(b*x.i)
  error <- xy$y-y
  
  return(list(y=y,error=error))
}

plot(x=xy$x,y=xy$y,col="blue")
lines(x=xy$x,y=y.func()$y,col="red")
lines(x=xy$x,y=(xy$y-y.func()$y),type="h",col="orange")

error <- xy$y-y.func()$y
hist(error,col="lightblue",border="orange",lwd=3)

plot(density(error),main="Error density",col="orange",lwd=3)

Q4b

Q4b

#our regression parameters: a, b and e

#sample from x and y repeatedly with replacement

count <- 0

a <- a.new
b <- b.new

a.list <- a.new
b.list <- b.new

while(count <= 10000){
  
index <- sample(1:100,size=1000,replace=T)
x.boot <- xy$x[index]
y.boot <- xy$y[index]

x <- x.boot
y <- y.boot


sim <- optim(par=c(a,b),fn=non.lin.mod,method = "L-BFGS-B",lower=c(0,-1),upper=c(Inf,0))

a <- sim$par[1]
b <- sim$par[2]

a.list <- c(a.list,a)
b.list <-c(b.list,b)
  
  count <- count+1
}

a.mean <- mean(a.list)
b.mean <- mean(b.list)

a.var <- var(a.list)
b.var <- var(b.list)

a.range <- range(a.list)
b.range <- range(b.list)

b.seq <- seq(b.range[1],b.range[2],length.out = 1000)
b.norm <- dnorm(x=b.seq,mean=b.mean,sd=sqrt(b.var))

a.seq <- seq(a.range[1],a.range[2],length.out = 1000)
a.norm <- dnorm(x=a.seq,mean=a.mean,sd=sqrt(a.var))

hist(a.list,breaks = 1000,main="Histogram of a, based on a 10,000 simulations\neach of sample size n=1,000",border="aquamarine",freq = F)

lines(x=a.seq,y=a.norm,type="l",col="red",lwd=4)

a.hi.lo <- qnorm(p=c(0.025,0.975),mean=a.mean,sd=sqrt(a.var))

segments(x0=a.hi.lo[1],y0=0,y1=dnorm(a.hi.lo[1],mean=a.mean,sd=sqrt(a.var)),col="red",lwd=4)
segments(x0=a.hi.lo[2],y0=0,y1=dnorm(a.hi.lo[2],mean=a.mean,sd=sqrt(a.var)),col="red",lwd=4)

hist(b.list,breaks = 1000,main="Histogram of b, based on a 10,000 simulations\neach of sample size n=10,000",border="aquamarine",freq=F)


lines(x=b.seq,y=b.norm,type="l",col="red",lwd=4)

b.hi.lo <- qnorm(p=c(0.025,0.975),mean=b.mean,sd=sqrt(b.var))

segments(x0=b.hi.lo[1],y0=0,y1=dnorm(b.hi.lo[1],mean=b.mean,sd=sqrt(b.var)),col="red",lwd=4)
segments(x0=b.hi.lo[2],y0=0,y1=dnorm(b.hi.lo[2],mean=b.mean,sd=sqrt(b.var)),col="red",lwd=4)