Instructions

Please ensure that you rename this file as “StudentNumber.Rmd”. i.e. If your student number is CLRALL001 then save the file as “CLRALL001.Rmd”.
Also ensure that you create the object named “stdnumber” as well as one named “name_surname” in the following R chunk.
Please note the underscore between your name and surname!

Please don’t change the seed number!

#derfine f(x) as a function
fx <- function(x){
  return((1/(1+(x-5)^2)))
}

#we define 'a' as the given value, then intregrate the function
a <- 0.3640598
#the function multiplied by a integrates to 1, so it is a true p.d.f., when a is at the level in question
a*integrate(fx,lower=0,upper=10)$value

## [1] 1

fx.scaled <- function(x){
  return(a*(1/(1+(x-5)^2)))
}

integrate(fx.scaled,lower=0,upper=10)$value

## [1] 1

curve(fx.scaled,from=0,to=10)
#we find a candidate distribution
curve(2*dnorm(x,5,2), from=0, to=10, col="red" , add=TRUE)

#we call this hx
hx <- function(x){
  return(2*dnorm(x,5,2))
}

#we then find the max differencfe between fx and hx
x <- seq(0,10,length.out = 10000)
C. <- fx.scaled(x)/hx(x)

C <- max(C.)

#this allows us to define gx

gx <- function(x){
  return(fx.scaled(x)/(C*hx(x)))
}

plot(x,C.)
lines(x,gx(x),col="red")

#we draw samples from our candidate distribution
m <- 2*dnorm(5,5,2)

nsamples<-1000000
x<-runif(nsamples, 0, 10)
y<-runif(nsamples, 0, m)
keep<- (2*dnorm(x,5,2))>=y
plot(x[keep],y[keep], cex=.3)

#we then only keep points that fall under our scaled fx function, they are overplotted in white on top of the points from our candidate distributoion in black
keep2<-fx.scaled(x)>=y

points(x[keep2],y[keep2], cex=.3,col="white")

acc_x <- x[keep2]

#we find the variance and check if it the given value
varX <- var(acc_x)
varX

## [1] 2.634531

#close enough!

#we got our sample in the previous code block
hist(acc_x,freq=F,breaks = 10000)
lines(density(acc_x),col="red")

boxplot(acc_x)
abline(h=as.numeric(summary(acc_x)[2]),col="red")
abline(h=IQR(acc_x)+as.numeric(summary(acc_x)[2]),col="red")

inter_range <- IQR(acc_x)
inter_range

## [1] 1.637684

fxy <- function(x,y){
  return((3*x-5)^2+y^2+((y^3-x^3)/1000))
}

df.dx <- function(x){
  return((18*x)-30-((3*x^2)/1000))
}

df.dy <- function(y){
  return((2*y)+((3*y^2)/1000))
}

gradient.func <- function(x,y){
  dx <- df.dx(x)
  dy <- df.dy(y)
  return(c(dx,dy))
}

alpha <- 0.01
x0 <- 15
y0 <- 20

descent.function <- function(par){
  x <- par[1]
  y <- par[2]
  N <- 1000
  Z <- matrix(nrow=N,ncol=2)
  Z[1,] <- c(x,y)
  
  ES <- 0
  i <- 1
  while(i<N){
    
    Z[i+1,] <- Z[i,]-(alpha*gradient.func(x,y))
    expected.Z <- c(x,y)
    if(i+1==N){break}
    error.sq <- (expected.Z[1]-Z[i+1,1])^2+(expected.Z[2]-Z[i+1,2])^2
    ES <- ES+error.sq
    
    x <- x+0.001
    y <- y+0.001
    i <- i+1
  }
  ES <- sum(ES)
  MES <- mean(ES)
  RMS <- sqrt(MES)
  return(RMS)
  
}

par. <- c(x0,y0)

opt_own <- optim(descent.function,par=par.)

#the values for x and y that minimizes the function are:
opt_own$par

## [1]  1.2875673 -0.4235298

#the amount of calls made to the function is:
opt_own$counts[1]

## function 
##       87

?Distributions

## starting httpd help server ... done

claims<-read.csv("claimsdata.csv", header=T, sep=";")

tx <- function(x,params){
  mu <- params[1]
  sigma <- params[2]
  return(exp(-(x-mu)/sigma))
}

x. <- claims[500,]
params. <- c(25,2)

tx_eval <- tx(x.,params.)
tx_eval

## [1] 1.268522

logl <- function(x,par=params){
  mu. <- par[1]
  sigma. <- par[2]
  
  n <- length(x)
  ll <- 0
  for(i in 1:n){
    xi <- x[i]
    
    loglike <- (-n*log(sigma.))+sum(log10(tx(xi,c(mu.,sigma.))))-sum(tx(xi,c(mu.,sigma.)))
    ll <- ll+loglike
  }
  return(ll)
}

x <- as.vector(claims$daily)
mu. <- mean(x)
sigma. <- sd(x)

params <- c(sigma.,mu.)

mu.hat <- optim(f=logl,par=params,control = list(fnscale=-1))$par[1]
sigma.hat <- optim(f=logl,par=params,control = list(fnscale=-1))$par[2]
mle <- c(mu.hat,sigma.hat)
mle

## [1] 29.36635 29.36398

#nsamp <- 20000
#for testing:
nsamp=20000
bsamples <- matrix(nrow = nsamp,ncol=2)
x.. <- as.vector(claims$daily)

MU <- mle[1]
SIGMA <- mle[2]

tx <- function(x,params=c(MU,SIGMA)){
  mu <- params[1]
  sigma <- params[2]
  return(exp(-(x-mu)/sigma))
}


for(i in 1:nrow(bsamples)){
  x <- sample(x..,size=2000,replace = T)
  
  #boot.sample <- tx(x)
  new.mu <- mean(x)
  new.sigma <- sd(x)
  bsamples[i,1] <- new.mu
  bsamples[i,2] <- new.sigma
}

par(mfrow=c(1,2))

hist(bsamples[,1],breaks=1000,main="Histogram of distribution of mu^",border ="purple",freq = F)
lines(density(bsamples[,1]),col="blue",lwd=3)
abline(v=mean(bsamples[,1]),col="yellow",lwd=3)


hist(bsamples[,2],breaks=1000,main="Histogram of distribution of sigma^",border="purple",freq = F)
lines(density(bsamples[,2]),col="blue",lwd=3)
abline(v=mean(bsamples[,2]),col="yellow",lwd=3)

my.mu <- bsamples[,1]

my.mu <- sort(my.mu)

length(my.mu)

## [1] 20000

lo.index <- round(2.5/100*length(my.mu))
hi.index <- round(97.5/100*length(my.mu))

bootmu95 <- c(my.mu[lo.index],my.mu[hi.index])

boxplot(my.mu,col="purple")
abline(h=bootmu95,col="orange",lwd=5)

bootmu95

## [1] 27.31684 27.88079

R Computing Exam - 2018

Instructions