Please don’t change the seed number!
Q1
#derfine f(x) as a function
fx <- function(x){
return((1/(1+(x-5)^2)))
}
#we define 'a' as the given value, then intregrate the function
a <- 0.3640598
#the function multiplied by a integrates to 1, so it is a true p.d.f., when a is at the level in question
a*integrate(fx,lower=0,upper=10)$value
## [1] 1
fx.scaled <- function(x){
return(a*(1/(1+(x-5)^2)))
}
integrate(fx.scaled,lower=0,upper=10)$value
## [1] 1
curve(fx.scaled,from=0,to=10)
#we find a candidate distribution
curve(2*dnorm(x,5,2), from=0, to=10, col="red" , add=TRUE)
#we call this hx
hx <- function(x){
return(2*dnorm(x,5,2))
}
#we then find the max differencfe between fx and hx
x <- seq(0,10,length.out = 10000)
C. <- fx.scaled(x)/hx(x)
C <- max(C.)
#this allows us to define gx
gx <- function(x){
return(fx.scaled(x)/(C*hx(x)))
}
plot(x,C.)
lines(x,gx(x),col="red")
#we draw samples from our candidate distribution
m <- 2*dnorm(5,5,2)
nsamples<-1000000
x<-runif(nsamples, 0, 10)
y<-runif(nsamples, 0, m)
keep<- (2*dnorm(x,5,2))>=y
plot(x[keep],y[keep], cex=.3)
#we then only keep points that fall under our scaled fx function, they are overplotted in white on top of the points from our candidate distributoion in black
keep2<-fx.scaled(x)>=y
points(x[keep2],y[keep2], cex=.3,col="white")
acc_x <- x[keep2]
#we find the variance and check if it the given value
varX <- var(acc_x)
varX
## [1] 2.634531
#close enough!
#we got our sample in the previous code block
hist(acc_x,freq=F,breaks = 10000)
lines(density(acc_x),col="red")
boxplot(acc_x)
abline(h=as.numeric(summary(acc_x)[2]),col="red")
abline(h=IQR(acc_x)+as.numeric(summary(acc_x)[2]),col="red")
inter_range <- IQR(acc_x)
inter_range
## [1] 1.637684
Q2
fxy <- function(x,y){
return((3*x-5)^2+y^2+((y^3-x^3)/1000))
}
df.dx <- function(x){
return((18*x)-30-((3*x^2)/1000))
}
df.dy <- function(y){
return((2*y)+((3*y^2)/1000))
}
gradient.func <- function(x,y){
dx <- df.dx(x)
dy <- df.dy(y)
return(c(dx,dy))
}
alpha <- 0.01
x0 <- 15
y0 <- 20
descent.function <- function(par){
x <- par[1]
y <- par[2]
N <- 1000
Z <- matrix(nrow=N,ncol=2)
Z[1,] <- c(x,y)
ES <- 0
i <- 1
while(i<N){
Z[i+1,] <- Z[i,]-(alpha*gradient.func(x,y))
expected.Z <- c(x,y)
if(i+1==N){break}
error.sq <- (expected.Z[1]-Z[i+1,1])^2+(expected.Z[2]-Z[i+1,2])^2
ES <- ES+error.sq
x <- x+0.001
y <- y+0.001
i <- i+1
}
ES <- sum(ES)
MES <- mean(ES)
RMS <- sqrt(MES)
return(RMS)
}
par. <- c(x0,y0)
opt_own <- optim(descent.function,par=par.)
#the values for x and y that minimizes the function are:
opt_own$par
## [1] 1.2875673 -0.4235298
#the amount of calls made to the function is:
opt_own$counts[1]
## function
## 87
Q3
Q3
?Distributions
## starting httpd help server ... done
claims<-read.csv("claimsdata.csv", header=T, sep=";")
tx <- function(x,params){
mu <- params[1]
sigma <- params[2]
return(exp(-(x-mu)/sigma))
}
x. <- claims[500,]
params. <- c(25,2)
tx_eval <- tx(x.,params.)
tx_eval
## [1] 1.268522
logl <- function(x,par=params){
mu. <- par[1]
sigma. <- par[2]
n <- length(x)
ll <- 0
for(i in 1:n){
xi <- x[i]
loglike <- (-n*log(sigma.))+sum(log10(tx(xi,c(mu.,sigma.))))-sum(tx(xi,c(mu.,sigma.)))
ll <- ll+loglike
}
return(ll)
}
x <- as.vector(claims$daily)
mu. <- mean(x)
sigma. <- sd(x)
params <- c(sigma.,mu.)
mu.hat <- optim(f=logl,par=params,control = list(fnscale=-1))$par[1]
sigma.hat <- optim(f=logl,par=params,control = list(fnscale=-1))$par[2]
mle <- c(mu.hat,sigma.hat)
mle
## [1] 29.36635 29.36398
#nsamp <- 20000
#for testing:
nsamp=20000
bsamples <- matrix(nrow = nsamp,ncol=2)
x.. <- as.vector(claims$daily)
MU <- mle[1]
SIGMA <- mle[2]
tx <- function(x,params=c(MU,SIGMA)){
mu <- params[1]
sigma <- params[2]
return(exp(-(x-mu)/sigma))
}
for(i in 1:nrow(bsamples)){
x <- sample(x..,size=2000,replace = T)
#boot.sample <- tx(x)
new.mu <- mean(x)
new.sigma <- sd(x)
bsamples[i,1] <- new.mu
bsamples[i,2] <- new.sigma
}
par(mfrow=c(1,2))
hist(bsamples[,1],breaks=1000,main="Histogram of distribution of mu^",border ="purple",freq = F)
lines(density(bsamples[,1]),col="blue",lwd=3)
abline(v=mean(bsamples[,1]),col="yellow",lwd=3)
hist(bsamples[,2],breaks=1000,main="Histogram of distribution of sigma^",border="purple",freq = F)
lines(density(bsamples[,2]),col="blue",lwd=3)
abline(v=mean(bsamples[,2]),col="yellow",lwd=3)
my.mu <- bsamples[,1]
my.mu <- sort(my.mu)
length(my.mu)
## [1] 20000
lo.index <- round(2.5/100*length(my.mu))
hi.index <- round(97.5/100*length(my.mu))
bootmu95 <- c(my.mu[lo.index],my.mu[hi.index])
boxplot(my.mu,col="purple")
abline(h=bootmu95,col="orange",lwd=5)
bootmu95
## [1] 27.31684 27.88079