Problem 1

I used the following tutorial for assistance: https://www.youtube.com/watch?v=HiI_SF4C0cI

g <- function(x) abs((x^2)*cos(x))*8.75166 #original function 0 to 10

n <- 1e6
a <- 0
b <- 10
c <- min(g(a:b))
d <-max(g(a:b))
x <- runif(n)

f <- function(y) (g(a+(b-a)*y)-c) /(d-c)

integrate(g,0,10) #interval 0 to 10 built in function

## 2020.002 with absolute error < 0.098

I <- (b-a) * (d-c) * sum(f(x))/n + c*(b-a) #monte carlo integration
I

## [1] 2021.024

Problem 2

Part A.

The Dagum Distribution’s CDF is as follows: \(F(x;a,b,p)\) = \((1+(\frac{x}{b})^{-a})^{-p}\)

The inverse found by hand is as follows: \(x = \frac{b}{((\frac{1}{y})^\frac{1}{p} - 1)^\frac{1}{a}}\)

The code below simulates values for the Dagum distribution using the inversion method.

u <- runif(5)
Finv <- function(y,a=1, b=1, p=1){b/(((1/y)**(1/p)-1)**1/a)}
Finv(u)

## [1] 3.7896321 1.3335060 0.8653407 2.3328435 0.9715696

Part B.

set.seed(1234)
u <- runif(10000)
Finv <- function(y,a=2, b=1, p=2){b/(((1/y)**(1/p)-1)**1/a)}


#Mean
mean(Finv(u))

## [1] 26.71721

#Median
median(Finv(u))

## [1] 4.849345

#Variance
var(Finv(u))

## [1] 33357.55

Problem 3

a=5 b=3

We are to sample from the beta distribution with parameters alpha = 5 and beta = 3 using the acceptance rejection region. I followed the example here: https://www.r-bloggers.com/rejection-sampling/

sample.x = runif(100000,0,1)
accept = c()
for(i in 1:length(sample.x)){
 U = runif(1, 0, 1)
 if(dunif(sample.x[i], 0, 1)*3*U <= dbeta(sample.x[i], 5, 3)) {

   accept[i] = 'Yes'
 }
 else if(dunif(sample.x[i],0,1)*3*U > dbeta(sample.x[i], 5, 3)) {
  accept[i] = 'No'
 }
}
T = data.frame(sample.x, accept = factor(accept, levels= c('Yes','No')))


hist(T[,1][T$accept=='Yes'], breaks = seq(0,1,0.01), freq = FALSE, main = 'Histogram of X', xlab = 'X')
#lines(x, dbeta(x,5,3))

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.5.2

print(qplot(sample.x, data = T, geom = 'histogram', fill = accept, binwidth=0.01))

Problem 4

First three graphs are of the ranges, the second three graphs are of the simulated means.

## [1] 0.2180057

#FUNCTION
ranges_beta <- function(m){
x <- dbeta(runif(m,0,1), 5, 3)
range <- max(x)-min(x)
range
}

#M = 5
ranges_beta(5)

## [1] 2.128068

hist(replicate(10000,ranges_beta(5)))

#M=10
ranges_beta(10)

## [1] 1.897952

hist(replicate(10000,ranges_beta(10)))

#M=25
ranges_beta(25)

## [1] 2.292442

hist(replicate(10000,ranges_beta(25)))

#Frequencies 
set.seed(1234)
for (i in 1:100){
  if (i <=5){
    M5[i] <- betas()
    M10[i] <- betas()
    M25[i] <- betas()
  }
  if (i <=10){
    M10[i] <- betas()
    M25[i] <- betas()
  }
  else
    M25[i] <- betas()
}

spaces <- seq(0,1,0.05)

hist(M5)

hist(M10)

hist(M25)

Problem 5

The expected value of the ranges when m=5 is 1.873758

The expected value of the ranges when m=10 is 2.165024

The expected value of the ranges when m=25 is 2.28024

The expected value when m=5 is 0.6241501

The expected value when m=10 is 0.6954588

The expected value when m=25 is 0.6462025

set.seed(1234)

#Expected Values of Ranges when M=5,10,15
mean(replicate(10000,ranges_beta(5)))

## [1] 1.873724

mean(replicate(10000,ranges_beta(10)))

## [1] 2.16889

mean(replicate(10000,ranges_beta(25)))

## [1] 2.279953

#Expected Values when M = 5,10,25
mean(M5)

## [1] 0.6241501

mean(M10)

## [1] 0.6954588

mean(M25)

## [1] 0.6462025

Problem 6

Create a plot that has the value of m on the x-axis and your simulated value of the MEAN on the y-axis for values of m from 2 through 100.

The first plot is the range the second is the actual simulated means of the beta distribution.

#Ranges
set.seed(12345)
means <- c()
for (i in 2:100) {
means[i] <- mean(replicate(100,ranges_beta(i))) 
}
plot(means)

#Means
set.seed(12345)
m<- c(1,seq(2,100,1))
means <- c()
means[1] <- betas()
for (i in 2:100) {
means[i] <- mean(replicate(i,betas()))
}
plot(m,means)

Problem 7

Create a plot that has the value of m on the x-axis and your simulated value of the MEDIAN on the y-axis for values of m from 2 through 100.

The first plot is the range the second is the actual means simulated from the beta distribution.

#Ranges
set.seed(12345)
medians <- c()
for (i in 2:100) {
medians[i] <- median(replicate(100,ranges_beta(i))) 
}
plot(medians)

#Means
m<- c(1,seq(2,100,1))
medians <- c()
medians[1] <- betas()

for (i in 2:100) {
medians[i] <- median(replicate(i,betas()))
}
plot(m,medians)

STAT Exam 2

Kajal Chokshi

12/3/2019