StatCompPractice.R

user2 — Oct 23, 2013, 12:15 AM

require(mvtnorm)
Loading required package: mvtnorm
require(boot)
Loading required package: boot

f <- function(x,y) {
  dmvnorm(cbind(x,y))/3 + dmvnorm(cbind(x-4,y-3))/2 + dmvnorm(cbind(x,y-3))/6 
}

x <- seq(-5,5,0.1)
y <- seq(-5,5,0.1)
z <- outer(x,y,f)

#1a
persp(x,y,z, theta=210, phi=25)

plot of chunk unnamed-chunk-1


#1b
generate.sample <- function(n) {
  sel1 <- rbinom(n, 1, 0.5)
  sel2 <- rbinom(n, 1, 1/3)

  x <- sel1*rnorm(n, 4) + (1-sel1)*(sel2*rnorm(n, 0) + (1-sel2)*rnorm(n, 0))
  y <- sel1*rnorm(n, 3) + (1-sel1)*(sel2*rnorm(n, 3) + (1-sel2)*rnorm(n, 0))
  data.frame(x=x, y=y)
}

sample <- generate.sample(10000)
plot(sample$x,sample$y)

plot of chunk unnamed-chunk-1


#1c
cor(sample$x,sample$y)
[1] 0.5266

#1d
k <- 10000
large.sample <- data.frame(x=rnorm(k*10),y=rnorm(k*10))
groups <- split(large.sample, 1:k)
cors <- sapply(groups, function(sample) cor(sample$x, sample$y))
cors <- sort(cors)
plot(1:length(cors), cors)

plot of chunk unnamed-chunk-1

(critical <- cors[length(cors)*0.95])
  8266 
0.5468 


#1e
new.sample <- generate.sample(k * 10)
groups <- split(new.sample, 1:k)
cors <- sapply(groups, function(sample) cor(sample$x, sample$y))
cors <- sort(cors)
plot(1:length(cors), cors)

plot of chunk unnamed-chunk-1

max(which(cors<critical))/length(cors)
[1] 0.4972


#2
data <- read.table("migraine.txt")
data$test <- rexp(50, l)
Error: object 'l' not found
data$test2 <- data$test < data$C
Error: replacement has 0 rows, data has 50
mean(data$test2)
Warning: argument is not numeric or logical: returning NA
[1] NA
mean(data$delta)
[1] 0.74

#l.step <- function(l, n) n / sum(data$C + (1-data$delta) / l)
l.step <- function(l, n) n / sum(data$C * data$delta + l) # mostly agrees with l2.step

l2.step <- function(l, n) l * n / (sum(data$delta * pgamma(data$C,2,l) / pgamma(data$C,1,l)   + (1-data$delta) * (1-pgamma(data$C,2,l))/(1-pgamma(data$C,1,l))  ))

# should be: 1/sum(data$C/sum(data$delta))

l <- 1.0
(l <- l.step(l, length(data$delta)))
[1] 0.04193
(l <- l.step(l, length(data$delta)))
[1] 0.04369
(l <- l.step(l, length(data$delta)))
[1] 0.04368
(l <- l.step(l, length(data$delta)))
[1] 0.04368
(l <- l.step(l, length(data$delta)))
[1] 0.04368
(l <- l.step(l, length(data$delta)))
[1] 0.04368
(l <- l.step(l, length(data$delta)))
[1] 0.04368
(l <- l.step(l, length(data$delta)))
[1] 0.04368
data$test <- rexp(50, l)
data$test2 <- data$test < data$C
mean(data$test2)
[1] 0.68
mean(data$delta)
[1] 0.74

l2 <- 0.5
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.1037
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.06264
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.0516
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04794
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04663
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04614
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04595
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04588
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04585
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04584
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04584
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04584
(l2 <- l2.step(l2, length(data$delta)))
[1] 0.04584

## 3a:
res <- c()
for(i in 1:10000) {
  data <- rt(n=10, df=4)
  res <- c(res, median(data))
}
mean(res)
[1] -0.001201
sd(res)
[1] 0.415
# MSE:
mean(res)^2 + sd(res)^2
[1] 0.1723

mse <- function(res, i) mean(res[i])^2 + sd(res[i])^2

## 3b:
boot(res, mse, R=1000)

ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = res, statistic = mse, R = 1000)


Bootstrap Statistics :
    original    bias    std. error
t1*   0.1723 0.0001331    0.002639

## 3c:
t <- c()
for(i in 1:50000){
  y <- rnorm(10,sd=8)
  t <- c(t, (median(y)>2) * prod(dt(y, 4)) / prod(dnorm(y, sd=8)))
}
mean(t)
[1] 3.192e-06