Practica3.R

nma — Oct 28, 2013, 4:23 PM

##########################################################
# Material del profesor Juan Miguel Marin Diazaraque
##########################################################
##################################################################################################
# Supongamos que estamos interesados en estudiar los hÃ¡bitos de sueÃ±o de los estudiantes de
# un cierto centro. Parece ser que los medicos recomiendan un mÃnimo de 8 horas de sueÃ±o para
# una persona adulta, con lo cual el estudio se plantea en tÃ©rminos de averiguar la proporciÃ³n de
# de estudiantes que duermen al menos 8 horas. Llamaremos p a dicha proporcion.
# Se toma una muestra de 27 estudiantes de modo que 11 de ellos duermen al menos 8
# horas y el resto no. Nos planteamos hacer inferencias sobre la proporciÃ³n p teniendo en cuenta
# la informacion previa que se tiene. AdemÃ¡s interesa predecir el nÃºmero de estudiantes que
# duermen al menos 8 horas si se toma una nueva muestra de 20 estudiantes.
###############################################################################################
p=seq(0,1,length=500)
s=11
f=16
a=3.4
b=7.4
prior = dbeta(p, a, b)
like = dbeta(p, s+1, f+1)
post = dbeta(p,a+s,b+f)
plot(p,post,type="l",ylab="Densidad",lty=2,lwd=3, col=4)
lines(p,like,lty=1,lwd=3,col=2)
lines(p,prior,lty=3,lwd=3,col=6)
legend(0.7, 4, c("A priori","Verosimilitud","A posteriori"),
       lty=c(3,1,2), lwd=c(3,3,3), col=c(6,2,4))

plot of chunk unnamed-chunk-1

#########################################################
s=11
f=16
a=3.4
b=7.4
#########################################################
#  Â¿Cual es P(p >= 0,5|datos)?
#########################################################
1-pbeta(0.5, a+s, b+f)

[1] 0.06843

#############################################################
# Â¿Cual es el
# intervalo de conï¬anza al 90 % para el verdadero valor de p?
#############################################################
qbeta(c(0.05, 0.95), a + s, b + f)

[1] 0.2562 0.5129

#########################################################
s=11
f=16
a=3.4
b=7.4
##################################################################
# Otra alternativa para responder a las anteriores dos preguntas:
##################################################################
ps = rbeta(1000, a+s, b+f)
hist(ps,xlab="p",main="")

plot of chunk unnamed-chunk-1

sum(ps >= 0.5)/1000

[1] 0.068

quantile(ps, c(0.05, 0.95))

    5%    95% 
0.2589 0.5117

####################################################
# Un distribuidor anuncia que el 95 % de sus ordenadores no se tiene que reparar durante el
# aÃ±o de garantÃa. Desde el punto de vista bayesiano se puede modelizar esta creencia mediante
# una Be (Î± = 4.75, Î² = 0.25), ya que si p â¼ Be (4.75,0.25),
# entonces E(p) = 0.95, Var(p) = 0.0016.
# Debido a la aparente calidad, compramos 20 ordenadores, de los cuales 12 requieren reparaciÃ³n
# durante el aÃ±o de garantÃa. Nuestra distribuciÃ³n a posteriori sobre la proporciÃ³n es
# entonces ahora Be(4.75+8,0.25+12). Un intervalo simÃ©trico de probabilidad 0.95 en R
# se calcula como:
p <- c(0.025,0.975)
qbeta(p,12.75,12.25)

[1] 0.3188 0.6996

####################################################
# Una forma de tomar una priori:
# Tomando una distribuciÃ³n discreta
####################################################
p = seq(0.05, 0.95, by=0.1)
prior = c(2, 4, 8, 8, 4, 2, 1, 1, 1, 1)
prior = prior/sum(prior)
plot(p, prior, type="h", ylab="Probabilidad a priori", col=4)

plot of chunk unnamed-chunk-1

library(LearnBayes)
data = c(11, 16)
post = pdisc(p, prior, data)
cbind(p, prior, post)

         p   prior      post
 [1,] 0.05 0.06250 2.883e-08
 [2,] 0.15 0.12500 1.723e-03
 [3,] 0.25 0.25000 1.282e-01
 [4,] 0.35 0.25000 5.260e-01
 [5,] 0.45 0.12500 2.882e-01
 [6,] 0.55 0.06250 5.284e-02
 [7,] 0.65 0.03125 2.976e-03
 [8,] 0.75 0.03125 6.595e-05
 [9,] 0.85 0.03125 7.372e-08
[10,] 0.95 0.03125 5.821e-15

plot(p, post, type="h", ylab="Probabilidad a posteriori", col=2)

plot of chunk unnamed-chunk-1

########################################################
library(LearnBayes)
midpt = seq(0.05, 0.95, by = 0.1)
prior = c(2, 4, 8, 8, 4, 2, 1, 1, 1, 1)
prior = prior/sum(prior)
p = seq(0, 1, length = 500)
plot(p,histprior(p,midpt,prior),type="l",
       ylab="Densidad a priori",ylim=c(0,.25))

plot of chunk unnamed-chunk-1

s = 11
f = 16
like = dbeta(p, s+1, f+1)
post = like*histprior(p, midpt, prior)
plot(p, post, type="l", ylab="Densidad a posteriori", col=4)

plot of chunk unnamed-chunk-1

post = post/sum(post)
ps = sample(p, replace=TRUE, prob=post)

Warning: Walker's alias method used: results are different from R < 2.2.0

hist(ps, xlab="p")

plot of chunk unnamed-chunk-1

####################################################
# En el ejemplo de las horas de sueÃ±o se puede considerar
# la distribuciÃ³n predictiva a posteriori
####################################################
library(LearnBayes)
s=11
f=16
a=3.4
b=7.4
s=11
ab=c(a+s, b+f)
m=20
ys=0:20
pred=pbetap(ab, m, ys)
#######################################################
# Para obtener las predicciones se simula p a partir de
# la distribucion a priori, y entonces se simulan las
# predicciones a partir de una binomial
#######################################################
p=rbeta(1000,a+s, b+f)
y = rbinom(1000, 20, p)
table(y)

y
  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
  1   5  15  35  72  83 138 128 154 122  97  62  43  26  10   5   3   1

#######################################################
# Se puede dibujar la distribuciÃ³n de las predicciones
########################################################
freq=table(y)
if(names(freq)[1]=="0"){
  # Si aparece alguna observacion 0
  ys=c(0:max(y))
}else{
  # Si NO aparece ninguna observacion 0
  ys=c(1:max(y))}
predprob=freq/sum(freq)
plot(ys,predprob,type="h",xlab="y",ylab="Probabilidad predictiva")

plot of chunk unnamed-chunk-1

###########################################################
# La tabla correspondiente de la distribucion predictiva es
###########################################################
dist=cbind(ys,predprob)
dist

   ys predprob
0   0    0.001
1   1    0.005
2   2    0.015
3   3    0.035
4   4    0.072
5   5    0.083
6   6    0.138
7   7    0.128
8   8    0.154
9   9    0.122
10 10    0.097
11 11    0.062
12 12    0.043
13 13    0.026
14 14    0.010
15 15    0.005
16 16    0.003
17 17    0.001

###########################################################
# Si se quiere resumir la distribucion predictiva mediante un intervalo que
# cubra el 90 % de la funciÃ³n, se puede usar la funciÃ³n discint. Se obtiene
# los valores que incluye el intervalo y la probabilidad exacta de los mismos
covprob=0.9
discint(dist,covprob)

$prob
    3 
0.934 

$set
 3  4  5  6  7  8  9 10 11 12 
 3  4  5  6  7  8  9 10 11 12

##########################################################################
# Obtencion de estimador e intervalo de confianza con 95% de probabilidad
# mediante simulacion
#########################################################################
x <- rbeta(1000,12.75,12.25)
mean(x)

[1] 0.5094

y <- sort(x)
y[975]

[1] 0.7066

y[25]

[1] 0.3016

##########################################################################
# Obtencion de estimador e intervalo de confianza con 95% de probabilidad
# de forma exacta
#########################################################################
 library(Bolstad)
# Ejemplo con 6 exitos en 8 ensayos con una a priori uniforme Beta(1,1)
x11()
binobp(6,8)

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

plot of chunk unnamed-chunk-1

Posterior Mean           :  0.7 
Posterior Variance       :  0.0190909 
Posterior Std. Deviation :  0.1381699 

Prob.   Quantile 
------  ---------
0.005   0.3073936
0.01    0.3436855
0.025   0.3999064
0.05    0.4503584
0.5 0.7137633
0.95    0.9022532
0.975   0.9251454
0.99    0.9466518
0.995   0.9584153

# 6 exitos en 8 ensayos con una a priori Beta(0.5,6)
x11()
binobp(6,8,0.5,6)

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

Warning: Font family not found in Windows font database

plot of chunk unnamed-chunk-1

Posterior Mean           :  0.4482759 
Posterior Variance       :  0.0159564 
Posterior Std. Deviation :  0.1263188 

Prob.   Quantile 
------  ---------
0.005   0.1554803
0.01    0.1769862
0.025   0.2116211
0.05    0.2441832
0.5 0.4458341
0.95    0.6607604
0.975   0.698439
0.99    0.7397328
0.995   0.7661081