##.A
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#a
a = 47
b = 80-47
c = 80
Pointestimate = a/c
Pointestimate
## [1] 0.5875
Interpretation: The probability is with 58.75% probability of adults who are lactose intolerant
B
p = seq(0.4, 0.7, by=0.1)
B <- as.data.frame(p) %>% mutate(prior=c(0.1,0.2,0.44,0.26)) %>% mutate(likelihood=p^{47}*(1-p)^{80-47}) %>% mutate(PL=prior*likelihood) %>% mutate(posterior=PL/sum(PL))
B
## p prior likelihood PL posterior
## 1 0.4 0.10 9.458251e-27 9.458251e-28 0.0006491288
## 2 0.5 0.20 8.271806e-25 1.654361e-25 0.1135403807
## 3 0.6 0.44 2.761140e-24 1.214902e-24 0.8337986003
## 4 0.7 0.26 2.914804e-25 7.578489e-26 0.0520118903
VAR <- var(B$posterior)
VAR
## [1] 0.1536057
Bayes_Estimate = 0.6 #Based on the BayesBox B
Bayes_Estimate
## [1] 0.6
We observed that the point estimate in (a) and the bayesian estimate in (b) is about ; point estimate(a)- bayesian estimate(b) = 0.6 -0.5875 = 0. 0.0125.
C
C <-as.data.frame(p) %>% mutate(prior=c(0.1,0.2,0.44,0.26)) %>% mutate(likelihood=p^{470}*(1-p)^{330}) %>% mutate(PL=prior*likelihood) %>% mutate(posterior=PL/sum(PL))
C
## p prior likelihood PL posterior
## 1 0.4 0.10 5.729392e-261 5.729392e-262 5.055567e-26
## 2 0.5 0.20 1.499697e-241 2.999394e-242 2.646640e-06
## 3 0.6 0.44 2.575638e-236 1.133281e-236 9.999974e-01
## 4 0.7 0.26 4.426830e-246 1.150976e-246 1.015611e-10
posterior <- C %>% select(posterior)
Mean = sum(p*posterior)
Mean
## [1] 0.5999997
var = var(posterior)
var
## posterior
## posterior 0.2499982
Mean = 0.5999997 Variance = 0.2499982
We observed that the posterior probabilities in (b) becomes lesser in (c). Even though the Bayesian estimate of p = 0.6 is still similar, the probability occurring is smaller in (c).
Variance = np(1-p) = 800(0.6)(1-0.6) = 192/800 = 0.24 The variance of p in (b)n is still the same in (c).
D
#B$posterior
D <- as.data.frame(p) %>% mutate(prior= c(0.0006491288,0.1135403807,0.8337986003,0.0520118903)) %>%
mutate(likelihood=p^{470}*(1-p)^{800-470}) %>% mutate(PL=prior*likelihood) %>% mutate(posterior=PL/sum(PL))
D
## p prior likelihood PL posterior
## 1 0.4 0.0006491288 5.729392e-261 3.719113e-264 1.731782e-28
## 2 0.5 0.1135403807 1.499697e-241 1.702761e-242 7.928800e-07
## 3 0.6 0.8337986003 2.575638e-236 2.147563e-236 9.999992e-01
## 4 0.7 0.0520118903 4.426830e-246 2.302478e-247 1.072134e-11
Posterior <- C %>% select(posterior)
#posterior
Mean = sum(Posterior)
Mean
## [1] 1
Var = var(Posterior)
Var
## posterior
## posterior 0.2499982
SUM = sum(Posterior)
SUM
## [1] 1
The computations above indicates that sample size matters, and the prior probabilities as it affects the posterior distribution. The bigger the sample size, the smaller probability of a certain p occurring.