# 1A
pop = c(0, 1, 3, 5, 7)
N = length(pop)
mu = mean(pop)
mu
## [1] 3.2
sigma = mean((pop - mu)^2)
sigma
## [1] 6.56
suppressMessages(library(combinat))
# 1B: Without Replacement list all possible subsets
n = 2
dat = combn((pop), n)
c1 = t(dat)
c2 = 1/nrow(t(dat))
c3 = N * apply(c1, 1, mean)
c4 = apply(c1, 1, mean)
c5 = apply(c1, 1, var)
c6 = (1 - (n/N)) * (c5/n)
paste("The possible samples are")
## [1] "The possible samples are"
print(c1)
## [,1] [,2]
## [1,] 0 1
## [2,] 0 3
## [3,] 0 5
## [4,] 0 7
## [5,] 1 3
## [6,] 1 5
## [7,] 1 7
## [8,] 3 5
## [9,] 3 7
## [10,] 5 7
paste("The replication of the table with N is:")
## [1] "The replication of the table with N is:"
cbind(c1, Prob = c2, Tauhat = c3, Ybar = c4, S2 = c5, VhatYbar = c6)
## Prob Tauhat Ybar S2 VhatYbar
## [1,] 0 1 0.1 2.5 0.5 0.5 0.15
## [2,] 0 3 0.1 7.5 1.5 4.5 1.35
## [3,] 0 5 0.1 12.5 2.5 12.5 3.75
## [4,] 0 7 0.1 17.5 3.5 24.5 7.35
## [5,] 1 3 0.1 10.0 2.0 2.0 0.60
## [6,] 1 5 0.1 15.0 3.0 8.0 2.40
## [7,] 1 7 0.1 20.0 4.0 18.0 5.40
## [8,] 3 5 0.1 20.0 4.0 2.0 0.60
## [9,] 3 7 0.1 25.0 5.0 8.0 2.40
## [10,] 5 7 0.1 30.0 6.0 2.0 0.60
# The expected value of ybar equals the average of all possible Ybars.
mean(c4)
## [1] 3.2
# note that it is equal to mu since it is unbiased.
all.equal(mean(c4), mu)
## [1] TRUE
# The variance of ybar is equal to (written 2 ways)
(sigma/n) * ((N - n)/(N - 1))
## [1] 2.46
mean((c4 - mu)^2)
## [1] 2.46
# the expected value of the variance of ybar (written 2 ways)
sum(c6 * c2)
## [1] 2.46
mean(c6)
## [1] 2.46
## [,1] [,2]
## 1 2
## 1 3
## 1 4
## 1 5
## 2 3
## 2 4
## 2 5
## 3 4
## 3 5
## 4 5
## r11 0 0
## r12 1 1
## r13 3 3
## r14 5 5
## r15 7 7
## Prob Tauhat Ybar S2 VhatYbar
## 1 2 0.06667 7.5 1.5 0.5 0.15
## 1 3 0.06667 10.0 2.0 2.0 0.60
## 1 4 0.06667 12.5 2.5 4.5 1.35
## 1 5 0.06667 15.0 3.0 8.0 2.40
## 2 3 0.06667 12.5 2.5 0.5 0.15
## 2 4 0.06667 15.0 3.0 2.0 0.60
## 2 5 0.06667 17.5 3.5 4.5 1.35
## 3 4 0.06667 17.5 3.5 0.5 0.15
## 3 5 0.06667 20.0 4.0 2.0 0.60
## 4 5 0.06667 22.5 4.5 0.5 0.15
## r11 0 0 0.06667 0.0 0.0 0.0 0.00
## r12 1 1 0.06667 5.0 1.0 0.0 0.00
## r13 3 3 0.06667 15.0 3.0 0.0 0.00
## r14 5 5 0.06667 25.0 5.0 0.0 0.00
## r15 7 7 0.06667 35.0 7.0 0.0 0.00
## [1] 3.067
## [1] 2.713
## [1] 0.5
population = a collection of elements about which we wish to make an inference = all dvd's in the store. We wish to infer the proportion rented in the past month.
the frame = a list of sampling units = racks
sampling units = nonoverlapping collections of elements from the population that cover the entire population = the racks are nonoverlapping collections of DVD's.
element = an object on which a measurement is taken = the DVD's are measured as watched or not watched, making them elements.
Simple random sampling = consists of selecting a group of n sampling units in such a way that each sample of size n has the same chance of being selected. In this case, we select 15
# part A
dat = read.csv("H:\\STAT422\\corndata.csv", header = TRUE)
N = 98
n = 15
### total is N times ybar.
ybar = mean(dat$bushels)
total = ybar * N
total
## [1] 52479653
Var.total = N^2 * (1 - n/N) * (var(dat$bushels/n))
Var.total
## [1] 1.427e+13
Bound = 2 * sqrt(Var.total)
Bound
## [1] 7554215
# part B
B = 1.5e+07
N = 98
sig2 = var(dat$bushels)
D = B^2/(4 * N^2)
n = (N * sig2)/((N - 1) * D + sig2)
n
## [1] 40.17
# so round up
n = ceiling(n)
n
## [1] 41
N = 20000
n = 500
phat1 = 192/n
phat2 = 160/n
phat3 = 97/n
phat4 = 51/n
paste(c("The estimated proportion in support of Johnson is", phat1))
## [1] "The estimated proportion in support of Johnson is"
## [2] "0.384"
Var.phat1 = (1 - n/N) * ((phat1 * (1 - phat1))/(n - 1))
Bound.phat1 = 2 * sqrt(Var.phat1)
Bound.phat1
## [1] 0.043
Var.phat2 = (1 - n/N) * ((phat2 * (1 - phat2))/(n - 1))
Bound.phat2 = 2 * sqrt(Var.phat2)
Bound.phat2
## [1] 0.04124
Var.phat3 = (1 - n/N) * ((phat3 * (1 - phat3))/(n - 1))
Bound.phat3 = 2 * sqrt(Var.phat3)
Bound.phat3
## [1] 0.03496
Var.phat4 = (1 - n/N) * ((phat4 * (1 - phat4))/(n - 1))
Bound.phat4 = 2 * sqrt(Var.phat4)
Bound.phat4
## [1] 0.02676
rbind(c(phat1, phat2, phat3, phat4), c(Bound.phat1, Bound.phat2,
Bound.phat3, Bound.phat4))
## [,1] [,2] [,3] [,4]
## [1,] 0.384 0.32000 0.19400 0.10200
## [2,] 0.043 0.04124 0.03496 0.02676
# Part B the point estimator for the difference in proportions:
p.diff = phat1 - phat2
var.p.diff = Var.phat1 + Var.phat2 + (2 * phat1 * phat2)/n
var.p.diff
## [1] 0.001379
Bound.p.diff = 2 * sqrt(var.p.diff)
Bound.p.diff
## [1] 0.07427
# just for kicks, cuz we dont need dataset?
cardat = read.table("H:\\STAT422\\test1\\cars93.txt", sep = "")
N = 92
n = 10
mpg = c(31, 26, 20, 30, 36, 25, 26, 30, 23, 30)
# the point estimate of mu is ybar:
ybar = mean(mpg)
ybar
## [1] 27.7
Var.ybar = (1 - n/N) * (var(mpg)/n)
Bound.ybar = 2 * sqrt(Var.ybar)
Bound.ybar
## [1] 2.744