library(stats4)
wine <- read.csv(file = "winequality-red.csv", sep=";", header = T) # Load in the dataset
knitr::kable(head(wine,100), caption = "Red Wine Dataset")
| fixed.acidity | volatile.acidity | citric.acid | residual.sugar | chlorides | free.sulfur.dioxide | total.sulfur.dioxide | density | pH | sulphates | alcohol | quality |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 7.4 | 0.700 | 0.00 | 1.90 | 0.076 | 11 | 34 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
| 7.8 | 0.880 | 0.00 | 2.60 | 0.098 | 25 | 67 | 0.9968 | 3.20 | 0.68 | 9.8 | 5 |
| 7.8 | 0.760 | 0.04 | 2.30 | 0.092 | 15 | 54 | 0.9970 | 3.26 | 0.65 | 9.8 | 5 |
| 11.2 | 0.280 | 0.56 | 1.90 | 0.075 | 17 | 60 | 0.9980 | 3.16 | 0.58 | 9.8 | 6 |
| 7.4 | 0.700 | 0.00 | 1.90 | 0.076 | 11 | 34 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
| 7.4 | 0.660 | 0.00 | 1.80 | 0.075 | 13 | 40 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
| 7.9 | 0.600 | 0.06 | 1.60 | 0.069 | 15 | 59 | 0.9964 | 3.30 | 0.46 | 9.4 | 5 |
| 7.3 | 0.650 | 0.00 | 1.20 | 0.065 | 15 | 21 | 0.9946 | 3.39 | 0.47 | 10.0 | 7 |
| 7.8 | 0.580 | 0.02 | 2.00 | 0.073 | 9 | 18 | 0.9968 | 3.36 | 0.57 | 9.5 | 7 |
| 7.5 | 0.500 | 0.36 | 6.10 | 0.071 | 17 | 102 | 0.9978 | 3.35 | 0.80 | 10.5 | 5 |
| 6.7 | 0.580 | 0.08 | 1.80 | 0.097 | 15 | 65 | 0.9959 | 3.28 | 0.54 | 9.2 | 5 |
| 7.5 | 0.500 | 0.36 | 6.10 | 0.071 | 17 | 102 | 0.9978 | 3.35 | 0.80 | 10.5 | 5 |
| 5.6 | 0.615 | 0.00 | 1.60 | 0.089 | 16 | 59 | 0.9943 | 3.58 | 0.52 | 9.9 | 5 |
| 7.8 | 0.610 | 0.29 | 1.60 | 0.114 | 9 | 29 | 0.9974 | 3.26 | 1.56 | 9.1 | 5 |
| 8.9 | 0.620 | 0.18 | 3.80 | 0.176 | 52 | 145 | 0.9986 | 3.16 | 0.88 | 9.2 | 5 |
| 8.9 | 0.620 | 0.19 | 3.90 | 0.170 | 51 | 148 | 0.9986 | 3.17 | 0.93 | 9.2 | 5 |
| 8.5 | 0.280 | 0.56 | 1.80 | 0.092 | 35 | 103 | 0.9969 | 3.30 | 0.75 | 10.5 | 7 |
| 8.1 | 0.560 | 0.28 | 1.70 | 0.368 | 16 | 56 | 0.9968 | 3.11 | 1.28 | 9.3 | 5 |
| 7.4 | 0.590 | 0.08 | 4.40 | 0.086 | 6 | 29 | 0.9974 | 3.38 | 0.50 | 9.0 | 4 |
| 7.9 | 0.320 | 0.51 | 1.80 | 0.341 | 17 | 56 | 0.9969 | 3.04 | 1.08 | 9.2 | 6 |
| 8.9 | 0.220 | 0.48 | 1.80 | 0.077 | 29 | 60 | 0.9968 | 3.39 | 0.53 | 9.4 | 6 |
| 7.6 | 0.390 | 0.31 | 2.30 | 0.082 | 23 | 71 | 0.9982 | 3.52 | 0.65 | 9.7 | 5 |
| 7.9 | 0.430 | 0.21 | 1.60 | 0.106 | 10 | 37 | 0.9966 | 3.17 | 0.91 | 9.5 | 5 |
| 8.5 | 0.490 | 0.11 | 2.30 | 0.084 | 9 | 67 | 0.9968 | 3.17 | 0.53 | 9.4 | 5 |
| 6.9 | 0.400 | 0.14 | 2.40 | 0.085 | 21 | 40 | 0.9968 | 3.43 | 0.63 | 9.7 | 6 |
| 6.3 | 0.390 | 0.16 | 1.40 | 0.080 | 11 | 23 | 0.9955 | 3.34 | 0.56 | 9.3 | 5 |
| 7.6 | 0.410 | 0.24 | 1.80 | 0.080 | 4 | 11 | 0.9962 | 3.28 | 0.59 | 9.5 | 5 |
| 7.9 | 0.430 | 0.21 | 1.60 | 0.106 | 10 | 37 | 0.9966 | 3.17 | 0.91 | 9.5 | 5 |
| 7.1 | 0.710 | 0.00 | 1.90 | 0.080 | 14 | 35 | 0.9972 | 3.47 | 0.55 | 9.4 | 5 |
| 7.8 | 0.645 | 0.00 | 2.00 | 0.082 | 8 | 16 | 0.9964 | 3.38 | 0.59 | 9.8 | 6 |
| 6.7 | 0.675 | 0.07 | 2.40 | 0.089 | 17 | 82 | 0.9958 | 3.35 | 0.54 | 10.1 | 5 |
| 6.9 | 0.685 | 0.00 | 2.50 | 0.105 | 22 | 37 | 0.9966 | 3.46 | 0.57 | 10.6 | 6 |
| 8.3 | 0.655 | 0.12 | 2.30 | 0.083 | 15 | 113 | 0.9966 | 3.17 | 0.66 | 9.8 | 5 |
| 6.9 | 0.605 | 0.12 | 10.70 | 0.073 | 40 | 83 | 0.9993 | 3.45 | 0.52 | 9.4 | 6 |
| 5.2 | 0.320 | 0.25 | 1.80 | 0.103 | 13 | 50 | 0.9957 | 3.38 | 0.55 | 9.2 | 5 |
| 7.8 | 0.645 | 0.00 | 5.50 | 0.086 | 5 | 18 | 0.9986 | 3.40 | 0.55 | 9.6 | 6 |
| 7.8 | 0.600 | 0.14 | 2.40 | 0.086 | 3 | 15 | 0.9975 | 3.42 | 0.60 | 10.8 | 6 |
| 8.1 | 0.380 | 0.28 | 2.10 | 0.066 | 13 | 30 | 0.9968 | 3.23 | 0.73 | 9.7 | 7 |
| 5.7 | 1.130 | 0.09 | 1.50 | 0.172 | 7 | 19 | 0.9940 | 3.50 | 0.48 | 9.8 | 4 |
| 7.3 | 0.450 | 0.36 | 5.90 | 0.074 | 12 | 87 | 0.9978 | 3.33 | 0.83 | 10.5 | 5 |
| 7.3 | 0.450 | 0.36 | 5.90 | 0.074 | 12 | 87 | 0.9978 | 3.33 | 0.83 | 10.5 | 5 |
| 8.8 | 0.610 | 0.30 | 2.80 | 0.088 | 17 | 46 | 0.9976 | 3.26 | 0.51 | 9.3 | 4 |
| 7.5 | 0.490 | 0.20 | 2.60 | 0.332 | 8 | 14 | 0.9968 | 3.21 | 0.90 | 10.5 | 6 |
| 8.1 | 0.660 | 0.22 | 2.20 | 0.069 | 9 | 23 | 0.9968 | 3.30 | 1.20 | 10.3 | 5 |
| 6.8 | 0.670 | 0.02 | 1.80 | 0.050 | 5 | 11 | 0.9962 | 3.48 | 0.52 | 9.5 | 5 |
| 4.6 | 0.520 | 0.15 | 2.10 | 0.054 | 8 | 65 | 0.9934 | 3.90 | 0.56 | 13.1 | 4 |
| 7.7 | 0.935 | 0.43 | 2.20 | 0.114 | 22 | 114 | 0.9970 | 3.25 | 0.73 | 9.2 | 5 |
| 8.7 | 0.290 | 0.52 | 1.60 | 0.113 | 12 | 37 | 0.9969 | 3.25 | 0.58 | 9.5 | 5 |
| 6.4 | 0.400 | 0.23 | 1.60 | 0.066 | 5 | 12 | 0.9958 | 3.34 | 0.56 | 9.2 | 5 |
| 5.6 | 0.310 | 0.37 | 1.40 | 0.074 | 12 | 96 | 0.9954 | 3.32 | 0.58 | 9.2 | 5 |
| 8.8 | 0.660 | 0.26 | 1.70 | 0.074 | 4 | 23 | 0.9971 | 3.15 | 0.74 | 9.2 | 5 |
| 6.6 | 0.520 | 0.04 | 2.20 | 0.069 | 8 | 15 | 0.9956 | 3.40 | 0.63 | 9.4 | 6 |
| 6.6 | 0.500 | 0.04 | 2.10 | 0.068 | 6 | 14 | 0.9955 | 3.39 | 0.64 | 9.4 | 6 |
| 8.6 | 0.380 | 0.36 | 3.00 | 0.081 | 30 | 119 | 0.9970 | 3.20 | 0.56 | 9.4 | 5 |
| 7.6 | 0.510 | 0.15 | 2.80 | 0.110 | 33 | 73 | 0.9955 | 3.17 | 0.63 | 10.2 | 6 |
| 7.7 | 0.620 | 0.04 | 3.80 | 0.084 | 25 | 45 | 0.9978 | 3.34 | 0.53 | 9.5 | 5 |
| 10.2 | 0.420 | 0.57 | 3.40 | 0.070 | 4 | 10 | 0.9971 | 3.04 | 0.63 | 9.6 | 5 |
| 7.5 | 0.630 | 0.12 | 5.10 | 0.111 | 50 | 110 | 0.9983 | 3.26 | 0.77 | 9.4 | 5 |
| 7.8 | 0.590 | 0.18 | 2.30 | 0.076 | 17 | 54 | 0.9975 | 3.43 | 0.59 | 10.0 | 5 |
| 7.3 | 0.390 | 0.31 | 2.40 | 0.074 | 9 | 46 | 0.9962 | 3.41 | 0.54 | 9.4 | 6 |
| 8.8 | 0.400 | 0.40 | 2.20 | 0.079 | 19 | 52 | 0.9980 | 3.44 | 0.64 | 9.2 | 5 |
| 7.7 | 0.690 | 0.49 | 1.80 | 0.115 | 20 | 112 | 0.9968 | 3.21 | 0.71 | 9.3 | 5 |
| 7.5 | 0.520 | 0.16 | 1.90 | 0.085 | 12 | 35 | 0.9968 | 3.38 | 0.62 | 9.5 | 7 |
| 7.0 | 0.735 | 0.05 | 2.00 | 0.081 | 13 | 54 | 0.9966 | 3.39 | 0.57 | 9.8 | 5 |
| 7.2 | 0.725 | 0.05 | 4.65 | 0.086 | 4 | 11 | 0.9962 | 3.41 | 0.39 | 10.9 | 5 |
| 7.2 | 0.725 | 0.05 | 4.65 | 0.086 | 4 | 11 | 0.9962 | 3.41 | 0.39 | 10.9 | 5 |
| 7.5 | 0.520 | 0.11 | 1.50 | 0.079 | 11 | 39 | 0.9968 | 3.42 | 0.58 | 9.6 | 5 |
| 6.6 | 0.705 | 0.07 | 1.60 | 0.076 | 6 | 15 | 0.9962 | 3.44 | 0.58 | 10.7 | 5 |
| 9.3 | 0.320 | 0.57 | 2.00 | 0.074 | 27 | 65 | 0.9969 | 3.28 | 0.79 | 10.7 | 5 |
| 8.0 | 0.705 | 0.05 | 1.90 | 0.074 | 8 | 19 | 0.9962 | 3.34 | 0.95 | 10.5 | 6 |
| 7.7 | 0.630 | 0.08 | 1.90 | 0.076 | 15 | 27 | 0.9967 | 3.32 | 0.54 | 9.5 | 6 |
| 7.7 | 0.670 | 0.23 | 2.10 | 0.088 | 17 | 96 | 0.9962 | 3.32 | 0.48 | 9.5 | 5 |
| 7.7 | 0.690 | 0.22 | 1.90 | 0.084 | 18 | 94 | 0.9961 | 3.31 | 0.48 | 9.5 | 5 |
| 8.3 | 0.675 | 0.26 | 2.10 | 0.084 | 11 | 43 | 0.9976 | 3.31 | 0.53 | 9.2 | 4 |
| 9.7 | 0.320 | 0.54 | 2.50 | 0.094 | 28 | 83 | 0.9984 | 3.28 | 0.82 | 9.6 | 5 |
| 8.8 | 0.410 | 0.64 | 2.20 | 0.093 | 9 | 42 | 0.9986 | 3.54 | 0.66 | 10.5 | 5 |
| 8.8 | 0.410 | 0.64 | 2.20 | 0.093 | 9 | 42 | 0.9986 | 3.54 | 0.66 | 10.5 | 5 |
| 6.8 | 0.785 | 0.00 | 2.40 | 0.104 | 14 | 30 | 0.9966 | 3.52 | 0.55 | 10.7 | 6 |
| 6.7 | 0.750 | 0.12 | 2.00 | 0.086 | 12 | 80 | 0.9958 | 3.38 | 0.52 | 10.1 | 5 |
| 8.3 | 0.625 | 0.20 | 1.50 | 0.080 | 27 | 119 | 0.9972 | 3.16 | 1.12 | 9.1 | 4 |
| 6.2 | 0.450 | 0.20 | 1.60 | 0.069 | 3 | 15 | 0.9958 | 3.41 | 0.56 | 9.2 | 5 |
| 7.8 | 0.430 | 0.70 | 1.90 | 0.464 | 22 | 67 | 0.9974 | 3.13 | 1.28 | 9.4 | 5 |
| 7.4 | 0.500 | 0.47 | 2.00 | 0.086 | 21 | 73 | 0.9970 | 3.36 | 0.57 | 9.1 | 5 |
| 7.3 | 0.670 | 0.26 | 1.80 | 0.401 | 16 | 51 | 0.9969 | 3.16 | 1.14 | 9.4 | 5 |
| 6.3 | 0.300 | 0.48 | 1.80 | 0.069 | 18 | 61 | 0.9959 | 3.44 | 0.78 | 10.3 | 6 |
| 6.9 | 0.550 | 0.15 | 2.20 | 0.076 | 19 | 40 | 0.9961 | 3.41 | 0.59 | 10.1 | 5 |
| 8.6 | 0.490 | 0.28 | 1.90 | 0.110 | 20 | 136 | 0.9972 | 2.93 | 1.95 | 9.9 | 6 |
| 7.7 | 0.490 | 0.26 | 1.90 | 0.062 | 9 | 31 | 0.9966 | 3.39 | 0.64 | 9.6 | 5 |
| 9.3 | 0.390 | 0.44 | 2.10 | 0.107 | 34 | 125 | 0.9978 | 3.14 | 1.22 | 9.5 | 5 |
| 7.0 | 0.620 | 0.08 | 1.80 | 0.076 | 8 | 24 | 0.9978 | 3.48 | 0.53 | 9.0 | 5 |
| 7.9 | 0.520 | 0.26 | 1.90 | 0.079 | 42 | 140 | 0.9964 | 3.23 | 0.54 | 9.5 | 5 |
| 8.6 | 0.490 | 0.28 | 1.90 | 0.110 | 20 | 136 | 0.9972 | 2.93 | 1.95 | 9.9 | 6 |
| 8.6 | 0.490 | 0.29 | 2.00 | 0.110 | 19 | 133 | 0.9972 | 2.93 | 1.98 | 9.8 | 5 |
| 7.7 | 0.490 | 0.26 | 1.90 | 0.062 | 9 | 31 | 0.9966 | 3.39 | 0.64 | 9.6 | 5 |
| 5.0 | 1.020 | 0.04 | 1.40 | 0.045 | 41 | 85 | 0.9938 | 3.75 | 0.48 | 10.5 | 4 |
| 4.7 | 0.600 | 0.17 | 2.30 | 0.058 | 17 | 106 | 0.9932 | 3.85 | 0.60 | 12.9 | 6 |
| 6.8 | 0.775 | 0.00 | 3.00 | 0.102 | 8 | 23 | 0.9965 | 3.45 | 0.56 | 10.7 | 5 |
| 7.0 | 0.500 | 0.25 | 2.00 | 0.070 | 3 | 22 | 0.9963 | 3.25 | 0.63 | 9.2 | 5 |
| 7.6 | 0.900 | 0.06 | 2.50 | 0.079 | 5 | 10 | 0.9967 | 3.39 | 0.56 | 9.8 | 5 |
| 8.1 | 0.545 | 0.18 | 1.90 | 0.080 | 13 | 35 | 0.9972 | 3.30 | 0.59 | 9.0 | 6 |
a. Provide an estimate of μ based on the sample
Note: The population includes all the red wines produced in the north of Portugal. The sample is these 1599 bottles of red wines tested. We use “the sample” (1599 rows of data) to make inference about the population mean parameter mu.
mean(wine$density)
## [1] 0.9967467
As can be seen from the code snippet above, the estimate of the mean is 0.996746679174484
b. Use the Central Limit Theorem (CLT) to quantify the variability of your estimate
sd(wine$density)/sqrt(nrow(wine))
## [1] 4.71981e-05
The variability of the estimate – calculated via (standard deviation)/sqrt(n) – is 4.71981e-05
c. Use the CLT to give a 95% confidence interval for μ
sd.mu.hat <- sd(wine$density)/sqrt(nrow(wine))
mu.hat <- mean(wine$density)
c(mu.hat-2*sd.mu.hat, mu.hat + 2*sd.mu.hat)
## [1] 0.9966523 0.9968411
The 95% confidence interval for the mean is [0.9966523, 0.9968411]
d. Use the bootstrap method to do parts b and c, and compare the results with those obtained from the CLT. State your findings.
mu.hat.set <- NULL
sample <- wine$density
for (k in 1:2000) {
sample.boostrap <- sample(sample, size = 800, replace = T)
mu.hat <- mean(sample.boostrap)
mu.hat.set[k] <- mu.hat
}
sd(mu.hat.set)/sqrt(nrow(wine))
## [1] 1.655979e-06
# 95% confidence interval
quantile(mu.hat.set, probs = c(0.025, 0.975))
## 2.5% 97.5%
## 0.9966198 0.9968737
The variability calculated via the bootstrap method is 1.636967e-06 and the 95% confidence interval is [0.9966148, 0.9968716].
Compared to the CLT, the variability is smaller by a factor of 20-30 and the 95% confidence interval is wider. The width/range of the bootstrap confidence interval is .0002568 and that of the CLT is .0001888. In addition to this, the bootstrap confidence interval’s lower bound is lower than that of CLT and the upper bound is greater than that of CLT.
e. Can we use a normal distribution to model “density”? If yes, what are the maximum likelihood estimates of the mean and standard deviation? Please provide their standard errors as well.
Note: To find the MLEs, you may consider multiplying the data by 10. This will solve a numerical problem you may encounter. You must transform back in the end.
hist(wine$density)
Provided the histogram above, it is safe to conclude that we can model “density” using a normal distribution.
Maximum Likelihood Estimates (MLE) are calculated below.
wine$density <- 10*wine$density
minuslog.lik <- function(mu, sigma){
log.lik <- 0
for(i in 1:800){
log.lik <- log.lik+log(dnorm(wine$density[i], mean=mu, sd=sigma))
}
return(-log.lik)
}
est <- mle(minuslogl=minuslog.lik, start=list(mu = mean(wine$density), sigma = sd(wine$density)))
summary(est)
## Maximum likelihood estimation
##
## Call:
## mle(minuslogl = minuslog.lik, start = list(mu = mean(wine$density),
## sigma = sd(wine$density)))
##
## Coefficients:
## Estimate Std. Error
## mu 9.97646408 0.0005953600
## sigma 0.01683932 0.0004150282
##
## -2 log L: -4265.989
To get the MLE of the mean and the standard deviation – as well as the standard errors – divide the values from the above output by 10. As a pre-requisite to getting the above estimates, had to multiply the values in this column of the dataset by 10.
Estimate – Mean = 9.97646408/10 = 0.997646408
Std. Error – Mean = 0.0005953600/10 = 0.000059536
Estimate – Mean = 0.01683932/10 = 0.001683932
Std. Error – Mean = 0.0004150282/10 = 0.00004150282
a. Provide an estimate of μ based on the sample
mean(wine$residual.sugar)
## [1] 2.538806
The estimate of μ based on the sample is 2.538806
b. Noting that the sample distribution of “residual sugar” is highly skewed, can we use the CLT to quantify the variability of your estimate? Can we use the CLT to give a 95% confidence interval for μ? If yes, please give your solution. If no, explain why.
Yes – despite the fact that “residual sugar” is highly skewed, we can use the CLT to quantify the variability of this estimate and give a 95% confidence interval for μ. CLT can be applied to a distribution as long as it has a finite variance and it is IID (Independent and Identically Distributed).
# Variability of the estimate
sd(wine$residual.sugar)/sqrt(nrow(wine))
## [1] 0.03525922
# 95% confidence interval
sd.mu.hat <- sd(wine$residual.sugar)/sqrt(nrow(wine))
mu.hat <- mean(wine$residual.sugar)
c(mu.hat-2*sd.mu.hat, mu.hat + 2*sd.mu.hat)
## [1] 2.468287 2.609324
As can be seen in the above code snippet, the variability of μ estimate is 0.03525922 and the 95% confidence interval is [2.468287, 2.609324]
c. Use the bootstrap method to do part b. Is the bootstrap confidence interval symmetric?
mu.hat.set <- NULL
sample <- wine$residual.sugar
# getting the variability
for (k in 1:2000) {
sample.boostrap <- sample(sample, size = 800, replace = T)
mu.hat <- mean(sample.boostrap)
mu.hat.set[k] <- mu.hat
}
var.bootstrap <- sd(mu.hat.set)
print(var.bootstrap)
## [1] 0.04987445
# 95% confidence interval
CI.bootstrap <- quantile(mu.hat.set, probs = c(0.025, 0.975))
print(CI.bootstrap)
## 2.5% 97.5%
## 2.441059 2.636350
As can be seen from the output above, the bootstrap variability is 0.04888155 and the 95% confidence interval for μ estimate is [2.449434, 2.639439].
hist(mu.hat.set, freq = FALSE)
lines(density(mu.hat.set), lwd = 5, col='blue')
abline(v = c(CI.bootstrap[1], CI.bootstrap[2]), col = c('red','red'))
text(x=CI.bootstrap[1]-.01, y=5, srt=90, '2.5%')
text(x=CI.bootstrap[2]+.01, y=5, srt=90, '97.5%')
Provided the graphic above, it can be reasonably concluded that the bootstrap confidence interval is symmetric.
d. Can we use a normal distribution to model “residual sugar”? If no, what distribution do you think can approximate its empirical distribution? What parameters are needed to characterize such a distribution? what are their maximum likelihood estimates? Please provide their standard errors as well.
hist(wine$residual.sugar)
No, it would not be appropriate to model “residual sugar” using a normal distribution. I think a log-normal distribution – specifically the blue curve in the below snippet (https://en.wikipedia.org/wiki/Log-normal_distribution) – could be used to approximate its empirical distribution. Two parameters – mean and standard deviation – are needed to characterize this distribution.
minuslog.lik<-function(mu, sigma){
log.lik<-0
for(i in 1:800){
log.lik = log.lik + log(dlnorm(x=wine$residual.sugar[i], meanlog = mu, sdlog = sigma))
}
return(-log.lik)
}
est.lognorm<-mle(minuslog=minuslog.lik, start=list(mu=log(mean(wine$residual.sugar)), sigma=log(sd(wine$residual.sugar))))
summary(est.lognorm)
## Maximum likelihood estimation
##
## Call:
## mle(minuslogl = minuslog.lik, start = list(mu = log(mean(wine$residual.sugar)),
## sigma = log(sd(wine$residual.sugar))))
##
## Coefficients:
## Estimate Std. Error
## mu 0.8919587 0.012070414
## sigma 0.3414029 0.008534798
##
## -2 log L: 1977.921
As can be seen in the above output:
Note: before answering any part of question 3, you need to create a new column of data for the new variable “excellent” which has binary values (0 and 1). Its value is 1 if the wine rating >=7, and otherwise its value is 0. Then, you can make inference about the underlying proportion parameter p using this column of data.
wine$excellent <- as.integer(wine$quality >= 7)
Preview the new column to ensure it looks fine.
wine$excellent
## [1] 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [38] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## [223] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [260] 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 0 0 0 1 0 1 0 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0
## [334] 0 1 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 1
## [371] 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## [408] 1 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1
## [445] 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [482] 1 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 1 0 0 1 1 1 1 1 1 0 0 1 0 0 0 1 1 0 0 0
## [519] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [556] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 1 0 0
## [593] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [630] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [667] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [704] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [741] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [778] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0
## [815] 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0
## [852] 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1
## [889] 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## [926] 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0 0 0
## [963] 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0
## [1000] 0 1 1 1 1 0 1 1 1 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 1
## [1037] 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 1 0 0 0 0 1 1 1 0 1 0 0
## [1074] 0 0 1 0 0 0 1 0 1 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0
## [1111] 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
## [1148] 1 0 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## [1185] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0
## [1222] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [1259] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [1296] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [1333] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [1370] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1
## [1407] 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0
## [1444] 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0
## [1481] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [1518] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0
## [1555] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## [1592] 0 0 0 0 0 0 0 0
a. Use the CLT to derive a 95% confidence interval for p
mu.hat <- mean(wine$excellent)
sd.mu.hat <- sd(wine$residual.sugar)/sqrt(nrow(wine))
c(mu.hat-2*sd.mu.hat, mu.hat + 2*sd.mu.hat)
## [1] 0.06519138 0.20622826
As can be seen from the output above, the CLT 95% confidence interval for p is [0.06519138, 0.20622826]
b. Use the bootstrap method to derive a 95% confidence interval for p
mu.hat.set <- NULL
sample <- wine$excellent
for (k in 1:2000) {
sample.boostrap <- sample(sample, size = 800, replace = T)
mu.hat <- mean(sample.boostrap)
mu.hat.set[k] <- mu.hat
}
quantile(mu.hat.set, probs = c(0.025, 0.975))
## 2.5% 97.5%
## 0.1125 0.1600
As can be seen from the output above, the bootstrap 95% confidence interval for p is [0.1125, 0.1600]
c. Compare the two intervals. Is there any difference worth our attention?
Yeah, I think it’s worth noting that the CLT 95% confidence interval for p is considerably wider (approximately 3 times wider) than the bootstrap 95% confidence interval for p. The range of the CLT 95% confidence interval is 0.14103688 and the range of the bootstrap 95% confidence interval is .0475.
d. What is the maximum likelihood estimate of p and its standard error?
minuslog.lik<-function(p){
-log(dbinom(x=sum(wine$excellent), size=nrow(wine), prob=p))
}
est <- mle(minuslog=minuslog.lik, start=list(p=0.5))
summary(est)
## Maximum likelihood estimation
##
## Call:
## mle(minuslogl = minuslog.lik, start = list(p = 0.5))
##
## Coefficients:
## Estimate Std. Error
## p 0.1357098 0.008564279
##
## -2 log L: 7.072712
As can be seen in the output above, the estimate of p is 0.1357098 and it’s standard error is 0.008564279