x <- c(12, 11.4, 7.9, 9, 10.5, 7.9, 7.3, 10.2, 11.7, 11.3, 5.7,
        8, 10.3, 12, 9.2, 8.5, 7, 10.7, 9.3, 8.2)
 y <- c(125, 119, 83, 85, 99, 117, 69, 133, 154, 168, 61, 80, 114,
        147, 122, 106, 82, 88, 97, 97)
N <- 1132
n <- 20
mu.x <- 10.3
  1. the Plot
plot(x,y, main = "Scatterplot", xlab = "Diameter x", ylab = "Age y")

the ratio r:

r <- mean(y)/mean(x)
r
## [1] 11.40883
  1. The estimated mean y, (mu.hat.y)
mu.hat.y <- r * mu.x
mu.hat.y
## [1] 117.5109

The SE of mu.hat.y

var.mu.hat.y <- (1 - (20/1132)) * (sum((y - r*x)^2) / (20 * 19) )
SE.mu.hat.y <- sqrt(var.mu.hat.y)
SE.mu.hat.y
## [1] 3.971109
  1. Using regressions to calculate the estimated population mean
mean.x <- mean(x)
reg <- lm(y ~ 1 + x)
coef(reg)
## (Intercept)           x 
##   -8.264754   12.287587

The mean (reg.mu.hat.y)

reg.mu.hat.y <- mean(y) + (12.28 * (10.3 - mean(x)))
reg.mu.hat.y
## [1] 118.2906

The SE of the mean using regression

anova(reg)
## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## x          1 9595.0  9595.0  28.541 4.452e-05 ***
## Residuals 18 6051.2   336.2                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SE.reg.mu.hat.y <- ((1132 -20)/(1132*20)) * 336.2
SE.reg.mu.hat.y
## [1] 16.513
plot(x,y, main = "Scatterplot", xlab = "Diameter x", ylab = "Age y")

4.8 No.8

library(SDaA)
data(agsrs)
head(agsrs,2)
##           county state acres92 acres87 acres82 farms92 farms87 farms82
## 1  COFFEE COUNTY    AL  175209  179311  194509     760     842     944
## 2 COLBERT COUNTY    AL  138135  145104  161360     488     563     686
##   largef92 largef87 largef82 smallf92 smallf87 smallf82
## 1       29       28       21       57       47       66
## 2       37       41       42       12       44       47
  1. The plots
plot(agsrs$farms87, agsrs$acres92, main = "Scatterplot", xlab = "Farms 87", ylab = "Acres 92")

plot(agsrs$acres87, agsrs$acres92, main = "Scatterplot", xlab = "Acres 87", ylab = "Acres 92")

The ratio r2

r2 <- mean(agsrs$acres92)/mean(agsrs$farms87)
r2
## [1] 459.8975
  1. The estimated mean number of acres (mu.hat.acres.92)
mu.hat.acres.92 <- r2 * (2087759/3078)
mu.hat.acres.92
## [1] 311941.2

The S. error

var.mu.acres.92 <- (1 - (300/3078)) * (sum((agsrs$acres92 - r2*agsrs$farms87)^2) / (200 * 299) )
SE.mu.acres.92 <- sqrt(var.mu.acres.92)
  1. Regression estimation
reg2 <- lm(acres92 ~ 1 + farms87, agsrs)
coef(reg2)
##  (Intercept)      farms87 
## 267029.81421     47.65325
anova(reg2)
## Analysis of Variance Table
## 
## Response: acres92
##            Df     Sum Sq    Mean Sq F value Pr(>F)
## farms87     1 1.2629e+11 1.2629e+11   1.064 0.3031
## Residuals 298 3.5370e+13 1.1869e+11

The mean estimated

reg.mu.hat.acres.92 <- mean(agsrs$acres92) + (47.65 * ((2087759/3078) - mean(agsrs$farms87)))
reg.mu.hat.acres.92
## [1] 299352.2

The standard error

SE.reg.mu.hat.acres.92 <- ((3078 -300)/(3078*300)) * 1.1869e+11
SE.reg.mu.hat.acres.92 # what a value! I am moving on for the mean time.
## [1] 357072580

The ratio estimation provides better precision. The linearity between acres92 and farms87 in the plot is weak. Regression estimation with acrs87 could be better.