Load the lpSolve package.
options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages("lpSolve")
##
## The downloaded binary packages are in
## /var/folders/n5/62g0xldn3yv6cdrbstcqwdt40000gn/T//RtmpimWjeZ/downloaded_packages
library(lpSolve)
Refer to the heart data from Section 2 of the self-guided portion.
w <- c(0.13, 0.64, 5.8, 102, 210, 2030, 3900)
l <- c(0.55, 1, 2.2, 4, 6.5, 12, 16)
l_cubed <- l^3
n <- length(w)
obj <- c(1, rep(0, n))
# two constraints: w_i - a*l_i^3 <= r and -w_i + a*l_i^3 <= r
con <- rbind(
cbind(rep(1, n), diag(l_cubed)), # w_i - a*l_i^3 <= r
cbind(rep(1, n), -diag(l_cubed)) # -w_i + a*l_i^3 <= r
)
rhs <- c(w, -w)
rel <- rep("<=", 2*n)
lp_result <- lp("min", obj, con, rel, rhs)
chebyshev_a_estimate <- lp_result$solution[2]
print(chebyshev_a_estimate)
## [1] 0.7813674
w <- c(0.13, 0.64, 5.8, 102, 210, 2030, 3900)
l <- c(0.55, 1, 2.2, 4, 6.5, 12, 16)
l_seq <- seq(min(l), max(l), length.out = 100)
w_cac <- 0.7813674 * l_seq^3
w_lsc <- 0.9851 * l_seq^3
plot(l, w, xlim = range(l_seq), ylim = range(c(w, w_cac, w_lsc)),
xlab = "Length of the cavity (in)", ylab = "Heart weight (g)",
main = "Heart Data and Estimated Curves", pch = 16, cex.lab = 1.5, cex.main = 1.5, cex.axis = 1.2)
lines(l_seq, w_cac, col = "red", lwd = 2)
lines(l_seq, w_lsc, col = "blue", lwd = 2)
legend("topleft", legend = c("Original Data", "CAC Curve", "LSC Curve"),
col = c("black", "red", "blue"), lwd = 2, pch = c(16, NA, NA), lty = c(NA, 1, 1), cex = 0.6)
The results are what I expected.
par(mfrow=c(1,2)) # generate plots in 1 row, 2 columns
l <- c(0.55, 1, 2.2, 4, 6.5, 12, 16) # Lengths
w <- c(0.13, 0.64, 5.8, 102, 210, 2030, 3900) # Weights
residuals_lsc <- w - 0.9851 * l^3
residuals_cac <- w - 0.7813674 * l^3
common_ylim <- range(c(residuals_lsc, residuals_cac))
plot(l, residuals_lsc, ylim = common_ylim, main = "LSC Residuals", xlab = "Length of cavity (in)", ylab = "Residuals")
abline(h = 0, col = "blue")
plot(l, residuals_cac, ylim = common_ylim, main = "CAC Residuals", xlab = "Length of cavity (in)", ylab = "Residuals")
abline(h = 0, col = "blue")
The results are what I expect to see. Since LSC minimizes the sum of
squared residuals, we expect the residuals to be symmetrically
distributed around zero, with no pattern indicating a systematic
deviation. CAC minimizes the maximum absolute deviation. Thus, the
largest absolute residual is minimized, but this does not necessarily
minimize all residuals equally. We might observe one or more residuals
significantly larger than the rest but minimized to the extent possible
within the criterion’s objective.
Apply the linear regression method, using the lm command, to the energy consumption scenario in Section 1 of the self-guided portion. Don’t forget to modify the intercept assumption! What are the LSC estimates for \(a\) and \(b\) (after transformation)?
x <- c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
Q <- c(1.31, 1.94, 3.32, 6.59, 15.68, 34.78, 64.94, 132.17, 274.07, 512.16, 1086.37)
ln_Q <- log(Q)
lm_model <- lm(ln_Q ~ x)
ln_a_estimate <- coef(lm_model)[1]
b_estimate <- coef(lm_model)[2]
print(paste("ln(a):", ln_a_estimate))
## [1] "ln(a): -0.00569941775334676"
print(paste("b:", b_estimate))
## [1] "b: 0.0696045924855083"
a_estimate <- exp(ln_a_estimate)
print(paste("a:", a_estimate))
## [1] "a: 0.99431679311589"
x <- seq(0, 100, by=10)
Q <- c(1.31, 1.94, 3.32, 6.59, 15.68, 34.78, 64.94, 132.17, 274.07, 512.16, 1086.37)
x_smooth <- seq(0, 100, length.out = 1000)
Q_cac <- 1.06496183198 * exp(0.06720570 * x_smooth)
Q_lsc <- 0.99431679311589 * exp(0.0696045924855083 * x_smooth)
plot(x, Q, xlab = "Year (since 1900)", ylab = "Energy Consumption (Normalized)",
main = "Energy Consumption and Model Estimates", pch = 19, ylim = c(min(Q), max(Q_lsc)))
lines(x_smooth, Q_cac, col = "red", lwd = 2)
lines(x_smooth, Q_lsc, col = "blue", lwd = 2)
legend("topleft", legend = c("Original Data", "CAC Curve", "LSC Curve"),
col = c("black", "red", "blue"), lwd = 2, pch = c(19, NA, NA), lty = c(NA, 1, 1), cex = 0.75)
These are the results I expected.
x <- seq(0, 100, by=10)
Q <- c(1.31, 1.94, 3.32, 6.59, 15.68, 34.78, 64.94, 132.17, 274.07, 512.16, 1086.37)
Q_pred_cac <- 1.06496183198 * exp(0.06720570 * x)
Q_pred_lsc <- 0.99431679311589 * exp(0.0696045924855083 * x)
residuals_cac <- Q - Q_pred_cac
residuals_lsc <- Q - Q_pred_lsc
common_ylim <- range(c(residuals_lsc, residuals_cac))
par(mfrow=c(1,2))
plot(x, residuals_cac, ylim = common_ylim, main = "CAC Residuals", xlab = "Year (since 1900)", ylab = "Residuals", pch = 19)
abline(h = 0, col = "red")
plot(x, residuals_lsc, ylim = common_ylim, main = "LSC Residuals", xlab = "Year (since 1900)", ylab = "Residuals", pch = 19)
abline(h = 0, col = "red")
The results are what I expect to see. As explained previously, since LSC minimizes the sum of squared residuals, we expect the residuals to be symmetrically distributed around zero, with no pattern indicating a systematic deviation. CAC minimizes the maximum absolute deviation. Thus, the largest absolute residual is minimized, but this does not necessarily minimize all residuals equally. We might observe one or more residuals significantly larger than the rest but minimized to the extent possible within the criterion’s objective.