EC3133
##
## Call:
## lm(formula = y ~ x, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9073 -0.6835 -0.0875 0.5806 3.2904
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10280 0.09755 -1.054 0.295
## x 1.94753 0.10688 18.222 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9707 on 98 degrees of freedom
## Multiple R-squared: 0.7721, Adjusted R-squared: 0.7698
## F-statistic: 332 on 1 and 98 DF, p-value: < 2.2e-16
# Generate sample from normal distribution
set.seed(123)
n <- 1000
true_mu <- 5
true_sigma <- 2
x <- rnorm(n, mean = true_mu, sd = true_sigma)
# Log-likelihood function
normal_loglik <- function(params) {
mu <- params[1]
sigma <- params[2]
-sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
# Find MLE
mle_result <- optim(c(0, 1), normal_loglik)## TRUE PARAMETERS:
## True Mean: mu = 5
## True SD: sigma = 2
## ML ESTIMATES:
## ML Estimate for Mean: mu_hat = 5.0325
## ML Estimate for SD: sigma_hat = 1.9827
# Generate binomial data
set.seed(456)
n <- 100
true_p <- 0.3
x_binom <- rbinom(n, size = 1, prob = true_p)
# Log-likelihood function
binom_loglik <- function(p) {
-sum(dbinom(x_binom, size = 1, prob = p, log = TRUE))
}
# Find MLE
mle_binom <- optimize(binom_loglik, interval = c(0, 1))## TRUE PARAMETERS:
## True p: 0.3
## ML ESTIMATES:
## ML estimate of p: 0.34
We will first generate a random sample of 200 observations.
In other words:
We will generate 200 random values of x and 200 random values of a normally distributed error term (\(\epsilon\)).
We will then use those x and \(\epsilon\) values to create 200 values of \(y\) using the following formula: \[ y=\beta_0 + \beta_1 *x + \epsilon \]
We are acting as the data generating process here so we are assumign that we know the true values of \(\beta_0\) and \(\beta_1\). We will assume that \(\beta_0=1\) and \(\beta_1=2\).
# Generate data
set.seed(789)
n <- 200
x <- rnorm(n)
beta0_true <- 1
beta1_true <- 2
epsilon <- rnorm(n)
y <- beta0_true + beta1_true * x + epsilon
# OLS estimation
ols_model <- lm(y ~ x)
# MLE assuming normal errors
linear_loglik <- function(params) {
beta0 <- params[1]
beta1 <- params[2]
sigma <- params[3]
-sum(dnorm(y, mean = beta0 + beta1 * x, sd = sigma, log = TRUE))
}
# Find MLE
mle_linear <- optim(c(0, 0, 1), linear_loglik)
# Compare results
results <- data.frame(
Parameter = c("beta0", "beta1", "sigma"),
True = c(beta0_true, beta1_true, 1),
OLS = c(coef(ols_model), sigma(ols_model)),
MLE = mle_linear$par
)## Parameter True OLS MLE
## (Intercept) beta0 1 1.016951 1.016977
## x beta1 2 2.019124 2.019334
## sigma 1 1.018168 1.013195
This exercise is similar to Example 3. The only difference is how we create the dataset that we are using to calculate our estimates:
Instead of randomly generating our sample as we did in Example 3, we will create our own data by simply assumign that the number of observations are 10 and manually entering some values of \(x\) and \(y\). In this case, we are no longer assuming that we know the true data generating process that created this data (unlike Example 3).
# Manually create data
mydata <- data.frame( id = c(1,2,3,4,5,6,7,8,9,10),
x = c(1,2,3,4,5,6,7,8,9,10), y=c(2.1,4.3,5.8,7.2,9.1,11.3,12.8,14.2,16.1,18.3) )
head(mydata)## id x y
## 1 1 1 2.1
## 2 2 2 4.3
## 3 3 3 5.8
## 4 4 4 7.2
## 5 5 5 9.1
## 6 6 6 11.3
## id x y
## Min. : 1.00 Min. : 1.00 Min. : 2.10
## 1st Qu.: 3.25 1st Qu.: 3.25 1st Qu.: 6.15
## Median : 5.50 Median : 5.50 Median :10.20
## Mean : 5.50 Mean : 5.50 Mean :10.12
## 3rd Qu.: 7.75 3rd Qu.: 7.75 3rd Qu.:13.85
## Max. :10.00 Max. :10.00 Max. :18.30
# OLS estimation
y <- mydata$y
x <- mydata$x
ols_model <- lm(y ~ x)
# MLE assuming normal errors
linear_loglik <- function(params) {
beta0 <- params[1]
beta1 <- params[2]
sigma <- params[3]
-sum(dnorm(y, mean = beta0 + beta1 * x, sd = sigma, log = TRUE))
}
# Find MLE
mle_linear <- optim(c(0, 0, 1), linear_loglik)
# Compare results
results <- data.frame(
Parameter = c("beta0", "beta1", "sigma"),
OLS = c(coef(ols_model), sigma(ols_model)),
MLE = mle_linear$par
)
print(results)## Parameter OLS MLE
## (Intercept) beta0 0.4733333 0.4735081
## x beta1 1.7539394 1.7539341
## sigma 0.2551886 0.2283878
Basic Principle:
Sample Moment = Population Moment
# Generate sample data from normal distribution
set.seed(123)
n <- 1000
true_mu <- 2
true_sigma <- 1.5
x <- rnorm(n, mean = true_mu, sd = true_sigma)
# Method of Moments Estimation
mom_mu <- mean(x) # First moment
mom_sigma2 <- mean(x^2) - mom_mu^2 # Second moment## TRUE PARAMETERS:
## True Mean: mu = 2
## True Variance: sigma^2 = 2.25
## METHOD OF MOMENTS ESTIMATES:
## MOM Estimate for Mean: mu_hat = 2.0242
## MOM Estimate for Variance: sigma^2_hat = 2.2106
Consider the model: \(y_i = \beta_0 + \beta_1 x_i + \epsilon_i\)
# Generate data
set.seed(456)
n <- 200
x <- rnorm(n)
beta0 <- 1
beta1 <- 2
epsilon <- rnorm(n)
y <- beta0 + beta1 * x + epsilon
# OLS Estimation
ols_model <- lm(y ~ x)
# Method of Moments Estimation
# Moment conditions:
# E[y - beta0 - beta1*x] = 0
# E[(y - beta0 - beta1*x)x] = 0
# Solve for beta0 and beta1
mom_beta1 <- cov(y, x) / var(x)
mom_beta0 <- mean(y) - mom_beta1 * mean(x)
# Compare results
results <- data.frame(
Method = c("True", "OLS", "MoM"),
beta0 = c(beta0, coef(ols_model)[1], mom_beta0),
beta1 = c(beta1, coef(ols_model)[2], mom_beta1)
)
print(results)## Method beta0 beta1
## 1 True 1.000000 2.000000
## 2 OLS 1.116799 2.091003
## 3 MoM 1.116799 2.091003
plot(x, y, main="Regression Comparison", pch=20)
abline(ols_model, col="red", lwd=2)
abline(a=mom_beta0, b=mom_beta1, col="blue", lwd=2, lty=2)
legend("topleft", c("OLS", "MoM"), col=c("red", "blue"),
lty=c(1,2), lwd=2)# Bootstrap to compare efficiency
library(boot)
# Function to compute both estimators
both_estimators <- function(data, indices) {
d <- data[indices, ]
# MoM
mom_b1 <- cov(d$y, d$x) / var(d$x)
mom_b0 <- mean(d$y) - mom_b1 * mean(d$x)
# OLS
ols <- lm(y ~ x, data=d)
return(c(mom_b0, mom_b1, coef(ols)))
}
# Prepare data
data <- data.frame(x=x, y=y)
# Perform bootstrap
set.seed(789)
boot_results <- boot(data, both_estimators, R=1000)
# Compute standard errors
se <- apply(boot_results$t, 2, sd)
names(se) <- c("MoM_b0", "MoM_b1", "OLS_b0", "OLS_b1")
# Print results
print("Bootstrap Standard Errors:")## [1] "Bootstrap Standard Errors:"
## MoM_b0 MoM_b1 OLS_b0 OLS_b1
## 0.06341480 0.06471047 0.06341480 0.06471047
# Load necessary library
library(boot)
# Generate sample data from normal distribution
set.seed(123)
n <- 100
true_mu <- 5
true_sigma <- 2
x <- rnorm(n, mean = true_mu, sd = true_sigma)
# Log-likelihood function for normal distribution
log_likelihood_normal <- function(params, data) {
mu <- params[1]
sigma <- params[2]
-sum(dnorm(data, mean = mu, sd = sigma, log = TRUE))
}
# Function to compute MLE
mle_normal <- function(data, indices) {
d <- data[indices]
optim(c(mean(d), sd(d)), log_likelihood_normal, data = d)$par
}
# Bootstrap
set.seed(456)
boot_results <- boot(x, mle_normal, R = 1000)
# Standard errors
se_mu <- sd(boot_results$t[, 1])
se_sigma <- sd(boot_results$t[, 2])## Bootstrap Standard Errors:
## SE(mu) = 0.183
## SE(sigma) = 0.1235
# Generate sample data from binomial distribution
set.seed(789)
n <- 100
p_true <- 0.3
x_binom <- rbinom(n, size = 1, prob = p_true)
# Log-likelihood function for binomial distribution
log_likelihood_binom <- function(p, data) {
-sum(dbinom(data, size = 1, prob = p, log = TRUE))
}
# Function to compute MLE
mle_binom <- function(data, indices) {
d <- data[indices]
optimize(log_likelihood_binom, interval = c(0, 1), data = d)$minimum
}
# Bootstrap
set.seed(101)
boot_results_binom <- boot(x_binom, mle_binom, R = 1000)
# Standard error
se_p <- sd(boot_results_binom$t)## Bootstrap Standard Error for p:
## SE(p) = 0.0451
# Load required libraries
library(MASS) # for income data
library(moments) # for calculating higher moments
# Load and prepare income data from Boston Housing dataset
data(Boston)
income <- Boston$medv * 1000 # Convert to actual dollars
# Plot the income distribution
hist(income, breaks = 30, probability = TRUE,
main = "Distribution of Median Home Values",
xlab = "Home Value ($)", col = "lightblue")
# Calculate sample moments
sample_mean <- mean(income)
sample_var <- var(income)
# Method of Moments for Gamma distribution
# For Gamma(\u03b1, \u03b2), E(X) = \u03b1/\u03b2 and Var(X) = \u03b1/\u03b2\u00b2
mom_beta <- sample_mean/sample_var
mom_alpha <- sample_mean * mom_beta
# Add fitted gamma distribution to plot
curve(dgamma(x, shape = mom_alpha, rate = mom_beta),
add = TRUE, col = "red", lwd = 2)## Method of Moments Estimates:
## Shape (α): 6.0024
## Rate (β): 3e-04
##
## Comparison of Moments:
## Sample Mean: 22532.81 Fitted Mean: 22532.81
## Sample Variance: 84586724 Fitted Variance: 84586724
# Load and prepare height data
data(survey, package = "MASS")
height <- na.omit(survey$Height)
# Plot the height distribution
hist(height, breaks = 30, probability = TRUE,
main = "Distribution of Heights",
xlab = "Height (cm)", col = "lightblue")
# Calculate sample moments
sample_mean <- mean(height)
sample_var <- var(height)
sample_skew <- skewness(height)
sample_kurt <- kurtosis(height)
# Method of Moments for mixture of two normals
# Assuming equal weights for simplicity
mom_mixture <- function(params) {
mu1 <- params[1]
mu2 <- params[2]
sigma <- params[3]
# Theoretical moments
theo_mean <- (mu1 + mu2)/2
theo_var <- (mu1^2 + mu2^2)/2 - theo_mean^2 + sigma^2
theo_skew <- (mu1^3 + mu2^3)/2 - 3*theo_mean*theo_var - theo_mean^3
theo_kurt <- (mu1^4 + mu2^4)/2 - 4*theo_mean*theo_skew -
6*theo_mean^2*theo_var - theo_mean^4
# Return sum of squared differences
(theo_mean - sample_mean)^2 +
(theo_var - sample_var)^2 +
(theo_skew - sample_skew)^2 +
(theo_kurt - sample_kurt)^2
}
# Find parameters using optimization
init_params <- c(mean(height) - sd(height),
mean(height) + sd(height),
sd(height))
mom_est <- optim(init_params, mom_mixture)
# Add fitted mixture distribution to plot
x <- seq(min(height), max(height), length.out = 100)
y <- 0.5 * dnorm(x, mom_est$par[1], mom_est$par[3]) +
0.5 * dnorm(x, mom_est$par[2], mom_est$par[3])
lines(x, y, col = "red", lwd = 2)## Method of Moments Estimates:
## Mean 1: 177.18
## Mean 2: 180.07
## Common SD: 0.05
Guidelines for choosing between methods:
Consider computational efficiency
Balance efficiency vs. robustness
# Generate well-behaved normal data
set.seed(123)
n <- 200
x <- rnorm(n)
beta0 <- 1
beta1 <- 2
epsilon <- rnorm(n) # Normal errors
y_normal <- beta0 + beta1 * x + epsilon
# OLS Estimation
ols_normal <- lm(y_normal ~ x)
# Method of Moments Estimation
mom_beta1 <- cov(y_normal, x) / var(x)
mom_beta0 <- mean(y_normal) - mom_beta1 * mean(x)
# Compare results
results_normal <- data.frame(
Method = c("True", "OLS", "MoM"),
beta0 = c(beta0, coef(ols_normal)[1], mom_beta0),
beta1 = c(beta1, coef(ols_normal)[2], mom_beta1)
)## [1] "Results for Normal Data:"
## Method beta0 beta1
## 1 True 1.00000 2.000000
## 2 OLS 1.04187 1.970746
## 3 MoM 1.04187 1.970746
# Generate data with t-distributed errors (heavy tails)
set.seed(456)
epsilon_t <- rt(n, df=3) # t-distribution with 3 degrees of freedom
y_heavy <- beta0 + beta1 * x + epsilon_t
# OLS Estimation
ols_heavy <- lm(y_heavy ~ x)
# Method of Moments Estimation
mom_beta1_heavy <- cov(y_heavy, x) / var(x)
mom_beta0_heavy <- mean(y_heavy) - mom_beta1_heavy * mean(x)
# Compare results
results_heavy <- data.frame(
Method = c("True", "OLS", "MoM"),
beta0 = c(beta0, coef(ols_heavy)[1], mom_beta0_heavy),
beta1 = c(beta1, coef(ols_heavy)[2], mom_beta1_heavy)
)## [1] "Results for Heavy-Tailed Data:"
## Method beta0 beta1
## 1 True 1.000000 2.00000
## 2 OLS 1.172027 1.89834
## 3 MoM 1.172027 1.89834
# Generate data with chi-squared errors (skewed)
set.seed(789)
epsilon_chi <- rchisq(n, df=2) - 2 # Centered chi-squared
y_skewed <- beta0 + beta1 * x + epsilon_chi
# OLS Estimation
ols_skewed <- lm(y_skewed ~ x)
# Method of Moments Estimation
mom_beta1_skewed <- cov(y_skewed, x) / var(x)
mom_beta0_skewed <- mean(y_skewed) - mom_beta1_skewed * mean(x)
# Compare results
results_skewed <- data.frame(
Method = c("True", "OLS", "MoM"),
beta0 = c(beta0, coef(ols_skewed)[1], mom_beta0_skewed),
beta1 = c(beta1, coef(ols_skewed)[2], mom_beta1_skewed)
)## [1] "Results for Skewed Data:"
## Method beta0 beta1
## 1 True 1.0000000 2.000000
## 2 OLS 0.9586122 1.995324
## 3 MoM 0.9586122 1.995324
# Function to compute both estimators
both_estimators <- function(data, indices) {
d <- data[indices, ]
# MoM
mom_b1 <- cov(d$y, d$x) / var(d$x)
mom_b0 <- mean(d$y) - mom_b1 * mean(d$x)
# OLS
ols <- lm(y ~ x, data=d)
return(c(mom_b0, mom_b1, coef(ols)))
}
# Bootstrap comparison for all three scenarios
library(boot)
# Normal data
data_normal <- data.frame(x=x, y=y_normal)
boot_normal <- boot(data_normal, both_estimators, R=1000)
# Heavy-tailed data
data_heavy <- data.frame(x=x, y=y_heavy)
boot_heavy <- boot(data_heavy, both_estimators, R=1000)
# Skewed data
data_skewed <- data.frame(x=x, y=y_skewed)
boot_skewed <- boot(data_skewed, both_estimators, R=1000)
# Compute standard errors
se_normal <- apply(boot_normal$t, 2, sd)
se_heavy <- apply(boot_heavy$t, 2, sd)
se_skewed <- apply(boot_skewed$t, 2, sd)
# Create comparison table
se_comparison <- data.frame(
Distribution = c("Normal", "Heavy-tailed", "Skewed"),
MoM_beta0_SE = c(se_normal[1], se_heavy[1], se_skewed[1]),
MoM_beta1_SE = c(se_normal[2], se_heavy[2], se_skewed[2]),
OLS_beta0_SE = c(se_normal[3], se_heavy[3], se_skewed[3]),
OLS_beta1_SE = c(se_normal[4], se_heavy[4], se_skewed[4])
)## [1] "Standard Error Comparison:"
## Distribution MoM_beta0_SE MoM_beta1_SE OLS_beta0_SE OLS_beta1_SE
## 1 Normal 0.0701110 0.07667871 0.0701110 0.07667871
## 2 Heavy-tailed 0.1277265 0.13495854 0.1277265 0.13495854
## 3 Skewed 0.1357854 0.11004954 0.1357854 0.11004954
# QQ plots for different error distributions
par(mfrow=c(1,3))
qqnorm(residuals(ols_normal), main="Normal")
qqline(residuals(ols_normal))
qqnorm(residuals(ols_heavy), main="Heavy-tailed")
qqline(residuals(ols_heavy))
qqnorm(residuals(ols_skewed), main="Skewed")
qqline(residuals(ols_skewed))