---
title: "Regression error analysis simulation"
format:
html:
theme:
light: cosmo
dark: darkly
toc: true
toc-depth: 3
number-sections: true
code-fold: true
code-tools: true
smooth-scroll: true
anchor-sections: true
fontsize: 1.05em
css: styles.css
execute:
echo: true
warning: false
message: false
cache: false
editor: visual
---
```{r}
# Load necessary library
set.seed(123)
# -------------------------
# 1. Generate synthetic data
# -------------------------
n <- 50
X1 <- rnorm(n, 5, 2)
X2 <- rnorm(n, 10, 3)
X3 <- rnorm(n, 0, 1)
# True coefficients
beta0 <- 2
beta1 <- 1.5
beta2 <- -0.8
beta3 <- 0.5
# Generate Y without outlier
Y <- beta0 + beta1*X1 + beta2*X2 + beta3*X3 + rnorm(n, 0, 1)
# Introduce an outlier (last observation)
Y_out <- Y
Y_out[n] <- Y_out[n] + 15 # large outlier
# Create data frames
data_clean <- data.frame(Y[-n], X1[-n], X2[-n], X3[-n])
data_out <- data.frame(Y=Y_out, X1, X2, X3)
# -------------------------
# 2. Cross plots: Y vs each regressor
# -------------------------
par(mfrow=c(1,3)) # 1 row, 3 columns
plot(X1, Y_out, main="Y vs X1", pch=19)
plot(X2, Y_out, main="Y vs X2", pch=19)
plot(X3, Y_out, main="Y vs X3", pch=19)
# -------------------------
# 3. Fit regression models
# -------------------------
# With outlier
fit_out <- lm(Y ~ X1 + X2 + X3, data=data_out)
beta_with_out <- coef(fit_out)
# Without outlier
fit_clean <- lm(data_clean$Y..n. ~ data_clean$X1..n. + data_clean$X2..n. + data_clean$X3..n., data=data_clean)
beta_without_out <- coef(fit_clean)
# -------------------------
# 4. Print beta estimates
# -------------------------
cat("True parameters:\n")
cat(beta0,"," ,beta1,",",beta2,",",beta3,"\n")
cat("Beta estimates WITH outlier:\n")
print(beta_with_out)
cat("\nBeta estimates WITHOUT outlier:\n")
print(beta_without_out)
par(mfrow=c(2,2)) # 1 row, 3 columns
print(summary(fit_out))
plot(fit_out)
print(summary(fit_clean))
plot(fit_clean)
```