Regression error analysis simulation

Code
# Load necessary library

set.seed(123)

# -------------------------
# 1. Generate synthetic data
# -------------------------
n <- 50
X1 <- rnorm(n, 5, 2)
X2 <- rnorm(n, 10, 3)
X3 <- rnorm(n, 0, 1)

# True coefficients
beta0 <- 2
beta1 <- 1.5
beta2 <- -0.8
beta3 <- 0.5

# Generate Y without outlier
Y <- beta0 + beta1*X1 + beta2*X2 + beta3*X3 + rnorm(n, 0, 1)

# Introduce an outlier (last observation)
Y_out <- Y
Y_out[n] <- Y_out[n] + 15  # large outlier

# Create data frames
data_clean <- data.frame(Y[-n], X1[-n], X2[-n], X3[-n])
data_out <- data.frame(Y=Y_out, X1, X2, X3)

# -------------------------
# 2. Cross plots: Y vs each regressor
# -------------------------
par(mfrow=c(1,3))  # 1 row, 3 columns
plot(X1, Y_out, main="Y vs X1", pch=19)
plot(X2, Y_out, main="Y vs X2", pch=19)
plot(X3, Y_out, main="Y vs X3", pch=19)

Code
# -------------------------
# 3. Fit regression models
# -------------------------
# With outlier
fit_out <- lm(Y ~ X1 + X2 + X3, data=data_out)
beta_with_out <- coef(fit_out)

# Without outlier
fit_clean <- lm(data_clean$Y..n. ~ data_clean$X1..n. + data_clean$X2..n. + data_clean$X3..n., data=data_clean)
beta_without_out <- coef(fit_clean)

# -------------------------
# 4. Print beta estimates
# -------------------------
cat("True parameters:\n")
True parameters:
Code
cat(beta0,"," ,beta1,",",beta2,",",beta3,"\n")
2 , 1.5 , -0.8 , 0.5 
Code
cat("Beta estimates WITH outlier:\n")
Beta estimates WITH outlier:
Code
print(beta_with_out)
(Intercept)          X1          X2          X3 
 5.03298667  1.41212904 -1.02568802  0.07767266 
Code
cat("\nBeta estimates WITHOUT outlier:\n")

Beta estimates WITHOUT outlier:
Code
print(beta_without_out)
      (Intercept) data_clean$X1..n. data_clean$X2..n. data_clean$X3..n. 
        3.1720777         1.4333674        -0.8748356         0.4296366 
Code
par(mfrow=c(2,2))  # 1 row, 3 columns

print(summary(fit_out))

Call:
lm(formula = Y ~ X1 + X2 + X3, data = data_out)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.4354 -0.9829 -0.2986  0.6710 12.2246 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  5.03299    1.46994   3.424  0.00131 ** 
X1           1.41213    0.16184   8.726 2.58e-11 ***
X2          -1.02569    0.11165  -9.187 5.62e-12 ***
X3           0.07767    0.30647   0.253  0.80105    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.096 on 46 degrees of freedom
Multiple R-squared:  0.7867,    Adjusted R-squared:  0.7728 
F-statistic: 56.55 on 3 and 46 DF,  p-value: 1.811e-15
Code
plot(fit_out)

Code
print(summary(fit_clean))

Call:
lm(formula = data_clean$Y..n. ~ data_clean$X1..n. + data_clean$X2..n. + 
    data_clean$X3..n., data = data_clean)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.48595 -0.63941 -0.07008  0.62937  2.93264 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        3.17208    0.66115   4.798  1.8e-05 ***
data_clean$X1..n.  1.43337    0.07129  20.106  < 2e-16 ***
data_clean$X2..n. -0.87484    0.05036 -17.372  < 2e-16 ***
data_clean$X3..n.  0.42964    0.13734   3.128  0.00308 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.923 on 45 degrees of freedom
Multiple R-squared:  0.9456,    Adjusted R-squared:  0.942 
F-statistic: 260.7 on 3 and 45 DF,  p-value: < 2.2e-16
Code
plot(fit_clean)