Simple linear regression models the relationship between a dependent variable \(Y\) and an independent variable \(X\):
\[ Y = \beta_0 + \beta_1 X + \varepsilon \]
- \(\beta_0\): intercept
- \(\beta_1\): slope coefficient
- \(\varepsilon\): error term
2025-04-07
Simple linear regression models the relationship between a dependent variable \(Y\) and an independent variable \(X\):
\[ Y = \beta_0 + \beta_1 X + \varepsilon \]
set.seed(123) n <- 100 x <- rnorm(n, mean = 50, sd = 10) y <- 5 + 0.7 * x + rnorm(n, mean = 0, sd = 5) data <- data.frame(x = x, y = y) head(data)
## x y ## 1 44.39524 32.52464 ## 2 47.69823 39.67318 ## 3 65.58708 49.67750 ## 4 50.70508 38.75585 ## 5 51.29288 36.14692 ## 6 67.15065 51.78032
ggplot(data, aes(x = x, y = y)) + geom_point(color = "blue") + geom_smooth(method = "lm", se = TRUE, color = "darkred") + labs(title = "Scatterplot with Regression Line", x = "X", y = "Y")
## `geom_smooth()` using formula = 'y ~ x'
model <- lm(y ~ x, data = data) summary(model)
## ## Call: ## lm(formula = y ~ x, data = data) ## ## Residuals: ## Min 1Q Median 3Q Max ## -9.5367 -3.4175 -0.4375 2.9032 16.4520 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 5.79778 2.76324 2.098 0.0385 * ## x 0.67376 0.05344 12.608 <2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 4.854 on 98 degrees of freedom ## Multiple R-squared: 0.6186, Adjusted R-squared: 0.6147 ## F-statistic: 159 on 1 and 98 DF, p-value: < 2.2e-16
data$residuals <- residuals(model) ggplot(data, aes(x = x, y = residuals)) + geom_point(color = "purple") + geom_hline(yintercept = 0, linetype = "dashed", color = "gray") + labs(title = "Residual Plot", x = "X", y = "Residuals")
z <- rnorm(n, mean = 30, sd = 5)
data$z <- z
data$y3d <- 5 + 0.6 * x + 0.4 * z + rnorm(n)
plot_ly(data, x = ~x, y = ~z, z = ~y3d, type = "scatter3d", mode = "markers",
marker = list(color = ~y3d, colorscale = "Viridis", size = 4)) %>%
layout(title = "3D Plot: y ~ x + z",
scene = list(xaxis = list(title = "x"),
yaxis = list(title = "z"),
zaxis = list(title = "y")))
\[ \hat{y} = \hat{\beta}_0 + \hat{\beta}_1 x \]
\[ \text{RSE} = \sqrt{\frac{1}{n-2} \sum (y_i - \hat{y}_i)^2} \]
library(ggplot2) set.seed(123) x <- rnorm(100, mean = 50, sd = 10) y <- 5 + 0.7 * x + rnorm(100, mean = 0, sd = 5) data <- data.frame(x, y) model <- lm(y ~ x, data = data) ggplot(data, aes(x = x, y = y)) + geom_point() + geom_smooth(method = "lm")