library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Load mtcars dataset
data(mtcars)
# Fit regression model
model <- lm(mpg~disp+hp+wt, data=mtcars)
# Get model summary
model_summ <-summary(model)
model_summ
##
## Call:
## lm(formula = mpg ~ disp + hp + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.891 -1.640 -0.172 1.061 5.861
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.105505 2.110815 17.579 < 2e-16 ***
## disp -0.000937 0.010350 -0.091 0.92851
## hp -0.031157 0.011436 -2.724 0.01097 *
## wt -3.800891 1.066191 -3.565 0.00133 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.639 on 28 degrees of freedom
## Multiple R-squared: 0.8268, Adjusted R-squared: 0.8083
## F-statistic: 44.57 on 3 and 28 DF, p-value: 8.65e-11
# Create data frame with residuals
data <- data.frame(pred = predict(model), actual = mtcars$mpg)
data <- data %>% mutate(res=pred-actual)
# Residuals
error_mod <- data$actual-data$pred
error_mod
## [1] -2.5700299 -1.6008028 -2.4886829 0.1833269 0.4592780 -2.3721590
## [7] -1.2656477 1.4885011 0.7591033 -0.8411432 -2.2411432 0.6307257
## [13] 0.2384229 -1.6715326 0.0785315 1.0402079 5.4885456 5.7865290
## [19] 1.1240053 5.8609261 -3.1015898 -3.2549189 -3.8911127 -1.2487773
## [25] 2.5361190 -0.3204259 -0.0236311 2.6550420 -0.7024625 -1.2887756
## [31] 2.1831584 -1.6295873
u_hat_sq <- error_mod^2
df <- nrow(data) - length(model$coefficients[2:length(model$coefficients)]) - 1
sigma_sq_hat <- sum(u_hat_sq)/df
RSE <- sqrt(sigma_sq_hat)
RSE
## [1] 2.63893
# Function for Mean Squared Error
MSE <- function(x) { (mean(x^2)) }
mse_mod <- MSE(error_mod)
mse_mod
## [1] 6.093459
RSE^2 * df/nrow(data)
## [1] 6.093459
# Function for Root Mean Squared Error
RMSE <- function(x) { sqrt(mean(x^2)) }
rmse_mod <- RMSE(error_mod)
rmse_mod
## [1] 2.468493
# Function for Mean Absolute Error
mae <- function(x) { mean(abs(x)) }
mae_mod <- mae(error_mod)
mae_mod
## [1] 1.907026
# R2
SQt = sum((mean(data$actual) - data$actual)^2)
SQres = sum((data$pred - data$actual)^2)
R2 = (SQt - SQres) / SQt
R2
## [1] 0.8268361
# R2.adj
obs <- nrow(data)
k <- length(model$coefficients[2:length(model$coefficients)])
R2.adj <- 1-((1-R2)*(obs-1)/(obs-k-1))
R2.adj
## [1] 0.8082829