Regression.R

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

# Load mtcars dataset
data(mtcars)

# Fit regression model
model <- lm(mpg~disp+hp+wt, data=mtcars)

# Get model summary
model_summ <-summary(model)
model_summ

## 
## Call:
## lm(formula = mpg ~ disp + hp + wt, data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.891 -1.640 -0.172  1.061  5.861 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 37.105505   2.110815  17.579  < 2e-16 ***
## disp        -0.000937   0.010350  -0.091  0.92851    
## hp          -0.031157   0.011436  -2.724  0.01097 *  
## wt          -3.800891   1.066191  -3.565  0.00133 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.639 on 28 degrees of freedom
## Multiple R-squared:  0.8268, Adjusted R-squared:  0.8083 
## F-statistic: 44.57 on 3 and 28 DF,  p-value: 8.65e-11

# Create data frame with residuals
data <- data.frame(pred = predict(model), actual = mtcars$mpg)
data <- data %>% mutate(res=pred-actual)





# Residuals
error_mod <-  data$actual-data$pred
error_mod

##  [1] -2.5700299 -1.6008028 -2.4886829  0.1833269  0.4592780 -2.3721590
##  [7] -1.2656477  1.4885011  0.7591033 -0.8411432 -2.2411432  0.6307257
## [13]  0.2384229 -1.6715326  0.0785315  1.0402079  5.4885456  5.7865290
## [19]  1.1240053  5.8609261 -3.1015898 -3.2549189 -3.8911127 -1.2487773
## [25]  2.5361190 -0.3204259 -0.0236311  2.6550420 -0.7024625 -1.2887756
## [31]  2.1831584 -1.6295873

u_hat_sq     <- error_mod^2
df           <- nrow(data) - length(model$coefficients[2:length(model$coefficients)]) - 1
sigma_sq_hat <- sum(u_hat_sq)/df
RSE          <- sqrt(sigma_sq_hat)
RSE

## [1] 2.63893

# Function for Mean Squared Error
MSE <- function(x) { (mean(x^2)) }

mse_mod <- MSE(error_mod)

mse_mod

## [1] 6.093459

RSE^2 * df/nrow(data)

## [1] 6.093459

# Function for Root Mean Squared Error
RMSE <- function(x) { sqrt(mean(x^2)) }

rmse_mod <- RMSE(error_mod)

rmse_mod

## [1] 2.468493

# Function for Mean Absolute Error
mae <- function(x) { mean(abs(x)) }
mae_mod <- mae(error_mod)

mae_mod

## [1] 1.907026

# R2

SQt = sum((mean(data$actual) - data$actual)^2)
SQres = sum((data$pred - data$actual)^2)
R2 = (SQt - SQres) / SQt
R2

## [1] 0.8268361

# R2.adj

obs <- nrow(data)
k <- length(model$coefficients[2:length(model$coefficients)])

R2.adj <- 1-((1-R2)*(obs-1)/(obs-k-1))
R2.adj

## [1] 0.8082829

Regression.R

cided

2022-01-20