data(cheddar)

head(cheddar)
##   taste Acetic   H2S Lactic
## 1  12.3  4.543 3.135   0.86
## 2  20.9  5.159 5.043   1.53
## 3  39.0  5.366 5.438   1.57
## 4  47.9  5.759 7.496   1.81
## 5   5.6  4.663 3.807   0.99
## 6  25.9  5.697 7.601   1.09
Fit_1 <- lm(taste ~ Acetic + Lactic, data = cheddar)
Fit_2 <- lm(taste ~ Acetic + H2S, data = cheddar)
Fit_3 <- lm(taste ~ H2S + Lactic, data = cheddar)

Fit_4 <- lm(taste ~ Acetic + H2S + Lactic, data = cheddar)


olsrr

{olsrr}: Tools for Building OLS Regression Models

(Source: CRAN)


Diagnostics panel

Panel of plots for regression diagnostics.



ols_plot_diagnostics(model)


Diagnostics panel

model <- Fit_4
ols_plot_diagnostics(model)


Residual QQ plot

============================================= #### Description

Graph for detecting violation of normality assumption.


ols_plot_resid_qq(model)

Residual QQ plot

ols_plot_resid_qq(model)

ols_plot_resid_box()

Box plot of residuals to examine if residuals are normally distributed.


ols_plot_resid_box(model)

ols_plot_resid_box()

ols_plot_resid_box(model)


Breusch pagan test

Description

Test for constant variance. It assumes that the error terms are normally distributed.



ols_test_breusch_pagan(model, fitted.values = TRUE, rhs = FALSE,
                       multiple = FALSE, 
                       p.adj = c("none", "bonferroni", "sidak", "holm"),
                       vars = NA)

Breusch pagan test

Breusch pagan test

Value

An object of class "ols_test_breusch_pagan``" is a list containing the following components:

  • bp : breusch pagan statistic
  • p : p-value of bp
  • fv : fitted values of the regression model
  • rhs : names of explanatory variables of fitted regression model
  • multiple logical value indicating if multiple tests should be performed
  • padj : adjusted p values
  • vars : variables to be used for heteroskedasticity test
  • resp : response variable
  • preds : predictors

Breusch pagan test

# model

# use fitted values of the model

ols_test_breusch_pagan(model)
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##               Data                
##  ---------------------------------
##  Response : taste 
##  Variables: fitted values of taste 
## 
##         Test Summary         
##  ----------------------------
##  DF            =    1 
##  Chi2          =    1.157465 
##  Prob > Chi2   =    0.2819919

Breusch pagan test

# use independent variables of the model
ols_test_breusch_pagan(model, rhs = TRUE)
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##             Data             
##  ----------------------------
##  Response : taste 
##  Variables: Acetic H2S Lactic 
## 
##         Test Summary         
##  ----------------------------
##  DF            =    3 
##  Chi2          =    4.493994 
##  Prob > Chi2   =    0.2128266

Breusch pagan test

# use independent variables of the model and perform multiple tests
ols_test_breusch_pagan(model, rhs = TRUE, multiple = TRUE)
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##             Data             
##  ----------------------------
##  Response : taste 
##  Variables: Acetic H2S Lactic 
## 
##         Test Summary (Unadjusted p values)        
##  -----------------------------------------------
##   Variable           chi2       df        p      
##  -----------------------------------------------
##   Acetic           3.8855767     1    0.04870253 
##   H2S              0.5192075     1    0.47117994 
##   Lactic           1.6874864     1    0.19393265 
##  -----------------------------------------------
##   simultaneous     4.4939942     3    0.21282661 
##  -----------------------------------------------

Breusch pagan test

# bonferroni p value adjustment
ols_test_breusch_pagan(model, rhs = TRUE, multiple = TRUE, p.adj = 'bonferroni')
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##             Data             
##  ----------------------------
##  Response : taste 
##  Variables: Acetic H2S Lactic 
## 
##         Test Summary (Bonferroni p values)       
##  ----------------------------------------------
##   Variable           chi2       df        p     
##  ----------------------------------------------
##   Acetic           3.8855767     1    0.1461076 
##   H2S              0.5192075     1    1.0000000 
##   Lactic           1.6874864     1    0.5817979 
##  ----------------------------------------------
##   simultaneous     4.4939942     3    0.2128266 
##  ----------------------------------------------

Breusch pagan test

# sidak p value adjustment
ols_test_breusch_pagan(model, rhs = TRUE, multiple = TRUE, p.adj = 'sidak')
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##             Data             
##  ----------------------------
##  Response : taste 
##  Variables: Acetic H2S Lactic 
## 
##           Test Summary (Sidak p values)          
##  ----------------------------------------------
##   Variable           chi2       df        p     
##  ----------------------------------------------
##   Acetic           3.8855767     1    0.1391073 
##   H2S              0.5192075     1    0.8521151 
##   Lactic           1.6874864     1    0.4762621 
##  ----------------------------------------------
##   simultaneous     4.4939942     3    0.2128266 
##  ----------------------------------------------
# holm's p value adjustment
ols_test_breusch_pagan(model, rhs = TRUE, multiple = TRUE, p.adj = 'holm')
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##             Data             
##  ----------------------------
##  Response : taste 
##  Variables: Acetic H2S Lactic 
## 
##           Test Summary (Holm's p values)         
##  ----------------------------------------------
##   Variable           chi2       df        p     
##  ----------------------------------------------
##   Acetic           3.8855767     1    0.1461076 
##   H2S              0.5192075     1    0.4711799 
##   Lactic           1.6874864     1    0.3878653 
##  ----------------------------------------------
##   simultaneous     4.4939942     3    0.2128266 
##  ----------------------------------------------

ols_aic`` Akaike information criterion

Description

Akaike information criterion for model selection.


  
  ols_aic(model, method = c("R", "STATA", "SAS"))

ols_aic`` Akaike information criterion

Arguments

  • model An object of class lm.
  • method A character vector; specify the method to compute AIC. Valid options include R, STATA and SAS.

Details

  • AIC provides a means for model selection. Given a collection of models for the data, AIC estimates the quality of each model, relative to each of the other models.
  • R and STATA use loglikelihood to compute AIC. SAS uses residual sum of squares.

ols_sbc`` Bayesian information criterion

Description

Bayesian information criterion for model selection.


ols_sbc(model, method = c("R", "STATA", "SAS"))

Arguments

  • model: An object of class lm.
  • method: A character vector; specify the method to compute BIC. Valid options include R, STATA and SAS.

ols_sbc Bayesian information criterion

# using R computation method
ols_sbc(model)
## [1] 236.7835
# using STATA computation method

# ols_sbc(model, method = 'STATA')

# using SAS computation method

# ols_sbc(model, method = 'SAS')

Cook's Distance

Cook's Distance

Steps to compute Cook's distance:

A data point having a large cook's d indicates that the data point strongly influences the fitted values.

Cooks' D bar plot

Description

Bar Plot of cook's distance to detect observations that strongly influence fitted values of the model.



ols_plot_cooksd_bar(model)

Cook's distance:

ols_plot_cooksd_bar`` returns a list containing the following components:

Cook's distance:

ols_plot_cooksd_bar(model)

Cook's distance:

ols_plot_cooksd_chart(model)

DFBETa:

Description

Panel of plots to detect influential observations using DFBETAs.



ols_plot_dfbetas(model)

Arguments

  • model: An object of class lm.

Details

  • DFBETA measures the difference in each parameter estimate with and without the influential point.
  • There is a DFBETA for each data point i.e if there are n observations and k variables, there will be \(n - k\) DFBETAs.
  • In general, large values of DFBETAS indicate observations that are influential in estimating a given parameter.
  • Belsley, Kuh, and Welsch recommend 2 as a general cutoff value toindicate influential observations and well as an alternative size-adjusted cutoff.
ols_plot_dfbetas(model)

olsrr: Leverage

The leverage of an observation is based on how much the observation's value on the predictor variable differs from the mean of the predictor variable. The greater an observation's leverage, the more potential it has to be an influential observation.



ols_leverage(model)

Arguments

  • model: An object of class lm.
ols_leverage(model)
##  [1] 0.17525784 0.07593130 0.05994339 0.08829409 0.12879533 0.23036705
##  [7] 0.20709897 0.08333780 0.08291114 0.12013909 0.06531941 0.14929496
## [13] 0.14821335 0.04332811 0.09000337 0.15153827 0.08934443 0.06198950
## [19] 0.08249992 0.26029095 0.14521419 0.03912430 0.20545696 0.23343680
## [25] 0.08406925 0.26606306 0.14973461 0.07036401 0.19818511 0.21445340

Studentized residuals vs leverage plot

Description

Graph for detecting outliers and/or observations with high leverage.



ols_plot_resid_lev(model)

Studentized residuals vs leverage plot

ols_plot_resid_lev(model)

olsrr: PRESS

Description

PRESS (prediction sum of squares) tells you how well the model will predict new data.

  • The prediction sum of squares (PRESS) is the sum of squares of the prediction error.
  • Each fitted to obtain the predicted value for the ith observation.
  • Use PRESS to assess your model's predictive ability.
  • Usually, the smaller the PRESS value, the better the model's predictive ability.

Usage



ols_press(model)

Collinearity diagnostics

Collinearity

Variance inflation factor, tolerance, eigenvalues and condition indices.

Details

  • Collinearity implies two variables are near perfect linear combinations of one another.
  • Multicollinearity involves more than two variables.
  • In the presence of multicollinearity, regression estimates are unstable and have high standard errors.

Collinearity diagnostics

Collinearity diagnostics

# vif and tolerance
ols_vif_tol(model)
##   Variables Tolerance      VIF
## 1    Acetic 0.5459740 1.831589
## 2       H2S 0.5019577 1.992200
## 3    Lactic 0.5160194 1.937912

Collinearity diagnostics

# eigenvalues and condition indices

ols_eigen_cindex(model) %>%
  kable( format = "markdown",digits=4)
Eigenvalue Condition Index intercept Acetic H2S Lactic
3.9154 1.0000 0.0006 0.0004 0.0034 0.0014
0.0646 7.7878 0.0347 0.0072 0.5673 0.0000
0.0165 15.3968 0.0698 0.0154 0.2826 0.9483
0.0036 33.1344 0.8950 0.9770 0.1466 0.0504

Collinearity diagnostics

# collinearity diagnostics
ols_coll_diag(model)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
##   Variables Tolerance      VIF
## 1    Acetic 0.5459740 1.831589
## 2       H2S 0.5019577 1.992200
## 3    Lactic 0.5160194 1.937912
## 
## 
## Eigenvalue and Condition Index
## ------------------------------
##    Eigenvalue Condition Index    intercept       Acetic        H2S       Lactic
## 1 3.915360446         1.00000 0.0005615356 0.0003642985 0.00344925 1.365207e-03
## 2 0.064557155         7.78778 0.0347152460 0.0072159623 0.56732059 2.694277e-05
## 3 0.016516138        15.39684 0.0697512251 0.0154437761 0.28263569 9.482560e-01
## 4 0.003566261        33.13441 0.8949719933 0.9769759631 0.14659447 5.035184e-02