Original Authors: Shulin Cao

Resources and data files

This material has been created using the following resources:
https://www.statmethods.net/stats/index.html

Overview

This section describes basic statistics. It includes code for obtaining descriptive statistics and crosstabulations (including tests of independence), correlations (pearson), t-tests, nonparametric tests of group differences (Mann Whitney U, Wilcoxon Signed Rank), multiple linear regression (including diagnostics, cross-validation and variable selection), analysis of variance (including ANCOVA).

Introduction

First, let’s load dataset that we will use (I will only load the packages when needed).

library(car)
data(mtcars)

Descriptive Statistics

If you want to derive the average values for all different variables.

sapply(mtcars, mean, na.rm=TRUE)
##        mpg        cyl       disp         hp       drat         wt 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250 
##       qsec         vs         am       gear       carb 
##  17.848750   0.437500   0.406250   3.687500   2.812500
# mean,median,25th and 75th quartiles,min,max
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
describe(mtcars) 
## mtcars 
## 
##  11  Variables      32  Observations
## ---------------------------------------------------------------------------
## mpg 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       25    0.999    20.09    6.796    12.00    14.34 
##      .25      .50      .75      .90      .95 
##    15.43    19.20    22.80    30.09    31.30 
## 
## lowest : 10.4 13.3 14.3 14.7 15.0, highest: 26.0 27.3 30.4 32.4 33.9
## ---------------------------------------------------------------------------
## cyl 
##        n  missing distinct     Info     Mean      Gmd 
##       32        0        3    0.866    6.188    1.948 
##                             
## Value          4     6     8
## Frequency     11     7    14
## Proportion 0.344 0.219 0.438
## ---------------------------------------------------------------------------
## disp 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       27    0.999    230.7    142.5    77.35    80.61 
##      .25      .50      .75      .90      .95 
##   120.83   196.30   326.00   396.00   449.00 
## 
## lowest :  71.1  75.7  78.7  79.0  95.1, highest: 360.0 400.0 440.0 460.0 472.0
## ---------------------------------------------------------------------------
## hp 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       22    0.997    146.7    77.04    63.65    66.00 
##      .25      .50      .75      .90      .95 
##    96.50   123.00   180.00   243.50   253.55 
## 
## lowest :  52  62  65  66  91, highest: 215 230 245 264 335
## ---------------------------------------------------------------------------
## drat 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       22    0.997    3.597   0.6099    2.853    3.007 
##      .25      .50      .75      .90      .95 
##    3.080    3.695    3.920    4.209    4.314 
## 
## lowest : 2.76 2.93 3.00 3.07 3.08, highest: 4.08 4.11 4.22 4.43 4.93
## ---------------------------------------------------------------------------
## wt 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       29    0.999    3.217    1.089    1.736    1.956 
##      .25      .50      .75      .90      .95 
##    2.581    3.325    3.610    4.048    5.293 
## 
## lowest : 1.513 1.615 1.835 1.935 2.140, highest: 3.845 4.070 5.250 5.345 5.424
## ---------------------------------------------------------------------------
## qsec 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       30        1    17.85    2.009    15.05    15.53 
##      .25      .50      .75      .90      .95 
##    16.89    17.71    18.90    19.99    20.10 
## 
## lowest : 14.50 14.60 15.41 15.50 15.84, highest: 19.90 20.00 20.01 20.22 22.90
## ---------------------------------------------------------------------------
## vs 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##       32        0        2    0.739       14   0.4375   0.5081 
## 
## ---------------------------------------------------------------------------
## am 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##       32        0        2    0.724       13   0.4062    0.498 
## 
## ---------------------------------------------------------------------------
## gear 
##        n  missing distinct     Info     Mean      Gmd 
##       32        0        3    0.841    3.688   0.7863 
##                             
## Value          3     4     5
## Frequency     15    12     5
## Proportion 0.469 0.375 0.156
## ---------------------------------------------------------------------------
## carb 
##        n  missing distinct     Info     Mean      Gmd 
##       32        0        6    0.929    2.812    1.718 
##                                               
## Value          1     2     3     4     6     8
## Frequency      7    10     3    10     1     1
## Proportion 0.219 0.312 0.094 0.312 0.031 0.031
## ---------------------------------------------------------------------------
# n, nmiss, unique, mean, 5,10,25,50,75,90,95th percentiles 
# 5 lowest and 5 highest scores
library(pastecs)
stat.desc(mtcars) 
##                      mpg         cyl         disp           hp
## nbr.val       32.0000000  32.0000000 3.200000e+01   32.0000000
## nbr.null       0.0000000   0.0000000 0.000000e+00    0.0000000
## nbr.na         0.0000000   0.0000000 0.000000e+00    0.0000000
## min           10.4000000   4.0000000 7.110000e+01   52.0000000
## max           33.9000000   8.0000000 4.720000e+02  335.0000000
## range         23.5000000   4.0000000 4.009000e+02  283.0000000
## sum          642.9000000 198.0000000 7.383100e+03 4694.0000000
## median        19.2000000   6.0000000 1.963000e+02  123.0000000
## mean          20.0906250   6.1875000 2.307219e+02  146.6875000
## SE.mean        1.0654240   0.3157093 2.190947e+01   12.1203173
## CI.mean.0.95   2.1729465   0.6438934 4.468466e+01   24.7195501
## var           36.3241028   3.1895161 1.536080e+04 4700.8669355
## std.dev        6.0269481   1.7859216 1.239387e+02   68.5628685
## coef.var       0.2999881   0.2886338 5.371779e-01    0.4674077
##                      drat          wt        qsec          vs          am
## nbr.val       32.00000000  32.0000000  32.0000000 32.00000000 32.00000000
## nbr.null       0.00000000   0.0000000   0.0000000 18.00000000 19.00000000
## nbr.na         0.00000000   0.0000000   0.0000000  0.00000000  0.00000000
## min            2.76000000   1.5130000  14.5000000  0.00000000  0.00000000
## max            4.93000000   5.4240000  22.9000000  1.00000000  1.00000000
## range          2.17000000   3.9110000   8.4000000  1.00000000  1.00000000
## sum          115.09000000 102.9520000 571.1600000 14.00000000 13.00000000
## median         3.69500000   3.3250000  17.7100000  0.00000000  0.00000000
## mean           3.59656250   3.2172500  17.8487500  0.43750000  0.40625000
## SE.mean        0.09451874   0.1729685   0.3158899  0.08909831  0.08820997
## CI.mean.0.95   0.19277224   0.3527715   0.6442617  0.18171719  0.17990541
## var            0.28588135   0.9573790   3.1931661  0.25403226  0.24899194
## std.dev        0.53467874   0.9784574   1.7869432  0.50401613  0.49899092
## coef.var       0.14866382   0.3041285   0.1001159  1.15203687  1.22828533
##                     gear       carb
## nbr.val       32.0000000 32.0000000
## nbr.null       0.0000000  0.0000000
## nbr.na         0.0000000  0.0000000
## min            3.0000000  1.0000000
## max            5.0000000  8.0000000
## range          2.0000000  7.0000000
## sum          118.0000000 90.0000000
## median         4.0000000  2.0000000
## mean           3.6875000  2.8125000
## SE.mean        0.1304266  0.2855297
## CI.mean.0.95   0.2660067  0.5823417
## var            0.5443548  2.6088710
## std.dev        0.7378041  1.6152000
## coef.var       0.2000825  0.5742933
# nbr.val, nbr.null, nbr.na, min max, range, sum, 
# median, mean, SE.mean, CI.mean, var, std.dev, coef.var
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## The following object is masked from 'package:car':
## 
##     logit
describeBy(mtcars, mtcars$cyl)
## 
##  Descriptive statistics by group 
## group: 4
##      vars  n   mean    sd median trimmed   mad   min    max range  skew
## mpg     1 11  26.66  4.51  26.00   26.44  6.52 21.40  33.90 12.50  0.26
## cyl     2 11   4.00  0.00   4.00    4.00  0.00  4.00   4.00  0.00   NaN
## disp    3 11 105.14 26.87 108.00  104.30 43.00 71.10 146.70 75.60  0.12
## hp      4 11  82.64 20.93  91.00   82.67 32.62 52.00 113.00 61.00  0.01
## drat    5 11   4.07  0.37   4.08    4.02  0.34  3.69   4.93  1.24  1.00
## wt      6 11   2.29  0.57   2.20    2.27  0.54  1.51   3.19  1.68  0.30
## qsec    7 11  19.14  1.68  18.90   18.99  1.48 16.70  22.90  6.20  0.55
## vs      8 11   0.91  0.30   1.00    1.00  0.00  0.00   1.00  1.00 -2.47
## am      9 11   0.73  0.47   1.00    0.78  0.00  0.00   1.00  1.00 -0.88
## gear   10 11   4.09  0.54   4.00    4.11  0.00  3.00   5.00  2.00  0.11
## carb   11 11   1.55  0.52   2.00    1.56  0.00  1.00   2.00  1.00 -0.16
##      kurtosis   se
## mpg     -1.65 1.36
## cyl       NaN 0.00
## disp    -1.64 8.10
## hp      -1.71 6.31
## drat     0.12 0.11
## wt      -1.36 0.17
## qsec    -0.02 0.51
## vs       4.52 0.09
## am      -1.31 0.14
## gear    -0.01 0.16
## carb    -2.15 0.16
## -------------------------------------------------------- 
## group: 6
##      vars n   mean    sd median trimmed   mad    min    max  range  skew
## mpg     1 7  19.74  1.45  19.70   19.74  1.93  17.80  21.40   3.60 -0.16
## cyl     2 7   6.00  0.00   6.00    6.00  0.00   6.00   6.00   0.00   NaN
## disp    3 7 183.31 41.56 167.60  183.31 11.27 145.00 258.00 113.00  0.80
## hp      4 7 122.29 24.26 110.00  122.29  7.41 105.00 175.00  70.00  1.36
## drat    5 7   3.59  0.48   3.90    3.59  0.03   2.76   3.92   1.16 -0.74
## wt      6 7   3.12  0.36   3.21    3.12  0.36   2.62   3.46   0.84 -0.22
## qsec    7 7  17.98  1.71  18.30   17.98  1.90  15.50  20.22   4.72 -0.12
## vs      8 7   0.57  0.53   1.00    0.57  0.00   0.00   1.00   1.00 -0.23
## am      9 7   0.43  0.53   0.00    0.43  0.00   0.00   1.00   1.00  0.23
## gear   10 7   3.86  0.69   4.00    3.86  0.00   3.00   5.00   2.00  0.11
## carb   11 7   3.43  1.81   4.00    3.43  0.00   1.00   6.00   5.00 -0.26
##      kurtosis    se
## mpg     -1.91  0.55
## cyl       NaN  0.00
## disp    -1.23 15.71
## hp       0.25  9.17
## drat    -1.40  0.18
## wt      -1.98  0.13
## qsec    -1.75  0.65
## vs      -2.20  0.20
## am      -2.20  0.20
## gear    -1.24  0.26
## carb    -1.50  0.69
## -------------------------------------------------------- 
## group: 8
##      vars  n   mean    sd median trimmed   mad    min    max  range  skew
## mpg     1 14  15.10  2.56  15.20   15.15  1.56  10.40  19.20   8.80 -0.36
## cyl     2 14   8.00  0.00   8.00    8.00  0.00   8.00   8.00   0.00   NaN
## disp    3 14 353.10 67.77 350.50  349.63 73.39 275.80 472.00 196.20  0.45
## hp      4 14 209.21 50.98 192.50  203.67 44.48 150.00 335.00 185.00  0.91
## drat    5 14   3.23  0.37   3.12    3.19  0.16   2.76   4.22   1.46  1.34
## wt      6 14   4.00  0.76   3.75    3.95  0.41   3.17   5.42   2.25  0.99
## qsec    7 14  16.77  1.20  17.18   16.86  0.79  14.50  18.00   3.50 -0.80
## vs      8 14   0.00  0.00   0.00    0.00  0.00   0.00   0.00   0.00   NaN
## am      9 14   0.14  0.36   0.00    0.08  0.00   0.00   1.00   1.00  1.83
## gear   10 14   3.29  0.73   3.00    3.17  0.00   3.00   5.00   2.00  1.83
## carb   11 14   3.50  1.56   3.50    3.25  0.74   2.00   8.00   6.00  1.48
##      kurtosis    se
## mpg     -0.57  0.68
## cyl       NaN  0.00
## disp    -1.26 18.11
## hp       0.09 13.62
## drat     1.08  0.10
## wt      -0.71  0.20
## qsec    -0.92  0.32
## vs        NaN  0.00
## am       1.45  0.10
## gear     1.45  0.19
## carb     2.24  0.42

Tests of Independence

Now take a look at the correlation matrix

#mtcars is a data frame 
rcorr(as.matrix(mtcars), type = 'pearson')
##        mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
## mpg   1.00 -0.85 -0.85 -0.78  0.68 -0.87  0.42  0.66  0.60  0.48 -0.55
## cyl  -0.85  1.00  0.90  0.83 -0.70  0.78 -0.59 -0.81 -0.52 -0.49  0.53
## disp -0.85  0.90  1.00  0.79 -0.71  0.89 -0.43 -0.71 -0.59 -0.56  0.39
## hp   -0.78  0.83  0.79  1.00 -0.45  0.66 -0.71 -0.72 -0.24 -0.13  0.75
## drat  0.68 -0.70 -0.71 -0.45  1.00 -0.71  0.09  0.44  0.71  0.70 -0.09
## wt   -0.87  0.78  0.89  0.66 -0.71  1.00 -0.17 -0.55 -0.69 -0.58  0.43
## qsec  0.42 -0.59 -0.43 -0.71  0.09 -0.17  1.00  0.74 -0.23 -0.21 -0.66
## vs    0.66 -0.81 -0.71 -0.72  0.44 -0.55  0.74  1.00  0.17  0.21 -0.57
## am    0.60 -0.52 -0.59 -0.24  0.71 -0.69 -0.23  0.17  1.00  0.79  0.06
## gear  0.48 -0.49 -0.56 -0.13  0.70 -0.58 -0.21  0.21  0.79  1.00  0.27
## carb -0.55  0.53  0.39  0.75 -0.09  0.43 -0.66 -0.57  0.06  0.27  1.00
## 
## n= 32 
## 
## 
## P
##      mpg    cyl    disp   hp     drat   wt     qsec   vs     am     gear  
## mpg         0.0000 0.0000 0.0000 0.0000 0.0000 0.0171 0.0000 0.0003 0.0054
## cyl  0.0000        0.0000 0.0000 0.0000 0.0000 0.0004 0.0000 0.0022 0.0042
## disp 0.0000 0.0000        0.0000 0.0000 0.0000 0.0131 0.0000 0.0004 0.0010
## hp   0.0000 0.0000 0.0000        0.0100 0.0000 0.0000 0.0000 0.1798 0.4930
## drat 0.0000 0.0000 0.0000 0.0100        0.0000 0.6196 0.0117 0.0000 0.0000
## wt   0.0000 0.0000 0.0000 0.0000 0.0000        0.3389 0.0010 0.0000 0.0005
## qsec 0.0171 0.0004 0.0131 0.0000 0.6196 0.3389        0.0000 0.2057 0.2425
## vs   0.0000 0.0000 0.0000 0.0000 0.0117 0.0010 0.0000        0.3570 0.2579
## am   0.0003 0.0022 0.0004 0.1798 0.0000 0.0000 0.2057 0.3570        0.0000
## gear 0.0054 0.0042 0.0010 0.4930 0.0000 0.0005 0.2425 0.2579 0.0000       
## carb 0.0011 0.0019 0.0253 0.0000 0.6212 0.0146 0.0000 0.0007 0.7545 0.1290
##      carb  
## mpg  0.0011
## cyl  0.0019
## disp 0.0253
## hp   0.0000
## drat 0.6212
## wt   0.0146
## qsec 0.0000
## vs   0.0007
## am   0.7545
## gear 0.1290
## carb
# Correlation matrix from mtcars
# with mpg, cyl, and disp as rows 
# and hp, drat, and wt as columns 
x <- mtcars[1:3]
y <- mtcars[4:6]
cor(x, y)
##              hp       drat         wt
## mpg  -0.7761684  0.6811719 -0.8676594
## cyl   0.8324475 -0.6999381  0.7824958
## disp  0.7909486 -0.7102139  0.8879799

Visualize it

# First Correlogram Example
par(mfrow = c(1,2))
library(corrgram)
## 
## Attaching package: 'corrgram'
## The following object is masked from 'package:lattice':
## 
##     panel.fill
corrgram(mtcars, order=TRUE, lower.panel=panel.shade,
  upper.panel=panel.pie, text.panel=panel.txt,
  main="Car Milage Data in PC2/PC1 Order")

corrgram(mtcars, order=NULL, lower.panel=panel.shade,
  upper.panel=NULL, text.panel=panel.txt,
  main="Car Milage Data (unsorted)")

t-tests

t.test(mtcars[1,],mtcars[15,])
## 
##  Welch Two Sample t-test
## 
## data:  mtcars[1, ] and mtcars[15, ]
## t = -0.76849, df = 12.595, p-value = 0.4564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -138.7776   66.1267
## sample estimates:
## mean of x mean of y 
##  29.90727  66.23273
# Boxplot of MPG by Car Cylinders 
boxplot(mpg~cyl,data=mtcars, main="Car Milage Data", 
   xlab="Number of Cylinders", ylab="Miles Per Gallon")

Nonparametric Tests of Group Differences

wilcox.test(as.numeric(mtcars[3,1:7]),as.numeric(mtcars[4,1:7]))
## 
##  Wilcoxon rank sum test
## 
## data:  as.numeric(mtcars[3, 1:7]) and as.numeric(mtcars[4, 1:7])
## W = 22, p-value = 0.8048
## alternative hypothesis: true location shift is not equal to 0

For the wilcox.test you can use the alternative=“less” or alternative=“greater” option to specify a one tailed test.

Multiple (Linear) Regression

fit <- lm(mpg ~ cyl + disp + hp, data=mtcars)
summary(fit) # show results
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0889 -2.0845 -0.7745  1.3972  6.9183 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.18492    2.59078  13.195 1.54e-13 ***
## cyl         -1.22742    0.79728  -1.540   0.1349    
## disp        -0.01884    0.01040  -1.811   0.0809 .  
## hp          -0.01468    0.01465  -1.002   0.3250    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.055 on 28 degrees of freedom
## Multiple R-squared:  0.7679, Adjusted R-squared:  0.743 
## F-statistic: 30.88 on 3 and 28 DF,  p-value: 5.054e-09

Coefficients and Other Stats

# Other useful functions 
coefficients(fit) # model coefficients
## (Intercept)         cyl        disp          hp 
## 34.18491917 -1.22741994 -0.01883809 -0.01467933
confint(fit, level=0.95) # CIs for model parameters 
##                   2.5 %       97.5 %
## (Intercept) 28.87795186 39.491886473
## cyl         -2.86056643  0.405726550
## disp        -0.04014908  0.002472913
## hp          -0.04469028  0.015331608
fitted(fit) # predicted values
##           Mazda RX4       Mazda RX4 Wag          Datsun 710 
##            22.19158            22.19158            25.87555 
##      Hornet 4 Drive   Hornet Sportabout             Valiant 
##            20.34545            15.01497            21.04050 
##          Duster 360           Merc 240D            Merc 230 
##            13.98741            25.60157            25.22830 
##            Merc 280           Merc 280C          Merc 450SE 
##            21.85758            21.85758            16.52774 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood 
##            16.52774            16.52774            12.46472 
## Lincoln Continental   Chrysler Imperial            Fiat 128 
##            12.54398            12.70056            26.82385 
##         Honda Civic      Toyota Corolla       Toyota Corona 
##            27.08587            26.98169            25.58889 
##    Dodge Challenger         AMC Javelin          Camaro Z28 
##            16.17315            16.43688            14.17579 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2 
##            14.26144            26.81819            25.67320 
##        Lotus Europa      Ford Pantera L        Ferrari Dino 
##            25.82497            13.87805            21.51999 
##       Maserati Bora          Volvo 142E 
##            13.77772            25.39578
residuals(fit) # residuals
##           Mazda RX4       Mazda RX4 Wag          Datsun 710 
##          -1.1915791          -1.1915791          -3.0755481 
##      Hornet 4 Drive   Hornet Sportabout             Valiant 
##           1.0545533           3.6850347          -2.9405002 
##          Duster 360           Merc 240D            Merc 230 
##           0.3125881          -1.2015735          -2.4283002 
##            Merc 280           Merc 280C          Merc 450SE 
##          -2.6575782          -4.0575782          -0.1277355 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood 
##           0.7722645          -1.3277355          -2.0647197 
## Lincoln Continental   Chrysler Imperial            Fiat 128 
##          -2.1439834           1.9994449           5.5761540 
##         Honda Civic      Toyota Corolla       Toyota Corona 
##           3.3141291           6.9183052          -4.0888899 
##    Dodge Challenger         AMC Javelin          Camaro Z28 
##          -0.6731483          -1.2368815          -0.8757928 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2 
##           4.9385581           0.4818054           0.3268017 
##        Lotus Europa      Ford Pantera L        Ferrari Dino 
##           4.5750273           1.9219527          -1.8199936 
##       Maserati Bora          Volvo 142E 
##           1.2222812          -3.9957836
anova(fit) # anova table 
## Analysis of Variance Table
## 
## Response: mpg
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## cyl        1 817.71  817.71 87.6000 4.064e-10 ***
## disp       1  37.59   37.59  4.0274   0.05451 .  
## hp         1   9.37    9.37  1.0039   0.32495    
## Residuals 28 261.37    9.33                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vcov(fit) # covariance matrix for model parameters 
##              (Intercept)          cyl          disp            hp
## (Intercept)  6.712128488 -1.786156956  0.0156477359  0.0069613946
## cyl         -1.786156956  0.635649515 -0.0059597991 -0.0052619916
## disp         0.015647736 -0.005959799  0.0001082368 -0.0000255242
## hp           0.006961395 -0.005261992 -0.0000255242  0.0002146479
influence(fit) # regression diagnostics
## $hat
##           Mazda RX4       Mazda RX4 Wag          Datsun 710 
##          0.08371177          0.08371177          0.08677819 
##      Hornet 4 Drive   Hornet Sportabout             Valiant 
##          0.07747090          0.09010123          0.06249839 
##          Duster 360           Merc 240D            Merc 230 
##          0.10139193          0.12140928          0.10822416 
##            Merc 280           Merc 280C          Merc 450SE 
##          0.06444896          0.06444896          0.12342137 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood 
##          0.12342137          0.12342137          0.25364222 
## Lincoln Continental   Chrysler Imperial            Fiat 128 
##          0.21591194          0.17244911          0.08406747 
##         Honda Civic      Toyota Corolla       Toyota Corona 
##          0.09510678          0.08734285          0.09416712 
##    Dodge Challenger         AMC Javelin          Camaro Z28 
##          0.13318562          0.13977761          0.10109200 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2 
##          0.12980375          0.08398124          0.09038548 
##        Lotus Europa      Ford Pantera L        Ferrari Dino 
##          0.10957458          0.14390165          0.13601470 
##       Maserati Bora          Volvo 142E 
##          0.50782507          0.10731118 
## 
## $coefficients
##                      (Intercept)          cyl          disp            hp
## Mazda RX4            0.102454442 -0.069009533  7.802728e-04  7.081534e-04
## Mazda RX4 Wag        0.102454442 -0.069009533  7.802728e-04  7.081534e-04
## Datsun 710          -0.687248826  0.135865644 -4.056553e-04 -1.125318e-03
## Hornet 4 Drive       0.097729396 -0.010863012  6.130778e-04 -9.287958e-04
## Hornet Sportabout   -0.314853040  0.100943673  1.070713e-03 -2.932848e-03
## Valiant             -0.082952123 -0.045118188 -5.249102e-04  2.626077e-03
## Duster 360          -0.008884086 -0.005056034  2.538403e-05  3.080168e-04
## Merc 240D           -0.336186118  0.065065786 -8.943525e-04  6.626352e-04
## Merc 230            -0.709446663  0.169946192 -1.348713e-03 -7.908746e-04
## Merc 280             0.160069525 -0.116142305  1.555058e-03  7.567420e-04
## Merc 280C            0.244393414 -0.177325537  2.374255e-03  1.155390e-03
## Merc 450SE           0.031353120 -0.011055039  1.057363e-04  5.522279e-05
## Merc 450SL          -0.189555071  0.066836690 -6.392618e-04 -3.338666e-04
## Merc 450SLC          0.325897375 -0.114910679  1.099067e-03  5.740087e-04
## Cadillac Fleetwood  -0.366207285  0.175650002 -4.097056e-03  9.421632e-04
## Lincoln Continental -0.327355475  0.168080733 -3.594388e-03  2.127717e-04
## Chrysler Imperial    0.235277687 -0.138094810  2.516619e-03  7.774901e-04
## Fiat 128             0.820736114 -0.039057443 -8.855576e-04 -1.257790e-03
## Honda Civic          0.437090217  0.012422146 -5.199411e-04 -1.905676e-03
## Toyota Corolla       0.919710241 -0.007576702 -1.749934e-03 -1.582915e-03
## Toyota Corona       -1.026162442  0.227154791 -1.127657e-03 -1.774138e-03
## Dodge Challenger     0.129526705 -0.051124265  1.197964e-04  9.196216e-04
## AMC Javelin          0.273567946 -0.107510823  4.552200e-04  1.647672e-03
## Camaro Z28           0.041214716  0.007940562  4.187445e-05 -8.893376e-04
## Pontiac Firebird    -0.060669138 -0.003483099  4.132616e-03 -4.730556e-03
## Fiat X1-9            0.071173221 -0.003475171 -7.467942e-05 -1.091001e-04
## Porsche 914-2        0.080187211 -0.016910434  9.648001e-05  9.144197e-05
## Lotus Europa         1.014017003 -0.222890263 -4.306393e-04  4.261015e-03
## Ford Pantera L      -0.059395765 -0.043775612 -1.870934e-04  3.023983e-03
## Ferrari Dino         0.116815634 -0.054773909  2.004697e-03 -2.087826e-03
## Maserati Bora       -0.142358380 -0.068540510 -2.128887e-03  7.739178e-03
## Volvo 142E          -1.064371890  0.258101556 -1.018045e-03 -2.983384e-03
## 
## $sigma
##           Mazda RX4       Mazda RX4 Wag          Datsun 710 
##            3.102089            3.102089            3.049053 
##      Hornet 4 Drive   Hornet Sportabout             Valiant 
##            3.104142            3.021192            3.055938 
##          Duster 360           Merc 240D            Merc 230 
##            3.110678            3.101529            3.071717 
##            Merc 280           Merc 280C          Merc 450SE 
##            3.066063            3.004757            3.111215 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood 
##            3.107273            3.099332            3.077141 
## Lincoln Continental   Chrysler Imperial            Fiat 128 
##            3.076235            3.082438            2.902247 
##         Honda Civic      Toyota Corolla       Toyota Corona 
##            3.038223            2.781725            2.999458 
##    Dodge Challenger         AMC Javelin          Camaro Z28 
##            3.108213            3.100722            3.106243 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2 
##            2.939778            3.109817            3.110627 
##        Lotus Europa      Ford Pantera L        Ferrari Dino 
##            2.968119            3.085537            3.088422 
##       Maserati Bora          Volvo 142E 
##            3.093206            3.002985 
## 
## $wt.res
##           Mazda RX4       Mazda RX4 Wag          Datsun 710 
##          -1.1915791          -1.1915791          -3.0755481 
##      Hornet 4 Drive   Hornet Sportabout             Valiant 
##           1.0545533           3.6850347          -2.9405002 
##          Duster 360           Merc 240D            Merc 230 
##           0.3125881          -1.2015735          -2.4283002 
##            Merc 280           Merc 280C          Merc 450SE 
##          -2.6575782          -4.0575782          -0.1277355 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood 
##           0.7722645          -1.3277355          -2.0647197 
## Lincoln Continental   Chrysler Imperial            Fiat 128 
##          -2.1439834           1.9994449           5.5761540 
##         Honda Civic      Toyota Corolla       Toyota Corona 
##           3.3141291           6.9183052          -4.0888899 
##    Dodge Challenger         AMC Javelin          Camaro Z28 
##          -0.6731483          -1.2368815          -0.8757928 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2 
##           4.9385581           0.4818054           0.3268017 
##        Lotus Europa      Ford Pantera L        Ferrari Dino 
##           4.5750273           1.9219527          -1.8199936 
##       Maserati Bora          Volvo 142E 
##           1.2222812          -3.9957836

Diagnostic Plots

layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page 
plot(fit)

Comparing Models

# compare models
fit1 <- lm(mpg ~ cyl + disp + hp + drat, data=mtcars)
anova(fit1, fit)
## Analysis of Variance Table
## 
## Model 1: mpg ~ cyl + disp + hp + drat
## Model 2: mpg ~ cyl + disp + hp
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     27 244.90                           
## 2     28 261.37 -1   -16.467 1.8155  0.189

Cross Validation

Variable Selection

# Stepwise Regression
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:DAAG':
## 
##     hills
fit <- lm(mpg ~ cyl + disp + hp, data=mtcars)
step <- stepAIC(fit, direction="both")
## Start:  AIC=75.2
## mpg ~ cyl + disp + hp
## 
##        Df Sum of Sq RSS  AIC
## - hp    1      9.37 271 74.3
## <none>              261 75.2
## - cyl   1     22.12 284 75.8
## - disp  1     30.61 292 76.7
## 
## Step:  AIC=74.3
## mpg ~ cyl + disp
## 
##        Df Sum of Sq RSS  AIC
## <none>              271 74.3
## + hp    1       9.4 261 75.2
## - disp  1      37.6 308 76.5
## - cyl   1      46.4 317 77.4
step$anova # display results
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## mpg ~ cyl + disp + hp
## 
## Final Model:
## mpg ~ cyl + disp
## 
## 
##   Step Df Deviance Resid. Df Resid. Dev  AIC
## 1                         28        261 75.2
## 2 - hp  1     9.37        29        271 74.3
# All Subsets Regression
library(leaps)
leaps<-regsubsets(mpg ~ cyl + disp + hp, data=mtcars,nbest=10)
# view results 
summary(leaps)
## Subset selection object
## Call: regsubsets.formula(mpg ~ cyl + disp + hp, data = mtcars, nbest = 10)
## 3 Variables  (and intercept)
##      Forced in Forced out
## cyl      FALSE      FALSE
## disp     FALSE      FALSE
## hp       FALSE      FALSE
## 10 subsets of each size up to 3
## Selection Algorithm: exhaustive
##          cyl disp hp 
## 1  ( 1 ) "*" " "  " "
## 1  ( 2 ) " " "*"  " "
## 1  ( 3 ) " " " "  "*"
## 2  ( 1 ) "*" "*"  " "
## 2  ( 2 ) " " "*"  "*"
## 2  ( 3 ) "*" " "  "*"
## 3  ( 1 ) "*" "*"  "*"
# plot a table of models showing variables in each model.
# models are ordered by the selection statistic.
plot(leaps,scale="r2")

# plot statistic by subset size 

Relative Importance

plot(booteval.relimp(boot,sort=TRUE)) # plot result
## Warning in rev(variances[[p]]) - variances[[p + 1]]: Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

Regression Diagnostics

# Assessing Outliers
outlierTest(fit) # Bonferonni p-value for most extreme obs
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
##                rstudent unadjusted p-value Bonferonni p
## Toyota Corolla      2.6             0.0148        0.474
qqPlot(fit, main="QQ Plot") #qq plot for studentized resid 

##       Fiat 128 Toyota Corolla 
##             18             20
leveragePlots(fit) # leverage plots

Alternatively, you can use avPlots(fit) to achieve the same results. ### Outliers

# Influential Observations
# added variable plots 
# Cook's D plot
# identify D values > 4/(n-k-1) 
cutoff <- 4/((nrow(mtcars)-length(fit$coefficients)-2)) 
plot(fit, which=4, cook.levels=cutoff)

Non-constant Error Variance

# Evaluate homoscedasticity
# non-constant error variance test
ncvTest(fit)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 4.34, Df = 1, p = 0.04
# plot studentized residuals vs. fitted values 
spreadLevelPlot(fit)

## 
## Suggested power transformation:  0.144

Multi-collinearity

# Evaluate Collinearity
vif(fit) # variance inflation factors 
##  cyl disp   hp 
## 6.73 5.52 3.35
sqrt(vif(fit)) > 2 # problem?
##   cyl  disp    hp 
##  TRUE  TRUE FALSE

ANOVA

# One Way Anova (Completely Randomized Design)
fit_an <- aov(mpg ~ disp, data=mtcars)
# Randomized Block Design (B is the blocking factor, cyl is the blocking) 
fit_bl <- aov(mpg ~ disp+cyl, data=mtcars)
layout(matrix(c(1,2,3,4),2,2)) # optional layout 
plot(fit_bl) # diagnostic plots

Outliers

# Detect Outliers in the MTCARS Data
library(mvoutlier)
## Loading required package: sgeostat
## sROC 0.1-2 loaded
outliers <- aq.plot(mtcars[c("mpg","disp","hp","drat","wt","qsec")])
## Projection to the first and second robust principal components.
## Proportion of total variation (explained variance): 0.974

outliers # show list of outliers
## $outliers
##           Mazda RX4       Mazda RX4 Wag          Datsun 710 
##               FALSE               FALSE               FALSE 
##      Hornet 4 Drive   Hornet Sportabout             Valiant 
##               FALSE               FALSE               FALSE 
##          Duster 360           Merc 240D            Merc 230 
##                TRUE                TRUE                TRUE 
##            Merc 280           Merc 280C          Merc 450SE 
##               FALSE               FALSE               FALSE 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood 
##               FALSE               FALSE               FALSE 
## Lincoln Continental   Chrysler Imperial            Fiat 128 
##               FALSE               FALSE                TRUE 
##         Honda Civic      Toyota Corolla       Toyota Corona 
##               FALSE               FALSE               FALSE 
##    Dodge Challenger         AMC Javelin          Camaro Z28 
##               FALSE               FALSE                TRUE 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2 
##               FALSE               FALSE               FALSE 
##        Lotus Europa      Ford Pantera L        Ferrari Dino 
##               FALSE                TRUE               FALSE 
##       Maserati Bora          Volvo 142E 
##                TRUE               FALSE

Notes

There are many other statistical analysis including nomality check and multivariate analysis. Due to the limitation of the time, please refer to other resources listed from Quick-R presented by datacamp.