MISSING DATA ANALYSES

This is an R Markdown document that summarizes methods for identifying the extent of missing data, the mechanism of the missing data, and compares mean substitution and multiple imputation approaches for handling missing data.

We are first going to load our libraries and read in a file that includes 480 observations on 9 variables.

library(mice)

## Warning: package 'mice' was built under R version 3.6.2

## 
## Attaching package: 'mice'

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(psych)
library(MissMech)
library(rio)
library(gridExtra)
library(VIM)

## Loading required package: colorspace

## Loading required package: grid

## VIM is ready to use.

## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues

## 
## Attaching package: 'VIM'

## The following object is masked from 'package:datasets':
## 
##     sleep

library(moments)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

setwd("/Users/munira/Desktop/Education/Masters/Illinois Institute of Technology/Coursework/554-Multivariate Statistics/Projects/Project1")
project1 <- import("Project 1 Data.csv")

knitr::opts_chunk$set(comment = NA)

What does our missing data look like?

summary(project1)

      case            age            tenure           sex            psychwb           jobsat         jobperf          turnover           gma       
 Min.   :  1.0   Min.   :18.00   Min.   : 1.00   Min.   :0.0000   Min.   : 3.000   Min.   :3.000   Min.   : 3.000   Min.   :0.0000   Min.   : 71.0  
 1st Qu.:120.8   1st Qu.:34.00   1st Qu.: 8.00   1st Qu.:0.0000   1st Qu.: 5.000   1st Qu.:5.000   1st Qu.: 5.000   1st Qu.:0.0000   1st Qu.: 94.0  
 Median :240.5   Median :38.00   Median :10.00   Median :1.0000   Median : 6.000   Median :6.000   Median : 6.000   Median :0.0000   Median :100.0  
 Mean   :240.5   Mean   :37.96   Mean   :10.05   Mean   :0.5417   Mean   : 6.266   Mean   :5.966   Mean   : 6.021   Mean   :0.3208   Mean   :100.1  
 3rd Qu.:360.2   3rd Qu.:42.00   3rd Qu.:12.00   3rd Qu.:1.0000   3rd Qu.: 7.000   3rd Qu.:7.000   3rd Qu.: 7.000   3rd Qu.:1.0000   3rd Qu.:106.0  
 Max.   :480.0   Max.   :53.00   Max.   :21.00   Max.   :1.0000   Max.   :10.000   Max.   :9.000   Max.   :10.000   Max.   :1.0000   Max.   :125.0  
                 NA's   :21      NA's   :26                       NA's   :160      NA's   :160                                       NA's   :39

describe(project1)

         vars   n   mean     sd median trimmed    mad min max range  skew kurtosis   se
case        1 480 240.50 138.71  240.5  240.50 177.91   1 480   479  0.00    -1.21 6.33
age         2 459  37.96   5.32   38.0   37.97   5.93  18  53    35 -0.08    -0.07 0.25
tenure      3 454  10.05   3.08   10.0   10.04   2.97   1  21    20  0.11     0.07 0.14
sex         4 480   0.54   0.50    1.0    0.55   0.00   0   1     1 -0.17    -1.98 0.02
psychwb     5 320   6.27   1.18    6.0    6.27   1.48   3  10     7 -0.06     0.00 0.07
jobsat      6 320   5.97   1.18    6.0    5.96   1.48   3   9     6  0.04    -0.37 0.07
jobperf     7 480   6.02   1.25    6.0    6.03   1.48   3  10     7 -0.05    -0.03 0.06
turnover    8 480   0.32   0.47    0.0    0.28   0.00   0   1     1  0.77    -1.42 0.02
gma         9 441 100.13   8.52  100.0  100.20   8.90  71 125    54 -0.18     0.56 0.41

names(table(project1$age))[table(project1$age)==max(table(project1$age))]

[1] "40"

names(table(project1$tenure))[table(project1$tenure)==max(table(project1$tenure))]

[1] "10"

names(table(project1$sex))[table(project1$sex)==max(table(project1$sex))]

[1] "1"

names(table(project1$psychwb))[table(project1$psychwb)==max(table(project1$psychwb))]

[1] "6" "7"

names(table(project1$jobsat))[table(project1$jobsat)==max(table(project1$jobsat))]

[1] "6"

names(table(project1$jobperf))[table(project1$jobperf)==max(table(project1$jobperf))]

[1] "6"

names(table(project1$turnover))[table(project1$turnover)==max(table(project1$turnover))]

[1] "0"

names(table(project1$gma))[table(project1$gma)==max(table(project1$gma))]

[1] "96"

project1$psychwb[project1$psychwb == 10] <- NA
project1$jobperf[project1$jobperf == 10] <- NA

summary(project1)

      case            age            tenure           sex            psychwb          jobsat         jobperf         turnover           gma       
 Min.   :  1.0   Min.   :18.00   Min.   : 1.00   Min.   :0.0000   Min.   :3.000   Min.   :3.000   Min.   :3.000   Min.   :0.0000   Min.   : 71.0  
 1st Qu.:120.8   1st Qu.:34.00   1st Qu.: 8.00   1st Qu.:0.0000   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:0.0000   1st Qu.: 94.0  
 Median :240.5   Median :38.00   Median :10.00   Median :1.0000   Median :6.000   Median :6.000   Median :6.000   Median :0.0000   Median :100.0  
 Mean   :240.5   Mean   :37.96   Mean   :10.05   Mean   :0.5417   Mean   :6.254   Mean   :5.966   Mean   :6.013   Mean   :0.3208   Mean   :100.1  
 3rd Qu.:360.2   3rd Qu.:42.00   3rd Qu.:12.00   3rd Qu.:1.0000   3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:1.0000   3rd Qu.:106.0  
 Max.   :480.0   Max.   :53.00   Max.   :21.00   Max.   :1.0000   Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :1.0000   Max.   :125.0  
                 NA's   :21      NA's   :26                       NA's   :161     NA's   :160     NA's   :1                        NA's   :39

describe(project1)

         vars   n   mean     sd median trimmed    mad min max range  skew kurtosis   se
case        1 480 240.50 138.71  240.5  240.50 177.91   1 480   479  0.00    -1.21 6.33
age         2 459  37.96   5.32   38.0   37.97   5.93  18  53    35 -0.08    -0.07 0.25
tenure      3 454  10.05   3.08   10.0   10.04   2.97   1  21    20  0.11     0.07 0.14
sex         4 480   0.54   0.50    1.0    0.55   0.00   0   1     1 -0.17    -1.98 0.02
psychwb     5 319   6.25   1.17    6.0    6.26   1.48   3   9     6 -0.13    -0.14 0.07
jobsat      6 320   5.97   1.18    6.0    5.96   1.48   3   9     6  0.04    -0.37 0.07
jobperf     7 479   6.01   1.24    6.0    6.03   1.48   3   9     6 -0.10    -0.13 0.06
turnover    8 480   0.32   0.47    0.0    0.28   0.00   0   1     1  0.77    -1.42 0.02
gma         9 441 100.13   8.52  100.0  100.20   8.90  71 125    54 -0.18     0.56 0.41

hist(project1$psychwb)

hist(project1$jobsat)

hist(project1$jobperf)

hist(project1$gma)

MCAR or MAR?

We now test the hypothesis that the data are missing MCAR. The null hypothesis is that data are MCAR, so a significant test results indicates the data are MAR.

missingdata <- TestMCARNormality(project1,  method = "Auto")
print(missingdata)

Call:
TestMCARNormality(data = project1, method = "Auto")

Number of Patterns:  9 

Total number of cases used in the analysis:  459 

 Pattern(s) used:
          case   age   tenure   sex   psychwb   jobsat   jobperf   turnover   gma   Number of cases
group.1      1     1        1     1         1        1         1          1     1               128
group.2      1     1       NA     1        NA        1         1          1     1                 9
group.3      1     1        1     1         1       NA         1          1     1               136
group.4      1     1        1     1        NA        1         1          1     1               137
group.5      1     1        1     1         1       NA         1          1    NA                10
group.6      1     1        1     1        NA        1         1          1    NA                 8
group.7      1    NA        1     1         1        1         1          1     1                 9
group.8      1     1        1     1         1        1         1          1    NA                15
group.9      1     1       NA     1         1        1         1          1     1                 7

    Test of normality and Homoscedasticity:
  -------------------------------------------

Hawkins Test:

    P-value for the Hawkins test of normality and homoscedasticity:  0.09249588 

    There is not sufficient evidence to reject normality
    or MCAR at 0.05 significance level

What is the pattern of missing data?

md.pattern(project1)

    case sex turnover jobperf age tenure gma jobsat psychwb    
128    1   1        1       1   1      1   1      1       1   0
137    1   1        1       1   1      1   1      1       0   1
136    1   1        1       1   1      1   1      0       1   1
15     1   1        1       1   1      1   0      1       1   1
8      1   1        1       1   1      1   0      1       0   2
10     1   1        1       1   1      1   0      0       1   2
7      1   1        1       1   1      0   1      1       1   1
9      1   1        1       1   1      0   1      1       0   2
4      1   1        1       1   1      0   1      0       1   2
1      1   1        1       1   1      0   0      1       0   3
3      1   1        1       1   1      0   0      0       1   3
9      1   1        1       1   0      1   1      1       1   1
5      1   1        1       1   0      1   1      1       0   2
3      1   1        1       1   0      1   1      0       1   2
1      1   1        1       1   0      1   0      1       0   3
1      1   1        1       1   0      1   0      0       1   3
2      1   1        1       1   0      0   1      0       1   3
1      1   1        1       0   1      1   1      0       1   2
       0   0        0       1  21     26  39    160     161 408

project1_aggr = aggr(project1, col=mdc(1:2), numbers=TRUE, sortVars=TRUE, labels=names(project1), cex.axis=.7, cex.numbers=.7, gap=3, ylab=c("Proportion of missingness","Missingness Pattern"))


 Variables sorted by number of missings: 
 Variable       Count
  psychwb 0.335416667
   jobsat 0.333333333
      gma 0.081250000
   tenure 0.054166667
      age 0.043750000
  jobperf 0.002083333
     case 0.000000000
      sex 0.000000000
 turnover 0.000000000

HANDLING MISSING DATA

What happens when we then look at basic descriptive values in the original file and the imputed file?

Listwise deletion

listdel <- na.omit(project1)
summary(listdel)

      case            age            tenure            sex            psychwb          jobsat         jobperf         turnover           gma     
 Min.   :  1.0   Min.   :18.00   Min.   : 2.000   Min.   :0.0000   Min.   :3.000   Min.   :3.000   Min.   :3.000   Min.   :0.0000   Min.   : 71  
 1st Qu.:146.2   1st Qu.:34.00   1st Qu.: 8.000   1st Qu.:0.0000   1st Qu.:6.000   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:0.0000   1st Qu.: 96  
 Median :262.5   Median :39.00   Median :10.000   Median :0.0000   Median :6.000   Median :6.000   Median :6.000   Median :0.0000   Median :101  
 Mean   :255.5   Mean   :38.20   Mean   : 9.891   Mean   :0.4844   Mean   :6.281   Mean   :6.039   Mean   :6.016   Mean   :0.3438   Mean   :101  
 3rd Qu.:369.8   3rd Qu.:42.25   3rd Qu.:12.000   3rd Qu.:1.0000   3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:1.0000   3rd Qu.:106  
 Max.   :479.0   Max.   :53.00   Max.   :19.000   Max.   :1.0000   Max.   :9.000   Max.   :8.000   Max.   :9.000   Max.   :1.0000   Max.   :125

describe(listdel)

         vars   n   mean     sd median trimmed    mad min max range  skew kurtosis    se
case        1 128 255.53 136.66  262.5  257.74 165.31   1 479   478 -0.10    -1.13 12.08
age         2 128  38.20   5.69   39.0   38.33   5.93  18  53    35 -0.35     0.41  0.50
tenure      3 128   9.89   3.13   10.0    9.82   2.97   2  19    17  0.31    -0.03  0.28
sex         4 128   0.48   0.50    0.0    0.48   0.00   0   1     1  0.06    -2.01  0.04
psychwb     5 128   6.28   1.19    6.0    6.32   1.48   3   9     6 -0.30     0.07  0.11
jobsat      6 128   6.04   1.17    6.0    6.05   1.48   3   8     5 -0.07    -0.49  0.10
jobperf     7 128   6.02   1.25    6.0    6.05   1.48   3   9     6 -0.17    -0.32  0.11
turnover    8 128   0.34   0.48    0.0    0.31   0.00   0   1     1  0.65    -1.59  0.04
gma         9 128 101.00   9.04  101.0  100.89   7.41  71 125    54 -0.11     1.06  0.80

listvar <- var(listdel)
round(listvar, 2)

             case    age tenure    sex psychwb jobsat jobperf turnover   gma
case     18676.58 -25.66  12.81 -59.04  -16.72  10.30  -15.85     2.53 35.79
age        -25.66  32.36   9.67   0.11    0.92   1.12   -0.50    -1.11 -2.20
tenure      12.81   9.67   9.81  -0.01    0.28   0.67    0.02    -0.14  0.14
sex        -59.04   0.11  -0.01   0.25    0.04  -0.05    0.03    -0.02 -0.26
psychwb    -16.72   0.92   0.28   0.04    1.42   0.54    0.52    -0.18  3.25
jobsat      10.30   1.12   0.67  -0.05    0.54   1.38    0.39    -0.17  4.94
jobperf    -15.85  -0.50   0.02   0.03    0.52   0.39    1.57    -0.16  4.99
turnover     2.53  -1.11  -0.14  -0.02   -0.18  -0.17   -0.16     0.23 -1.16
gma         35.79  -2.20   0.14  -0.26    3.25   4.94    4.99    -1.16 81.78

listcor <- cor(listdel)
round(listcor, 2)

          case   age tenure   sex psychwb jobsat jobperf turnover   gma
case      1.00 -0.03   0.03 -0.86   -0.10   0.06   -0.09     0.04  0.03
age      -0.03  1.00   0.54  0.04    0.14   0.17   -0.07    -0.41 -0.04
tenure    0.03  0.54   1.00 -0.01    0.07   0.18    0.00    -0.09  0.01
sex      -0.86  0.04  -0.01  1.00    0.06  -0.09    0.05    -0.08 -0.06
psychwb  -0.10  0.14   0.07  0.06    1.00   0.39    0.35    -0.32  0.30
jobsat    0.06  0.17   0.18 -0.09    0.39   1.00    0.26    -0.31  0.47
jobperf  -0.09 -0.07   0.00  0.05    0.35   0.26    1.00    -0.27  0.44
turnover  0.04 -0.41  -0.09 -0.08   -0.32  -0.31   -0.27     1.00 -0.27
gma       0.03 -0.04   0.01 -0.06    0.30   0.47    0.44    -0.27  1.00

Pairwise deletion

summary(project1, na.rm = TRUE)

      case            age            tenure           sex            psychwb          jobsat         jobperf         turnover           gma       
 Min.   :  1.0   Min.   :18.00   Min.   : 1.00   Min.   :0.0000   Min.   :3.000   Min.   :3.000   Min.   :3.000   Min.   :0.0000   Min.   : 71.0  
 1st Qu.:120.8   1st Qu.:34.00   1st Qu.: 8.00   1st Qu.:0.0000   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:0.0000   1st Qu.: 94.0  
 Median :240.5   Median :38.00   Median :10.00   Median :1.0000   Median :6.000   Median :6.000   Median :6.000   Median :0.0000   Median :100.0  
 Mean   :240.5   Mean   :37.96   Mean   :10.05   Mean   :0.5417   Mean   :6.254   Mean   :5.966   Mean   :6.013   Mean   :0.3208   Mean   :100.1  
 3rd Qu.:360.2   3rd Qu.:42.00   3rd Qu.:12.00   3rd Qu.:1.0000   3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:1.0000   3rd Qu.:106.0  
 Max.   :480.0   Max.   :53.00   Max.   :21.00   Max.   :1.0000   Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :1.0000   Max.   :125.0  
                 NA's   :21      NA's   :26                       NA's   :161     NA's   :160     NA's   :1                        NA's   :39

describe(project1, na.rm = TRUE)

         vars   n   mean     sd median trimmed    mad min max range  skew kurtosis   se
case        1 480 240.50 138.71  240.5  240.50 177.91   1 480   479  0.00    -1.21 6.33
age         2 459  37.96   5.32   38.0   37.97   5.93  18  53    35 -0.08    -0.07 0.25
tenure      3 454  10.05   3.08   10.0   10.04   2.97   1  21    20  0.11     0.07 0.14
sex         4 480   0.54   0.50    1.0    0.55   0.00   0   1     1 -0.17    -1.98 0.02
psychwb     5 319   6.25   1.17    6.0    6.26   1.48   3   9     6 -0.13    -0.14 0.07
jobsat      6 320   5.97   1.18    6.0    5.96   1.48   3   9     6  0.04    -0.37 0.07
jobperf     7 479   6.01   1.24    6.0    6.03   1.48   3   9     6 -0.10    -0.13 0.06
turnover    8 480   0.32   0.47    0.0    0.28   0.00   0   1     1  0.77    -1.42 0.02
gma         9 441 100.13   8.52  100.0  100.20   8.90  71 125    54 -0.18     0.56 0.41

var(project1, use = "pairwise.complete.obs")

                  case         age       tenure           sex      psychwb      jobsat    jobperf     turnover         gma
case      1.924000e+04 12.42544548 37.017212708 -59.707724426 -16.07122296 -5.62650862  3.4685974 -0.020876827 -67.3114512
age       1.242545e+01 28.31250773  7.981884634  -0.039629534   1.05373893  0.75823986 -0.1418306 -0.404848208   0.8217120
tenure    3.701721e+01  7.98188463  9.502718052  -0.057288172   0.59483531  0.56474985  0.1195716 -0.008090945   0.7224252
sex      -5.970772e+01 -0.03962953 -0.057288172   0.248782185   0.06152284  0.02686129 -0.0130633  0.001217815   0.3297568
psychwb  -1.607122e+01  1.05373893  0.594835311   0.061522841   1.35985095  0.47141947  0.6513104 -0.145186412   2.9618184
jobsat   -5.626509e+00  0.75823986  0.564749852   0.026861285   0.47141947  1.40006857  0.2496963 -0.142829154   4.1546524
jobperf   3.468597e+00 -0.14183062  0.119571588  -0.013063303   0.65131044  0.24969632  1.5437758 -0.200688324   4.6391178
turnover -2.087683e-02 -0.40484821 -0.008090945   0.001217815  -0.14518641 -0.14282915 -0.2006883  0.218354210  -0.7319779
gma      -6.731145e+01  0.82171201  0.722425232   0.329756751   2.96181840  4.15465237  4.6391178 -0.731977943  72.6053906

pairdel <- cor(project1, use = "pairwise.complete.obs")
round(pairdel, 2)

          case   age tenure   sex psychwb jobsat jobperf turnover   gma
case      1.00  0.02   0.09 -0.86   -0.10  -0.04    0.02     0.00 -0.06
age       0.02  1.00   0.49 -0.01    0.17   0.12   -0.02    -0.16  0.02
tenure    0.09  0.49   1.00 -0.04    0.16   0.15    0.03    -0.01  0.03
sex      -0.86 -0.01  -0.04  1.00    0.11   0.05   -0.02     0.01  0.08
psychwb  -0.10  0.17   0.16  0.11    1.00   0.34    0.45    -0.26  0.29
jobsat   -0.04  0.12   0.15  0.05    0.34   1.00    0.17    -0.26  0.41
jobperf   0.02 -0.02   0.03 -0.02    0.45   0.17    1.00    -0.35  0.44
turnover  0.00 -0.16  -0.01  0.01   -0.26  -0.26   -0.35     1.00 -0.18
gma      -0.06  0.02   0.03  0.08    0.29   0.41    0.44    -0.18  1.00

Mean substitution

project1$age<-ifelse(is.na(project1$age)==T, mean(project1$age, na.rm=T), project1$age)
project1$tenure<-ifelse(is.na(project1$tenure)==T, mean(project1$tenure, na.rm=T), project1$tenure)
project1$psychwb<-ifelse(is.na(project1$psychwb)==T, mean(project1$psychwb, na.rm=T), project1$psychwb)
project1$jobsat<-ifelse(is.na(project1$jobsat)==T, mean(project1$jobsat, na.rm=T), project1$jobsat)
project1$jobperf<-ifelse(is.na(project1$jobperf)==T, mean(project1$jobperf, na.rm=T), project1$jobperf)
project1$gma<-ifelse(is.na(project1$gma)==T, mean(project1$gma, na.rm=T), project1$gma)
#now we remove criterion cases with NA values 
project1 <- na.omit(project1)

summary(project1)

      case            age            tenure           sex            psychwb          jobsat         jobperf         turnover           gma       
 Min.   :  1.0   Min.   :18.00   Min.   : 1.00   Min.   :0.0000   Min.   :3.000   Min.   :3.000   Min.   :3.000   Min.   :0.0000   Min.   : 71.0  
 1st Qu.:120.8   1st Qu.:34.00   1st Qu.: 8.00   1st Qu.:0.0000   1st Qu.:6.000   1st Qu.:5.966   1st Qu.:5.000   1st Qu.:0.0000   1st Qu.: 95.0  
 Median :240.5   Median :38.00   Median :10.00   Median :1.0000   Median :6.254   Median :5.966   Median :6.000   Median :0.0000   Median :100.1  
 Mean   :240.5   Mean   :37.96   Mean   :10.05   Mean   :0.5417   Mean   :6.254   Mean   :5.966   Mean   :6.013   Mean   :0.3208   Mean   :100.1  
 3rd Qu.:360.2   3rd Qu.:41.00   3rd Qu.:12.00   3rd Qu.:1.0000   3rd Qu.:7.000   3rd Qu.:6.000   3rd Qu.:7.000   3rd Qu.:1.0000   3rd Qu.:105.0  
 Max.   :480.0   Max.   :53.00   Max.   :21.00   Max.   :1.0000   Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :1.0000   Max.   :125.0

describe(project1)

         vars   n   mean     sd median trimmed    mad min max range  skew kurtosis   se
case        1 480 240.50 138.71 240.50  240.50 177.91   1 480   479  0.00    -1.21 6.33
age         2 480  37.96   5.20  38.00   37.97   5.93  18  53    35 -0.09     0.07 0.24
tenure      3 480  10.05   3.00  10.00   10.04   2.97   1  21    20  0.11     0.25 0.14
sex         4 480   0.54   0.50   1.00    0.55   0.00   0   1     1 -0.17    -1.98 0.02
psychwb     5 480   6.25   0.95   6.25    6.26   0.38   3   9     6 -0.16     1.31 0.04
jobsat      6 480   5.97   0.97   5.97    5.96   0.05   3   9     6  0.05     0.95 0.04
jobperf     7 480   6.01   1.24   6.00    6.03   1.48   3   9     6 -0.10    -0.13 0.06
turnover    8 480   0.32   0.47   0.00    0.28   0.00   0   1     1  0.77    -1.42 0.02
gma         9 480 100.13   8.17 100.13  100.19   7.22  71 125    54 -0.19     0.88 0.37

var(project1)

                  case         age       tenure           sex      psychwb      jobsat     jobperf     turnover         gma
case      1.924000e+04 11.88069735 35.007927676 -59.707724426 -10.66941316 -3.74709029  3.46135608 -0.020876827 -61.8309782
age       1.188070e+01 27.07124956  7.230640937  -0.037892123   0.66610717  0.47942426 -0.13527067 -0.387099122   0.7127114
tenure    3.500793e+01  7.23064094  8.986912897  -0.054178584   0.37448284  0.35625144  0.11283465 -0.007651771   0.6309786
sex      -5.970772e+01 -0.03789212 -0.054178584   0.248782185   0.04084397  0.01788883 -0.01303603  0.001217815   0.3029081
psychwb  -1.066941e+01  0.66610717  0.374482840   0.040843974   0.90278205  0.15556099  0.43133211 -0.096386804   1.7873284
jobsat   -3.747090e+00  0.47942426  0.356251437   0.017888831   0.15556099  0.93240475  0.16629045 -0.095120042   2.5514863
jobperf   3.461356e+00 -0.13527067  0.112834647  -0.013036031   0.43133211  0.16629045  1.54055291 -0.200269350   4.2517955
turnover -2.087683e-02 -0.38709912 -0.007651771   0.001217815  -0.09638680 -0.09512004 -0.20026935  0.218354210  -0.6723806
gma      -6.183098e+01  0.71271143  0.630978611   0.302908080   1.78732837  2.55148632  4.25179554 -0.672380574  66.6938870

meansub <- cor(project1)
round(meansub, 2)

          case   age tenure   sex psychwb jobsat jobperf turnover   gma
case      1.00  0.02   0.08 -0.86   -0.08  -0.03    0.02     0.00 -0.05
age       0.02  1.00   0.46 -0.01    0.13   0.10   -0.02    -0.16  0.02
tenure    0.08  0.46   1.00 -0.04    0.13   0.12    0.03    -0.01  0.03
sex      -0.86 -0.01  -0.04  1.00    0.09   0.04   -0.02     0.01  0.07
psychwb  -0.08  0.13   0.13  0.09    1.00   0.17    0.37    -0.22  0.23
jobsat   -0.03  0.10   0.12  0.04    0.17   1.00    0.14    -0.21  0.32
jobperf   0.02 -0.02   0.03 -0.02    0.37   0.14    1.00    -0.35  0.42
turnover  0.00 -0.16  -0.01  0.01   -0.22  -0.21   -0.35     1.00 -0.18
gma      -0.05  0.02   0.03  0.07    0.23   0.32    0.42    -0.18  1.00

IMPORTANT NOTE: I did NOT impute values for any missing criterion variable data. Standard practice is to delete those cases.

We then run our regression analysis in which we predict JOBPERF from the other 3 variables.

reg <- lm(jobperf ~ psychwb + jobsat + gma, data = project1)
summary(reg)


Call:
lm(formula = jobperf ~ psychwb + jobsat + gma, data = project1)

Residuals:
     Min       1Q   Median       3Q      Max 
-3.12907 -0.65267 -0.04173  0.70658  3.10598 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.634934   0.641131  -2.550   0.0111 *  
psychwb      0.374814   0.053462   7.011 8.15e-12 ***
jobsat      -0.034795   0.054101  -0.643   0.5204    
gma          0.055037   0.006478   8.495 2.54e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.076 on 476 degrees of freedom
Multiple R-squared:  0.2531,    Adjusted R-squared:  0.2484 
F-statistic: 53.76 on 3 and 476 DF,  p-value: < 2.2e-16

Multiple imputation

Let’s do a basic multiple imputation procedure in which we generate 10 datasets. Note that the default for the mice function is to generate 5 datasets.

imp <- mice(data = project1, m=10)
print(imp)

We can check on the imputed values to see if they make sense

head(imp$imp$age)

 [1] 1  2  3  4  5  6  7  8  9  10
<0 rows> (or 0-length row.names)

head(imp$imp$tenure)

 [1] 1  2  3  4  5  6  7  8  9  10
<0 rows> (or 0-length row.names)

head(imp$imp$psychwb)

 [1] 1  2  3  4  5  6  7  8  9  10
<0 rows> (or 0-length row.names)

head(imp$imp$jobsat)

 [1] 1  2  3  4  5  6  7  8  9  10
<0 rows> (or 0-length row.names)

head(imp$imp$jobperf)

 [1] 1  2  3  4  5  6  7  8  9  10
<0 rows> (or 0-length row.names)

head(imp$imp$gma)

 [1] 1  2  3  4  5  6  7  8  9  10
<0 rows> (or 0-length row.names)

Look at the first imputed data set

dat.imp<-complete(imp)
head(dat.imp, n=10)

   case age   tenure sex  psychwb   jobsat jobperf turnover      gma
1     1  40 10.00000   1 8.000000 8.000000       6        0 106.0000
2     2  53 14.00000   1 6.000000 5.000000       5        0  93.0000
3     3  46 10.05286   1 6.253918 7.000000       7        0 107.0000
4     4  37  8.00000   1 7.000000 5.965625       5        0  94.0000
5     5  44  9.00000   1 6.253918 5.000000       5        0 107.0000
6     6  39 10.00000   1 7.000000 5.965625       7        0 100.1315
7     7  33  7.00000   1 6.253918 5.000000       7        0 103.0000
8     8  43  9.00000   1 7.000000 5.965625       7        0 106.0000
9     9  35  9.00000   1 7.000000 7.000000       7        1 108.0000
10   10  37 10.00000   1 5.000000 6.000000       6        0  97.0000

Compare this to the original data frame

head(project1 [,c("age","tenure","psychwb","jobsat","jobperf","gma")], n=10)

   age   tenure  psychwb   jobsat jobperf      gma
1   40 10.00000 8.000000 8.000000       6 106.0000
2   53 14.00000 6.000000 5.000000       5  93.0000
3   46 10.05286 6.253918 7.000000       7 107.0000
4   37  8.00000 7.000000 5.965625       5  94.0000
5   44  9.00000 6.253918 5.000000       5 107.0000
6   39 10.00000 7.000000 5.965625       7 100.1315
7   33  7.00000 6.253918 5.000000       7 103.0000
8   43  9.00000 7.000000 5.965625       7 106.0000
9   35  9.00000 7.000000 7.000000       7 108.0000
10  37 10.00000 5.000000 6.000000       6  97.0000

summary(dat.imp)

      case            age            tenure           sex            psychwb          jobsat         jobperf         turnover           gma       
 Min.   :  1.0   Min.   :18.00   Min.   : 1.00   Min.   :0.0000   Min.   :3.000   Min.   :3.000   Min.   :3.000   Min.   :0.0000   Min.   : 71.0  
 1st Qu.:120.8   1st Qu.:34.00   1st Qu.: 8.00   1st Qu.:0.0000   1st Qu.:6.000   1st Qu.:5.966   1st Qu.:5.000   1st Qu.:0.0000   1st Qu.: 95.0  
 Median :240.5   Median :38.00   Median :10.00   Median :1.0000   Median :6.254   Median :5.966   Median :6.000   Median :0.0000   Median :100.1  
 Mean   :240.5   Mean   :37.96   Mean   :10.05   Mean   :0.5417   Mean   :6.254   Mean   :5.966   Mean   :6.013   Mean   :0.3208   Mean   :100.1  
 3rd Qu.:360.2   3rd Qu.:41.00   3rd Qu.:12.00   3rd Qu.:1.0000   3rd Qu.:7.000   3rd Qu.:6.000   3rd Qu.:7.000   3rd Qu.:1.0000   3rd Qu.:105.0  
 Max.   :480.0   Max.   :53.00   Max.   :21.00   Max.   :1.0000   Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :1.0000   Max.   :125.0

describe(dat.imp)

         vars   n   mean     sd median trimmed    mad min max range  skew kurtosis   se
case        1 480 240.50 138.71 240.50  240.50 177.91   1 480   479  0.00    -1.21 6.33
age         2 480  37.96   5.20  38.00   37.97   5.93  18  53    35 -0.09     0.07 0.24
tenure      3 480  10.05   3.00  10.00   10.04   2.97   1  21    20  0.11     0.25 0.14
sex         4 480   0.54   0.50   1.00    0.55   0.00   0   1     1 -0.17    -1.98 0.02
psychwb     5 480   6.25   0.95   6.25    6.26   0.38   3   9     6 -0.16     1.31 0.04
jobsat      6 480   5.97   0.97   5.97    5.96   0.05   3   9     6  0.05     0.95 0.04
jobperf     7 480   6.01   1.24   6.00    6.03   1.48   3   9     6 -0.10    -0.13 0.06
turnover    8 480   0.32   0.47   0.00    0.28   0.00   0   1     1  0.77    -1.42 0.02
gma         9 480 100.13   8.17 100.13  100.19   7.22  71 125    54 -0.19     0.88 0.37

var(dat.imp)

                  case         age       tenure           sex      psychwb      jobsat     jobperf     turnover         gma
case      1.924000e+04 11.88069735 35.007927676 -59.707724426 -10.66941316 -3.74709029  3.46135608 -0.020876827 -61.8309782
age       1.188070e+01 27.07124956  7.230640937  -0.037892123   0.66610717  0.47942426 -0.13527067 -0.387099122   0.7127114
tenure    3.500793e+01  7.23064094  8.986912897  -0.054178584   0.37448284  0.35625144  0.11283465 -0.007651771   0.6309786
sex      -5.970772e+01 -0.03789212 -0.054178584   0.248782185   0.04084397  0.01788883 -0.01303603  0.001217815   0.3029081
psychwb  -1.066941e+01  0.66610717  0.374482840   0.040843974   0.90278205  0.15556099  0.43133211 -0.096386804   1.7873284
jobsat   -3.747090e+00  0.47942426  0.356251437   0.017888831   0.15556099  0.93240475  0.16629045 -0.095120042   2.5514863
jobperf   3.461356e+00 -0.13527067  0.112834647  -0.013036031   0.43133211  0.16629045  1.54055291 -0.200269350   4.2517955
turnover -2.087683e-02 -0.38709912 -0.007651771   0.001217815  -0.09638680 -0.09512004 -0.20026935  0.218354210  -0.6723806
gma      -6.183098e+01  0.71271143  0.630978611   0.302908080   1.78732837  2.55148632  4.25179554 -0.672380574  66.6938870

res <- cor(dat.imp)
round(res, 2)

          case   age tenure   sex psychwb jobsat jobperf turnover   gma
case      1.00  0.02   0.08 -0.86   -0.08  -0.03    0.02     0.00 -0.05
age       0.02  1.00   0.46 -0.01    0.13   0.10   -0.02    -0.16  0.02
tenure    0.08  0.46   1.00 -0.04    0.13   0.12    0.03    -0.01  0.03
sex      -0.86 -0.01  -0.04  1.00    0.09   0.04   -0.02     0.01  0.07
psychwb  -0.08  0.13   0.13  0.09    1.00   0.17    0.37    -0.22  0.23
jobsat   -0.03  0.10   0.12  0.04    0.17   1.00    0.14    -0.21  0.32
jobperf   0.02 -0.02   0.03 -0.02    0.37   0.14    1.00    -0.35  0.42
turnover  0.00 -0.16  -0.01  0.01   -0.22  -0.21   -0.35     1.00 -0.18
gma      -0.05  0.02   0.03  0.07    0.23   0.32    0.42    -0.18  1.00

Let’s look at the sixth imputed data set

dat.imp<-complete(imp, action = 6)
head(dat.imp, n=10)

   case age   tenure sex  psychwb   jobsat jobperf turnover      gma
1     1  40 10.00000   1 8.000000 8.000000       6        0 106.0000
2     2  53 14.00000   1 6.000000 5.000000       5        0  93.0000
3     3  46 10.05286   1 6.253918 7.000000       7        0 107.0000
4     4  37  8.00000   1 7.000000 5.965625       5        0  94.0000
5     5  44  9.00000   1 6.253918 5.000000       5        0 107.0000
6     6  39 10.00000   1 7.000000 5.965625       7        0 100.1315
7     7  33  7.00000   1 6.253918 5.000000       7        0 103.0000
8     8  43  9.00000   1 7.000000 5.965625       7        0 106.0000
9     9  35  9.00000   1 7.000000 7.000000       7        1 108.0000
10   10  37 10.00000   1 5.000000 6.000000       6        0  97.0000

We can also do things like look at observed and imputed values of JOBPERF with respect to the 3 variables

stripplot(imp,jobperf ~ psychwb | .imp, pch=20)

stripplot(imp,jobperf ~ jobsat | .imp, pch=20)

stripplot(imp,jobperf ~ gma | .imp, pch=20)

xyplot(imp, jobperf ~ psychwb | .imp, pch = 20, cex = 1.4)

xyplot(imp, jobperf ~ jobsat | .imp, pch = 20, cex = 1.4)

xyplot(imp, jobperf ~ gma | .imp, pch = 20, cex = 1.4)

Now let’s run our model using the imputed data

fit <- with(data = imp, exp = lm(jobperf ~ psychwb + jobsat + gma))
fit

call :
with.mids(data = imp, expr = lm(jobperf ~ psychwb + jobsat + 
    gma))

call1 :
mice(data = project1, m = 10)

nmis :
    case      age   tenure      sex  psychwb   jobsat  jobperf turnover      gma 
       0        0        0        0        0        0        0        0        0 

analyses :
[[1]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504  


[[2]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504  


[[3]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504  


[[4]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504  


[[5]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504  


[[6]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504  


[[7]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504  


[[8]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504  


[[9]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504  


[[10]]

Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)

Coefficients:
(Intercept)      psychwb       jobsat          gma  
   -1.63493      0.37481     -0.03480      0.05504

What happens if we run the model using the data frame that had missing data?

reg <- lm(jobperf ~ psychwb + jobsat + gma, data = project1)
summary(reg)


Call:
lm(formula = jobperf ~ psychwb + jobsat + gma, data = project1)

Residuals:
     Min       1Q   Median       3Q      Max 
-3.12907 -0.65267 -0.04173  0.70658  3.10598 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.634934   0.641131  -2.550   0.0111 *  
psychwb      0.374814   0.053462   7.011 8.15e-12 ***
jobsat      -0.034795   0.054101  -0.643   0.5204    
gma          0.055037   0.006478   8.495 2.54e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.076 on 476 degrees of freedom
Multiple R-squared:  0.2531,    Adjusted R-squared:  0.2484 
F-statistic: 53.76 on 3 and 476 DF,  p-value: < 2.2e-16

Let’s compare that to what we get when we combine results across the imputed datasets.

summary(pool(fit))

         term    estimate   std.error  statistic       df      p.value
1 (Intercept) -1.63493364 0.641130843 -2.5500780 473.9649 1.108344e-02
2     psychwb  0.37481352 0.053461626  7.0108888 473.9649 8.195222e-12
3      jobsat -0.03479515 0.054101104 -0.6431504 473.9649 5.204376e-01
4         gma  0.05503743 0.006478422  8.4954994 473.9649 2.220446e-16

pool.r.squared(fit, adjusted = FALSE)

         est     lo 95     hi 95 fmi
R^2 0.253085 0.1875524 0.3215408 NaN

SKEW AND KURTOSIS

D’Agostino skewness test

agostino.test(dat.imp$age, alternative = c("two.sided", "less", "greater"))


    D'Agostino skewness test

data:  dat.imp$age
skew = -0.086426, z = -0.783274, p-value = 0.4335
alternative hypothesis: data have a skewness

agostino.test(dat.imp$tenure, alternative = c("two.sided", "less", "greater"))


    D'Agostino skewness test

data:  dat.imp$tenure
skew = 0.10901, z = 0.98689, p-value = 0.3237
alternative hypothesis: data have a skewness

agostino.test(dat.imp$sex, alternative = c("two.sided", "less", "greater"))


    D'Agostino skewness test

data:  dat.imp$sex
skew = -0.16725, z = -1.50850, p-value = 0.1314
alternative hypothesis: data have a skewness

agostino.test(dat.imp$psychwb, alternative = c("two.sided", "less", "greater"))


    D'Agostino skewness test

data:  dat.imp$psychwb
skew = -0.16477, z = -1.48645, p-value = 0.1372
alternative hypothesis: data have a skewness

agostino.test(dat.imp$jobsat, alternative = c("two.sided", "less", "greater"))


    D'Agostino skewness test

data:  dat.imp$jobsat
skew = 0.053552, z = 0.485869, p-value = 0.6271
alternative hypothesis: data have a skewness

agostino.test(dat.imp$jobperf, alternative = c("two.sided", "less", "greater"))


    D'Agostino skewness test

data:  dat.imp$jobperf
skew = -0.095886, z = -0.868655, p-value = 0.385
alternative hypothesis: data have a skewness

agostino.test(dat.imp$turnover, alternative = c("two.sided", "less", "greater"))


    D'Agostino skewness test

data:  dat.imp$turnover
skew = 0.76764, z = 6.24117, p-value = 4.343e-10
alternative hypothesis: data have a skewness

agostino.test(dat.imp$gma, alternative = c("two.sided", "less", "greater"))


    D'Agostino skewness test

data:  dat.imp$gma
skew = -0.1872, z = -1.6857, p-value = 0.09186
alternative hypothesis: data have a skewness

Visualizing tyhe skewed variables

skewness(dat.imp$age)

[1] -0.08642598

skewness(dat.imp$jobsat)

[1] 0.05355189

hist(dat.imp$age)

hist(dat.imp$jobsat)

Mardia tests of multivariate skew and kurtosis

round(skew(dat.imp),2)   #type 3 (default)

[1]  0.00 -0.09  0.11 -0.17 -0.16  0.05 -0.10  0.77 -0.19

round(kurtosi(dat.imp),2)  #type 3 (default)

    case      age   tenure      sex  psychwb   jobsat  jobperf turnover      gma 
   -1.21     0.07     0.25    -1.98     1.31     0.95    -0.13    -1.42     0.88

#for the differences between the three types of skew and kurtosis:
round(skew(dat.imp,type=1),2)  #type 1

[1]  0.00 -0.09  0.11 -0.17 -0.16  0.05 -0.10  0.77 -0.19

round(skew(dat.imp,type=2),2)  #type 2

[1]  0.00 -0.09  0.11 -0.17 -0.17  0.05 -0.10  0.77 -0.19

mardia(dat.imp)

Call: mardia(x = dat.imp)

Mardia tests of multivariate skew and kurtosis
Use describe(x) the to get univariate tests
n.obs = 480   num.vars =  9 
b1p =  2.96   skew =  237.04  with probability =  2e-04
 small sample skew =  238.82  with probability =  0.00015
b2p =  96.51   kurtosis =  -1.94  with probability =  0.052

x <- matrix(rnorm(1000),ncol=10)
describe(x)

    vars   n  mean   sd median trimmed  mad   min  max range  skew kurtosis   se
X1     1 100  0.01 0.97   0.00    0.01 1.05 -2.27 2.80  5.07  0.08    -0.07 0.10
X2     2 100 -0.15 0.97  -0.11   -0.12 0.99 -2.62 1.77  4.39 -0.24    -0.54 0.10
X3     3 100 -0.14 0.98  -0.13   -0.12 0.95 -2.44 2.65  5.09 -0.06     0.03 0.10
X4     4 100  0.05 1.01   0.01    0.02 0.76 -2.82 3.31  6.13  0.32     0.91 0.10
X5     5 100  0.18 0.94   0.23    0.19 0.91 -2.11 2.62  4.73 -0.09    -0.03 0.09
X6     6 100  0.04 1.05   0.10    0.04 1.09 -2.45 2.53  4.99 -0.06    -0.31 0.10
X7     7 100 -0.04 0.97   0.01   -0.06 0.90 -1.98 2.89  4.87  0.20    -0.10 0.10
X8     8 100  0.10 0.95   0.11    0.07 1.08 -1.80 2.25  4.05  0.18    -0.72 0.10
X9     9 100  0.16 1.12   0.10    0.15 1.05 -2.72 3.55  6.27  0.11     0.17 0.11
X10   10 100 -0.08 0.98  -0.08   -0.07 0.98 -2.97 2.12  5.09 -0.19    -0.09 0.10

mardia(x)

Call: mardia(x = x)

Mardia tests of multivariate skew and kurtosis
Use describe(x) the to get univariate tests
n.obs = 100   num.vars =  10 
b1p =  11.98   skew =  199.73  with probability =  0.83
 small sample skew =  206.84  with probability =  0.73
b2p =  114.94   kurtosis =  -1.63  with probability =  0.1

OUTLIERS

Univariate outiers

age_outlier_values <- boxplot.stats(dat.imp$age)$out
boxplot(dat.imp$age, main="age", boxwex=0.1)
mtext(paste("Outliers: ", paste(age_outlier_values, collapse=", ")), cex=0.6)

tenure_outlier_values <- boxplot.stats(dat.imp$tenure)$out
boxplot(dat.imp$tenure, main="tenure", boxwex=0.1)
mtext(paste("Outliers: ", paste(tenure_outlier_values, collapse=", ")), cex=0.6)

psychwb_outlier_values <- boxplot.stats(dat.imp$psychwb)$out
boxplot(dat.imp$psychwb, main="psychwb", boxwex=0.1)
mtext(paste("Outliers: ", paste(psychwb_outlier_values, collapse=", ")), cex=0.6)

jobsat_outlier_values <- boxplot.stats(dat.imp$jobsat)$out
boxplot(dat.imp$jobsat, main="jobsat", boxwex=0.1)
mtext(paste("Outliers: ", paste(jobsat_outlier_values, collapse=", ")), cex=0.6)

jobperf_outlier_values <- boxplot.stats(dat.imp$jobperf)$out
boxplot(dat.imp$jobperf, main="jobperf", boxwex=0.1)
mtext(paste("Outliers: ", paste(jobperf_outlier_values, collapse=", ")), cex=0.6)

gma_outlier_values <- boxplot.stats(dat.imp$gma)$out
boxplot(dat.imp$gma, main="gma", boxwex=0.1)
mtext(paste("Outliers: ", paste(gma_outlier_values, collapse=", ")), cex=0.6)

Multivariate outliers

outlier(dat.imp, method = "mean", addthres = TRUE)

Warning in plot.window(...): "method" is not a graphical parameter

Warning in plot.window(...): "addthres" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "method" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "addthres" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "method" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "addthres" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "method" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "addthres" is not a graphical parameter

Warning in box(...): "method" is not a graphical parameter

Warning in box(...): "addthres" is not a graphical parameter

Warning in title(...): "method" is not a graphical parameter

Warning in title(...): "addthres" is not a graphical parameter

Warning in text.default(Chi2[n.obs:(n.obs - bad + 1)], D2[worst[1:bad]], : "method" is not a graphical parameter

Warning in text.default(Chi2[n.obs:(n.obs - bad + 1)], D2[worst[1:bad]], : "addthres" is not a graphical parameter

        1         2         3         4         5         6         7         8         9        10        11        12        13        14        15        16        17        18        19        20        21        22        23        24        25        26        27        28        29        30        31        32        33        34        35        36        37        38        39        40 
11.419392 15.783178  9.079817  7.425190 10.989555  5.251453  7.406341  6.452692 10.179570  7.375179 13.153991  9.415589  6.314043  6.687719  8.438447 14.207353  7.504731  6.531711  8.319442  5.527045  5.697569  6.892839  6.600222  6.223072  7.606714 11.956769  8.228375  5.619926 15.122777  8.195105 10.056578 11.134482  9.756263 11.772052  5.458195  8.582842 10.683426  5.666908  5.455868 15.856190 
       41        42        43        44        45        46        47        48        49        50        51        52        53        54        55        56        57        58        59        60        61        62        63        64        65        66        67        68        69        70        71        72        73        74        75        76        77        78        79        80 
 7.722037  9.049969  7.492725 16.428886  9.165068  6.245639 16.206583  6.450225 13.827427 15.241745  4.411360 14.293460  4.118634  7.423141  4.378764  4.423350  9.171220 10.808549  8.131268  7.782075 14.152865 10.347283  9.969207  3.185757 10.943747  8.430742 11.213864 12.863267  6.979996  7.879523  6.837619 10.665560  5.516717  5.250223  5.684632 10.250328  9.023506  3.980877  9.711109 11.755393 
       81        82        83        84        85        86        87        88        89        90        91        92        93        94        95        96        97        98        99       100       101       102       103       104       105       106       107       108       109       110       111       112       113       114       115       116       117       118       119       120 
 4.368757  8.784546  5.980737  6.070896  8.538297  5.531965  4.411070  5.205874 11.183279  7.649746  2.413620 10.069769  5.584571  9.388361  9.207115  7.945344  4.134855 10.361840  6.581706  1.960943  5.901978  8.561407 19.603157 12.771669  7.001832  9.301651  8.130479 16.807962 12.266143  6.625975  4.176604  4.384371  9.677434  5.777954  4.405485  4.569497 10.291551  4.014626  2.659294  6.857347 
      121       122       123       124       125       126       127       128       129       130       131       132       133       134       135       136       137       138       139       140       141       142       143       144       145       146       147       148       149       150       151       152       153       154       155       156       157       158       159       160 
 4.412583 11.702991 13.749110  7.831258 14.740180  8.413287  9.549897  5.200161  5.502696 16.156425  3.974626  3.884414 17.546318 10.606184  6.503505 10.343866 11.596412  9.206365  3.647984  6.868900  6.051162 11.113026  8.818401  7.700342  6.930146  7.042216 14.106905  9.710428  6.866065  4.860060  2.951196  7.128655  7.390816 12.781352  2.768046 10.381283  5.692788  4.842183 23.385578  5.142681 
      161       162       163       164       165       166       167       168       169       170       171       172       173       174       175       176       177       178       179       180       181       182       183       184       185       186       187       188       189       190       191       192       193       194       195       196       197       198       199       200 
12.632568  4.348758  3.103847  5.832293  3.520498  4.647637  9.187160  7.419858  7.941273  5.040482  7.263135  4.899600 10.799364  3.811072  6.720354 16.889233 10.246683  7.768004  6.396522 11.011131  9.826954  6.612300 16.574588  3.655640  6.527012  5.987517 12.023851  6.489063  6.539712 10.698295  3.566156  3.817818  7.632345  7.074156  5.284913  7.058112  6.420292  6.393441 12.525389  9.679066 
      201       202       203       204       205       206       207       208       209       210       211       212       213       214       215       216       217       218       219       220       221       222       223       224       225       226       227       228       229       230       231       232       233       234       235       236       237       238       239       240 
 9.248983  8.984455  4.192451  4.904182  5.702750  7.313462  3.666422 12.850111 10.227882 10.809077  7.393644  9.883381 19.496764  9.842191  7.616699 15.400509  7.068742 11.424972 16.699216  8.969663  7.756248  7.021469  7.817744  6.281500  4.814664 11.976475  4.844129  9.418227  6.141276 10.661408 11.545463  8.815166  4.056598 19.439619 14.604513  7.785155  6.473974 10.183673  9.698299 13.707653 
      241       242       243       244       245       246       247       248       249       250       251       252       253       254       255       256       257       258       259       260       261       262       263       264       265       266       267       268       269       270       271       272       273       274       275       276       277       278       279       280 
12.608247 12.390774  5.233921  6.947600 10.654386 10.961424  5.357127 12.904471  5.520566  8.724346 18.417934  7.279050  6.892146  9.450856  9.890664  7.799539 11.247384 10.196016 18.341696  9.696758 11.644191  7.924799  9.800543  9.371981 16.692268  9.467719  7.242622 12.194222  7.703177 18.137429  9.228506  6.061212  5.355501 11.756925 12.991922 10.013286  6.111834 11.065740 10.686158  9.051085 
      281       282       283       284       285       286       287       288       289       290       291       292       293       294       295       296       297       298       299       300       301       302       303       304       305       306       307       308       309       310       311       312       313       314       315       316       317       318       319       320 
13.293700  5.886501  3.741733  9.783887 12.131554  4.622149 10.691039 16.186138 11.800112 16.163147  4.579438 10.925220 14.884443  8.709794  8.160118  9.057895  3.862542  6.181506  7.329010  6.721078  5.972265  9.705004  7.176135  5.776712  5.876823  4.680523  5.532776 11.548326 10.113937  5.663005 12.585859  9.121456  8.569384  7.255645  5.189728  5.083230  8.134661  7.411546  4.174012  5.511772 
      321       322       323       324       325       326       327       328       329       330       331       332       333       334       335       336       337       338       339       340       341       342       343       344       345       346       347       348       349       350       351       352       353       354       355       356       357       358       359       360 
 4.750599  6.869142 15.884581  8.625419  7.161578  4.685404  6.128155 11.699766  7.789813 13.587317 11.288349 13.126086 11.049459  7.963791 11.928234  4.565839  7.875512  5.706536  5.202335  8.937618 10.739854  7.767758  4.318880  4.796508  6.391392 10.438099 12.538903  5.702756  9.051706 10.772329 15.097514  7.744425  6.291942  5.536867 16.142486 10.351198  8.860030 12.300025 11.972009 11.378871 
      361       362       363       364       365       366       367       368       369       370       371       372       373       374       375       376       377       378       379       380       381       382       383       384       385       386       387       388       389       390       391       392       393       394       395       396       397       398       399       400 
10.625007  8.420646 14.079970 17.319756  4.818950 15.944207  2.623111  3.894721 12.647575  7.304846  5.544923 17.028728  4.384497  7.271147 10.653335  7.970582  7.983099  8.880903  9.112800  9.498981  6.909582  3.831591  7.465430  4.381657  8.956859  9.575041  9.237096  8.640386  6.330190 12.400121 16.508250  7.698648  8.462863  4.852291 18.529875  7.916427  9.939611 14.364129  6.671453  8.005495 
      401       402       403       404       405       406       407       408       409       410       411       412       413       414       415       416       417       418       419       420       421       422       423       424       425       426       427       428       429       430       431       432       433       434       435       436       437       438       439       440 
 5.441089  2.416981  8.751005 11.270866  2.659439  8.360085  3.767699  9.073638  7.118514 13.008347 14.424488 16.558980 16.561686  5.306274  7.491341  5.660012  9.942914 10.936036 11.355292  6.705314  7.293426  4.794905 11.708633  3.972120 15.503983 20.109132 14.683748 11.269535  4.631778  8.520396  6.487938  6.982795  6.568099 16.355423  4.498490 11.193724  5.911631 14.056070 10.007785  6.406753 
      441       442       443       444       445       446       447       448       449       450       451       452       453       454       455       456       457       458       459       460       461       462       463       464       465       466       467       468       469       470       471       472       473       474       475       476       477       478       479       480 
 7.068674  6.967187  9.049721 14.455119 17.367011  7.417337 11.950578  7.711825  4.374636  7.365838  5.808953 10.129323  8.430759  6.737615 17.989029  8.894988 12.270690 20.500825  9.301845 14.622085 13.140468  6.315382  7.291279 34.940408 24.786788  7.544370  7.621707  6.526261 13.670044  8.850176  6.546806 11.323524 11.796708 14.648657  9.919024 12.039941  9.705664 10.114553 13.340510  5.875593

md <- mahalanobis(dat.imp, center = colMeans(dat.imp), cov = cov(dat.imp))
alpha <- .001
cutoff <- (qchisq(p = 1 - alpha, df = ncol(dat.imp)))
names_outliers_MH <- which(md > cutoff)
excluded_mh <- names_outliers_MH
data_clean_mh <- dat.imp[-excluded_mh, ]
dat.imp[excluded_mh, ]

    case age tenure sex psychwb jobsat jobperf turnover gma
464  464  36      5   0       3      3       6        1  71

outlier(data_clean_mh, method = "mean", addthres = TRUE)

Warning in plot.window(...): "method" is not a graphical parameter

Warning in plot.window(...): "addthres" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "method" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "addthres" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "method" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "addthres" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "method" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "addthres" is not a graphical parameter

Warning in box(...): "method" is not a graphical parameter

Warning in box(...): "addthres" is not a graphical parameter

Warning in title(...): "method" is not a graphical parameter

Warning in title(...): "addthres" is not a graphical parameter

Warning in text.default(Chi2[n.obs:(n.obs - bad + 1)], D2[worst[1:bad]], : "method" is not a graphical parameter

Warning in text.default(Chi2[n.obs:(n.obs - bad + 1)], D2[worst[1:bad]], : "addthres" is not a graphical parameter

        1         2         3         4         5         6         7         8         9        10        11        12        13        14        15        16        17        18        19        20        21        22        23        24        25        26        27        28        29        30        31        32        33        34        35        36        37        38        39        40 
11.840972 15.750961  9.076922  7.454322 11.017873  5.253008  7.389554  6.462010 10.221474  7.370261 13.178030  9.537655  6.381307  6.686151  8.425621 14.197862  7.511533  6.515998  8.303260  5.557328  5.754988  7.020501  6.668573  6.209117  7.793337 12.108343  8.212616  5.621862 15.091615  8.260507 10.047486 11.232724  9.746682 11.746986  5.509064  8.609192 10.682904  5.653017  5.452309 15.933353 
       41        42        43        44        45        46        47        48        49        50        51        52        53        54        55        56        57        58        59        60        61        62        63        64        65        66        67        68        69        70        71        72        73        74        75        76        77        78        79        80 
 7.883277  9.038926  7.553738 16.392564  9.148670  6.261756 16.173287  6.434964 14.135763 15.212679  4.517066 14.309660  4.111266  7.551889  4.443920  4.521636  9.193271 10.795420  8.159793  7.785947 14.212583 10.392305  9.949911  3.181329 10.918916  8.427715 11.188486 12.908185  6.993789  7.947963  6.916872 10.708855  5.542947  5.244049  5.696803 10.417322  9.077934  3.997533  9.693362 12.166364 
       81        82        83        84        85        86        87        88        89        90        91        92        93        94        95        96        97        98        99       100       101       102       103       104       105       106       107       108       109       110       111       112       113       114       115       116       117       118       119       120 
 4.390321  8.977242  6.033407  6.056269  8.525588  5.607449  4.400215  5.193320 11.169874  7.657557  2.423427 10.240986  5.572802  9.373596  9.188818  7.934109  4.177169 10.363040  6.566834  1.959727  5.900349  8.636245 19.561196 12.872086  6.989452  9.309040  8.161872 16.855988 12.244039  6.659087  4.166597  4.402024  9.887671  5.764249  4.412338  4.563423 10.349469  4.024151  2.653535  7.042767 
      121       122       123       124       125       126       127       128       129       130       131       132       133       134       135       136       137       138       139       140       141       142       143       144       145       146       147       148       149       150       151       152       153       154       155       156       157       158       159       160 
 4.413180 11.698672 13.721539  7.835132 14.867391  8.403798  9.541624  5.230250  5.513513 16.249119  3.964625  3.875058 18.004088 10.746575  6.571036 10.464604 11.884660  9.190947  3.659637  6.859308  6.089297 11.135638  8.967948  7.709992  6.918334  7.025438 14.536786  9.906180  6.877154  4.850113  2.945193  7.111704  7.395087 12.822141  2.761729 10.392646  5.703976  4.889157 23.376440  5.158546 
      161       162       163       164       165       166       167       168       169       170       171       172       173       174       175       176       177       178       179       180       181       182       183       184       185       186       187       188       189       190       191       192       193       194       195       196       197       198       199       200 
12.637719  4.358579  3.119930  5.852438  3.517041  4.643997  9.195471  7.512948  7.951503  5.029464  7.311039  4.892642 10.847703  3.830752  6.843674 16.947793 10.361698  7.796900  6.394944 10.998948  9.807179  6.599566 16.734849  3.654064  6.519283  5.975238 12.002263  6.555575  6.671564 10.766722  3.616487  3.822568  7.628574  7.136454  5.272475  7.150561  6.438754  6.477948 12.610521  9.944022 
      201       202       203       204       205       206       207       208       209       210       211       212       213       214       215       216       217       218       219       220       221       222       223       224       225       226       227       228       229       230       231       232       233       234       235       236       237       238       239       240 
 9.249889  8.975881  4.186615  4.902172  5.691076  7.296115  3.660220 13.133620 10.244108 10.804629  7.525882  9.943562 19.487313  9.826179  7.612006 15.465060  7.052539 11.405038 17.004365  9.014184  7.741549  7.046214  7.901938  6.314727  4.803642 12.127243  4.897616  9.408760  6.131894 10.639552 11.524939  8.801434  4.078013 19.979630 14.598933  7.930281  6.466740 10.179885  9.680291 13.725423 
      241       242       243       244       245       246       247       248       249       250       251       252       253       254       255       256       257       258       259       260       261       262       263       264       265       266       267       268       269       270       271       272       273       274       275       276       277       278       279       280 
13.072245 12.372024  5.220921  6.937809 10.676015 11.027499  5.343864 13.186230  5.531902  8.717050 18.546332  7.320534  6.926126  9.565842  9.936523  7.795735 11.262991 10.178859 18.325693  9.681348 11.673672  7.936094  9.869113  9.351125 16.696649  9.486204  7.332088 12.527884  7.748117 18.447122  9.232115  6.086632  5.422503 11.768135 13.287534  9.991010  6.119945 11.156396 10.670918  9.073549 
      281       282       283       284       285       286       287       288       289       290       291       292       293       294       295       296       297       298       299       300       301       302       303       304       305       306       307       308       309       310       311       312       313       314       315       316       317       318       319       320 
13.385524  5.872228  3.741868  9.768103 12.308936  4.633126 10.955994 16.371382 11.795816 16.302308  4.594339 11.052437 15.016992  8.689550  8.143705  9.073393  3.889892  6.168588  7.340001  6.835519  5.958449  9.693332  7.216859  5.793765  5.941618  4.670910  5.575031 11.671395 10.291096  5.673422 12.611899  9.301575  8.559362  7.238510  5.204271  5.077596  8.201165  7.464659  4.232207  5.498790 
      321       322       323       324       325       326       327       328       329       330       331       332       333       334       335       336       337       338       339       340       341       342       343       344       345       346       347       348       349       350       351       352       353       354       355       356       357       358       359       360 
 4.745679  6.884677 15.941328  8.734711  7.185057  4.710150  6.118164 11.697004  7.812899 13.683306 11.434096 13.107812 11.056476  7.976534 11.901553  4.584031  7.928821  5.821433  5.195997  9.065302 10.724749  7.783185  4.311004  4.788831  6.382532 10.726649 12.943872  5.705818  9.260566 10.832520 15.121029  7.790293  6.280842  5.529371 16.112190 10.358675  8.922601 12.289232 11.959651 11.496007 
      361       362       363       364       365       366       367       368       369       370       371       372       373       374       375       376       377       378       379       380       381       382       383       384       385       386       387       388       389       390       391       392       393       394       395       396       397       398       399       400 
10.619128  8.402306 14.506173 17.662368  4.872800 15.908993  2.619727  3.887109 12.633120  7.420379  5.534224 17.159974  4.391097  7.341753 10.813385  7.970670  8.055015  8.860436  9.097541  9.662206  6.906345  3.831989  7.557167  4.382139  9.012683  9.578520  9.449819  8.705900  6.318339 12.662255 16.930357  7.753267  8.560233  4.848766 18.580212  8.258302 10.456110 14.523238  6.802026  7.988672 
      401       402       403       404       405       406       407       408       409       410       411       412       413       414       415       416       417       418       419       420       421       422       423       424       425       426       427       428       429       430       431       432       433       434       435       436       437       438       439       440 
 5.473278  2.410130  8.848533 11.262627  2.707000  8.434428  3.763174  9.184680  7.120902 12.988808 14.869535 16.956989 16.799152  5.320146  7.677055  5.802393 10.035401 10.959404 11.347674  6.822911  7.373301  4.794129 11.872562  3.966207 16.201694 20.680758 14.651035 11.252933  4.621015  8.503399  6.494828  6.971031  6.608999 16.335245  4.521051 11.180752  6.117358 14.081008  9.984850  6.406891 
      441       442       443       444       445       446       447       448       449       450       451       452       453       454       455       456       457       458       459       460       461       462       463       465       466       467       468       469       470       471       472       473       474       475       476       477       478       479       480 
 7.156066  7.087619  9.060148 14.506554 17.591044  7.549956 11.942796  7.744531  4.365107  7.388661  5.819037 10.114919  8.463559  6.742864 18.027876  9.094563 12.315967 20.467105  9.285530 14.590192 13.125183  6.304944  7.315206 25.964425  7.534112  7.610269  6.511034 13.945210  8.879503  6.531683 11.298076 11.770245 14.640253  9.972975 12.026105  9.685162 10.093087 13.311502  5.895739

MULTICOLLINEARITY

data <- cor(dat.imp)
round(data, 2)

          case   age tenure   sex psychwb jobsat jobperf turnover   gma
case      1.00  0.02   0.08 -0.86   -0.08  -0.03    0.02     0.00 -0.05
age       0.02  1.00   0.46 -0.01    0.13   0.10   -0.02    -0.16  0.02
tenure    0.08  0.46   1.00 -0.04    0.13   0.12    0.03    -0.01  0.03
sex      -0.86 -0.01  -0.04  1.00    0.09   0.04   -0.02     0.01  0.07
psychwb  -0.08  0.13   0.13  0.09    1.00   0.17    0.37    -0.22  0.23
jobsat   -0.03  0.10   0.12  0.04    0.17   1.00    0.14    -0.21  0.32
jobperf   0.02 -0.02   0.03 -0.02    0.37   0.14    1.00    -0.35  0.42
turnover  0.00 -0.16  -0.01  0.01   -0.22  -0.21   -0.35     1.00 -0.18
gma      -0.05  0.02   0.03  0.07    0.23   0.32    0.42    -0.18  1.00

model1 <- lm(jobperf ~ age + tenure + sex + psychwb + jobsat + gma, data = dat.imp)
vif(model1)

     age   tenure      sex  psychwb   jobsat      gma 
1.284822 1.292375 1.013178 1.095978 1.144377 1.163126

model2 <- lm(turnover ~ age + tenure + sex + psychwb + jobsat + gma, data = dat.imp)
vif(model2)

     age   tenure      sex  psychwb   jobsat      gma 
1.284822 1.292375 1.013178 1.095978 1.144377 1.163126

PSYC 554 Multivariate Statistics – Project 1

Munira Ajmal

September 27, 2020