This is an R Markdown document that summarizes methods for identifying the extent of missing data, the mechanism of the missing data, and compares mean substitution and multiple imputation approaches for handling missing data.
We are first going to load our libraries and read in a file that includes 480 observations on 9 variables.
library(mice)
## Warning: package 'mice' was built under R version 3.6.2
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(psych)
library(MissMech)
library(rio)
library(gridExtra)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(moments)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
setwd("/Users/munira/Desktop/Education/Masters/Illinois Institute of Technology/Coursework/554-Multivariate Statistics/Projects/Project1")
project1 <- import("Project 1 Data.csv")
knitr::opts_chunk$set(comment = NA)
What does our missing data look like?
summary(project1)
case age tenure sex psychwb jobsat jobperf turnover gma
Min. : 1.0 Min. :18.00 Min. : 1.00 Min. :0.0000 Min. : 3.000 Min. :3.000 Min. : 3.000 Min. :0.0000 Min. : 71.0
1st Qu.:120.8 1st Qu.:34.00 1st Qu.: 8.00 1st Qu.:0.0000 1st Qu.: 5.000 1st Qu.:5.000 1st Qu.: 5.000 1st Qu.:0.0000 1st Qu.: 94.0
Median :240.5 Median :38.00 Median :10.00 Median :1.0000 Median : 6.000 Median :6.000 Median : 6.000 Median :0.0000 Median :100.0
Mean :240.5 Mean :37.96 Mean :10.05 Mean :0.5417 Mean : 6.266 Mean :5.966 Mean : 6.021 Mean :0.3208 Mean :100.1
3rd Qu.:360.2 3rd Qu.:42.00 3rd Qu.:12.00 3rd Qu.:1.0000 3rd Qu.: 7.000 3rd Qu.:7.000 3rd Qu.: 7.000 3rd Qu.:1.0000 3rd Qu.:106.0
Max. :480.0 Max. :53.00 Max. :21.00 Max. :1.0000 Max. :10.000 Max. :9.000 Max. :10.000 Max. :1.0000 Max. :125.0
NA's :21 NA's :26 NA's :160 NA's :160 NA's :39
describe(project1)
vars n mean sd median trimmed mad min max range skew kurtosis se
case 1 480 240.50 138.71 240.5 240.50 177.91 1 480 479 0.00 -1.21 6.33
age 2 459 37.96 5.32 38.0 37.97 5.93 18 53 35 -0.08 -0.07 0.25
tenure 3 454 10.05 3.08 10.0 10.04 2.97 1 21 20 0.11 0.07 0.14
sex 4 480 0.54 0.50 1.0 0.55 0.00 0 1 1 -0.17 -1.98 0.02
psychwb 5 320 6.27 1.18 6.0 6.27 1.48 3 10 7 -0.06 0.00 0.07
jobsat 6 320 5.97 1.18 6.0 5.96 1.48 3 9 6 0.04 -0.37 0.07
jobperf 7 480 6.02 1.25 6.0 6.03 1.48 3 10 7 -0.05 -0.03 0.06
turnover 8 480 0.32 0.47 0.0 0.28 0.00 0 1 1 0.77 -1.42 0.02
gma 9 441 100.13 8.52 100.0 100.20 8.90 71 125 54 -0.18 0.56 0.41
names(table(project1$age))[table(project1$age)==max(table(project1$age))]
[1] "40"
names(table(project1$tenure))[table(project1$tenure)==max(table(project1$tenure))]
[1] "10"
names(table(project1$sex))[table(project1$sex)==max(table(project1$sex))]
[1] "1"
names(table(project1$psychwb))[table(project1$psychwb)==max(table(project1$psychwb))]
[1] "6" "7"
names(table(project1$jobsat))[table(project1$jobsat)==max(table(project1$jobsat))]
[1] "6"
names(table(project1$jobperf))[table(project1$jobperf)==max(table(project1$jobperf))]
[1] "6"
names(table(project1$turnover))[table(project1$turnover)==max(table(project1$turnover))]
[1] "0"
names(table(project1$gma))[table(project1$gma)==max(table(project1$gma))]
[1] "96"
project1$psychwb[project1$psychwb == 10] <- NA
project1$jobperf[project1$jobperf == 10] <- NA
summary(project1)
case age tenure sex psychwb jobsat jobperf turnover gma
Min. : 1.0 Min. :18.00 Min. : 1.00 Min. :0.0000 Min. :3.000 Min. :3.000 Min. :3.000 Min. :0.0000 Min. : 71.0
1st Qu.:120.8 1st Qu.:34.00 1st Qu.: 8.00 1st Qu.:0.0000 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:0.0000 1st Qu.: 94.0
Median :240.5 Median :38.00 Median :10.00 Median :1.0000 Median :6.000 Median :6.000 Median :6.000 Median :0.0000 Median :100.0
Mean :240.5 Mean :37.96 Mean :10.05 Mean :0.5417 Mean :6.254 Mean :5.966 Mean :6.013 Mean :0.3208 Mean :100.1
3rd Qu.:360.2 3rd Qu.:42.00 3rd Qu.:12.00 3rd Qu.:1.0000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:1.0000 3rd Qu.:106.0
Max. :480.0 Max. :53.00 Max. :21.00 Max. :1.0000 Max. :9.000 Max. :9.000 Max. :9.000 Max. :1.0000 Max. :125.0
NA's :21 NA's :26 NA's :161 NA's :160 NA's :1 NA's :39
describe(project1)
vars n mean sd median trimmed mad min max range skew kurtosis se
case 1 480 240.50 138.71 240.5 240.50 177.91 1 480 479 0.00 -1.21 6.33
age 2 459 37.96 5.32 38.0 37.97 5.93 18 53 35 -0.08 -0.07 0.25
tenure 3 454 10.05 3.08 10.0 10.04 2.97 1 21 20 0.11 0.07 0.14
sex 4 480 0.54 0.50 1.0 0.55 0.00 0 1 1 -0.17 -1.98 0.02
psychwb 5 319 6.25 1.17 6.0 6.26 1.48 3 9 6 -0.13 -0.14 0.07
jobsat 6 320 5.97 1.18 6.0 5.96 1.48 3 9 6 0.04 -0.37 0.07
jobperf 7 479 6.01 1.24 6.0 6.03 1.48 3 9 6 -0.10 -0.13 0.06
turnover 8 480 0.32 0.47 0.0 0.28 0.00 0 1 1 0.77 -1.42 0.02
gma 9 441 100.13 8.52 100.0 100.20 8.90 71 125 54 -0.18 0.56 0.41
hist(project1$psychwb)
hist(project1$jobsat)
hist(project1$jobperf)
hist(project1$gma)
We now test the hypothesis that the data are missing MCAR. The null hypothesis is that data are MCAR, so a significant test results indicates the data are MAR.
missingdata <- TestMCARNormality(project1, method = "Auto")
print(missingdata)
Call:
TestMCARNormality(data = project1, method = "Auto")
Number of Patterns: 9
Total number of cases used in the analysis: 459
Pattern(s) used:
case age tenure sex psychwb jobsat jobperf turnover gma Number of cases
group.1 1 1 1 1 1 1 1 1 1 128
group.2 1 1 NA 1 NA 1 1 1 1 9
group.3 1 1 1 1 1 NA 1 1 1 136
group.4 1 1 1 1 NA 1 1 1 1 137
group.5 1 1 1 1 1 NA 1 1 NA 10
group.6 1 1 1 1 NA 1 1 1 NA 8
group.7 1 NA 1 1 1 1 1 1 1 9
group.8 1 1 1 1 1 1 1 1 NA 15
group.9 1 1 NA 1 1 1 1 1 1 7
Test of normality and Homoscedasticity:
-------------------------------------------
Hawkins Test:
P-value for the Hawkins test of normality and homoscedasticity: 0.09249588
There is not sufficient evidence to reject normality
or MCAR at 0.05 significance level
What is the pattern of missing data?
md.pattern(project1)
case sex turnover jobperf age tenure gma jobsat psychwb
128 1 1 1 1 1 1 1 1 1 0
137 1 1 1 1 1 1 1 1 0 1
136 1 1 1 1 1 1 1 0 1 1
15 1 1 1 1 1 1 0 1 1 1
8 1 1 1 1 1 1 0 1 0 2
10 1 1 1 1 1 1 0 0 1 2
7 1 1 1 1 1 0 1 1 1 1
9 1 1 1 1 1 0 1 1 0 2
4 1 1 1 1 1 0 1 0 1 2
1 1 1 1 1 1 0 0 1 0 3
3 1 1 1 1 1 0 0 0 1 3
9 1 1 1 1 0 1 1 1 1 1
5 1 1 1 1 0 1 1 1 0 2
3 1 1 1 1 0 1 1 0 1 2
1 1 1 1 1 0 1 0 1 0 3
1 1 1 1 1 0 1 0 0 1 3
2 1 1 1 1 0 0 1 0 1 3
1 1 1 1 0 1 1 1 0 1 2
0 0 0 1 21 26 39 160 161 408
project1_aggr = aggr(project1, col=mdc(1:2), numbers=TRUE, sortVars=TRUE, labels=names(project1), cex.axis=.7, cex.numbers=.7, gap=3, ylab=c("Proportion of missingness","Missingness Pattern"))
Variables sorted by number of missings:
Variable Count
psychwb 0.335416667
jobsat 0.333333333
gma 0.081250000
tenure 0.054166667
age 0.043750000
jobperf 0.002083333
case 0.000000000
sex 0.000000000
turnover 0.000000000
What happens when we then look at basic descriptive values in the original file and the imputed file?
listdel <- na.omit(project1)
summary(listdel)
case age tenure sex psychwb jobsat jobperf turnover gma
Min. : 1.0 Min. :18.00 Min. : 2.000 Min. :0.0000 Min. :3.000 Min. :3.000 Min. :3.000 Min. :0.0000 Min. : 71
1st Qu.:146.2 1st Qu.:34.00 1st Qu.: 8.000 1st Qu.:0.0000 1st Qu.:6.000 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:0.0000 1st Qu.: 96
Median :262.5 Median :39.00 Median :10.000 Median :0.0000 Median :6.000 Median :6.000 Median :6.000 Median :0.0000 Median :101
Mean :255.5 Mean :38.20 Mean : 9.891 Mean :0.4844 Mean :6.281 Mean :6.039 Mean :6.016 Mean :0.3438 Mean :101
3rd Qu.:369.8 3rd Qu.:42.25 3rd Qu.:12.000 3rd Qu.:1.0000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:1.0000 3rd Qu.:106
Max. :479.0 Max. :53.00 Max. :19.000 Max. :1.0000 Max. :9.000 Max. :8.000 Max. :9.000 Max. :1.0000 Max. :125
describe(listdel)
vars n mean sd median trimmed mad min max range skew kurtosis se
case 1 128 255.53 136.66 262.5 257.74 165.31 1 479 478 -0.10 -1.13 12.08
age 2 128 38.20 5.69 39.0 38.33 5.93 18 53 35 -0.35 0.41 0.50
tenure 3 128 9.89 3.13 10.0 9.82 2.97 2 19 17 0.31 -0.03 0.28
sex 4 128 0.48 0.50 0.0 0.48 0.00 0 1 1 0.06 -2.01 0.04
psychwb 5 128 6.28 1.19 6.0 6.32 1.48 3 9 6 -0.30 0.07 0.11
jobsat 6 128 6.04 1.17 6.0 6.05 1.48 3 8 5 -0.07 -0.49 0.10
jobperf 7 128 6.02 1.25 6.0 6.05 1.48 3 9 6 -0.17 -0.32 0.11
turnover 8 128 0.34 0.48 0.0 0.31 0.00 0 1 1 0.65 -1.59 0.04
gma 9 128 101.00 9.04 101.0 100.89 7.41 71 125 54 -0.11 1.06 0.80
listvar <- var(listdel)
round(listvar, 2)
case age tenure sex psychwb jobsat jobperf turnover gma
case 18676.58 -25.66 12.81 -59.04 -16.72 10.30 -15.85 2.53 35.79
age -25.66 32.36 9.67 0.11 0.92 1.12 -0.50 -1.11 -2.20
tenure 12.81 9.67 9.81 -0.01 0.28 0.67 0.02 -0.14 0.14
sex -59.04 0.11 -0.01 0.25 0.04 -0.05 0.03 -0.02 -0.26
psychwb -16.72 0.92 0.28 0.04 1.42 0.54 0.52 -0.18 3.25
jobsat 10.30 1.12 0.67 -0.05 0.54 1.38 0.39 -0.17 4.94
jobperf -15.85 -0.50 0.02 0.03 0.52 0.39 1.57 -0.16 4.99
turnover 2.53 -1.11 -0.14 -0.02 -0.18 -0.17 -0.16 0.23 -1.16
gma 35.79 -2.20 0.14 -0.26 3.25 4.94 4.99 -1.16 81.78
listcor <- cor(listdel)
round(listcor, 2)
case age tenure sex psychwb jobsat jobperf turnover gma
case 1.00 -0.03 0.03 -0.86 -0.10 0.06 -0.09 0.04 0.03
age -0.03 1.00 0.54 0.04 0.14 0.17 -0.07 -0.41 -0.04
tenure 0.03 0.54 1.00 -0.01 0.07 0.18 0.00 -0.09 0.01
sex -0.86 0.04 -0.01 1.00 0.06 -0.09 0.05 -0.08 -0.06
psychwb -0.10 0.14 0.07 0.06 1.00 0.39 0.35 -0.32 0.30
jobsat 0.06 0.17 0.18 -0.09 0.39 1.00 0.26 -0.31 0.47
jobperf -0.09 -0.07 0.00 0.05 0.35 0.26 1.00 -0.27 0.44
turnover 0.04 -0.41 -0.09 -0.08 -0.32 -0.31 -0.27 1.00 -0.27
gma 0.03 -0.04 0.01 -0.06 0.30 0.47 0.44 -0.27 1.00
summary(project1, na.rm = TRUE)
case age tenure sex psychwb jobsat jobperf turnover gma
Min. : 1.0 Min. :18.00 Min. : 1.00 Min. :0.0000 Min. :3.000 Min. :3.000 Min. :3.000 Min. :0.0000 Min. : 71.0
1st Qu.:120.8 1st Qu.:34.00 1st Qu.: 8.00 1st Qu.:0.0000 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:0.0000 1st Qu.: 94.0
Median :240.5 Median :38.00 Median :10.00 Median :1.0000 Median :6.000 Median :6.000 Median :6.000 Median :0.0000 Median :100.0
Mean :240.5 Mean :37.96 Mean :10.05 Mean :0.5417 Mean :6.254 Mean :5.966 Mean :6.013 Mean :0.3208 Mean :100.1
3rd Qu.:360.2 3rd Qu.:42.00 3rd Qu.:12.00 3rd Qu.:1.0000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:1.0000 3rd Qu.:106.0
Max. :480.0 Max. :53.00 Max. :21.00 Max. :1.0000 Max. :9.000 Max. :9.000 Max. :9.000 Max. :1.0000 Max. :125.0
NA's :21 NA's :26 NA's :161 NA's :160 NA's :1 NA's :39
describe(project1, na.rm = TRUE)
vars n mean sd median trimmed mad min max range skew kurtosis se
case 1 480 240.50 138.71 240.5 240.50 177.91 1 480 479 0.00 -1.21 6.33
age 2 459 37.96 5.32 38.0 37.97 5.93 18 53 35 -0.08 -0.07 0.25
tenure 3 454 10.05 3.08 10.0 10.04 2.97 1 21 20 0.11 0.07 0.14
sex 4 480 0.54 0.50 1.0 0.55 0.00 0 1 1 -0.17 -1.98 0.02
psychwb 5 319 6.25 1.17 6.0 6.26 1.48 3 9 6 -0.13 -0.14 0.07
jobsat 6 320 5.97 1.18 6.0 5.96 1.48 3 9 6 0.04 -0.37 0.07
jobperf 7 479 6.01 1.24 6.0 6.03 1.48 3 9 6 -0.10 -0.13 0.06
turnover 8 480 0.32 0.47 0.0 0.28 0.00 0 1 1 0.77 -1.42 0.02
gma 9 441 100.13 8.52 100.0 100.20 8.90 71 125 54 -0.18 0.56 0.41
var(project1, use = "pairwise.complete.obs")
case age tenure sex psychwb jobsat jobperf turnover gma
case 1.924000e+04 12.42544548 37.017212708 -59.707724426 -16.07122296 -5.62650862 3.4685974 -0.020876827 -67.3114512
age 1.242545e+01 28.31250773 7.981884634 -0.039629534 1.05373893 0.75823986 -0.1418306 -0.404848208 0.8217120
tenure 3.701721e+01 7.98188463 9.502718052 -0.057288172 0.59483531 0.56474985 0.1195716 -0.008090945 0.7224252
sex -5.970772e+01 -0.03962953 -0.057288172 0.248782185 0.06152284 0.02686129 -0.0130633 0.001217815 0.3297568
psychwb -1.607122e+01 1.05373893 0.594835311 0.061522841 1.35985095 0.47141947 0.6513104 -0.145186412 2.9618184
jobsat -5.626509e+00 0.75823986 0.564749852 0.026861285 0.47141947 1.40006857 0.2496963 -0.142829154 4.1546524
jobperf 3.468597e+00 -0.14183062 0.119571588 -0.013063303 0.65131044 0.24969632 1.5437758 -0.200688324 4.6391178
turnover -2.087683e-02 -0.40484821 -0.008090945 0.001217815 -0.14518641 -0.14282915 -0.2006883 0.218354210 -0.7319779
gma -6.731145e+01 0.82171201 0.722425232 0.329756751 2.96181840 4.15465237 4.6391178 -0.731977943 72.6053906
pairdel <- cor(project1, use = "pairwise.complete.obs")
round(pairdel, 2)
case age tenure sex psychwb jobsat jobperf turnover gma
case 1.00 0.02 0.09 -0.86 -0.10 -0.04 0.02 0.00 -0.06
age 0.02 1.00 0.49 -0.01 0.17 0.12 -0.02 -0.16 0.02
tenure 0.09 0.49 1.00 -0.04 0.16 0.15 0.03 -0.01 0.03
sex -0.86 -0.01 -0.04 1.00 0.11 0.05 -0.02 0.01 0.08
psychwb -0.10 0.17 0.16 0.11 1.00 0.34 0.45 -0.26 0.29
jobsat -0.04 0.12 0.15 0.05 0.34 1.00 0.17 -0.26 0.41
jobperf 0.02 -0.02 0.03 -0.02 0.45 0.17 1.00 -0.35 0.44
turnover 0.00 -0.16 -0.01 0.01 -0.26 -0.26 -0.35 1.00 -0.18
gma -0.06 0.02 0.03 0.08 0.29 0.41 0.44 -0.18 1.00
project1$age<-ifelse(is.na(project1$age)==T, mean(project1$age, na.rm=T), project1$age)
project1$tenure<-ifelse(is.na(project1$tenure)==T, mean(project1$tenure, na.rm=T), project1$tenure)
project1$psychwb<-ifelse(is.na(project1$psychwb)==T, mean(project1$psychwb, na.rm=T), project1$psychwb)
project1$jobsat<-ifelse(is.na(project1$jobsat)==T, mean(project1$jobsat, na.rm=T), project1$jobsat)
project1$jobperf<-ifelse(is.na(project1$jobperf)==T, mean(project1$jobperf, na.rm=T), project1$jobperf)
project1$gma<-ifelse(is.na(project1$gma)==T, mean(project1$gma, na.rm=T), project1$gma)
#now we remove criterion cases with NA values
project1 <- na.omit(project1)
summary(project1)
case age tenure sex psychwb jobsat jobperf turnover gma
Min. : 1.0 Min. :18.00 Min. : 1.00 Min. :0.0000 Min. :3.000 Min. :3.000 Min. :3.000 Min. :0.0000 Min. : 71.0
1st Qu.:120.8 1st Qu.:34.00 1st Qu.: 8.00 1st Qu.:0.0000 1st Qu.:6.000 1st Qu.:5.966 1st Qu.:5.000 1st Qu.:0.0000 1st Qu.: 95.0
Median :240.5 Median :38.00 Median :10.00 Median :1.0000 Median :6.254 Median :5.966 Median :6.000 Median :0.0000 Median :100.1
Mean :240.5 Mean :37.96 Mean :10.05 Mean :0.5417 Mean :6.254 Mean :5.966 Mean :6.013 Mean :0.3208 Mean :100.1
3rd Qu.:360.2 3rd Qu.:41.00 3rd Qu.:12.00 3rd Qu.:1.0000 3rd Qu.:7.000 3rd Qu.:6.000 3rd Qu.:7.000 3rd Qu.:1.0000 3rd Qu.:105.0
Max. :480.0 Max. :53.00 Max. :21.00 Max. :1.0000 Max. :9.000 Max. :9.000 Max. :9.000 Max. :1.0000 Max. :125.0
describe(project1)
vars n mean sd median trimmed mad min max range skew kurtosis se
case 1 480 240.50 138.71 240.50 240.50 177.91 1 480 479 0.00 -1.21 6.33
age 2 480 37.96 5.20 38.00 37.97 5.93 18 53 35 -0.09 0.07 0.24
tenure 3 480 10.05 3.00 10.00 10.04 2.97 1 21 20 0.11 0.25 0.14
sex 4 480 0.54 0.50 1.00 0.55 0.00 0 1 1 -0.17 -1.98 0.02
psychwb 5 480 6.25 0.95 6.25 6.26 0.38 3 9 6 -0.16 1.31 0.04
jobsat 6 480 5.97 0.97 5.97 5.96 0.05 3 9 6 0.05 0.95 0.04
jobperf 7 480 6.01 1.24 6.00 6.03 1.48 3 9 6 -0.10 -0.13 0.06
turnover 8 480 0.32 0.47 0.00 0.28 0.00 0 1 1 0.77 -1.42 0.02
gma 9 480 100.13 8.17 100.13 100.19 7.22 71 125 54 -0.19 0.88 0.37
var(project1)
case age tenure sex psychwb jobsat jobperf turnover gma
case 1.924000e+04 11.88069735 35.007927676 -59.707724426 -10.66941316 -3.74709029 3.46135608 -0.020876827 -61.8309782
age 1.188070e+01 27.07124956 7.230640937 -0.037892123 0.66610717 0.47942426 -0.13527067 -0.387099122 0.7127114
tenure 3.500793e+01 7.23064094 8.986912897 -0.054178584 0.37448284 0.35625144 0.11283465 -0.007651771 0.6309786
sex -5.970772e+01 -0.03789212 -0.054178584 0.248782185 0.04084397 0.01788883 -0.01303603 0.001217815 0.3029081
psychwb -1.066941e+01 0.66610717 0.374482840 0.040843974 0.90278205 0.15556099 0.43133211 -0.096386804 1.7873284
jobsat -3.747090e+00 0.47942426 0.356251437 0.017888831 0.15556099 0.93240475 0.16629045 -0.095120042 2.5514863
jobperf 3.461356e+00 -0.13527067 0.112834647 -0.013036031 0.43133211 0.16629045 1.54055291 -0.200269350 4.2517955
turnover -2.087683e-02 -0.38709912 -0.007651771 0.001217815 -0.09638680 -0.09512004 -0.20026935 0.218354210 -0.6723806
gma -6.183098e+01 0.71271143 0.630978611 0.302908080 1.78732837 2.55148632 4.25179554 -0.672380574 66.6938870
meansub <- cor(project1)
round(meansub, 2)
case age tenure sex psychwb jobsat jobperf turnover gma
case 1.00 0.02 0.08 -0.86 -0.08 -0.03 0.02 0.00 -0.05
age 0.02 1.00 0.46 -0.01 0.13 0.10 -0.02 -0.16 0.02
tenure 0.08 0.46 1.00 -0.04 0.13 0.12 0.03 -0.01 0.03
sex -0.86 -0.01 -0.04 1.00 0.09 0.04 -0.02 0.01 0.07
psychwb -0.08 0.13 0.13 0.09 1.00 0.17 0.37 -0.22 0.23
jobsat -0.03 0.10 0.12 0.04 0.17 1.00 0.14 -0.21 0.32
jobperf 0.02 -0.02 0.03 -0.02 0.37 0.14 1.00 -0.35 0.42
turnover 0.00 -0.16 -0.01 0.01 -0.22 -0.21 -0.35 1.00 -0.18
gma -0.05 0.02 0.03 0.07 0.23 0.32 0.42 -0.18 1.00
IMPORTANT NOTE: I did NOT impute values for any missing criterion variable data. Standard practice is to delete those cases.
We then run our regression analysis in which we predict JOBPERF from the other 3 variables.
reg <- lm(jobperf ~ psychwb + jobsat + gma, data = project1)
summary(reg)
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma, data = project1)
Residuals:
Min 1Q Median 3Q Max
-3.12907 -0.65267 -0.04173 0.70658 3.10598
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.634934 0.641131 -2.550 0.0111 *
psychwb 0.374814 0.053462 7.011 8.15e-12 ***
jobsat -0.034795 0.054101 -0.643 0.5204
gma 0.055037 0.006478 8.495 2.54e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.076 on 476 degrees of freedom
Multiple R-squared: 0.2531, Adjusted R-squared: 0.2484
F-statistic: 53.76 on 3 and 476 DF, p-value: < 2.2e-16
Let’s do a basic multiple imputation procedure in which we generate 10 datasets. Note that the default for the mice function is to generate 5 datasets.
imp <- mice(data = project1, m=10)
print(imp)
We can check on the imputed values to see if they make sense
head(imp$imp$age)
[1] 1 2 3 4 5 6 7 8 9 10
<0 rows> (or 0-length row.names)
head(imp$imp$tenure)
[1] 1 2 3 4 5 6 7 8 9 10
<0 rows> (or 0-length row.names)
head(imp$imp$psychwb)
[1] 1 2 3 4 5 6 7 8 9 10
<0 rows> (or 0-length row.names)
head(imp$imp$jobsat)
[1] 1 2 3 4 5 6 7 8 9 10
<0 rows> (or 0-length row.names)
head(imp$imp$jobperf)
[1] 1 2 3 4 5 6 7 8 9 10
<0 rows> (or 0-length row.names)
head(imp$imp$gma)
[1] 1 2 3 4 5 6 7 8 9 10
<0 rows> (or 0-length row.names)
Look at the first imputed data set
dat.imp<-complete(imp)
head(dat.imp, n=10)
case age tenure sex psychwb jobsat jobperf turnover gma
1 1 40 10.00000 1 8.000000 8.000000 6 0 106.0000
2 2 53 14.00000 1 6.000000 5.000000 5 0 93.0000
3 3 46 10.05286 1 6.253918 7.000000 7 0 107.0000
4 4 37 8.00000 1 7.000000 5.965625 5 0 94.0000
5 5 44 9.00000 1 6.253918 5.000000 5 0 107.0000
6 6 39 10.00000 1 7.000000 5.965625 7 0 100.1315
7 7 33 7.00000 1 6.253918 5.000000 7 0 103.0000
8 8 43 9.00000 1 7.000000 5.965625 7 0 106.0000
9 9 35 9.00000 1 7.000000 7.000000 7 1 108.0000
10 10 37 10.00000 1 5.000000 6.000000 6 0 97.0000
Compare this to the original data frame
head(project1 [,c("age","tenure","psychwb","jobsat","jobperf","gma")], n=10)
age tenure psychwb jobsat jobperf gma
1 40 10.00000 8.000000 8.000000 6 106.0000
2 53 14.00000 6.000000 5.000000 5 93.0000
3 46 10.05286 6.253918 7.000000 7 107.0000
4 37 8.00000 7.000000 5.965625 5 94.0000
5 44 9.00000 6.253918 5.000000 5 107.0000
6 39 10.00000 7.000000 5.965625 7 100.1315
7 33 7.00000 6.253918 5.000000 7 103.0000
8 43 9.00000 7.000000 5.965625 7 106.0000
9 35 9.00000 7.000000 7.000000 7 108.0000
10 37 10.00000 5.000000 6.000000 6 97.0000
summary(dat.imp)
case age tenure sex psychwb jobsat jobperf turnover gma
Min. : 1.0 Min. :18.00 Min. : 1.00 Min. :0.0000 Min. :3.000 Min. :3.000 Min. :3.000 Min. :0.0000 Min. : 71.0
1st Qu.:120.8 1st Qu.:34.00 1st Qu.: 8.00 1st Qu.:0.0000 1st Qu.:6.000 1st Qu.:5.966 1st Qu.:5.000 1st Qu.:0.0000 1st Qu.: 95.0
Median :240.5 Median :38.00 Median :10.00 Median :1.0000 Median :6.254 Median :5.966 Median :6.000 Median :0.0000 Median :100.1
Mean :240.5 Mean :37.96 Mean :10.05 Mean :0.5417 Mean :6.254 Mean :5.966 Mean :6.013 Mean :0.3208 Mean :100.1
3rd Qu.:360.2 3rd Qu.:41.00 3rd Qu.:12.00 3rd Qu.:1.0000 3rd Qu.:7.000 3rd Qu.:6.000 3rd Qu.:7.000 3rd Qu.:1.0000 3rd Qu.:105.0
Max. :480.0 Max. :53.00 Max. :21.00 Max. :1.0000 Max. :9.000 Max. :9.000 Max. :9.000 Max. :1.0000 Max. :125.0
describe(dat.imp)
vars n mean sd median trimmed mad min max range skew kurtosis se
case 1 480 240.50 138.71 240.50 240.50 177.91 1 480 479 0.00 -1.21 6.33
age 2 480 37.96 5.20 38.00 37.97 5.93 18 53 35 -0.09 0.07 0.24
tenure 3 480 10.05 3.00 10.00 10.04 2.97 1 21 20 0.11 0.25 0.14
sex 4 480 0.54 0.50 1.00 0.55 0.00 0 1 1 -0.17 -1.98 0.02
psychwb 5 480 6.25 0.95 6.25 6.26 0.38 3 9 6 -0.16 1.31 0.04
jobsat 6 480 5.97 0.97 5.97 5.96 0.05 3 9 6 0.05 0.95 0.04
jobperf 7 480 6.01 1.24 6.00 6.03 1.48 3 9 6 -0.10 -0.13 0.06
turnover 8 480 0.32 0.47 0.00 0.28 0.00 0 1 1 0.77 -1.42 0.02
gma 9 480 100.13 8.17 100.13 100.19 7.22 71 125 54 -0.19 0.88 0.37
var(dat.imp)
case age tenure sex psychwb jobsat jobperf turnover gma
case 1.924000e+04 11.88069735 35.007927676 -59.707724426 -10.66941316 -3.74709029 3.46135608 -0.020876827 -61.8309782
age 1.188070e+01 27.07124956 7.230640937 -0.037892123 0.66610717 0.47942426 -0.13527067 -0.387099122 0.7127114
tenure 3.500793e+01 7.23064094 8.986912897 -0.054178584 0.37448284 0.35625144 0.11283465 -0.007651771 0.6309786
sex -5.970772e+01 -0.03789212 -0.054178584 0.248782185 0.04084397 0.01788883 -0.01303603 0.001217815 0.3029081
psychwb -1.066941e+01 0.66610717 0.374482840 0.040843974 0.90278205 0.15556099 0.43133211 -0.096386804 1.7873284
jobsat -3.747090e+00 0.47942426 0.356251437 0.017888831 0.15556099 0.93240475 0.16629045 -0.095120042 2.5514863
jobperf 3.461356e+00 -0.13527067 0.112834647 -0.013036031 0.43133211 0.16629045 1.54055291 -0.200269350 4.2517955
turnover -2.087683e-02 -0.38709912 -0.007651771 0.001217815 -0.09638680 -0.09512004 -0.20026935 0.218354210 -0.6723806
gma -6.183098e+01 0.71271143 0.630978611 0.302908080 1.78732837 2.55148632 4.25179554 -0.672380574 66.6938870
res <- cor(dat.imp)
round(res, 2)
case age tenure sex psychwb jobsat jobperf turnover gma
case 1.00 0.02 0.08 -0.86 -0.08 -0.03 0.02 0.00 -0.05
age 0.02 1.00 0.46 -0.01 0.13 0.10 -0.02 -0.16 0.02
tenure 0.08 0.46 1.00 -0.04 0.13 0.12 0.03 -0.01 0.03
sex -0.86 -0.01 -0.04 1.00 0.09 0.04 -0.02 0.01 0.07
psychwb -0.08 0.13 0.13 0.09 1.00 0.17 0.37 -0.22 0.23
jobsat -0.03 0.10 0.12 0.04 0.17 1.00 0.14 -0.21 0.32
jobperf 0.02 -0.02 0.03 -0.02 0.37 0.14 1.00 -0.35 0.42
turnover 0.00 -0.16 -0.01 0.01 -0.22 -0.21 -0.35 1.00 -0.18
gma -0.05 0.02 0.03 0.07 0.23 0.32 0.42 -0.18 1.00
Let’s look at the sixth imputed data set
dat.imp<-complete(imp, action = 6)
head(dat.imp, n=10)
case age tenure sex psychwb jobsat jobperf turnover gma
1 1 40 10.00000 1 8.000000 8.000000 6 0 106.0000
2 2 53 14.00000 1 6.000000 5.000000 5 0 93.0000
3 3 46 10.05286 1 6.253918 7.000000 7 0 107.0000
4 4 37 8.00000 1 7.000000 5.965625 5 0 94.0000
5 5 44 9.00000 1 6.253918 5.000000 5 0 107.0000
6 6 39 10.00000 1 7.000000 5.965625 7 0 100.1315
7 7 33 7.00000 1 6.253918 5.000000 7 0 103.0000
8 8 43 9.00000 1 7.000000 5.965625 7 0 106.0000
9 9 35 9.00000 1 7.000000 7.000000 7 1 108.0000
10 10 37 10.00000 1 5.000000 6.000000 6 0 97.0000
We can also do things like look at observed and imputed values of JOBPERF with respect to the 3 variables
stripplot(imp,jobperf ~ psychwb | .imp, pch=20)
stripplot(imp,jobperf ~ jobsat | .imp, pch=20)
stripplot(imp,jobperf ~ gma | .imp, pch=20)
xyplot(imp, jobperf ~ psychwb | .imp, pch = 20, cex = 1.4)
xyplot(imp, jobperf ~ jobsat | .imp, pch = 20, cex = 1.4)
xyplot(imp, jobperf ~ gma | .imp, pch = 20, cex = 1.4)
Now let’s run our model using the imputed data
fit <- with(data = imp, exp = lm(jobperf ~ psychwb + jobsat + gma))
fit
call :
with.mids(data = imp, expr = lm(jobperf ~ psychwb + jobsat +
gma))
call1 :
mice(data = project1, m = 10)
nmis :
case age tenure sex psychwb jobsat jobperf turnover gma
0 0 0 0 0 0 0 0 0
analyses :
[[1]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
[[2]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
[[3]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
[[4]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
[[5]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
[[6]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
[[7]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
[[8]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
[[9]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
[[10]]
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma)
Coefficients:
(Intercept) psychwb jobsat gma
-1.63493 0.37481 -0.03480 0.05504
What happens if we run the model using the data frame that had missing data?
reg <- lm(jobperf ~ psychwb + jobsat + gma, data = project1)
summary(reg)
Call:
lm(formula = jobperf ~ psychwb + jobsat + gma, data = project1)
Residuals:
Min 1Q Median 3Q Max
-3.12907 -0.65267 -0.04173 0.70658 3.10598
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.634934 0.641131 -2.550 0.0111 *
psychwb 0.374814 0.053462 7.011 8.15e-12 ***
jobsat -0.034795 0.054101 -0.643 0.5204
gma 0.055037 0.006478 8.495 2.54e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.076 on 476 degrees of freedom
Multiple R-squared: 0.2531, Adjusted R-squared: 0.2484
F-statistic: 53.76 on 3 and 476 DF, p-value: < 2.2e-16
Let’s compare that to what we get when we combine results across the imputed datasets.
summary(pool(fit))
term estimate std.error statistic df p.value
1 (Intercept) -1.63493364 0.641130843 -2.5500780 473.9649 1.108344e-02
2 psychwb 0.37481352 0.053461626 7.0108888 473.9649 8.195222e-12
3 jobsat -0.03479515 0.054101104 -0.6431504 473.9649 5.204376e-01
4 gma 0.05503743 0.006478422 8.4954994 473.9649 2.220446e-16
pool.r.squared(fit, adjusted = FALSE)
est lo 95 hi 95 fmi
R^2 0.253085 0.1875524 0.3215408 NaN
D’Agostino skewness test
agostino.test(dat.imp$age, alternative = c("two.sided", "less", "greater"))
D'Agostino skewness test
data: dat.imp$age
skew = -0.086426, z = -0.783274, p-value = 0.4335
alternative hypothesis: data have a skewness
agostino.test(dat.imp$tenure, alternative = c("two.sided", "less", "greater"))
D'Agostino skewness test
data: dat.imp$tenure
skew = 0.10901, z = 0.98689, p-value = 0.3237
alternative hypothesis: data have a skewness
agostino.test(dat.imp$sex, alternative = c("two.sided", "less", "greater"))
D'Agostino skewness test
data: dat.imp$sex
skew = -0.16725, z = -1.50850, p-value = 0.1314
alternative hypothesis: data have a skewness
agostino.test(dat.imp$psychwb, alternative = c("two.sided", "less", "greater"))
D'Agostino skewness test
data: dat.imp$psychwb
skew = -0.16477, z = -1.48645, p-value = 0.1372
alternative hypothesis: data have a skewness
agostino.test(dat.imp$jobsat, alternative = c("two.sided", "less", "greater"))
D'Agostino skewness test
data: dat.imp$jobsat
skew = 0.053552, z = 0.485869, p-value = 0.6271
alternative hypothesis: data have a skewness
agostino.test(dat.imp$jobperf, alternative = c("two.sided", "less", "greater"))
D'Agostino skewness test
data: dat.imp$jobperf
skew = -0.095886, z = -0.868655, p-value = 0.385
alternative hypothesis: data have a skewness
agostino.test(dat.imp$turnover, alternative = c("two.sided", "less", "greater"))
D'Agostino skewness test
data: dat.imp$turnover
skew = 0.76764, z = 6.24117, p-value = 4.343e-10
alternative hypothesis: data have a skewness
agostino.test(dat.imp$gma, alternative = c("two.sided", "less", "greater"))
D'Agostino skewness test
data: dat.imp$gma
skew = -0.1872, z = -1.6857, p-value = 0.09186
alternative hypothesis: data have a skewness
Visualizing tyhe skewed variables
skewness(dat.imp$age)
[1] -0.08642598
skewness(dat.imp$jobsat)
[1] 0.05355189
hist(dat.imp$age)
hist(dat.imp$jobsat)
Mardia tests of multivariate skew and kurtosis
round(skew(dat.imp),2) #type 3 (default)
[1] 0.00 -0.09 0.11 -0.17 -0.16 0.05 -0.10 0.77 -0.19
round(kurtosi(dat.imp),2) #type 3 (default)
case age tenure sex psychwb jobsat jobperf turnover gma
-1.21 0.07 0.25 -1.98 1.31 0.95 -0.13 -1.42 0.88
#for the differences between the three types of skew and kurtosis:
round(skew(dat.imp,type=1),2) #type 1
[1] 0.00 -0.09 0.11 -0.17 -0.16 0.05 -0.10 0.77 -0.19
round(skew(dat.imp,type=2),2) #type 2
[1] 0.00 -0.09 0.11 -0.17 -0.17 0.05 -0.10 0.77 -0.19
mardia(dat.imp)
Call: mardia(x = dat.imp)
Mardia tests of multivariate skew and kurtosis
Use describe(x) the to get univariate tests
n.obs = 480 num.vars = 9
b1p = 2.96 skew = 237.04 with probability = 2e-04
small sample skew = 238.82 with probability = 0.00015
b2p = 96.51 kurtosis = -1.94 with probability = 0.052
x <- matrix(rnorm(1000),ncol=10)
describe(x)
vars n mean sd median trimmed mad min max range skew kurtosis se
X1 1 100 0.01 0.97 0.00 0.01 1.05 -2.27 2.80 5.07 0.08 -0.07 0.10
X2 2 100 -0.15 0.97 -0.11 -0.12 0.99 -2.62 1.77 4.39 -0.24 -0.54 0.10
X3 3 100 -0.14 0.98 -0.13 -0.12 0.95 -2.44 2.65 5.09 -0.06 0.03 0.10
X4 4 100 0.05 1.01 0.01 0.02 0.76 -2.82 3.31 6.13 0.32 0.91 0.10
X5 5 100 0.18 0.94 0.23 0.19 0.91 -2.11 2.62 4.73 -0.09 -0.03 0.09
X6 6 100 0.04 1.05 0.10 0.04 1.09 -2.45 2.53 4.99 -0.06 -0.31 0.10
X7 7 100 -0.04 0.97 0.01 -0.06 0.90 -1.98 2.89 4.87 0.20 -0.10 0.10
X8 8 100 0.10 0.95 0.11 0.07 1.08 -1.80 2.25 4.05 0.18 -0.72 0.10
X9 9 100 0.16 1.12 0.10 0.15 1.05 -2.72 3.55 6.27 0.11 0.17 0.11
X10 10 100 -0.08 0.98 -0.08 -0.07 0.98 -2.97 2.12 5.09 -0.19 -0.09 0.10
mardia(x)
Call: mardia(x = x)
Mardia tests of multivariate skew and kurtosis
Use describe(x) the to get univariate tests
n.obs = 100 num.vars = 10
b1p = 11.98 skew = 199.73 with probability = 0.83
small sample skew = 206.84 with probability = 0.73
b2p = 114.94 kurtosis = -1.63 with probability = 0.1
age_outlier_values <- boxplot.stats(dat.imp$age)$out
boxplot(dat.imp$age, main="age", boxwex=0.1)
mtext(paste("Outliers: ", paste(age_outlier_values, collapse=", ")), cex=0.6)
tenure_outlier_values <- boxplot.stats(dat.imp$tenure)$out
boxplot(dat.imp$tenure, main="tenure", boxwex=0.1)
mtext(paste("Outliers: ", paste(tenure_outlier_values, collapse=", ")), cex=0.6)
psychwb_outlier_values <- boxplot.stats(dat.imp$psychwb)$out
boxplot(dat.imp$psychwb, main="psychwb", boxwex=0.1)
mtext(paste("Outliers: ", paste(psychwb_outlier_values, collapse=", ")), cex=0.6)
jobsat_outlier_values <- boxplot.stats(dat.imp$jobsat)$out
boxplot(dat.imp$jobsat, main="jobsat", boxwex=0.1)
mtext(paste("Outliers: ", paste(jobsat_outlier_values, collapse=", ")), cex=0.6)
jobperf_outlier_values <- boxplot.stats(dat.imp$jobperf)$out
boxplot(dat.imp$jobperf, main="jobperf", boxwex=0.1)
mtext(paste("Outliers: ", paste(jobperf_outlier_values, collapse=", ")), cex=0.6)
gma_outlier_values <- boxplot.stats(dat.imp$gma)$out
boxplot(dat.imp$gma, main="gma", boxwex=0.1)
mtext(paste("Outliers: ", paste(gma_outlier_values, collapse=", ")), cex=0.6)
outlier(dat.imp, method = "mean", addthres = TRUE)
Warning in plot.window(...): "method" is not a graphical parameter
Warning in plot.window(...): "addthres" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "method" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "addthres" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "method" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "addthres" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "method" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "addthres" is not a graphical parameter
Warning in box(...): "method" is not a graphical parameter
Warning in box(...): "addthres" is not a graphical parameter
Warning in title(...): "method" is not a graphical parameter
Warning in title(...): "addthres" is not a graphical parameter
Warning in text.default(Chi2[n.obs:(n.obs - bad + 1)], D2[worst[1:bad]], : "method" is not a graphical parameter
Warning in text.default(Chi2[n.obs:(n.obs - bad + 1)], D2[worst[1:bad]], : "addthres" is not a graphical parameter
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
11.419392 15.783178 9.079817 7.425190 10.989555 5.251453 7.406341 6.452692 10.179570 7.375179 13.153991 9.415589 6.314043 6.687719 8.438447 14.207353 7.504731 6.531711 8.319442 5.527045 5.697569 6.892839 6.600222 6.223072 7.606714 11.956769 8.228375 5.619926 15.122777 8.195105 10.056578 11.134482 9.756263 11.772052 5.458195 8.582842 10.683426 5.666908 5.455868 15.856190
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
7.722037 9.049969 7.492725 16.428886 9.165068 6.245639 16.206583 6.450225 13.827427 15.241745 4.411360 14.293460 4.118634 7.423141 4.378764 4.423350 9.171220 10.808549 8.131268 7.782075 14.152865 10.347283 9.969207 3.185757 10.943747 8.430742 11.213864 12.863267 6.979996 7.879523 6.837619 10.665560 5.516717 5.250223 5.684632 10.250328 9.023506 3.980877 9.711109 11.755393
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
4.368757 8.784546 5.980737 6.070896 8.538297 5.531965 4.411070 5.205874 11.183279 7.649746 2.413620 10.069769 5.584571 9.388361 9.207115 7.945344 4.134855 10.361840 6.581706 1.960943 5.901978 8.561407 19.603157 12.771669 7.001832 9.301651 8.130479 16.807962 12.266143 6.625975 4.176604 4.384371 9.677434 5.777954 4.405485 4.569497 10.291551 4.014626 2.659294 6.857347
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
4.412583 11.702991 13.749110 7.831258 14.740180 8.413287 9.549897 5.200161 5.502696 16.156425 3.974626 3.884414 17.546318 10.606184 6.503505 10.343866 11.596412 9.206365 3.647984 6.868900 6.051162 11.113026 8.818401 7.700342 6.930146 7.042216 14.106905 9.710428 6.866065 4.860060 2.951196 7.128655 7.390816 12.781352 2.768046 10.381283 5.692788 4.842183 23.385578 5.142681
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
12.632568 4.348758 3.103847 5.832293 3.520498 4.647637 9.187160 7.419858 7.941273 5.040482 7.263135 4.899600 10.799364 3.811072 6.720354 16.889233 10.246683 7.768004 6.396522 11.011131 9.826954 6.612300 16.574588 3.655640 6.527012 5.987517 12.023851 6.489063 6.539712 10.698295 3.566156 3.817818 7.632345 7.074156 5.284913 7.058112 6.420292 6.393441 12.525389 9.679066
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
9.248983 8.984455 4.192451 4.904182 5.702750 7.313462 3.666422 12.850111 10.227882 10.809077 7.393644 9.883381 19.496764 9.842191 7.616699 15.400509 7.068742 11.424972 16.699216 8.969663 7.756248 7.021469 7.817744 6.281500 4.814664 11.976475 4.844129 9.418227 6.141276 10.661408 11.545463 8.815166 4.056598 19.439619 14.604513 7.785155 6.473974 10.183673 9.698299 13.707653
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
12.608247 12.390774 5.233921 6.947600 10.654386 10.961424 5.357127 12.904471 5.520566 8.724346 18.417934 7.279050 6.892146 9.450856 9.890664 7.799539 11.247384 10.196016 18.341696 9.696758 11.644191 7.924799 9.800543 9.371981 16.692268 9.467719 7.242622 12.194222 7.703177 18.137429 9.228506 6.061212 5.355501 11.756925 12.991922 10.013286 6.111834 11.065740 10.686158 9.051085
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
13.293700 5.886501 3.741733 9.783887 12.131554 4.622149 10.691039 16.186138 11.800112 16.163147 4.579438 10.925220 14.884443 8.709794 8.160118 9.057895 3.862542 6.181506 7.329010 6.721078 5.972265 9.705004 7.176135 5.776712 5.876823 4.680523 5.532776 11.548326 10.113937 5.663005 12.585859 9.121456 8.569384 7.255645 5.189728 5.083230 8.134661 7.411546 4.174012 5.511772
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
4.750599 6.869142 15.884581 8.625419 7.161578 4.685404 6.128155 11.699766 7.789813 13.587317 11.288349 13.126086 11.049459 7.963791 11.928234 4.565839 7.875512 5.706536 5.202335 8.937618 10.739854 7.767758 4.318880 4.796508 6.391392 10.438099 12.538903 5.702756 9.051706 10.772329 15.097514 7.744425 6.291942 5.536867 16.142486 10.351198 8.860030 12.300025 11.972009 11.378871
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
10.625007 8.420646 14.079970 17.319756 4.818950 15.944207 2.623111 3.894721 12.647575 7.304846 5.544923 17.028728 4.384497 7.271147 10.653335 7.970582 7.983099 8.880903 9.112800 9.498981 6.909582 3.831591 7.465430 4.381657 8.956859 9.575041 9.237096 8.640386 6.330190 12.400121 16.508250 7.698648 8.462863 4.852291 18.529875 7.916427 9.939611 14.364129 6.671453 8.005495
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
5.441089 2.416981 8.751005 11.270866 2.659439 8.360085 3.767699 9.073638 7.118514 13.008347 14.424488 16.558980 16.561686 5.306274 7.491341 5.660012 9.942914 10.936036 11.355292 6.705314 7.293426 4.794905 11.708633 3.972120 15.503983 20.109132 14.683748 11.269535 4.631778 8.520396 6.487938 6.982795 6.568099 16.355423 4.498490 11.193724 5.911631 14.056070 10.007785 6.406753
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
7.068674 6.967187 9.049721 14.455119 17.367011 7.417337 11.950578 7.711825 4.374636 7.365838 5.808953 10.129323 8.430759 6.737615 17.989029 8.894988 12.270690 20.500825 9.301845 14.622085 13.140468 6.315382 7.291279 34.940408 24.786788 7.544370 7.621707 6.526261 13.670044 8.850176 6.546806 11.323524 11.796708 14.648657 9.919024 12.039941 9.705664 10.114553 13.340510 5.875593
md <- mahalanobis(dat.imp, center = colMeans(dat.imp), cov = cov(dat.imp))
alpha <- .001
cutoff <- (qchisq(p = 1 - alpha, df = ncol(dat.imp)))
names_outliers_MH <- which(md > cutoff)
excluded_mh <- names_outliers_MH
data_clean_mh <- dat.imp[-excluded_mh, ]
dat.imp[excluded_mh, ]
case age tenure sex psychwb jobsat jobperf turnover gma
464 464 36 5 0 3 3 6 1 71
outlier(data_clean_mh, method = "mean", addthres = TRUE)
Warning in plot.window(...): "method" is not a graphical parameter
Warning in plot.window(...): "addthres" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "method" is not a graphical parameter
Warning in plot.xy(xy, type, ...): "addthres" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "method" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "addthres" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "method" is not a graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "addthres" is not a graphical parameter
Warning in box(...): "method" is not a graphical parameter
Warning in box(...): "addthres" is not a graphical parameter
Warning in title(...): "method" is not a graphical parameter
Warning in title(...): "addthres" is not a graphical parameter
Warning in text.default(Chi2[n.obs:(n.obs - bad + 1)], D2[worst[1:bad]], : "method" is not a graphical parameter
Warning in text.default(Chi2[n.obs:(n.obs - bad + 1)], D2[worst[1:bad]], : "addthres" is not a graphical parameter
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
11.840972 15.750961 9.076922 7.454322 11.017873 5.253008 7.389554 6.462010 10.221474 7.370261 13.178030 9.537655 6.381307 6.686151 8.425621 14.197862 7.511533 6.515998 8.303260 5.557328 5.754988 7.020501 6.668573 6.209117 7.793337 12.108343 8.212616 5.621862 15.091615 8.260507 10.047486 11.232724 9.746682 11.746986 5.509064 8.609192 10.682904 5.653017 5.452309 15.933353
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
7.883277 9.038926 7.553738 16.392564 9.148670 6.261756 16.173287 6.434964 14.135763 15.212679 4.517066 14.309660 4.111266 7.551889 4.443920 4.521636 9.193271 10.795420 8.159793 7.785947 14.212583 10.392305 9.949911 3.181329 10.918916 8.427715 11.188486 12.908185 6.993789 7.947963 6.916872 10.708855 5.542947 5.244049 5.696803 10.417322 9.077934 3.997533 9.693362 12.166364
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
4.390321 8.977242 6.033407 6.056269 8.525588 5.607449 4.400215 5.193320 11.169874 7.657557 2.423427 10.240986 5.572802 9.373596 9.188818 7.934109 4.177169 10.363040 6.566834 1.959727 5.900349 8.636245 19.561196 12.872086 6.989452 9.309040 8.161872 16.855988 12.244039 6.659087 4.166597 4.402024 9.887671 5.764249 4.412338 4.563423 10.349469 4.024151 2.653535 7.042767
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
4.413180 11.698672 13.721539 7.835132 14.867391 8.403798 9.541624 5.230250 5.513513 16.249119 3.964625 3.875058 18.004088 10.746575 6.571036 10.464604 11.884660 9.190947 3.659637 6.859308 6.089297 11.135638 8.967948 7.709992 6.918334 7.025438 14.536786 9.906180 6.877154 4.850113 2.945193 7.111704 7.395087 12.822141 2.761729 10.392646 5.703976 4.889157 23.376440 5.158546
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
12.637719 4.358579 3.119930 5.852438 3.517041 4.643997 9.195471 7.512948 7.951503 5.029464 7.311039 4.892642 10.847703 3.830752 6.843674 16.947793 10.361698 7.796900 6.394944 10.998948 9.807179 6.599566 16.734849 3.654064 6.519283 5.975238 12.002263 6.555575 6.671564 10.766722 3.616487 3.822568 7.628574 7.136454 5.272475 7.150561 6.438754 6.477948 12.610521 9.944022
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
9.249889 8.975881 4.186615 4.902172 5.691076 7.296115 3.660220 13.133620 10.244108 10.804629 7.525882 9.943562 19.487313 9.826179 7.612006 15.465060 7.052539 11.405038 17.004365 9.014184 7.741549 7.046214 7.901938 6.314727 4.803642 12.127243 4.897616 9.408760 6.131894 10.639552 11.524939 8.801434 4.078013 19.979630 14.598933 7.930281 6.466740 10.179885 9.680291 13.725423
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
13.072245 12.372024 5.220921 6.937809 10.676015 11.027499 5.343864 13.186230 5.531902 8.717050 18.546332 7.320534 6.926126 9.565842 9.936523 7.795735 11.262991 10.178859 18.325693 9.681348 11.673672 7.936094 9.869113 9.351125 16.696649 9.486204 7.332088 12.527884 7.748117 18.447122 9.232115 6.086632 5.422503 11.768135 13.287534 9.991010 6.119945 11.156396 10.670918 9.073549
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
13.385524 5.872228 3.741868 9.768103 12.308936 4.633126 10.955994 16.371382 11.795816 16.302308 4.594339 11.052437 15.016992 8.689550 8.143705 9.073393 3.889892 6.168588 7.340001 6.835519 5.958449 9.693332 7.216859 5.793765 5.941618 4.670910 5.575031 11.671395 10.291096 5.673422 12.611899 9.301575 8.559362 7.238510 5.204271 5.077596 8.201165 7.464659 4.232207 5.498790
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
4.745679 6.884677 15.941328 8.734711 7.185057 4.710150 6.118164 11.697004 7.812899 13.683306 11.434096 13.107812 11.056476 7.976534 11.901553 4.584031 7.928821 5.821433 5.195997 9.065302 10.724749 7.783185 4.311004 4.788831 6.382532 10.726649 12.943872 5.705818 9.260566 10.832520 15.121029 7.790293 6.280842 5.529371 16.112190 10.358675 8.922601 12.289232 11.959651 11.496007
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
10.619128 8.402306 14.506173 17.662368 4.872800 15.908993 2.619727 3.887109 12.633120 7.420379 5.534224 17.159974 4.391097 7.341753 10.813385 7.970670 8.055015 8.860436 9.097541 9.662206 6.906345 3.831989 7.557167 4.382139 9.012683 9.578520 9.449819 8.705900 6.318339 12.662255 16.930357 7.753267 8.560233 4.848766 18.580212 8.258302 10.456110 14.523238 6.802026 7.988672
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
5.473278 2.410130 8.848533 11.262627 2.707000 8.434428 3.763174 9.184680 7.120902 12.988808 14.869535 16.956989 16.799152 5.320146 7.677055 5.802393 10.035401 10.959404 11.347674 6.822911 7.373301 4.794129 11.872562 3.966207 16.201694 20.680758 14.651035 11.252933 4.621015 8.503399 6.494828 6.971031 6.608999 16.335245 4.521051 11.180752 6.117358 14.081008 9.984850 6.406891
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
7.156066 7.087619 9.060148 14.506554 17.591044 7.549956 11.942796 7.744531 4.365107 7.388661 5.819037 10.114919 8.463559 6.742864 18.027876 9.094563 12.315967 20.467105 9.285530 14.590192 13.125183 6.304944 7.315206 25.964425 7.534112 7.610269 6.511034 13.945210 8.879503 6.531683 11.298076 11.770245 14.640253 9.972975 12.026105 9.685162 10.093087 13.311502 5.895739
data <- cor(dat.imp)
round(data, 2)
case age tenure sex psychwb jobsat jobperf turnover gma
case 1.00 0.02 0.08 -0.86 -0.08 -0.03 0.02 0.00 -0.05
age 0.02 1.00 0.46 -0.01 0.13 0.10 -0.02 -0.16 0.02
tenure 0.08 0.46 1.00 -0.04 0.13 0.12 0.03 -0.01 0.03
sex -0.86 -0.01 -0.04 1.00 0.09 0.04 -0.02 0.01 0.07
psychwb -0.08 0.13 0.13 0.09 1.00 0.17 0.37 -0.22 0.23
jobsat -0.03 0.10 0.12 0.04 0.17 1.00 0.14 -0.21 0.32
jobperf 0.02 -0.02 0.03 -0.02 0.37 0.14 1.00 -0.35 0.42
turnover 0.00 -0.16 -0.01 0.01 -0.22 -0.21 -0.35 1.00 -0.18
gma -0.05 0.02 0.03 0.07 0.23 0.32 0.42 -0.18 1.00
model1 <- lm(jobperf ~ age + tenure + sex + psychwb + jobsat + gma, data = dat.imp)
vif(model1)
age tenure sex psychwb jobsat gma
1.284822 1.292375 1.013178 1.095978 1.144377 1.163126
model2 <- lm(turnover ~ age + tenure + sex + psychwb + jobsat + gma, data = dat.imp)
vif(model2)
age tenure sex psychwb jobsat gma
1.284822 1.292375 1.013178 1.095978 1.144377 1.163126