In this assignment, we will investigate the scope of the missingness problem from the incomplete data set we received. This data is derived from National Health and Nutrition Examination Survey (NHANES). After inspecting and analyzing the data, we create a research question with a variable that holds missing values. We will use different strategies to cope with these NA values and compare them to the results we retreive from analyzing the complete data set.
The first step we take is obviously reading the data:
data_complete <- readRDS("g3_complete_data.rds")
data_incomplete <- readRDS("g3_incomplete_data.rds")
Now that we have loaded our data sets, we take a look at a summary of our incomplete data to obtain a general understanding of it.
View(data_incomplete) #looking through the dataframe
head(data_incomplete) #viewing the head of the data
## id sex age ethnicity education marital
## 1 43736 female 49 mexican_american some_college married
## 2 43339 female 34 mexican_american high_school_grad married
## 3 47572 female 68 non-hispanic_white some_high_school divorced
## 4 48127 male 24 mexican_american some_college never_married
## 5 46971 female 67 non-hispanic_white college_grad divorced
## 6 43233 male 40 mexican_american some_college separated
## household_size household_income weight height bmi pulse bp_sys1 bp_dia1
## 1 2 75000:99999 NA 151.6 NA 74 180 94
## 2 2 65000:74999 69.3 172.6 23.26 102 100 84
## 3 1 5000:9999 67.8 161.3 26.06 70 110 68
## 4 7 15000:19999 84.1 170.5 28.93 64 126 66
## 5 1 35000:44999 NA 150.9 NA 74 132 70
## 6 2 55000:64999 77.8 165.7 28.34 94 96 72
## bp_sys2 bp_dia2 time_sed drink_regularly days_drinking dep1 dep2 dep3 dep4
## 1 NA NA 480 yes 364 0 0 0 0
## 2 NA NA 10 yes 52 3 NA NA 3
## 3 104 62 120 <NA> 1 0 0 0 0
## 4 124 60 480 yes 120 0 0 0 0
## 5 128 72 240 yes 24 0 0 0 0
## 6 NA NA 480 yes 52 0 NA NA 1
## dep5 dep6 dep7 dep8 dep9
## 1 0 0 0 0 0
## 2 NA NA 3 3 0
## 3 0 0 0 0 NA
## 4 1 0 0 0 0
## 5 0 0 0 0 0
## 6 NA NA 0 0 0
tail(data_incomplete)#viewing the tail of the data
## id sex age ethnicity education marital
## 495 44001 female 49 non-hispanic_black college_grad married
## 496 51309 male 27 non-hispanic_white some_college married
## 497 43378 female 24 mexican_american no_high_school living_with_partner
## 498 45675 male 55 other_hispanic no_high_school married
## 499 46007 male 32 non-hispanic_white some_college married
## 500 45751 female 45 other_hispanic some_high_school married
## household_size household_income weight height bmi pulse bp_sys1 bp_dia1
## 495 5 100000+ NA 163.1 NA 80 108 76
## 496 3 45000:54999 100.9 NA NA 58 108 58
## 497 6 15000:19999 79.3 156.2 32.50 54 94 58
## 498 6 45000:54999 NA 165.1 NA 84 132 64
## 499 3 45000:54999 92.0 184.4 27.06 66 110 58
## 500 5 100000+ 61.5 153.1 26.24 76 108 66
## bp_sys2 bp_dia2 time_sed drink_regularly days_drinking dep1 dep2 dep3 dep4
## 495 108 78 360 yes 156 0 0 0 0
## 496 108 64 120 yes 52 0 0 0 1
## 497 98 70 30 no 0 0 0 0 0
## 498 124 64 420 no 0 0 0 3 3
## 499 108 54 180 yes 12 0 0 2 2
## 500 106 64 120 yes 364 0 0 1 1
## dep5 dep6 dep7 dep8 dep9
## 495 0 0 0 0 0
## 496 0 0 1 0 NA
## 497 0 0 0 0 0
## 498 3 0 0 1 0
## 499 0 0 0 0 0
## 500 1 0 0 0 NA
summary(data_incomplete) #viewing means, median, mins, maxs and NA's for each variable
## id sex age ethnicity
## Min. :41487 male :256 Min. :20.00 mexican_american :101
## 1st Qu.:44313 female:244 1st Qu.:32.00 other_hispanic : 66
## Median :46962 Median :45.00 non-hispanic_white:220
## Mean :46743 Mean :44.48 non-hispanic_black: 98
## 3rd Qu.:49245 3rd Qu.:57.00 other : 15
## Max. :51614 Max. :69.00
##
## education marital household_size
## no_high_school : 59 married :264 Min. :1.000
## some_high_school: 93 widowed : 13 1st Qu.:2.000
## high_school_grad:113 divorced : 58 Median :3.000
## some_college :151 separated : 28 Mean :3.304
## college_grad : 84 never_married :100 3rd Qu.:4.000
## living_with_partner: 37 Max. :7.000
##
## household_income weight height bmi
## 100000+ : 78 Min. : 48.00 Min. :143.3 Min. :17.20
## 25000:34999: 74 1st Qu.: 68.33 1st Qu.:161.1 1st Qu.:24.55
## 75000:99999: 57 Median : 81.45 Median :168.1 Median :28.07
## 35000:44999: 43 Mean : 83.58 Mean :168.1 Mean :28.99
## 10000:14999: 42 3rd Qu.: 95.75 3rd Qu.:176.3 3rd Qu.:32.16
## 45000:54999: 41 Max. :195.80 Max. :192.9 Max. :58.59
## (Other) :165 NA's :150 NA's :96 NA's :222
## pulse bp_sys1 bp_dia1 bp_sys2
## Min. : 46.00 Min. : 82.0 Min. : 28.00 Min. : 76.0
## 1st Qu.: 66.00 1st Qu.:110.0 1st Qu.: 64.00 1st Qu.:108.0
## Median : 74.00 Median :120.0 Median : 72.00 Median :118.0
## Mean : 74.86 Mean :122.2 Mean : 72.39 Mean :119.5
## 3rd Qu.: 82.00 3rd Qu.:132.0 3rd Qu.: 80.00 3rd Qu.:130.0
## Max. :128.00 Max. :210.0 Max. :110.00 Max. :180.0
## NA's :100
## bp_dia2 time_sed drink_regularly days_drinking
## Min. : 40.00 Min. : 0.0 yes :335 Min. : 0.00
## 1st Qu.: 62.00 1st Qu.: 180.0 no :115 1st Qu.: 0.75
## Median : 70.00 Median : 240.0 NA's: 50 Median : 7.50
## Mean : 70.57 Mean : 307.4 Mean : 50.69
## 3rd Qu.: 78.00 3rd Qu.: 480.0 3rd Qu.: 52.00
## Max. :108.00 Max. :1080.0 Max. :365.00
## NA's :100
## dep1 dep2 dep3 dep4
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.368 Mean :0.3153 Mean :0.6918 Mean :0.794
## 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :3.000 Max. :3.0000 Max. :3.0000 Max. :3.000
## NA's :75 NA's :75
## dep5 dep6 dep7 dep8
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.4094 Mean :0.2682 Mean :0.286 Mean :0.1674
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :3.0000 Max. :3.0000 Max. :3.000 Max. :3.0000
## NA's :75 NA's :75 NA's :40
## dep9
## Min. :0.00000
## 1st Qu.:0.00000
## Median :0.00000
## Mean :0.06205
## 3rd Qu.:0.00000
## Max. :3.00000
## NA's :81
We know now what our data looks like. Now we will start comparing their distributions and descriptive statistics.
In the table above, we saw the summary statistics of the incomplete data set. We will now take a look at that for the complete data set as well.
summary(data_complete)
## id sex age ethnicity
## Min. :41487 male :256 Min. :20.00 mexican_american :101
## 1st Qu.:44313 female:244 1st Qu.:32.00 other_hispanic : 66
## Median :46962 Median :45.00 non-hispanic_white:220
## Mean :46743 Mean :44.48 non-hispanic_black: 98
## 3rd Qu.:49245 3rd Qu.:57.00 other : 15
## Max. :51614 Max. :69.00
##
## education marital household_size
## no_high_school : 59 married :264 Min. :1.000
## some_high_school: 93 widowed : 13 1st Qu.:2.000
## high_school_grad:113 divorced : 58 Median :3.000
## some_college :151 separated : 28 Mean :3.304
## college_grad : 84 never_married :100 3rd Qu.:4.000
## living_with_partner: 37 Max. :7.000
##
## household_income weight height bmi
## 100000+ : 78 Min. : 40.80 Min. :141.9 Min. :16.88
## 25000:34999: 74 1st Qu.: 67.10 1st Qu.:161.2 1st Qu.:24.60
## 75000:99999: 57 Median : 80.15 Median :168.0 Median :28.24
## 35000:44999: 43 Mean : 83.06 Mean :168.1 Mean :29.31
## 10000:14999: 42 3rd Qu.: 94.62 3rd Qu.:176.3 3rd Qu.:32.38
## 45000:54999: 41 Max. :213.50 Max. :192.9 Max. :71.83
## (Other) :165
## pulse bp_sys1 bp_dia1 bp_sys2
## Min. : 46.00 Min. : 82.0 Min. : 28.00 Min. : 76.0
## 1st Qu.: 66.00 1st Qu.:110.0 1st Qu.: 64.00 1st Qu.:108.0
## Median : 74.00 Median :120.0 Median : 72.00 Median :118.0
## Mean : 74.86 Mean :122.2 Mean : 72.39 Mean :120.9
## 3rd Qu.: 82.00 3rd Qu.:132.0 3rd Qu.: 80.00 3rd Qu.:130.0
## Max. :128.00 Max. :210.0 Max. :110.00 Max. :214.0
##
## bp_dia2 time_sed drink_regularly days_drinking
## Min. : 34.00 Min. : 0.0 yes:370 Min. : 0.00
## 1st Qu.: 64.00 1st Qu.: 180.0 no :130 1st Qu.: 0.75
## Median : 72.00 Median : 240.0 Median : 7.50
## Mean : 71.86 Mean : 307.4 Mean : 50.69
## 3rd Qu.: 80.00 3rd Qu.: 480.0 3rd Qu.: 52.00
## Max. :108.00 Max. :1080.0 Max. :365.00
##
## dep1 dep2 dep3 dep4 dep5
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.00
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.00
## Median :0.000 Median :0.000 Median :0.000 Median :0.000 Median :0.00
## Mean :0.368 Mean :0.382 Mean :0.738 Mean :0.794 Mean :0.45
## 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.00
## Max. :3.000 Max. :3.000 Max. :3.000 Max. :3.000 Max. :3.00
##
## dep6 dep7 dep8 dep9
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.00
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.00
## Median :0.000 Median :0.000 Median :0.000 Median :0.00
## Mean :0.312 Mean :0.286 Mean :0.164 Mean :0.07
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.00
## Max. :3.000 Max. :3.000 Max. :3.000 Max. :3.00
##
Here we use arsenal::comparedf() in order to detect the differences between the two datasets. It will show us the amount of NA values per variable, but it also gives us some additional information. In this case we are specifically interested in the ‘summary of overall comparison’ report. It tells us the following. There are 12 variables that have differences: they contain NA values and there are 16 that don’t. From the 500 observations, there are 367 with some compared variables unequal. The total number of differences between the two datasets is 1139, across the 12 variables with some values unequal.
summary(comparedf(data_incomplete,data_complete))
##
##
## Table: Summary of data.frames
##
## version arg ncol nrow
## -------- ---------------- ----- -----
## x data_incomplete 28 500
## y data_complete 28 500
##
##
##
## Table: Summary of overall comparison
##
## statistic value
## ------------------------------------------------------------ ------
## Number of by-variables 0
## Number of non-by variables in common 28
## Number of variables compared 28
## Number of variables in x but not y 0
## Number of variables in y but not x 0
## Number of variables compared with some values unequal 12
## Number of variables compared with all values equal 16
## Number of observations in common 500
## Number of observations in x but not y 0
## Number of observations in y but not x 0
## Number of observations with some compared variables unequal 367
## Number of observations with all compared variables equal 133
## Number of values unequal 1139
##
##
##
## Table: Variables not shared
##
##
## ------------------------
## No variables not shared
## ------------------------
##
##
##
## Table: Other variables not compared
##
##
## --------------------------------
## No other variables not compared
## --------------------------------
##
##
##
## Table: Observations not shared
##
##
## ---------------------------
## No observations not shared
## ---------------------------
##
##
##
## Table: Differences detected by variable
##
## var.x var.y n NAs
## ----------------- ----------------- ---- ----
## id id 0 0
## sex sex 0 0
## age age 0 0
## ethnicity ethnicity 0 0
## education education 0 0
## marital marital 0 0
## household_size household_size 0 0
## household_income household_income 0 0
## weight weight 150 150
## height height 96 96
## bmi bmi 222 222
## pulse pulse 0 0
## bp_sys1 bp_sys1 0 0
## bp_dia1 bp_dia1 0 0
## bp_sys2 bp_sys2 100 100
## bp_dia2 bp_dia2 100 100
## time_sed time_sed 0 0
## drink_regularly drink_regularly 50 50
## days_drinking days_drinking 0 0
## dep1 dep1 0 0
## dep2 dep2 75 75
## dep3 dep3 75 75
## dep4 dep4 0 0
## dep5 dep5 75 75
## dep6 dep6 75 75
## dep7 dep7 0 0
## dep8 dep8 40 40
## dep9 dep9 81 81
##
##
##
## Table: Differences detected (1089 not shown)
##
## var.x var.y ..row.names.. values.x values.y row.x row.y
## -------- -------- -------------- --------- --------- ------ ------
## weight weight 1 NA 67 1 1
## weight weight 5 NA 56.8 5 5
## weight weight 8 NA 108.2 8 8
## weight weight 13 NA 107.9 13 13
## weight weight 21 NA 86.7 21 21
## weight weight 27 NA 63.9 27 27
## weight weight 30 NA 97.9 30 30
## weight weight 33 NA 50.9 33 33
## weight weight 34 NA 52.8 34 34
## weight weight 36 NA 79.4 36 36
## height height 15 NA 157.1 15 15
## height height 16 NA 166.5 16 16
## height height 22 NA 164.1 22 22
## height height 24 NA 149.5 24 24
## height height 25 NA 168.4 25 25
## height height 26 NA 181.9 26 26
## height height 28 NA 178 28 28
## height height 32 NA 164.6 32 32
## height height 34 NA 161.6 34 34
## height height 39 NA 188.3 39 39
## bmi bmi 1 NA 29.15 1 1
## bmi bmi 5 NA 24.94 5 5
## bmi bmi 8 NA 32.24 8 8
## bmi bmi 13 NA 32.79 13 13
## bmi bmi 15 NA 26.42 15 15
## bmi bmi 16 NA 26.33 16 16
## bmi bmi 21 NA 33.36 21 21
## bmi bmi 22 NA 31.16 22 22
## bmi bmi 24 NA 26.89 24 24
## bmi bmi 25 NA 38.58 25 25
## bp_sys2 bp_sys2 1 NA 180 1 1
## bp_sys2 bp_sys2 2 NA 100 2 2
## bp_sys2 bp_sys2 6 NA 98 6 6
## bp_sys2 bp_sys2 10 NA 134 10 10
## bp_sys2 bp_sys2 11 NA 156 11 11
## bp_sys2 bp_sys2 15 NA 104 15 15
## bp_sys2 bp_sys2 16 NA 126 16 16
## bp_sys2 bp_sys2 20 NA 106 20 20
## bp_sys2 bp_sys2 23 NA 138 23 23
## bp_sys2 bp_sys2 28 NA 134 28 28
## bp_dia2 bp_dia2 1 NA 98 1 1
## bp_dia2 bp_dia2 2 NA 78 2 2
## bp_dia2 bp_dia2 6 NA 70 6 6
## bp_dia2 bp_dia2 10 NA 68 10 10
## bp_dia2 bp_dia2 11 NA 76 11 11
## bp_dia2 bp_dia2 15 NA 62 15 15
## bp_dia2 bp_dia2 16 NA 84 16 16
## bp_dia2 bp_dia2 20 NA 68 20 20
## bp_dia2 bp_dia2 23 NA 70 23 23
## bp_dia2 bp_dia2 28 NA 70 28 28
##
##
##
## Table: Non-identical attributes
##
##
## ----------------------------
## No non-identical attributes
## ----------------------------
With this information, we will do some computations to compare the means, variances and correlations between the datasets.
#converting the columns of our data sets for comparing computations
columns_to_convert <- c('id', 'sex', 'age', 'ethnicity', 'education', 'marital', 'household_size', 'household_income', 'weight', 'height', 'bmi', 'pulse', 'bp_sys1', 'bp_dia1', 'bp_sys2', 'bp_dia2', 'time_sed', 'drink_regularly', 'days_drinking', 'dep1' , 'dep2', 'dep3', 'dep4', 'dep5', 'dep6', 'dep7', 'dep8', 'dep9')
for (x in columns_to_convert) {data_complete[[x]] <- as.numeric(data_complete[[x]])}
for (x in columns_to_convert) {data_incomplete[[x]] <- as.numeric(data_incomplete[[x]])}
#comparing the means of the data sets
mean_complete <- apply(data_complete, 2, mean, na.rm = TRUE)
mean_incomplete<- apply(data_incomplete, 2, mean, na.rm = TRUE)
cbind(mean_complete, mean_incomplete, na.rm = TRUE)
## mean_complete mean_incomplete na.rm
## id 46743.24400 4.674324e+04 1
## sex 1.48800 1.488000e+00 1
## age 44.48400 4.448400e+01 1
## ethnicity 2.72000 2.720000e+00 1
## education 3.21600 3.216000e+00 1
## marital 2.59600 2.596000e+00 1
## household_size 3.30400 3.304000e+00 1
## household_income 7.49800 7.498000e+00 1
## weight 83.05860 8.358057e+01 1
## height 168.11680 1.681314e+02 1
## bmi 29.30561 2.898734e+01 1
## pulse 74.86400 7.486400e+01 1
## bp_sys1 122.15600 1.221560e+02 1
## bp_dia1 72.38800 7.238800e+01 1
## bp_sys2 120.86800 1.195500e+02 1
## bp_dia2 71.86000 7.057000e+01 1
## time_sed 307.42400 3.074240e+02 1
## drink_regularly 1.26000 1.255556e+00 1
## days_drinking 50.69400 5.069400e+01 1
## dep1 0.36800 3.680000e-01 1
## dep2 0.38200 3.152941e-01 1
## dep3 0.73800 6.917647e-01 1
## dep4 0.79400 7.940000e-01 1
## dep5 0.45000 4.094118e-01 1
## dep6 0.31200 2.682353e-01 1
## dep7 0.28600 2.860000e-01 1
## dep8 0.16400 1.673913e-01 1
## dep9 0.07000 6.205251e-02 1
Here we create a table that compares the variances for our two datasets.
var1 <- apply(data_complete, 2, var)
var2 <- apply(data_incomplete, 2, var, na.rm = TRUE)
(var_table <- data.frame(Variable = colnames(data_complete), Variance1 = var1, Variance2 = var2, Difference = var1 - var2))
## Variable Variance1 Variance2 Difference
## id id 8.365722e+06 8.365722e+06 0.000000000
## sex sex 2.503567e-01 2.503567e-01 0.000000000
## age age 2.066510e+02 2.066510e+02 0.000000000
## ethnicity ethnicity 1.179960e+00 1.179960e+00 0.000000000
## education education 1.588521e+00 1.588521e+00 0.000000000
## marital marital 3.503792e+00 3.503792e+00 0.000000000
## household_size household_size 2.881347e+00 2.881347e+00 0.000000000
## household_income household_income 1.046693e+01 1.046693e+01 0.000000000
## weight weight 4.695250e+02 4.215897e+02 47.935305850
## height height 1.001351e+02 1.002984e+02 -0.163370733
## bmi bmi 4.777918e+01 4.051594e+01 7.263236522
## pulse pulse 1.629394e+02 1.629394e+02 0.000000000
## bp_sys1 bp_sys1 3.274706e+02 3.274706e+02 0.000000000
## bp_dia1 bp_dia1 1.418051e+02 1.418051e+02 0.000000000
## bp_sys2 bp_sys2 3.045837e+02 2.723584e+02 32.225347497
## bp_dia2 bp_dia2 1.389062e+02 1.253084e+02 13.597841498
## time_sed time_sed 4.108440e+04 4.108440e+04 0.000000000
## drink_regularly drink_regularly 1.927856e-01 1.906706e-01 0.002114945
## days_drinking days_drinking 7.769459e+03 7.769459e+03 0.000000000
## dep1 dep1 5.296353e-01 5.296353e-01 0.000000000
## dep2 dep2 5.932625e-01 4.805438e-01 0.112718685
## dep3 dep3 9.712986e-01 9.684462e-01 0.002852426
## dep4 dep4 9.895431e-01 9.895431e-01 0.000000000
## dep5 dep5 6.968938e-01 6.385905e-01 0.058303333
## dep6 dep6 4.876313e-01 4.137292e-01 0.073902073
## dep7 dep7 4.410862e-01 4.410862e-01 0.000000000
## dep8 dep8 2.576192e-01 2.660368e-01 -0.008417514
## dep9 dep9 1.093186e-01 9.183405e-02 0.017484583
Plotted, it looks like this:
(var_plot <- ggplot(var_table, aes(x = Variable, y = Difference)) +
geom_bar(stat = "identity", fill = "blue") +
ggtitle("Differences in Variances") +
xlab("Variable") +
ylab("Difference in Variance") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=8)))
We now do the same, but then for the correlations.The Correlation1 versions indicate the correlation coefficient for the variable in the complete dataset. The Correlation2 versions refer to the incomplete variant.
cor1 <- cor(data_complete)
cor2 <- cor(data_incomplete, use = 'complete.obs')
(cor_table <- data.frame(Variable = colnames(data_complete), Correlation1 = cor1, Correlation2 = cor2, Difference = cor1 - cor2))
## Variable Correlation1.id Correlation1.sex
## id id 1.000000000 -0.009061094
## sex sex -0.009061094 1.000000000
## age age -0.025562771 0.023098114
## ethnicity ethnicity -0.014312468 -0.039378345
## education education 0.021615281 0.035896163
## marital marital -0.038175760 -0.024443810
## household_size household_size -0.004221008 0.011382283
## household_income household_income 0.026733200 -0.061294379
## weight weight -0.025072187 -0.272949497
## height height 0.021715970 -0.689623117
## bmi bmi -0.038143481 0.042259004
## pulse pulse -0.035046305 0.114582565
## bp_sys1 bp_sys1 -0.043128147 -0.152286887
## bp_dia1 bp_dia1 0.027575190 -0.171757617
## bp_sys2 bp_sys2 -0.085713697 -0.162891182
## bp_dia2 bp_dia2 0.016405141 -0.165781630
## time_sed time_sed -0.003417041 -0.010580475
## drink_regularly drink_regularly -0.061661981 0.242276137
## days_drinking days_drinking 0.086961928 -0.242475355
## dep1 dep1 0.043327849 0.078192362
## dep2 dep2 -0.003542063 0.076917166
## dep3 dep3 -0.049868469 0.093177194
## dep4 dep4 -0.074186403 0.117824652
## dep5 dep5 0.042081421 0.149689429
## dep6 dep6 0.018947108 0.125447659
## dep7 dep7 -0.059105937 0.061608268
## dep8 dep8 -0.013813271 0.039328621
## dep9 dep9 0.014889936 -0.061537017
## Correlation1.age Correlation1.ethnicity Correlation1.education
## id -0.025562771 -0.014312468 2.161528e-02
## sex 0.023098114 -0.039378345 3.589616e-02
## age 1.000000000 0.050276740 -3.166380e-02
## ethnicity 0.050276740 1.000000000 2.711466e-01
## education -0.031663800 0.271146569 1.000000e+00
## marital -0.357051255 0.046756433 -1.115895e-01
## household_size -0.256527457 -0.200457902 -2.199695e-01
## household_income 0.084346917 0.085376036 4.040911e-01
## weight 0.007704952 0.121989730 8.578564e-02
## height -0.089705102 0.306217362 2.442509e-01
## bmi 0.061898028 -0.005645942 -2.006648e-02
## pulse -0.133970475 -0.001017479 -9.184198e-02
## bp_sys1 0.381669741 0.075425374 -1.107846e-01
## bp_dia1 0.172956118 0.046217061 1.281824e-05
## bp_sys2 0.362182470 0.061260550 -1.151354e-01
## bp_dia2 0.148466542 0.072380706 1.148349e-02
## time_sed -0.077221674 0.146806236 2.894475e-01
## drink_regularly 0.053365358 0.110925780 -9.444377e-02
## days_drinking 0.073618466 0.033219351 1.379435e-01
## dep1 -0.018016843 -0.016426778 -1.654866e-01
## dep2 -0.027229023 -0.029987944 -1.016807e-01
## dep3 -0.041670731 0.015574452 -9.954969e-02
## dep4 -0.014735481 0.026261028 -9.390923e-02
## dep5 -0.091662265 0.015469653 -1.135183e-01
## dep6 -0.065780258 0.015006119 -4.712416e-02
## dep7 -0.055878734 0.027889313 -7.873697e-02
## dep8 -0.071050606 -0.029223534 -1.557310e-01
## dep9 0.022371793 -0.079233176 -1.373454e-01
## Correlation1.marital Correlation1.household_size
## id -0.038175760 -0.004221008
## sex -0.024443810 0.011382283
## age -0.357051255 -0.256527457
## ethnicity 0.046756433 -0.200457902
## education -0.111589524 -0.219969510
## marital 1.000000000 -0.013618380
## household_size -0.013618380 1.000000000
## household_income -0.266191931 0.051199072
## weight -0.029178776 -0.095064813
## height 0.037691375 -0.150761239
## bmi -0.050470988 -0.031230985
## pulse 0.064625767 0.069983608
## bp_sys1 -0.042862289 -0.109454330
## bp_dia1 -0.081959670 -0.010605769
## bp_sys2 -0.027155085 -0.110530816
## bp_dia2 -0.058525366 -0.040340758
## time_sed 0.118101884 -0.199365701
## drink_regularly -0.049937065 0.060445050
## days_drinking -0.009034375 -0.139235885
## dep1 0.076991432 0.003348289
## dep2 0.075286584 -0.024622488
## dep3 0.078296909 -0.043335704
## dep4 0.076831300 -0.011497899
## dep5 0.149920678 -0.051477777
## dep6 0.095092075 -0.017623426
## dep7 0.068949030 -0.031058651
## dep8 0.120500874 0.049013792
## dep9 0.078166467 0.154825990
## Correlation1.household_income Correlation1.weight
## id 0.02673320 -0.025072187
## sex -0.06129438 -0.272949497
## age 0.08434692 0.007704952
## ethnicity 0.08537604 0.121989730
## education 0.40409105 0.085785639
## marital -0.26619193 -0.029178776
## household_size 0.05119907 -0.095064813
## household_income 1.00000000 -0.039477663
## weight -0.03947766 1.000000000
## height 0.16008286 0.415429394
## bmi -0.12162978 0.886379420
## pulse -0.11942971 0.152457915
## bp_sys1 -0.06657151 0.202607336
## bp_dia1 0.06082786 0.237188551
## bp_sys2 -0.05956108 0.216514575
## bp_dia2 0.06363893 0.209744411
## time_sed 0.12569705 0.118132995
## drink_regularly -0.09979702 -0.069555839
## days_drinking 0.19435804 0.022801412
## dep1 -0.14523184 0.054062624
## dep2 -0.16656517 0.035842786
## dep3 -0.19720311 0.128597838
## dep4 -0.13992236 0.083566675
## dep5 -0.22931647 0.148976260
## dep6 -0.13011837 0.060056746
## dep7 -0.07947655 0.054910755
## dep8 -0.13404338 0.018019985
## dep9 -0.05513571 0.011873820
## Correlation1.height Correlation1.bmi Correlation1.pulse
## id 0.021715970 -0.038143481 -0.035046305
## sex -0.689623117 0.042259004 0.114582565
## age -0.089705102 0.061898028 -0.133970475
## ethnicity 0.306217362 -0.005645942 -0.001017479
## education 0.244250903 -0.020066484 -0.091841980
## marital 0.037691375 -0.050470988 0.064625767
## household_size -0.150761239 -0.031230985 0.069983608
## household_income 0.160082860 -0.121629779 -0.119429710
## weight 0.415429394 0.886379420 0.152457915
## height 1.000000000 -0.038822222 -0.082254825
## bmi -0.038822222 1.000000000 0.211810098
## pulse -0.082254825 0.211810098 1.000000000
## bp_sys1 0.060416606 0.204099015 -0.007525159
## bp_dia1 0.195226206 0.167590329 0.170867088
## bp_sys2 0.084863522 0.206465963 0.011703570
## bp_dia2 0.182779568 0.144101143 0.150902776
## time_sed 0.136526507 0.059674333 0.062559283
## drink_regularly -0.213908045 0.029184378 0.067822017
## days_drinking 0.271969296 -0.108446083 -0.083106304
## dep1 -0.019397815 0.069563577 0.086510403
## dep2 -0.033075015 0.057931789 0.120457212
## dep3 -0.040010495 0.163298313 0.175416295
## dep4 -0.034741878 0.110121565 0.122311217
## dep5 -0.083071197 0.203279834 0.182721912
## dep6 -0.039009098 0.080526726 0.087055045
## dep7 0.016825197 0.044896165 0.089460416
## dep8 -0.050534781 0.039038055 0.127174396
## dep9 0.009213944 0.009429586 0.023627604
## Correlation1.bp_sys1 Correlation1.bp_dia1 Correlation1.bp_sys2
## id -0.043128147 2.757519e-02 -0.085713697
## sex -0.152286887 -1.717576e-01 -0.162891182
## age 0.381669741 1.729561e-01 0.362182470
## ethnicity 0.075425374 4.621706e-02 0.061260550
## education -0.110784638 1.281824e-05 -0.115135358
## marital -0.042862289 -8.195967e-02 -0.027155085
## household_size -0.109454330 -1.060577e-02 -0.110530816
## household_income -0.066571510 6.082786e-02 -0.059561079
## weight 0.202607336 2.371886e-01 0.216514575
## height 0.060416606 1.952262e-01 0.084863522
## bmi 0.204099015 1.675903e-01 0.206465963
## pulse -0.007525159 1.708671e-01 0.011703570
## bp_sys1 1.000000000 4.830037e-01 0.926381496
## bp_dia1 0.483003674 1.000000e+00 0.497291337
## bp_sys2 0.926381496 4.972913e-01 1.000000000
## bp_dia2 0.466530196 8.685409e-01 0.487850540
## time_sed -0.003782454 2.178395e-03 0.022736306
## drink_regularly -0.026805726 -5.766066e-02 -0.013818842
## days_drinking 0.148590830 1.172064e-01 0.124082412
## dep1 -0.029932158 6.072578e-02 -0.021097356
## dep2 -0.061219730 4.323728e-02 -0.045289054
## dep3 0.009937245 5.751556e-02 -0.001432162
## dep4 -0.026042674 2.740026e-02 -0.022462736
## dep5 -0.047371796 3.118607e-02 -0.051347675
## dep6 -0.059681989 -2.615469e-02 -0.050878215
## dep7 -0.102765982 -4.345257e-02 -0.071946080
## dep8 -0.025918588 -4.580857e-03 0.013760427
## dep9 0.055780801 7.656179e-02 0.067937920
## Correlation1.bp_dia2 Correlation1.time_sed
## id 0.016405141 -0.003417041
## sex -0.165781630 -0.010580475
## age 0.148466542 -0.077221674
## ethnicity 0.072380706 0.146806236
## education 0.011483486 0.289447488
## marital -0.058525366 0.118101884
## household_size -0.040340758 -0.199365701
## household_income 0.063638934 0.125697050
## weight 0.209744411 0.118132995
## height 0.182779568 0.136526507
## bmi 0.144101143 0.059674333
## pulse 0.150902776 0.062559283
## bp_sys1 0.466530196 -0.003782454
## bp_dia1 0.868540923 0.002178395
## bp_sys2 0.487850540 0.022736306
## bp_dia2 1.000000000 -0.002446444
## time_sed -0.002446444 1.000000000
## drink_regularly -0.032452292 -0.055598865
## days_drinking 0.112310342 0.116689916
## dep1 0.066765423 0.026518491
## dep2 0.054028153 0.068057904
## dep3 0.065502380 0.030903810
## dep4 0.076847235 0.078405683
## dep5 0.031672746 0.051348399
## dep6 0.019440766 0.084268961
## dep7 0.028167515 0.052525857
## dep8 0.026626086 -0.032895905
## dep9 0.106402543 -0.101067153
## Correlation1.drink_regularly Correlation1.days_drinking
## id -0.06166198 0.086961928
## sex 0.24227614 -0.242475355
## age 0.05336536 0.073618466
## ethnicity 0.11092578 0.033219351
## education -0.09444377 0.137943546
## marital -0.04993706 -0.009034375
## household_size 0.06044505 -0.139235885
## household_income -0.09979702 0.194358042
## weight -0.06955584 0.022801412
## height -0.21390805 0.271969296
## bmi 0.02918438 -0.108446083
## pulse 0.06782202 -0.083106304
## bp_sys1 -0.02680573 0.148590830
## bp_dia1 -0.05766066 0.117206398
## bp_sys2 -0.01381884 0.124082412
## bp_dia2 -0.03245229 0.112310342
## time_sed -0.05559887 0.116689916
## drink_regularly 1.00000000 -0.311575058
## days_drinking -0.31157506 1.000000000
## dep1 0.01354651 -0.091712111
## dep2 0.04349455 -0.103534470
## dep3 0.04195793 -0.091562615
## dep4 0.04487285 -0.106563193
## dep5 0.01366844 -0.060791679
## dep6 0.01594800 -0.071961869
## dep7 0.03999663 -0.102261637
## dep8 0.04208418 -0.042997630
## dep9 0.06764118 -0.034195320
## Correlation1.dep1 Correlation1.dep2 Correlation1.dep3
## id 0.043327849 -0.003542063 -0.049868469
## sex 0.078192362 0.076917166 0.093177194
## age -0.018016843 -0.027229023 -0.041670731
## ethnicity -0.016426778 -0.029987944 0.015574452
## education -0.165486570 -0.101680729 -0.099549689
## marital 0.076991432 0.075286584 0.078296909
## household_size 0.003348289 -0.024622488 -0.043335704
## household_income -0.145231837 -0.166565166 -0.197203111
## weight 0.054062624 0.035842786 0.128597838
## height -0.019397815 -0.033075015 -0.040010495
## bmi 0.069563577 0.057931789 0.163298313
## pulse 0.086510403 0.120457212 0.175416295
## bp_sys1 -0.029932158 -0.061219730 0.009937245
## bp_dia1 0.060725777 0.043237280 0.057515565
## bp_sys2 -0.021097356 -0.045289054 -0.001432162
## bp_dia2 0.066765423 0.054028153 0.065502380
## time_sed 0.026518491 0.068057904 0.030903810
## drink_regularly 0.013546512 0.043494545 0.041957926
## days_drinking -0.091712111 -0.103534470 -0.091562615
## dep1 1.000000000 0.531659479 0.355425881
## dep2 0.531659479 1.000000000 0.430426607
## dep3 0.355425881 0.430426607 1.000000000
## dep4 0.478628650 0.516163119 0.515146065
## dep5 0.389892773 0.380391220 0.516265612
## dep6 0.451876269 0.612566401 0.416029788
## dep7 0.407885666 0.506828795 0.371891354
## dep8 0.302860623 0.387922544 0.310417331
## dep9 0.200882083 0.359070197 0.277796046
## Correlation1.dep4 Correlation1.dep5 Correlation1.dep6
## id -0.07418640 0.04208142 0.01894711
## sex 0.11782465 0.14968943 0.12544766
## age -0.01473548 -0.09166227 -0.06578026
## ethnicity 0.02626103 0.01546965 0.01500612
## education -0.09390923 -0.11351831 -0.04712416
## marital 0.07683130 0.14992068 0.09509207
## household_size -0.01149790 -0.05147778 -0.01762343
## household_income -0.13992236 -0.22931647 -0.13011837
## weight 0.08356667 0.14897626 0.06005675
## height -0.03474188 -0.08307120 -0.03900910
## bmi 0.11012156 0.20327983 0.08052673
## pulse 0.12231122 0.18272191 0.08705505
## bp_sys1 -0.02604267 -0.04737180 -0.05968199
## bp_dia1 0.02740026 0.03118607 -0.02615469
## bp_sys2 -0.02246274 -0.05134768 -0.05087821
## bp_dia2 0.07684724 0.03167275 0.01944077
## time_sed 0.07840568 0.05134840 0.08426896
## drink_regularly 0.04487285 0.01366844 0.01594800
## days_drinking -0.10656319 -0.06079168 -0.07196187
## dep1 0.47862865 0.38989277 0.45187627
## dep2 0.51616312 0.38039122 0.61256640
## dep3 0.51514607 0.51626561 0.41602979
## dep4 1.00000000 0.50762303 0.41582328
## dep5 0.50762303 1.00000000 0.40840103
## dep6 0.41582328 0.40840103 1.00000000
## dep7 0.44728951 0.28789904 0.52018848
## dep8 0.31313090 0.33627657 0.42075653
## dep9 0.22062947 0.16880761 0.33052440
## Correlation1.dep7 Correlation1.dep8 Correlation1.dep9
## id -0.05910594 -0.013813271 0.014889936
## sex 0.06160827 0.039328621 -0.061537017
## age -0.05587873 -0.071050606 0.022371793
## ethnicity 0.02788931 -0.029223534 -0.079233176
## education -0.07873697 -0.155730995 -0.137345384
## marital 0.06894903 0.120500874 0.078166467
## household_size -0.03105865 0.049013792 0.154825990
## household_income -0.07947655 -0.134043377 -0.055135708
## weight 0.05491076 0.018019985 0.011873820
## height 0.01682520 -0.050534781 0.009213944
## bmi 0.04489616 0.039038055 0.009429586
## pulse 0.08946042 0.127174396 0.023627604
## bp_sys1 -0.10276598 -0.025918588 0.055780801
## bp_dia1 -0.04345257 -0.004580857 0.076561787
## bp_sys2 -0.07194608 0.013760427 0.067937920
## bp_dia2 0.02816752 0.026626086 0.106402543
## time_sed 0.05252586 -0.032895905 -0.101067153
## drink_regularly 0.03999663 0.042084183 0.067641179
## days_drinking -0.10226164 -0.042997630 -0.034195320
## dep1 0.40788567 0.302860623 0.200882083
## dep2 0.50682879 0.387922544 0.359070197
## dep3 0.37189135 0.310417331 0.277796046
## dep4 0.44728951 0.313130895 0.220629469
## dep5 0.28789904 0.336276573 0.168807606
## dep6 0.52018848 0.420756525 0.330524395
## dep7 1.00000000 0.461019386 0.237190291
## dep8 0.46101939 1.000000000 0.229995578
## dep9 0.23719029 0.229995578 1.000000000
## Correlation2.id Correlation2.sex Correlation2.age
## id 1.000000000 -0.05317545 -0.002276105
## sex -0.053175454 1.00000000 -0.063952534
## age -0.002276105 -0.06395253 1.000000000
## ethnicity -0.035437642 -0.17985483 0.094081873
## education -0.003002854 -0.04289781 -0.002330056
## marital 0.027369336 0.12969303 -0.347414999
## household_size 0.061231696 0.15521282 -0.346197982
## household_income -0.033751166 -0.10301234 -0.030968901
## weight -0.020770620 -0.30441606 0.053511978
## height -0.003927402 -0.72253226 -0.021830956
## bmi -0.021902239 0.02515022 0.095499834
## pulse -0.054038979 0.16614422 -0.129248337
## bp_sys1 0.021249306 -0.28159678 0.395539507
## bp_dia1 0.035232574 -0.03699273 0.176862727
## bp_sys2 0.013395807 -0.31524051 0.397004628
## bp_dia2 0.038168565 -0.04151953 0.157760752
## time_sed 0.091144103 -0.09773092 -0.171538179
## drink_regularly 0.116209870 0.12679009 -0.011966665
## days_drinking -0.011595180 -0.24739987 0.051273908
## dep1 0.131402973 0.01769036 -0.125731081
## dep2 0.035251327 0.02870827 -0.106066244
## dep3 -0.013550269 0.13594785 -0.018909433
## dep4 -0.047593785 0.15351734 -0.130332994
## dep5 0.106406993 0.16571963 -0.169892392
## dep6 0.086418021 0.03935623 -0.022402285
## dep7 -0.047095282 0.15010310 -0.151210760
## dep8 0.101114771 0.10655202 -0.141801472
## dep9 0.001234142 -0.01798982 -0.015020876
## Correlation2.ethnicity Correlation2.education
## id -0.035437642 -0.003002854
## sex -0.179854832 -0.042897808
## age 0.094081873 -0.002330056
## ethnicity 1.000000000 0.205012197
## education 0.205012197 1.000000000
## marital 0.118635238 -0.059525948
## household_size -0.315564020 -0.307730824
## household_income 0.043563631 0.324451009
## weight 0.241620888 0.077728264
## height 0.404234125 0.287553219
## bmi 0.089883360 -0.050273227
## pulse 0.116095769 -0.087687403
## bp_sys1 0.132275904 0.044164422
## bp_dia1 0.114125051 0.033329734
## bp_sys2 0.121183962 0.033293549
## bp_dia2 0.115024449 0.105443445
## time_sed 0.101396736 0.329169644
## drink_regularly 0.050131549 -0.046961454
## days_drinking 0.153414092 0.130718354
## dep1 0.009757537 -0.007678133
## dep2 0.124161734 0.021260763
## dep3 0.070259535 -0.242021635
## dep4 0.093364780 -0.052406423
## dep5 0.060258975 -0.199214036
## dep6 0.044150074 -0.043504103
## dep7 -0.050950161 -0.001121101
## dep8 0.032973997 -0.146332016
## dep9 0.054876685 -0.031959361
## Correlation2.marital Correlation2.household_size
## id 2.736934e-02 0.061231696
## sex 1.296930e-01 0.155212815
## age -3.474150e-01 -0.346197982
## ethnicity 1.186352e-01 -0.315564020
## education -5.952595e-02 -0.307730824
## marital 1.000000e+00 0.060803128
## household_size 6.080313e-02 1.000000000
## household_income -2.605452e-01 -0.036424322
## weight 5.686866e-02 -0.169090175
## height 2.308751e-02 -0.281971238
## bmi 3.896273e-02 -0.067980008
## pulse 4.389539e-05 0.128496999
## bp_sys1 -6.932595e-02 -0.173709027
## bp_dia1 -8.785076e-04 0.008893365
## bp_sys2 -6.459353e-02 -0.128000070
## bp_dia2 6.124346e-03 -0.042865748
## time_sed 2.085367e-01 -0.161443254
## drink_regularly -1.543426e-02 0.125171867
## days_drinking 1.035354e-01 -0.164332991
## dep1 1.292284e-01 -0.018798947
## dep2 1.977895e-01 -0.001860202
## dep3 9.738092e-02 0.044880660
## dep4 1.561584e-01 0.039257738
## dep5 2.362056e-01 0.029350780
## dep6 1.453389e-01 -0.043106569
## dep7 1.129602e-01 0.088616396
## dep8 1.932295e-01 0.033152707
## dep9 1.451907e-01 0.122896158
## Correlation2.household_income Correlation2.weight
## id -0.033751166 -0.02077062
## sex -0.103012341 -0.30441606
## age -0.030968901 0.05351198
## ethnicity 0.043563631 0.24162089
## education 0.324451009 0.07772826
## marital -0.260545165 0.05686866
## household_size -0.036424322 -0.16909017
## household_income 1.000000000 0.04816387
## weight 0.048163873 1.00000000
## height 0.172106834 0.47330089
## bmi -0.030371914 0.88505114
## pulse 0.045798547 0.20259261
## bp_sys1 0.029065977 0.27989266
## bp_dia1 -0.013716254 0.16079838
## bp_sys2 0.053269188 0.26796311
## bp_dia2 0.033881106 0.14935624
## time_sed 0.247685742 0.14077805
## drink_regularly -0.111498516 -0.02618593
## days_drinking 0.120322164 0.18221234
## dep1 -0.083624100 0.24874122
## dep2 -0.122138483 0.15931382
## dep3 -0.108340910 0.17249174
## dep4 -0.012093979 0.17099272
## dep5 -0.207468754 0.22497615
## dep6 -0.144415291 0.07451680
## dep7 -0.023387590 0.08447968
## dep8 -0.097552281 0.12106640
## dep9 0.009153002 0.15863236
## Correlation2.height Correlation2.bmi Correlation2.pulse
## id -0.003927402 -0.02190224 -5.403898e-02
## sex -0.722532256 0.02515022 1.661442e-01
## age -0.021830956 0.09549983 -1.292483e-01
## ethnicity 0.404234125 0.08988336 1.160958e-01
## education 0.287553219 -0.05027323 -8.768740e-02
## marital 0.023087513 0.03896273 4.389539e-05
## household_size -0.281971238 -0.06798001 1.284970e-01
## household_income 0.172106834 -0.03037191 4.579855e-02
## weight 0.473300889 0.88505114 2.025926e-01
## height 1.000000000 0.02406659 -3.645864e-02
## bmi 0.024066587 1.00000000 2.408402e-01
## pulse -0.036458642 0.24084019 1.000000e+00
## bp_sys1 0.203543405 0.21588066 -2.418255e-02
## bp_dia1 0.044673159 0.16128707 1.623017e-01
## bp_sys2 0.255127402 0.18314912 -2.280450e-02
## bp_dia2 0.068085621 0.13628992 9.133461e-02
## time_sed 0.220536773 0.03993516 2.534172e-02
## drink_regularly -0.111972367 0.03071495 2.103099e-02
## days_drinking 0.340214150 0.02402465 -5.042144e-02
## dep1 0.075345786 0.22507851 9.866183e-02
## dep2 0.018000752 0.17029592 5.637505e-02
## dep3 -0.078834207 0.22649222 2.553235e-01
## dep4 -0.064666372 0.21935462 2.756444e-02
## dep5 -0.133141369 0.31084715 9.709907e-02
## dep6 -0.009180345 0.08400255 -7.086921e-03
## dep7 -0.036462633 0.11564580 1.219219e-01
## dep8 -0.057950727 0.13907595 1.387164e-01
## dep9 0.057628941 0.14729224 2.413567e-02
## Correlation2.bp_sys1 Correlation2.bp_dia1 Correlation2.bp_sys2
## id 0.021249306 0.0352325744 0.013395807
## sex -0.281596777 -0.0369927263 -0.315240512
## age 0.395539507 0.1768627275 0.397004628
## ethnicity 0.132275904 0.1141250515 0.121183962
## education 0.044164422 0.0333297342 0.033293549
## marital -0.069325946 -0.0008785076 -0.064593535
## household_size -0.173709027 0.0088933651 -0.128000070
## household_income 0.029065977 -0.0137162536 0.053269188
## weight 0.279892656 0.1607983834 0.267963107
## height 0.203543405 0.0446731587 0.255127402
## bmi 0.215880663 0.1612870653 0.183149116
## pulse -0.024182553 0.1623017122 -0.022804501
## bp_sys1 1.000000000 0.4949457172 0.919915343
## bp_dia1 0.494945717 1.0000000000 0.482730495
## bp_sys2 0.919915343 0.4827304946 1.000000000
## bp_dia2 0.450601249 0.8845695997 0.459109573
## time_sed 0.031224411 -0.0383178177 0.046892537
## drink_regularly -0.095005305 0.0343748192 -0.119492100
## days_drinking 0.192123023 0.1583876956 0.194125369
## dep1 -0.080327579 0.0008762567 -0.023457433
## dep2 0.007780815 0.1014952948 -0.001674213
## dep3 0.086869817 0.1845356805 0.072666014
## dep4 0.020892273 0.0601421231 0.022045134
## dep5 0.070289844 0.1351529625 0.032105826
## dep6 -0.052080069 -0.0211730064 -0.041019017
## dep7 -0.125935014 -0.0752176411 -0.067651846
## dep8 0.021267349 0.0481398418 -0.008103926
## dep9 0.087144809 0.1012306737 0.065230696
## Correlation2.bp_dia2 Correlation2.time_sed
## id 0.038168565 0.091144103
## sex -0.041519534 -0.097730920
## age 0.157760752 -0.171538179
## ethnicity 0.115024449 0.101396736
## education 0.105443445 0.329169644
## marital 0.006124346 0.208536664
## household_size -0.042865748 -0.161443254
## household_income 0.033881106 0.247685742
## weight 0.149356243 0.140778051
## height 0.068085621 0.220536773
## bmi 0.136289921 0.039935162
## pulse 0.091334609 0.025341721
## bp_sys1 0.450601249 0.031224411
## bp_dia1 0.884569600 -0.038317818
## bp_sys2 0.459109573 0.046892537
## bp_dia2 1.000000000 -0.001216683
## time_sed -0.001216683 1.000000000
## drink_regularly -0.002095607 -0.218078067
## days_drinking 0.131839456 0.288441702
## dep1 0.036848526 0.047947832
## dep2 0.063765883 0.120509227
## dep3 0.143204430 -0.036670858
## dep4 0.105911308 0.015434764
## dep5 0.101878824 0.042338131
## dep6 -0.033636785 0.005146929
## dep7 -0.076340986 0.040743386
## dep8 0.054925393 0.088082824
## dep9 0.088322140 0.054415820
## Correlation2.drink_regularly Correlation2.days_drinking
## id 0.116209870 -0.01159518
## sex 0.126790089 -0.24739987
## age -0.011966665 0.05127391
## ethnicity 0.050131549 0.15341409
## education -0.046961454 0.13071835
## marital -0.015434258 0.10353541
## household_size 0.125171867 -0.16433299
## household_income -0.111498516 0.12032216
## weight -0.026185929 0.18221234
## height -0.111972367 0.34021415
## bmi 0.030714949 0.02402465
## pulse 0.021030988 -0.05042144
## bp_sys1 -0.095005305 0.19212302
## bp_dia1 0.034374819 0.15838770
## bp_sys2 -0.119492100 0.19412537
## bp_dia2 -0.002095607 0.13183946
## time_sed -0.218078067 0.28844170
## drink_regularly 1.000000000 -0.30302913
## days_drinking -0.303029134 1.00000000
## dep1 -0.167616110 0.01606481
## dep2 -0.037649562 -0.05630038
## dep3 -0.018145893 -0.06264327
## dep4 -0.156416401 -0.08323949
## dep5 -0.083941151 -0.02280718
## dep6 -0.003833638 -0.11723723
## dep7 -0.052426078 -0.16428494
## dep8 -0.012342702 0.08353104
## dep9 0.132178372 -0.08778648
## Correlation2.dep1 Correlation2.dep2 Correlation2.dep3
## id 0.1314029735 0.035251327 -0.01355027
## sex 0.0176903573 0.028708275 0.13594785
## age -0.1257310813 -0.106066244 -0.01890943
## ethnicity 0.0097575373 0.124161734 0.07025954
## education -0.0076781334 0.021260763 -0.24202163
## marital 0.1292283906 0.197789477 0.09738092
## household_size -0.0187989473 -0.001860202 0.04488066
## household_income -0.0836241002 -0.122138483 -0.10834091
## weight 0.2487412200 0.159313815 0.17249174
## height 0.0753457859 0.018000752 -0.07883421
## bmi 0.2250785129 0.170295924 0.22649222
## pulse 0.0986618343 0.056375051 0.25532352
## bp_sys1 -0.0803275788 0.007780815 0.08686982
## bp_dia1 0.0008762567 0.101495295 0.18453568
## bp_sys2 -0.0234574335 -0.001674213 0.07266601
## bp_dia2 0.0368485263 0.063765883 0.14320443
## time_sed 0.0479478318 0.120509227 -0.03667086
## drink_regularly -0.1676161096 -0.037649562 -0.01814589
## days_drinking 0.0160648051 -0.056300378 -0.06264327
## dep1 1.0000000000 0.449989580 0.31272982
## dep2 0.4499895804 1.000000000 0.31103697
## dep3 0.3127298241 0.311036973 1.00000000
## dep4 0.4006425699 0.376959408 0.49823240
## dep5 0.4163066105 0.437692284 0.61815208
## dep6 0.4702227247 0.654100215 0.25789713
## dep7 0.3883858961 0.465352056 0.27027105
## dep8 0.3050335689 0.239866474 0.32056362
## dep9 0.0992030215 0.450969361 0.18524844
## Correlation2.dep4 Correlation2.dep5 Correlation2.dep6
## id -0.04759379 0.10640699 0.086418021
## sex 0.15351734 0.16571963 0.039356233
## age -0.13033299 -0.16989239 -0.022402285
## ethnicity 0.09336478 0.06025898 0.044150074
## education -0.05240642 -0.19921404 -0.043504103
## marital 0.15615839 0.23620555 0.145338886
## household_size 0.03925774 0.02935078 -0.043106569
## household_income -0.01209398 -0.20746875 -0.144415291
## weight 0.17099272 0.22497615 0.074516799
## height -0.06466637 -0.13314137 -0.009180345
## bmi 0.21935462 0.31084715 0.084002546
## pulse 0.02756444 0.09709907 -0.007086921
## bp_sys1 0.02089227 0.07028984 -0.052080069
## bp_dia1 0.06014212 0.13515296 -0.021173006
## bp_sys2 0.02204513 0.03210583 -0.041019017
## bp_dia2 0.10591131 0.10187882 -0.033636785
## time_sed 0.01543476 0.04233813 0.005146929
## drink_regularly -0.15641640 -0.08394115 -0.003833638
## days_drinking -0.08323949 -0.02280718 -0.117237230
## dep1 0.40064257 0.41630661 0.470222725
## dep2 0.37695941 0.43769228 0.654100215
## dep3 0.49823240 0.61815208 0.257897135
## dep4 1.00000000 0.57074546 0.261577176
## dep5 0.57074546 1.00000000 0.349493047
## dep6 0.26157718 0.34949305 1.000000000
## dep7 0.42079208 0.45082819 0.463516413
## dep8 0.30972311 0.55906700 0.312811391
## dep9 0.22360680 0.28211304 0.521096600
## Correlation2.dep7 Correlation2.dep8 Correlation2.dep9
## id -0.047095282 0.101114771 0.001234142
## sex 0.150103101 0.106552019 -0.017989824
## age -0.151210760 -0.141801472 -0.015020876
## ethnicity -0.050950161 0.032973997 0.054876685
## education -0.001121101 -0.146332016 -0.031959361
## marital 0.112960173 0.193229491 0.145190702
## household_size 0.088616396 0.033152707 0.122896158
## household_income -0.023387590 -0.097552281 0.009153002
## weight 0.084479677 0.121066400 0.158632364
## height -0.036462633 -0.057950727 0.057628941
## bmi 0.115645801 0.139075945 0.147292236
## pulse 0.121921893 0.138716409 0.024135667
## bp_sys1 -0.125935014 0.021267349 0.087144809
## bp_dia1 -0.075217641 0.048139842 0.101230674
## bp_sys2 -0.067651846 -0.008103926 0.065230696
## bp_dia2 -0.076340986 0.054925393 0.088322140
## time_sed 0.040743386 0.088082824 0.054415820
## drink_regularly -0.052426078 -0.012342702 0.132178372
## days_drinking -0.164284937 0.083531036 -0.087786479
## dep1 0.388385896 0.305033569 0.099203022
## dep2 0.465352056 0.239866474 0.450969361
## dep3 0.270271047 0.320563620 0.185248436
## dep4 0.420792081 0.309723105 0.223606798
## dep5 0.450828190 0.559067004 0.282113038
## dep6 0.463516413 0.312811391 0.521096600
## dep7 1.000000000 0.487490569 0.416927913
## dep8 0.487490569 1.000000000 0.313209182
## dep9 0.416927913 0.313209182 1.000000000
## Difference.id Difference.sex Difference.age
## id 0.000000000 0.044114360 -0.023286666
## sex 0.044114360 0.000000000 0.087050648
## age -0.023286666 0.087050648 0.000000000
## ethnicity 0.021125175 0.140476488 -0.043805133
## education 0.024618135 0.078793972 -0.029333744
## marital -0.065545095 -0.154136845 -0.009636256
## household_size -0.065452704 -0.143830532 0.089670525
## household_income 0.060484367 0.041717963 0.115315819
## weight -0.004301567 0.031466565 -0.045807026
## height 0.025643372 0.032909139 -0.067874147
## bmi -0.016241242 0.017108787 -0.033601806
## pulse 0.018992674 -0.051561660 -0.004722139
## bp_sys1 -0.064377452 0.129309891 -0.013869766
## bp_dia1 -0.007657384 -0.134764891 -0.003906610
## bp_sys2 -0.099109504 0.152349330 -0.034822158
## bp_dia2 -0.021763424 -0.124262097 -0.009294210
## time_sed -0.094561144 0.087150445 0.094316505
## drink_regularly -0.177871851 0.115486049 0.065332023
## days_drinking 0.098557108 0.004924517 0.022344558
## dep1 -0.088075125 0.060502005 0.107714238
## dep2 -0.038793391 0.048208891 0.078837221
## dep3 -0.036318200 -0.042770656 -0.022761297
## dep4 -0.026592618 -0.035692687 0.115597513
## dep5 -0.064325572 -0.016030206 0.078230127
## dep6 -0.067470913 0.086091426 -0.043377973
## dep7 -0.012010655 -0.088494834 0.095332027
## dep8 -0.114928042 -0.067223398 0.070750866
## dep9 0.013655795 -0.043547193 0.037392669
## Difference.ethnicity Difference.education Difference.marital
## id 0.02112517 0.024618135 -0.065545095
## sex 0.14047649 0.078793972 -0.154136845
## age -0.04380513 -0.029333744 -0.009636256
## ethnicity 0.00000000 0.066134371 -0.071878804
## education 0.06613437 0.000000000 -0.052063576
## marital -0.07187880 -0.052063576 0.000000000
## household_size 0.11510612 0.087761315 -0.074421507
## household_income 0.04181241 0.079640044 -0.005646766
## weight -0.11963116 0.008057375 -0.086047440
## height -0.09801676 -0.043302316 0.014603862
## bmi -0.09552930 0.030206742 -0.089433713
## pulse -0.11711325 -0.004154577 0.064581872
## bp_sys1 -0.05685053 -0.154949060 0.026463658
## bp_dia1 -0.06790799 -0.033316916 -0.081081162
## bp_sys2 -0.05992341 -0.148428906 0.037438450
## bp_dia2 -0.04264374 -0.093959959 -0.064649712
## time_sed 0.04540950 -0.039722156 -0.090434780
## drink_regularly 0.06079423 -0.047482311 -0.034502807
## days_drinking -0.12019474 0.007225192 -0.112569782
## dep1 -0.02618432 -0.157808437 -0.052236958
## dep2 -0.15414968 -0.122941492 -0.122502892
## dep3 -0.05468508 0.142471946 -0.019084006
## dep4 -0.06710375 -0.041502804 -0.079327093
## dep5 -0.04478932 0.085695723 -0.086284873
## dep6 -0.02914395 -0.003620056 -0.050246811
## dep7 0.07883947 -0.077615871 -0.044011143
## dep8 -0.06219753 -0.009398979 -0.072728617
## dep9 -0.13410986 -0.105386022 -0.067024235
## Difference.household_size Difference.household_income
## id -0.065452704 0.060484367
## sex -0.143830532 0.041717963
## age 0.089670525 0.115315819
## ethnicity 0.115106118 0.041812405
## education 0.087761315 0.079640044
## marital -0.074421507 -0.005646766
## household_size 0.000000000 0.087623394
## household_income 0.087623394 0.000000000
## weight 0.074025362 -0.087641536
## height 0.131209999 -0.012023974
## bmi 0.036749022 -0.091257864
## pulse -0.058513391 -0.165228256
## bp_sys1 0.064254697 -0.095637487
## bp_dia1 -0.019499134 0.074544116
## bp_sys2 0.017469254 -0.112830267
## bp_dia2 0.002524989 0.029757828
## time_sed -0.037922447 -0.121988693
## drink_regularly -0.064726817 0.011701492
## days_drinking 0.025097106 0.074035878
## dep1 0.022147236 -0.061607736
## dep2 -0.022762286 -0.044426684
## dep3 -0.088216364 -0.088862201
## dep4 -0.050755636 -0.127828378
## dep5 -0.080828557 -0.021847716
## dep6 0.025483144 0.014296921
## dep7 -0.119675048 -0.056088964
## dep8 0.015861086 -0.036491096
## dep9 0.031929832 -0.064288710
## Difference.weight Difference.height Difference.bmi
## id -0.004301567 0.025643372 -0.016241242
## sex 0.031466565 0.032909139 0.017108787
## age -0.045807026 -0.067874147 -0.033601806
## ethnicity -0.119631157 -0.098016763 -0.095529302
## education 0.008057375 -0.043302316 0.030206742
## marital -0.086047440 0.014603862 -0.089433713
## household_size 0.074025362 0.131209999 0.036749022
## household_income -0.087641536 -0.012023974 -0.091257864
## weight 0.000000000 -0.057871496 0.001328277
## height -0.057871496 0.000000000 -0.062888809
## bmi 0.001328277 -0.062888809 0.000000000
## pulse -0.050134697 -0.045796183 -0.029030088
## bp_sys1 -0.077285319 -0.143126799 -0.011781648
## bp_dia1 0.076390168 0.150553047 0.006303263
## bp_sys2 -0.051448532 -0.170263879 0.023316848
## bp_dia2 0.060388168 0.114693947 0.007811222
## time_sed -0.022645056 -0.084010266 0.019739171
## drink_regularly -0.043369910 -0.101935678 -0.001530571
## days_drinking -0.159410929 -0.068244854 -0.132470732
## dep1 -0.194678596 -0.094743601 -0.155514936
## dep2 -0.123471029 -0.051075768 -0.112364135
## dep3 -0.043893899 0.038823712 -0.063193912
## dep4 -0.087426042 0.029924494 -0.109233051
## dep5 -0.075999885 0.050070172 -0.107567319
## dep6 -0.014460053 -0.029828753 -0.003475820
## dep7 -0.029568922 0.053287829 -0.070749636
## dep8 -0.103046415 0.007415945 -0.100037890
## dep9 -0.146758543 -0.048414997 -0.137862650
## Difference.pulse Difference.bp_sys1 Difference.bp_dia1
## id 0.0189926738 -0.064377452 -0.007657384
## sex -0.0515616596 0.129309891 -0.134764891
## age -0.0047221385 -0.013869766 -0.003906610
## ethnicity -0.1171132483 -0.056850530 -0.067907991
## education -0.0041545769 -0.154949060 -0.033316916
## marital 0.0645818721 0.026463658 -0.081081162
## household_size -0.0585133911 0.064254697 -0.019499134
## household_income -0.1652282563 -0.095637487 0.074544116
## weight -0.0501346973 -0.077285319 0.076390168
## height -0.0457961833 -0.143126799 0.150553047
## bmi -0.0290300879 -0.011781648 0.006303263
## pulse 0.0000000000 0.016657394 0.008565375
## bp_sys1 0.0166573938 0.000000000 -0.011942043
## bp_dia1 0.0085653753 -0.011942043 0.000000000
## bp_sys2 0.0345080714 0.006466153 0.014560843
## bp_dia2 0.0595681670 0.015928947 -0.016028676
## time_sed 0.0372175627 -0.035006865 0.040496212
## drink_regularly 0.0467910286 0.068199579 -0.092035484
## days_drinking -0.0326848646 -0.043532193 -0.041181298
## dep1 -0.0121514311 0.050395420 0.059849521
## dep2 0.0640821613 -0.069000545 -0.058258015
## dep3 -0.0799072237 -0.076932572 -0.127020116
## dep4 0.0947467759 -0.046934947 -0.032741861
## dep5 0.0856228410 -0.117661639 -0.103966896
## dep6 0.0941419664 -0.007601920 -0.004981686
## dep7 -0.0324614771 0.023169032 0.031765071
## dep8 -0.0115420136 -0.047185937 -0.052720699
## dep9 -0.0005080625 -0.031364008 -0.024668887
## Difference.bp_sys2 Difference.bp_dia2 Difference.time_sed
## id -0.099109504 -0.021763424 -0.094561144
## sex 0.152349330 -0.124262097 0.087150445
## age -0.034822158 -0.009294210 0.094316505
## ethnicity -0.059923412 -0.042643742 0.045409499
## education -0.148428906 -0.093959959 -0.039722156
## marital 0.037438450 -0.064649712 -0.090434780
## household_size 0.017469254 0.002524989 -0.037922447
## household_income -0.112830267 0.029757828 -0.121988693
## weight -0.051448532 0.060388168 -0.022645056
## height -0.170263879 0.114693947 -0.084010266
## bmi 0.023316848 0.007811222 0.019739171
## pulse 0.034508071 0.059568167 0.037217563
## bp_sys1 0.006466153 0.015928947 -0.035006865
## bp_dia1 0.014560843 -0.016028676 0.040496212
## bp_sys2 0.000000000 0.028740967 -0.024156231
## bp_dia2 0.028740967 0.000000000 -0.001229762
## time_sed -0.024156231 -0.001229762 0.000000000
## drink_regularly 0.105673258 -0.030356685 0.162479202
## days_drinking -0.070042957 -0.019529114 -0.171751786
## dep1 0.002360078 0.029916897 -0.021429341
## dep2 -0.043614841 -0.009737730 -0.052451323
## dep3 -0.074098176 -0.077702049 0.067574669
## dep4 -0.044507870 -0.029064073 0.062970919
## dep5 -0.083453502 -0.070206078 0.009010268
## dep6 -0.009859198 0.053077551 0.079122032
## dep7 -0.004294235 0.104508502 0.011782471
## dep8 0.021864353 -0.028299307 -0.120978729
## dep9 0.002707224 0.018080403 -0.155482973
## Difference.drink_regularly Difference.days_drinking
## id -0.177871851 0.098557108
## sex 0.115486049 0.004924517
## age 0.065332023 0.022344558
## ethnicity 0.060794231 -0.120194742
## education -0.047482311 0.007225192
## marital -0.034502807 -0.112569782
## household_size -0.064726817 0.025097106
## household_income 0.011701492 0.074035878
## weight -0.043369910 -0.159410929
## height -0.101935678 -0.068244854
## bmi -0.001530571 -0.132470732
## pulse 0.046791029 -0.032684865
## bp_sys1 0.068199579 -0.043532193
## bp_dia1 -0.092035484 -0.041181298
## bp_sys2 0.105673258 -0.070042957
## bp_dia2 -0.030356685 -0.019529114
## time_sed 0.162479202 -0.171751786
## drink_regularly 0.000000000 -0.008545924
## days_drinking -0.008545924 0.000000000
## dep1 0.181162622 -0.107776916
## dep2 0.081144107 -0.047234092
## dep3 0.060103819 -0.028919342
## dep4 0.201289255 -0.023323698
## dep5 0.097609590 -0.037984502
## dep6 0.019781639 0.045275361
## dep7 0.092422713 0.062023300
## dep8 0.054426885 -0.126528667
## dep9 -0.064537194 0.053591159
## Difference.dep1 Difference.dep2 Difference.dep3
## id -0.088075125 -0.03879339 -0.03631820
## sex 0.060502005 0.04820889 -0.04277066
## age 0.107714238 0.07883722 -0.02276130
## ethnicity -0.026184315 -0.15414968 -0.05468508
## education -0.157808437 -0.12294149 0.14247195
## marital -0.052236958 -0.12250289 -0.01908401
## household_size 0.022147236 -0.02276229 -0.08821636
## household_income -0.061607736 -0.04442668 -0.08886220
## weight -0.194678596 -0.12347103 -0.04389390
## height -0.094743601 -0.05107577 0.03882371
## bmi -0.155514936 -0.11236414 -0.06319391
## pulse -0.012151431 0.06408216 -0.07990722
## bp_sys1 0.050395420 -0.06900055 -0.07693257
## bp_dia1 0.059849521 -0.05825802 -0.12702012
## bp_sys2 0.002360078 -0.04361484 -0.07409818
## bp_dia2 0.029916897 -0.00973773 -0.07770205
## time_sed -0.021429341 -0.05245132 0.06757467
## drink_regularly 0.181162622 0.08114411 0.06010382
## days_drinking -0.107776916 -0.04723409 -0.02891934
## dep1 0.000000000 0.08166990 0.04269606
## dep2 0.081669899 0.00000000 0.11938963
## dep3 0.042696057 0.11938963 0.00000000
## dep4 0.077986080 0.13920371 0.01691367
## dep5 -0.026413838 -0.05730106 -0.10188646
## dep6 -0.018346456 -0.04153381 0.15813265
## dep7 0.019499770 0.04147674 0.10162031
## dep8 -0.002172946 0.14805607 -0.01014629
## dep9 0.101679062 -0.09189916 0.09254761
## Difference.dep4 Difference.dep5 Difference.dep6
## id -0.026592618 -0.064325572 -0.067470913
## sex -0.035692687 -0.016030206 0.086091426
## age 0.115597513 0.078230127 -0.043377973
## ethnicity -0.067103752 -0.044789322 -0.029143955
## education -0.041502804 0.085695723 -0.003620056
## marital -0.079327093 -0.086284873 -0.050246811
## household_size -0.050755636 -0.080828557 0.025483144
## household_income -0.127828378 -0.021847716 0.014296921
## weight -0.087426042 -0.075999885 -0.014460053
## height 0.029924494 0.050070172 -0.029828753
## bmi -0.109233051 -0.107567319 -0.003475820
## pulse 0.094746776 0.085622841 0.094141966
## bp_sys1 -0.046934947 -0.117661639 -0.007601920
## bp_dia1 -0.032741861 -0.103966896 -0.004981686
## bp_sys2 -0.044507870 -0.083453502 -0.009859198
## bp_dia2 -0.029064073 -0.070206078 0.053077551
## time_sed 0.062970919 0.009010268 0.079122032
## drink_regularly 0.201289255 0.097609590 0.019781639
## days_drinking -0.023323698 -0.037984502 0.045275361
## dep1 0.077986080 -0.026413838 -0.018346456
## dep2 0.139203711 -0.057301064 -0.041533814
## dep3 0.016913670 -0.101886465 0.158132653
## dep4 0.000000000 -0.063122427 0.154246099
## dep5 -0.063122427 0.000000000 0.058907982
## dep6 0.154246099 0.058907982 0.000000000
## dep7 0.026497433 -0.162929150 0.056672069
## dep8 0.003407790 -0.222790431 0.107945134
## dep9 -0.002977329 -0.113305432 -0.190572205
## Difference.dep7 Difference.dep8 Difference.dep9
## id -0.012010655 -0.114928042 0.0136557946
## sex -0.088494834 -0.067223398 -0.0435471935
## age 0.095332027 0.070750866 0.0373926690
## ethnicity 0.078839474 -0.062197530 -0.1341098608
## education -0.077615871 -0.009398979 -0.1053860223
## marital -0.044011143 -0.072728617 -0.0670242347
## household_size -0.119675048 0.015861086 0.0319298315
## household_income -0.056088964 -0.036491096 -0.0642887097
## weight -0.029568922 -0.103046415 -0.1467585435
## height 0.053287829 0.007415945 -0.0484149970
## bmi -0.070749636 -0.100037890 -0.1378626502
## pulse -0.032461477 -0.011542014 -0.0005080625
## bp_sys1 0.023169032 -0.047185937 -0.0313640077
## bp_dia1 0.031765071 -0.052720699 -0.0246688869
## bp_sys2 -0.004294235 0.021864353 0.0027072237
## bp_dia2 0.104508502 -0.028299307 0.0180804029
## time_sed 0.011782471 -0.120978729 -0.1554829729
## drink_regularly 0.092422713 0.054426885 -0.0645371937
## days_drinking 0.062023300 -0.126528667 0.0535911589
## dep1 0.019499770 -0.002172946 0.1016790618
## dep2 0.041476739 0.148056070 -0.0918991639
## dep3 0.101620307 -0.010146289 0.0925476095
## dep4 0.026497433 0.003407790 -0.0029773285
## dep5 -0.162929150 -0.222790431 -0.1133054317
## dep6 0.056672069 0.107945134 -0.1905722051
## dep7 0.000000000 -0.026471182 -0.1797376219
## dep8 -0.026471182 0.000000000 -0.0832136036
## dep9 -0.179737622 -0.083213604 0.0000000000
Let’s take a look at the missigness in our incomplete dataset. We start with a visualization that tells us the amount of missingness per variable. From there, we see that 8.1% of the data is missing. This concerns data about weight, height bmi, blood pressure, drinking habits and depression.
vis_miss(data_incomplete) +
ggtitle("Missing Data Plot for Incomplete Dataset") +
theme(plot.title = element_text(size = 20, face = "bold"),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 10),
axis.text.y = element_text(size = 10),
legend.text = element_text(size = 10),
legend.title = element_blank())
To be able to make a better comparison, we use the following plot.
gg_miss_var(data_incomplete, show_pct = TRUE)
##Research Question Now that we have an understanding of our data, we formulate our research question, containing a variable with missing values.
We are interested in if age has a significant effect on depression screening score. We think this is an interesting question, since the screening score is build from all depression variables together. Furthermore, it contains a significant amount of total NA’s, which is in line with the purpose of this assignment.
The depression screening score is the sum of all scores given to the depression screening questions. To create this score, we add a new column to our data frame.
Our full research question and hypothesis are as follows. Rq: Does age have a (pos/neg) impact on the depression screening score? H1: Age has a (pos/neg) impact on depression screening score H0: Age has no significant correlation with depression screening score
data_complete1 <- data_complete %>% mutate(depression_score = dep1 + dep2 + dep3 + dep4 + dep5 + dep6 + dep7 + dep8 + dep9)
data_incomplete1 <- data_incomplete %>% mutate(depression_score = dep1 + dep2 + dep3 + dep4 + dep5 + dep6 + dep7 + dep8 + dep9)
The correlation test on the full data that shows us the relation is not signifcant at 95% confidence interval. However, with a p-value of 0.17 it is not that far away from 0.05. This seems interesting, since we like to find out if different imputations on NA values will give us another result;if it will tell us the effect is significant.
cor.test(data_complete1$age, data_complete1$depression_score)
##
## Pearson's product-moment correlation
##
## data: data_complete1$age and data_complete1$depression_score
## t = -1.3695, df = 498, p-value = 0.1714
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.14815038 0.02657777
## sample estimates:
## cor
## -0.06125558
For the sake of our research, we make a subset of the data with only the involved variables included.
subset_complete <- data_complete1[,c("age", "depression_score")]
subset_incomplete <- data_incomplete1[,c("age", "depression_score")]
Now, we do a MCAR test to see if the missing data is observed at random. The results show that the missing values are not observed completely at random. However, if we take a look at just the variables in our subset, these are observed completely at random.
# not MCAR if we look at all our data
out <- mcar_test(data_incomplete)
out$statistic # 1448.785
## [1] 1448.785
out$p.value # 1.968048e-10
## [1] 1.968048e-10
# MCAR for the variables in our research subset
out_sub <- mcar_test(subset_incomplete)
out_sub$statistic #0.07599126
## [1] 0.07599126
out_sub$p.value # 0.7828053
## [1] 0.7828053
We start inspecting our research data with a quick summary. The minimum age of participiants is 20, the maximum age 69 and the mean age 44.48. The lowest scored depression score is 0, the highest score is 22 and the mean score is 3.2. There are no missing values for age and 177 missing values for depression score.
summary(subset_incomplete)
## age depression_score
## Min. :20.00 Min. : 0.000
## 1st Qu.:32.00 1st Qu.: 0.000
## Median :45.00 Median : 2.000
## Mean :44.48 Mean : 3.238
## 3rd Qu.:57.00 3rd Qu.: 4.000
## Max. :69.00 Max. :22.000
## NA's :177
Visualizing the distribution of missing values between depression score and age:
# visualize the distribution of missing values between age & depression score
ggplot(data = subset_incomplete, mapping = aes(x = age, y =depression_score)) + geom_miss_point()
Now we take a look at the missingness proportion.
# Compute the proportion of missing values
pm <- colMeans(is.na(subset_incomplete))
pm['depression_score'] # 0.354
## depression_score
## 0.354
#Visualizing missing values
(vis <- vis_miss(subset_incomplete)) #35% of the depression scores are missing
# visualize the response patterns
plot_pattern(subset_incomplete) #177 values are missing for depression score
Here we compare the mean age of individuals with missing depression score values and those with non-missing values to see if the missingness relates to the observed data. The p-value of 0.7831269 indicates that there is no significant difference between the age of individuals with and without missing values in the depression score variable: the missingness in dep_score does not significantly depend on age.
# Create a missingness vector for dependent variables
mDep <- is.na(subset_incomplete$depression_score)
# age ~ dep_score
out <- t.test(age ~ mDep, data = subset_incomplete)
out$statistic # -0.2754501
## t
## -0.2754501
out$p.value # 0.7831269
## [1] 0.7831269
From the distribution visualization of the missing values, missing values in dep_score spread evenly at age axis. The logistic regression model further shows that the missingness on dep_score is independent of age. Both visualization suggest the missing values in depression score are missing completely at random (MCAR).
# create a subset for visualization
incomplete_missingness <- transform(subset_incomplete, missingness = depression_score)
incomplete_missingness <- incomplete_missingness[c("age", "missingness")]
# Re-group: the observed value is coded as 1
incomplete_missingness$`missingness` <- ifelse(incomplete_missingness$`missingness` == "", 0, 1)
# Re-group: the missing value is coded as 0
incomplete_missingness[is.na(incomplete_missingness)] <- 0
View(incomplete_missingness)
# create a logit model to examine whether the missingness of depression score is dependent on age
logit_model <- glm(missingness ~ age, data = incomplete_missingness, family = "binomial")
summary(logit_model)
##
## Call:
## glm(formula = missingness ~ age, family = "binomial", data = incomplete_missingness)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4609 -1.4331 0.9257 0.9392 0.9516
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.681464 0.305121 2.233 0.0255 *
## age -0.001795 0.006513 -0.276 0.7828
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 649.89 on 499 degrees of freedom
## Residual deviance: 649.81 on 498 degrees of freedom
## AIC: 653.81
##
## Number of Fisher Scoring iterations: 4
# visualize the relation between the missingness of dep_score and age
ggplot(incomplete_missingness, aes(x=age, y=missingness)) + geom_point() + geom_smooth(method = "glm", method.args = list(family = "binomial"), se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
The correlation estimate for the dataset with missing values is is -0.0978, which indicates a weak negative correlation between age and depression score. The p-value is 0.0792, which suggests that there is no strong evidence of a significant correlation between age and depression score. However, it is close to the conventional threshold of 0.05.
The correlation estimate for the complete subset is -0.0613, which indicates a weak negative correlation between age and depression_score. The p-value is 0.1714, which also suggests that there is no strong evidence of a significant correlation between age and depression_score.
We see that the p-value goes up when we look at the complete data, meaning that the missing values cause us to think the data is closer to being statistical significantly correlated than it is.
#test on the incomplete subset
cor_test1 <- cor.test(subset_incomplete$age, subset_incomplete$depression_score,
method = "pearson")
cor_test1$estimate # -0.09782773
## cor
## -0.09782773
cor_test1$p.value # 0.07916073
## [1] 0.07916073
#test on the subset without missing values
cor_test2 <- cor.test(subset_complete$age, subset_complete$depression_score,
method = "pearson")
cor_test2$estimate # -0.06125558
## cor
## -0.06125558
cor_test2$p.value # 0.1714459
## [1] 0.1714459
When looking at the complete subset, we see that the mean for age is 44.484 and for depression score 3.564. The variance for age is 206.65105 and 20.60311 for the depression score. They have a correlation of -0.06125558. According to the regression model we made, if age goes up with one year, the depression score goes down with 0.01934. The p-value of this effect is 0.171, indicating that it is 0.121 away from being statistically signifcant at 95% confidence level. This is interesting, as we like to find out if different imputation methods for our missing data can cause us to think the relation is statistically significant, while it in reality is not.
As estimates of the linear model show, the linear association between age and depression score isn’t significant and the model only accounts for 0.38% variability. Therefore, this linear model is not an ideal representation of their relation. The visualization also agrees with the non-linearity, with lots of points far away from the line.
# compute means, variances, and correlations of all variables in the complete data
colMeans(subset_complete)
## age depression_score
## 44.484 3.564
sapply(subset_complete, var)
## age depression_score
## 206.65105 20.60311
cor(subset_complete)
## age depression_score
## age 1.00000000 -0.06125558
## depression_score -0.06125558 1.00000000
plot_corr(subset_complete, label = TRUE)
# build a linear regression model
model1 <- lm(depression_score ~ age, data = subset_complete)
summary(model1)
##
## Call:
## lm(formula = depression_score ~ age, data = subset_complete)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.038 -3.225 -1.718 1.291 20.233
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.42439 0.66016 6.702 5.58e-11 ***
## age -0.01934 0.01412 -1.370 0.171
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.535 on 498 degrees of freedom
## Multiple R-squared: 0.003752, Adjusted R-squared: 0.001752
## F-statistic: 1.876 on 1 and 498 DF, p-value: 0.1714
# visualize the linear regression model
ggmice(subset_complete, aes(age, depression_score)) + geom_point() + geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
##Incomplete Data Analysis with Imputation Methods
Now we will analyze our incomplete data to see if we get different results. First we do list wise deletion of the NA values. Then, mean imputation on the NA values. And finally regression imputation for the NA values.
Neither the mean nor the variance of variable age changes since it has no missing data. However, the variance of dep_score decreases because 35.4% of data was deleted in the observed dataset, leading to sampling variability.The mean of depression score also decreases in the observed data. Nonetheless, the negative correlation between age and depression score remains low but gets slightly stronger, from -0.06 in the complete data to -0.1 in the observed data. From the comparison of linear models estimated from complete data and observed data, the R² slightly increases and the regression estimates are slightly biased in the model with deleted data.
# the proportion of values are deleted in Deletion-Based Treatments
pm <- colMeans(is.na(subset_incomplete))
pm['age']
## age
## 0
# compute means, variances, and correlations of all variables in the observed data
colMeans(subset_incomplete, na.rm = TRUE)
## age depression_score
## 44.48400 3.23839
sapply(subset_incomplete, var, na.rm = TRUE)
## age depression_score
## 206.65105 17.30635
cor(subset_incomplete, use = "pairwise.complete.obs")
## age depression_score
## age 1.00000000 -0.09782773
## depression_score -0.09782773 1.00000000
plot_corr(subset_incomplete, label = TRUE)
# build a linear regression model
model2 <- lm(depression_score ~ age, data = subset_incomplete)
summary(model2)
##
## Call:
## lm(formula = depression_score ~ age, data = subset_incomplete)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.926 -2.813 -1.571 1.232 18.497
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.49114 0.74780 6.006 5.15e-09 ***
## age -0.02825 0.01604 -1.761 0.0792 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.147 on 321 degrees of freedom
## (177 observations deleted due to missingness)
## Multiple R-squared: 0.00957, Adjusted R-squared: 0.006485
## F-statistic: 3.102 on 1 and 321 DF, p-value: 0.07916
# visulise the linear regression model
ggmice(subset_incomplete, aes(age, depression_score)) + geom_point() + geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 177 rows containing non-finite values (`stat_smooth()`).
The Pearson’s product-moment correlation shows that the correlation coefficient is -0.07876674, which indicates a weak negative correlation. Due to our p-value of 0.07847, there is not enough evidence to reject the null hypothesis of no correlation based on a 95% confidence interval. Compared to the complete data the correlation has gone down from -0.06125558 to -0.07876674 . The p-value has gone down as well, from 0.1714 to 0.07847. Since the p-value is greater than the typical significance level of 0.05, we do not have sufficient evidence to reject the null hypothesis. Therefore, we can conclude that there is no significant difference in mean depression scores between the two groups at the 5% significance level. So, although mean imputation normally is not the smartest thing to do, unless you know what you are doing, this tests shows us there is no significant difference in means for this particular case.
miceOut <- mice(subset_incomplete, method = "mean", m = 1, maxit = 1)
##
## iter imp variable
## 1 1 depression_score
mean_imputed_data <- complete(miceOut)
# correlation test:
cor.test(mean_imputed_data$age, mean_imputed_data$depression_score)
##
## Pearson's product-moment correlation
##
## data: mean_imputed_data$age and mean_imputed_data$depression_score
## t = -1.7632, df = 498, p-value = 0.07847
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.165315474 0.008985917
## sample estimates:
## cor
## -0.07876674
Now we will show two histograms that plot the differences in depression score frequency for the mean imputed data set and the complete data set.This shows us that the lower depression scores seem more prevalent in the mean imputed data
#histogram with mean imputed data
hist(mean_imputed_data$depression_score, main = "Mean Imputed Depression Score", xlab = "Depression Score")
#histogram with complete data
hist(data_complete1$depression_score, main = "Complete Data Set Depression Score", xlab = "Depression Score")
Here we show by means of a t-test that he differences in the depression score for the full data set and the mean imputed data set are not significant, indicated by the 0.1968 p-value.
t.test(data_complete1$depression_score, mean_imputed_data$depression_score)
##
## Welch Two Sample t-test
##
## data: data_complete1$depression_score and mean_imputed_data$depression_score
## t = 1.2917, df = 917.11, p-value = 0.1968
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1690996 0.8203194
## sample estimates:
## mean of x mean of y
## 3.56400 3.23839
Here we show our regression model with the mean imputed data. Comparing it to the regression model with the complete subset data, the coefficient goes up from -0.01934 to -0.01831, indicating mean imputation causes a slightly less negative relationship. The p-value goes down from 0.171 to 0.0785, indicating mean imputation puts us closer to having a statistical significant correlation than there is in reality.
fit <- glm(data= mean_imputed_data, depression_score ~ age)
summary(fit)
##
## Call:
## glm(formula = depression_score ~ age, data = mean_imputed_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.6867 -2.3617 -0.2194 0.3390 18.5880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.05292 0.48543 8.349 6.85e-16 ***
## age -0.01831 0.01038 -1.763 0.0785 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 11.12062)
##
## Null deviance: 5572.6 on 499 degrees of freedom
## Residual deviance: 5538.1 on 498 degrees of freedom
## AIC: 2627.3
##
## Number of Fisher Scoring iterations: 2
#regression model
fit <- with(subset_incomplete, lm(age ~ depression_score))
summary(fit)
##
## Call:
## lm(formula = age ~ depression_score)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.4502 -13.2643 -0.0949 12.9381 24.5828
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.4502 1.0133 44.854 <2e-16 ***
## depression_score -0.3388 0.1924 -1.761 0.0792 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.36 on 321 degrees of freedom
## (177 observations deleted due to missingness)
## Multiple R-squared: 0.00957, Adjusted R-squared: 0.006485
## F-statistic: 3.102 on 1 and 321 DF, p-value: 0.07916
#mean imputation
#create imp
imp <- mice(subset_incomplete, method = "mean", m = 1, maxit = 1)
##
## iter imp variable
## 1 1 depression_score
#use imp to complete data set
complete(imp)
## age depression_score
## 1 49 0.00000
## 2 34 3.23839
## 3 68 3.23839
## 4 24 1.00000
## 5 67 0.00000
## 6 40 3.23839
## 7 60 0.00000
## 8 60 3.23839
## 9 38 3.23839
## 10 21 3.23839
## 11 57 2.00000
## 12 47 3.23839
## 13 56 2.00000
## 14 61 1.00000
## 15 25 3.23839
## 16 34 0.00000
## 17 62 3.23839
## 18 32 0.00000
## 19 64 0.00000
## 20 35 3.23839
## 21 66 10.00000
## 22 48 3.23839
## 23 69 3.23839
## 24 22 5.00000
## 25 27 1.00000
## 26 69 3.00000
## 27 56 2.00000
## 28 55 3.23839
## 29 23 4.00000
## 30 46 3.23839
## 31 26 0.00000
## 32 29 8.00000
## 33 28 4.00000
## 34 28 3.23839
## 35 23 3.00000
## 36 60 0.00000
## 37 25 3.23839
## 38 37 4.00000
## 39 50 0.00000
## 40 40 3.23839
## 41 60 3.23839
## 42 21 3.23839
## 43 66 0.00000
## 44 59 0.00000
## 45 44 1.00000
## 46 34 3.23839
## 47 31 2.00000
## 48 66 3.00000
## 49 53 3.23839
## 50 28 3.23839
## 51 60 8.00000
## 52 35 1.00000
## 53 26 3.23839
## 54 66 3.23839
## 55 64 3.23839
## 56 63 3.23839
## 57 68 4.00000
## 58 59 0.00000
## 59 49 3.00000
## 60 50 3.23839
## 61 34 0.00000
## 62 42 3.23839
## 63 65 3.23839
## 64 33 0.00000
## 65 37 2.00000
## 66 21 3.23839
## 67 27 3.23839
## 68 23 11.00000
## 69 30 6.00000
## 70 38 1.00000
## 71 52 3.23839
## 72 40 3.23839
## 73 56 0.00000
## 74 26 3.23839
## 75 63 7.00000
## 76 55 1.00000
## 77 63 0.00000
## 78 22 10.00000
## 79 39 3.23839
## 80 54 0.00000
## 81 51 0.00000
## 82 33 3.23839
## 83 32 0.00000
## 84 36 3.23839
## 85 35 3.23839
## 86 62 3.23839
## 87 30 3.23839
## 88 62 4.00000
## 89 26 9.00000
## 90 20 10.00000
## 91 34 0.00000
## 92 59 4.00000
## 93 66 3.23839
## 94 61 5.00000
## 95 51 3.23839
## 96 58 0.00000
## 97 69 2.00000
## 98 44 0.00000
## 99 52 2.00000
## 100 30 0.00000
## 101 24 2.00000
## 102 36 3.23839
## 103 34 1.00000
## 104 38 3.23839
## 105 49 0.00000
## 106 30 1.00000
## 107 52 2.00000
## 108 51 11.00000
## 109 63 1.00000
## 110 63 3.23839
## 111 65 0.00000
## 112 41 3.00000
## 113 59 3.00000
## 114 27 2.00000
## 115 63 0.00000
## 116 31 5.00000
## 117 60 0.00000
## 118 25 1.00000
## 119 53 3.23839
## 120 30 3.23839
## 121 40 0.00000
## 122 66 0.00000
## 123 27 3.23839
## 124 43 5.00000
## 125 32 0.00000
## 126 53 5.00000
## 127 29 2.00000
## 128 50 3.23839
## 129 34 3.23839
## 130 37 19.00000
## 131 68 3.23839
## 132 49 3.23839
## 133 25 3.23839
## 134 45 3.23839
## 135 29 3.23839
## 136 34 4.00000
## 137 60 3.23839
## 138 50 4.00000
## 139 41 4.00000
## 140 41 3.23839
## 141 23 0.00000
## 142 60 3.23839
## 143 67 0.00000
## 144 33 9.00000
## 145 47 0.00000
## 146 28 5.00000
## 147 31 0.00000
## 148 32 10.00000
## 149 34 14.00000
## 150 25 0.00000
## 151 64 8.00000
## 152 50 3.23839
## 153 61 3.23839
## 154 50 0.00000
## 155 46 2.00000
## 156 27 3.00000
## 157 38 3.23839
## 158 68 6.00000
## 159 42 3.23839
## 160 27 0.00000
## 161 30 2.00000
## 162 68 0.00000
## 163 57 1.00000
## 164 32 3.23839
## 165 43 0.00000
## 166 63 0.00000
## 167 21 3.23839
## 168 34 0.00000
## 169 21 3.00000
## 170 39 3.23839
## 171 50 14.00000
## 172 61 3.23839
## 173 27 1.00000
## 174 32 1.00000
## 175 46 0.00000
## 176 24 1.00000
## 177 33 3.23839
## 178 43 3.23839
## 179 64 1.00000
## 180 34 0.00000
## 181 61 2.00000
## 182 45 3.23839
## 183 63 6.00000
## 184 27 0.00000
## 185 51 10.00000
## 186 68 3.23839
## 187 29 7.00000
## 188 47 3.23839
## 189 31 3.23839
## 190 44 3.00000
## 191 68 4.00000
## 192 51 3.23839
## 193 68 1.00000
## 194 68 3.00000
## 195 37 0.00000
## 196 50 1.00000
## 197 34 1.00000
## 198 57 9.00000
## 199 46 12.00000
## 200 45 2.00000
## 201 55 0.00000
## 202 27 3.23839
## 203 32 1.00000
## 204 42 2.00000
## 205 42 3.23839
## 206 50 3.23839
## 207 61 2.00000
## 208 32 4.00000
## 209 65 4.00000
## 210 24 10.00000
## 211 41 2.00000
## 212 64 10.00000
## 213 29 3.23839
## 214 52 3.23839
## 215 23 0.00000
## 216 67 3.23839
## 217 39 4.00000
## 218 35 0.00000
## 219 53 0.00000
## 220 62 2.00000
## 221 61 0.00000
## 222 47 2.00000
## 223 40 3.23839
## 224 52 1.00000
## 225 24 3.23839
## 226 61 3.23839
## 227 60 3.23839
## 228 20 3.23839
## 229 57 3.23839
## 230 28 3.23839
## 231 46 1.00000
## 232 57 3.00000
## 233 57 3.23839
## 234 60 0.00000
## 235 24 2.00000
## 236 33 8.00000
## 237 59 3.23839
## 238 21 3.00000
## 239 28 1.00000
## 240 26 1.00000
## 241 35 3.00000
## 242 29 1.00000
## 243 45 10.00000
## 244 20 12.00000
## 245 57 1.00000
## 246 39 18.00000
## 247 35 3.23839
## 248 69 3.23839
## 249 52 20.00000
## 250 66 3.23839
## 251 53 0.00000
## 252 54 0.00000
## 253 42 3.23839
## 254 57 16.00000
## 255 37 3.23839
## 256 21 3.23839
## 257 28 3.23839
## 258 22 1.00000
## 259 42 2.00000
## 260 68 3.23839
## 261 61 3.23839
## 262 41 3.00000
## 263 27 0.00000
## 264 66 2.00000
## 265 67 3.23839
## 266 58 0.00000
## 267 55 3.23839
## 268 52 3.23839
## 269 51 1.00000
## 270 45 5.00000
## 271 58 0.00000
## 272 33 3.23839
## 273 25 3.23839
## 274 66 3.00000
## 275 27 8.00000
## 276 50 7.00000
## 277 66 3.23839
## 278 64 5.00000
## 279 33 3.23839
## 280 61 3.00000
## 281 45 1.00000
## 282 20 7.00000
## 283 21 10.00000
## 284 36 10.00000
## 285 55 3.23839
## 286 60 1.00000
## 287 22 3.23839
## 288 48 3.23839
## 289 27 6.00000
## 290 62 2.00000
## 291 34 1.00000
## 292 34 0.00000
## 293 24 3.23839
## 294 37 2.00000
## 295 64 2.00000
## 296 22 1.00000
## 297 41 0.00000
## 298 49 0.00000
## 299 40 13.00000
## 300 32 0.00000
## 301 37 0.00000
## 302 43 0.00000
## 303 58 3.23839
## 304 62 3.00000
## 305 25 4.00000
## 306 35 22.00000
## 307 40 3.23839
## 308 21 3.23839
## 309 54 11.00000
## 310 50 2.00000
## 311 32 3.23839
## 312 67 1.00000
## 313 64 0.00000
## 314 56 3.23839
## 315 61 0.00000
## 316 44 6.00000
## 317 48 13.00000
## 318 38 0.00000
## 319 21 3.23839
## 320 44 1.00000
## 321 52 3.23839
## 322 62 3.23839
## 323 69 3.23839
## 324 66 1.00000
## 325 24 3.23839
## 326 52 3.23839
## 327 50 3.23839
## 328 32 0.00000
## 329 55 15.00000
## 330 67 0.00000
## 331 42 1.00000
## 332 28 17.00000
## 333 55 3.23839
## 334 63 8.00000
## 335 65 3.23839
## 336 60 2.00000
## 337 49 3.23839
## 338 33 3.23839
## 339 32 3.23839
## 340 37 4.00000
## 341 50 3.23839
## 342 21 4.00000
## 343 32 0.00000
## 344 45 3.00000
## 345 30 0.00000
## 346 53 3.23839
## 347 42 2.00000
## 348 55 10.00000
## 349 42 3.23839
## 350 40 6.00000
## 351 64 3.23839
## 352 62 1.00000
## 353 43 0.00000
## 354 22 2.00000
## 355 43 3.00000
## 356 46 3.23839
## 357 53 0.00000
## 358 61 0.00000
## 359 67 3.23839
## 360 58 0.00000
## 361 49 0.00000
## 362 45 2.00000
## 363 46 6.00000
## 364 47 5.00000
## 365 57 3.23839
## 366 24 3.23839
## 367 23 7.00000
## 368 25 2.00000
## 369 42 3.23839
## 370 62 3.23839
## 371 24 4.00000
## 372 65 1.00000
## 373 50 3.23839
## 374 38 3.23839
## 375 63 3.23839
## 376 35 4.00000
## 377 59 2.00000
## 378 23 3.23839
## 379 64 0.00000
## 380 36 2.00000
## 381 31 3.23839
## 382 30 3.23839
## 383 56 3.23839
## 384 36 0.00000
## 385 62 1.00000
## 386 53 4.00000
## 387 65 3.23839
## 388 43 4.00000
## 389 52 7.00000
## 390 65 1.00000
## 391 55 1.00000
## 392 43 3.00000
## 393 51 0.00000
## 394 24 3.00000
## 395 39 1.00000
## 396 42 0.00000
## 397 44 7.00000
## 398 55 6.00000
## 399 60 3.23839
## 400 32 0.00000
## 401 61 1.00000
## 402 64 0.00000
## 403 61 3.23839
## 404 52 3.23839
## 405 34 3.23839
## 406 50 3.23839
## 407 23 3.23839
## 408 23 0.00000
## 409 59 4.00000
## 410 32 13.00000
## 411 52 3.00000
## 412 24 0.00000
## 413 51 3.23839
## 414 57 0.00000
## 415 44 3.23839
## 416 22 6.00000
## 417 54 0.00000
## 418 40 3.23839
## 419 51 0.00000
## 420 23 8.00000
## 421 47 8.00000
## 422 49 3.23839
## 423 60 4.00000
## 424 29 3.23839
## 425 51 2.00000
## 426 31 3.00000
## 427 37 3.00000
## 428 54 0.00000
## 429 64 3.23839
## 430 60 0.00000
## 431 56 3.00000
## 432 28 1.00000
## 433 56 3.23839
## 434 31 1.00000
## 435 51 3.00000
## 436 67 3.23839
## 437 41 0.00000
## 438 34 3.23839
## 439 33 3.00000
## 440 23 0.00000
## 441 49 11.00000
## 442 30 2.00000
## 443 45 3.23839
## 444 46 3.23839
## 445 45 3.23839
## 446 60 5.00000
## 447 59 7.00000
## 448 28 3.00000
## 449 26 0.00000
## 450 53 3.23839
## 451 20 0.00000
## 452 33 1.00000
## 453 34 0.00000
## 454 47 1.00000
## 455 22 0.00000
## 456 41 3.23839
## 457 36 0.00000
## 458 39 3.23839
## 459 52 2.00000
## 460 24 3.23839
## 461 62 0.00000
## 462 37 9.00000
## 463 25 4.00000
## 464 47 12.00000
## 465 44 3.23839
## 466 24 6.00000
## 467 54 0.00000
## 468 26 6.00000
## 469 59 4.00000
## 470 43 0.00000
## 471 67 0.00000
## 472 60 3.23839
## 473 31 3.23839
## 474 44 4.00000
## 475 37 1.00000
## 476 57 0.00000
## 477 51 8.00000
## 478 66 0.00000
## 479 62 4.00000
## 480 37 18.00000
## 481 30 1.00000
## 482 48 3.23839
## 483 44 3.23839
## 484 22 3.23839
## 485 52 3.23839
## 486 38 1.00000
## 487 53 3.23839
## 488 48 13.00000
## 489 34 3.23839
## 490 32 15.00000
## 491 40 8.00000
## 492 28 0.00000
## 493 34 3.23839
## 494 57 2.00000
## 495 49 0.00000
## 496 27 3.23839
## 497 24 0.00000
## 498 55 10.00000
## 499 32 4.00000
## 500 45 3.23839
colMeans(subset_incomplete, na.rm = TRUE)
## age depression_score
## 44.48400 3.23839
#regression model with imputed data
fit <- with(imp, lm(age ~ depression_score))
summary(fit)
## # A tibble: 2 x 6
## term estimate std.error statistic p.value nobs
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 (Intercept) 45.6 0.894 51.0 7.78e-200 500
## 2 depression_score -0.339 0.192 -1.76 7.85e- 2 500
densityplot(subset_incomplete$depression_score)
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
## ggplot2::geom_density()
##
## See amices.org/ggmice for more info.
#regression imputation
imp <- mice(subset_incomplete, method = "norm.predict", m = 1, maxit = 1)
##
## iter imp variable
## 1 1 depression_score
complete(imp)
## age depression_score
## 1 49 0.000000
## 2 34 3.530810
## 3 68 2.570477
## 4 24 1.000000
## 5 67 0.000000
## 6 40 3.361339
## 7 60 0.000000
## 8 60 2.796438
## 9 38 3.417829
## 10 21 3.897996
## 11 57 2.000000
## 12 47 3.163624
## 13 56 2.000000
## 14 61 1.000000
## 15 25 3.785015
## 16 34 0.000000
## 17 62 2.739948
## 18 32 0.000000
## 19 64 0.000000
## 20 35 3.502565
## 21 66 10.000000
## 22 48 3.135379
## 23 69 2.542232
## 24 22 5.000000
## 25 27 1.000000
## 26 69 3.000000
## 27 56 2.000000
## 28 55 2.937663
## 29 23 4.000000
## 30 46 3.191869
## 31 26 0.000000
## 32 29 8.000000
## 33 28 4.000000
## 34 28 3.700280
## 35 23 3.000000
## 36 60 0.000000
## 37 25 3.785015
## 38 37 4.000000
## 39 50 0.000000
## 40 40 3.361339
## 41 60 2.796438
## 42 21 3.897996
## 43 66 0.000000
## 44 59 0.000000
## 45 44 1.000000
## 46 34 3.530810
## 47 31 2.000000
## 48 66 3.000000
## 49 53 2.994153
## 50 28 3.700280
## 51 60 8.000000
## 52 35 1.000000
## 53 26 3.756770
## 54 66 2.626967
## 55 64 2.683457
## 56 63 2.711703
## 57 68 4.000000
## 58 59 0.000000
## 59 49 3.000000
## 60 50 3.078889
## 61 34 0.000000
## 62 42 3.304849
## 63 65 2.655212
## 64 33 0.000000
## 65 37 2.000000
## 66 21 3.897996
## 67 27 3.728525
## 68 23 11.000000
## 69 30 6.000000
## 70 38 1.000000
## 71 52 3.022398
## 72 40 3.361339
## 73 56 0.000000
## 74 26 3.756770
## 75 63 7.000000
## 76 55 1.000000
## 77 63 0.000000
## 78 22 10.000000
## 79 39 3.389584
## 80 54 0.000000
## 81 51 0.000000
## 82 33 3.559055
## 83 32 0.000000
## 84 36 3.474320
## 85 35 3.502565
## 86 62 2.739948
## 87 30 3.643790
## 88 62 4.000000
## 89 26 9.000000
## 90 20 10.000000
## 91 34 0.000000
## 92 59 4.000000
## 93 66 2.626967
## 94 61 5.000000
## 95 51 3.050643
## 96 58 0.000000
## 97 69 2.000000
## 98 44 0.000000
## 99 52 2.000000
## 100 30 0.000000
## 101 24 2.000000
## 102 36 3.474320
## 103 34 1.000000
## 104 38 3.417829
## 105 49 0.000000
## 106 30 1.000000
## 107 52 2.000000
## 108 51 11.000000
## 109 63 1.000000
## 110 63 2.711703
## 111 65 0.000000
## 112 41 3.000000
## 113 59 3.000000
## 114 27 2.000000
## 115 63 0.000000
## 116 31 5.000000
## 117 60 0.000000
## 118 25 1.000000
## 119 53 2.994153
## 120 30 3.643790
## 121 40 0.000000
## 122 66 0.000000
## 123 27 3.728525
## 124 43 5.000000
## 125 32 0.000000
## 126 53 5.000000
## 127 29 2.000000
## 128 50 3.078889
## 129 34 3.530810
## 130 37 19.000000
## 131 68 2.570477
## 132 49 3.107134
## 133 25 3.785015
## 134 45 3.220114
## 135 29 3.672035
## 136 34 4.000000
## 137 60 2.796438
## 138 50 4.000000
## 139 41 4.000000
## 140 41 3.333094
## 141 23 0.000000
## 142 60 2.796438
## 143 67 0.000000
## 144 33 9.000000
## 145 47 0.000000
## 146 28 5.000000
## 147 31 0.000000
## 148 32 10.000000
## 149 34 14.000000
## 150 25 0.000000
## 151 64 8.000000
## 152 50 3.078889
## 153 61 2.768193
## 154 50 0.000000
## 155 46 2.000000
## 156 27 3.000000
## 157 38 3.417829
## 158 68 6.000000
## 159 42 3.304849
## 160 27 0.000000
## 161 30 2.000000
## 162 68 0.000000
## 163 57 1.000000
## 164 32 3.587300
## 165 43 0.000000
## 166 63 0.000000
## 167 21 3.897996
## 168 34 0.000000
## 169 21 3.000000
## 170 39 3.389584
## 171 50 14.000000
## 172 61 2.768193
## 173 27 1.000000
## 174 32 1.000000
## 175 46 0.000000
## 176 24 1.000000
## 177 33 3.559055
## 178 43 3.276604
## 179 64 1.000000
## 180 34 0.000000
## 181 61 2.000000
## 182 45 3.220114
## 183 63 6.000000
## 184 27 0.000000
## 185 51 10.000000
## 186 68 2.570477
## 187 29 7.000000
## 188 47 3.163624
## 189 31 3.615545
## 190 44 3.000000
## 191 68 4.000000
## 192 51 3.050643
## 193 68 1.000000
## 194 68 3.000000
## 195 37 0.000000
## 196 50 1.000000
## 197 34 1.000000
## 198 57 9.000000
## 199 46 12.000000
## 200 45 2.000000
## 201 55 0.000000
## 202 27 3.728525
## 203 32 1.000000
## 204 42 2.000000
## 205 42 3.304849
## 206 50 3.078889
## 207 61 2.000000
## 208 32 4.000000
## 209 65 4.000000
## 210 24 10.000000
## 211 41 2.000000
## 212 64 10.000000
## 213 29 3.672035
## 214 52 3.022398
## 215 23 0.000000
## 216 67 2.598722
## 217 39 4.000000
## 218 35 0.000000
## 219 53 0.000000
## 220 62 2.000000
## 221 61 0.000000
## 222 47 2.000000
## 223 40 3.361339
## 224 52 1.000000
## 225 24 3.813260
## 226 61 2.768193
## 227 60 2.796438
## 228 20 3.926241
## 229 57 2.881173
## 230 28 3.700280
## 231 46 1.000000
## 232 57 3.000000
## 233 57 2.881173
## 234 60 0.000000
## 235 24 2.000000
## 236 33 8.000000
## 237 59 2.824683
## 238 21 3.000000
## 239 28 1.000000
## 240 26 1.000000
## 241 35 3.000000
## 242 29 1.000000
## 243 45 10.000000
## 244 20 12.000000
## 245 57 1.000000
## 246 39 18.000000
## 247 35 3.502565
## 248 69 2.542232
## 249 52 20.000000
## 250 66 2.626967
## 251 53 0.000000
## 252 54 0.000000
## 253 42 3.304849
## 254 57 16.000000
## 255 37 3.446074
## 256 21 3.897996
## 257 28 3.700280
## 258 22 1.000000
## 259 42 2.000000
## 260 68 2.570477
## 261 61 2.768193
## 262 41 3.000000
## 263 27 0.000000
## 264 66 2.000000
## 265 67 2.598722
## 266 58 0.000000
## 267 55 2.937663
## 268 52 3.022398
## 269 51 1.000000
## 270 45 5.000000
## 271 58 0.000000
## 272 33 3.559055
## 273 25 3.785015
## 274 66 3.000000
## 275 27 8.000000
## 276 50 7.000000
## 277 66 2.626967
## 278 64 5.000000
## 279 33 3.559055
## 280 61 3.000000
## 281 45 1.000000
## 282 20 7.000000
## 283 21 10.000000
## 284 36 10.000000
## 285 55 2.937663
## 286 60 1.000000
## 287 22 3.869751
## 288 48 3.135379
## 289 27 6.000000
## 290 62 2.000000
## 291 34 1.000000
## 292 34 0.000000
## 293 24 3.813260
## 294 37 2.000000
## 295 64 2.000000
## 296 22 1.000000
## 297 41 0.000000
## 298 49 0.000000
## 299 40 13.000000
## 300 32 0.000000
## 301 37 0.000000
## 302 43 0.000000
## 303 58 2.852928
## 304 62 3.000000
## 305 25 4.000000
## 306 35 22.000000
## 307 40 3.361339
## 308 21 3.897996
## 309 54 11.000000
## 310 50 2.000000
## 311 32 3.587300
## 312 67 1.000000
## 313 64 0.000000
## 314 56 2.909418
## 315 61 0.000000
## 316 44 6.000000
## 317 48 13.000000
## 318 38 0.000000
## 319 21 3.897996
## 320 44 1.000000
## 321 52 3.022398
## 322 62 2.739948
## 323 69 2.542232
## 324 66 1.000000
## 325 24 3.813260
## 326 52 3.022398
## 327 50 3.078889
## 328 32 0.000000
## 329 55 15.000000
## 330 67 0.000000
## 331 42 1.000000
## 332 28 17.000000
## 333 55 2.937663
## 334 63 8.000000
## 335 65 2.655212
## 336 60 2.000000
## 337 49 3.107134
## 338 33 3.559055
## 339 32 3.587300
## 340 37 4.000000
## 341 50 3.078889
## 342 21 4.000000
## 343 32 0.000000
## 344 45 3.000000
## 345 30 0.000000
## 346 53 2.994153
## 347 42 2.000000
## 348 55 10.000000
## 349 42 3.304849
## 350 40 6.000000
## 351 64 2.683457
## 352 62 1.000000
## 353 43 0.000000
## 354 22 2.000000
## 355 43 3.000000
## 356 46 3.191869
## 357 53 0.000000
## 358 61 0.000000
## 359 67 2.598722
## 360 58 0.000000
## 361 49 0.000000
## 362 45 2.000000
## 363 46 6.000000
## 364 47 5.000000
## 365 57 2.881173
## 366 24 3.813260
## 367 23 7.000000
## 368 25 2.000000
## 369 42 3.304849
## 370 62 2.739948
## 371 24 4.000000
## 372 65 1.000000
## 373 50 3.078889
## 374 38 3.417829
## 375 63 2.711703
## 376 35 4.000000
## 377 59 2.000000
## 378 23 3.841505
## 379 64 0.000000
## 380 36 2.000000
## 381 31 3.615545
## 382 30 3.643790
## 383 56 2.909418
## 384 36 0.000000
## 385 62 1.000000
## 386 53 4.000000
## 387 65 2.655212
## 388 43 4.000000
## 389 52 7.000000
## 390 65 1.000000
## 391 55 1.000000
## 392 43 3.000000
## 393 51 0.000000
## 394 24 3.000000
## 395 39 1.000000
## 396 42 0.000000
## 397 44 7.000000
## 398 55 6.000000
## 399 60 2.796438
## 400 32 0.000000
## 401 61 1.000000
## 402 64 0.000000
## 403 61 2.768193
## 404 52 3.022398
## 405 34 3.530810
## 406 50 3.078889
## 407 23 3.841505
## 408 23 0.000000
## 409 59 4.000000
## 410 32 13.000000
## 411 52 3.000000
## 412 24 0.000000
## 413 51 3.050643
## 414 57 0.000000
## 415 44 3.248359
## 416 22 6.000000
## 417 54 0.000000
## 418 40 3.361339
## 419 51 0.000000
## 420 23 8.000000
## 421 47 8.000000
## 422 49 3.107134
## 423 60 4.000000
## 424 29 3.672035
## 425 51 2.000000
## 426 31 3.000000
## 427 37 3.000000
## 428 54 0.000000
## 429 64 2.683457
## 430 60 0.000000
## 431 56 3.000000
## 432 28 1.000000
## 433 56 2.909418
## 434 31 1.000000
## 435 51 3.000000
## 436 67 2.598722
## 437 41 0.000000
## 438 34 3.530810
## 439 33 3.000000
## 440 23 0.000000
## 441 49 11.000000
## 442 30 2.000000
## 443 45 3.220114
## 444 46 3.191869
## 445 45 3.220114
## 446 60 5.000000
## 447 59 7.000000
## 448 28 3.000000
## 449 26 0.000000
## 450 53 2.994153
## 451 20 0.000000
## 452 33 1.000000
## 453 34 0.000000
## 454 47 1.000000
## 455 22 0.000000
## 456 41 3.333094
## 457 36 0.000000
## 458 39 3.389584
## 459 52 2.000000
## 460 24 3.813260
## 461 62 0.000000
## 462 37 9.000000
## 463 25 4.000000
## 464 47 12.000000
## 465 44 3.248359
## 466 24 6.000000
## 467 54 0.000000
## 468 26 6.000000
## 469 59 4.000000
## 470 43 0.000000
## 471 67 0.000000
## 472 60 2.796438
## 473 31 3.615545
## 474 44 4.000000
## 475 37 1.000000
## 476 57 0.000000
## 477 51 8.000000
## 478 66 0.000000
## 479 62 4.000000
## 480 37 18.000000
## 481 30 1.000000
## 482 48 3.135379
## 483 44 3.248359
## 484 22 3.869751
## 485 52 3.022398
## 486 38 1.000000
## 487 53 2.994153
## 488 48 13.000000
## 489 34 3.530810
## 490 32 15.000000
## 491 40 8.000000
## 492 28 0.000000
## 493 34 3.530810
## 494 57 2.000000
## 495 49 0.000000
## 496 27 3.728525
## 497 24 0.000000
## 498 55 10.000000
## 499 32 4.000000
## 500 45 3.220114
fit <- with(imp, lm(age ~ depression_score))
summary(fit)
## # A tibble: 2 x 6
## term estimate std.error statistic p.value nobs
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 (Intercept) 46.2 0.888 52.0 2.88e-203 500
## 2 depression_score -0.520 0.191 -2.72 6.67e- 3 500
imp <- mice(subset_incomplete, method = "norm.nob", m = 1, maxit = 1)
##
## iter imp variable
## 1 1 depression_score
complete(imp)
## age depression_score
## 1 49 0.00000000
## 2 34 1.81329563
## 3 68 3.38643558
## 4 24 1.00000000
## 5 67 0.00000000
## 6 40 9.64360648
## 7 60 0.00000000
## 8 60 4.32234129
## 9 38 1.57777629
## 10 21 13.20507349
## 11 57 2.00000000
## 12 47 11.54446729
## 13 56 2.00000000
## 14 61 1.00000000
## 15 25 1.82110627
## 16 34 0.00000000
## 17 62 1.89286619
## 18 32 0.00000000
## 19 64 0.00000000
## 20 35 1.34858925
## 21 66 10.00000000
## 22 48 4.44472779
## 23 69 -3.90104434
## 24 22 5.00000000
## 25 27 1.00000000
## 26 69 3.00000000
## 27 56 2.00000000
## 28 55 0.64866872
## 29 23 4.00000000
## 30 46 8.48852974
## 31 26 0.00000000
## 32 29 8.00000000
## 33 28 4.00000000
## 34 28 1.76512809
## 35 23 3.00000000
## 36 60 0.00000000
## 37 25 -1.90927616
## 38 37 4.00000000
## 39 50 0.00000000
## 40 40 4.33988312
## 41 60 -2.90416833
## 42 21 2.26037526
## 43 66 0.00000000
## 44 59 0.00000000
## 45 44 1.00000000
## 46 34 4.85875648
## 47 31 2.00000000
## 48 66 3.00000000
## 49 53 0.19965770
## 50 28 8.23887235
## 51 60 8.00000000
## 52 35 1.00000000
## 53 26 8.89803394
## 54 66 2.22254953
## 55 64 0.01056736
## 56 63 0.61649241
## 57 68 4.00000000
## 58 59 0.00000000
## 59 49 3.00000000
## 60 50 6.90527612
## 61 34 0.00000000
## 62 42 4.54406104
## 63 65 9.83433727
## 64 33 0.00000000
## 65 37 2.00000000
## 66 21 4.08993610
## 67 27 8.47928896
## 68 23 11.00000000
## 69 30 6.00000000
## 70 38 1.00000000
## 71 52 13.35669024
## 72 40 -1.06283625
## 73 56 0.00000000
## 74 26 2.77580603
## 75 63 7.00000000
## 76 55 1.00000000
## 77 63 0.00000000
## 78 22 10.00000000
## 79 39 3.44806813
## 80 54 0.00000000
## 81 51 0.00000000
## 82 33 4.72458723
## 83 32 0.00000000
## 84 36 1.74880972
## 85 35 2.75456281
## 86 62 8.79547195
## 87 30 2.19156589
## 88 62 4.00000000
## 89 26 9.00000000
## 90 20 10.00000000
## 91 34 0.00000000
## 92 59 4.00000000
## 93 66 9.19050153
## 94 61 5.00000000
## 95 51 7.80877721
## 96 58 0.00000000
## 97 69 2.00000000
## 98 44 0.00000000
## 99 52 2.00000000
## 100 30 0.00000000
## 101 24 2.00000000
## 102 36 -3.84081361
## 103 34 1.00000000
## 104 38 -1.00994544
## 105 49 0.00000000
## 106 30 1.00000000
## 107 52 2.00000000
## 108 51 11.00000000
## 109 63 1.00000000
## 110 63 -3.11795201
## 111 65 0.00000000
## 112 41 3.00000000
## 113 59 3.00000000
## 114 27 2.00000000
## 115 63 0.00000000
## 116 31 5.00000000
## 117 60 0.00000000
## 118 25 1.00000000
## 119 53 6.80605083
## 120 30 8.59744132
## 121 40 0.00000000
## 122 66 0.00000000
## 123 27 12.26800924
## 124 43 5.00000000
## 125 32 0.00000000
## 126 53 5.00000000
## 127 29 2.00000000
## 128 50 6.80964833
## 129 34 9.36473881
## 130 37 19.00000000
## 131 68 4.99722765
## 132 49 -0.82389309
## 133 25 1.93278156
## 134 45 4.29071073
## 135 29 0.75567050
## 136 34 4.00000000
## 137 60 1.36630035
## 138 50 4.00000000
## 139 41 4.00000000
## 140 41 5.20325087
## 141 23 0.00000000
## 142 60 2.93065444
## 143 67 0.00000000
## 144 33 9.00000000
## 145 47 0.00000000
## 146 28 5.00000000
## 147 31 0.00000000
## 148 32 10.00000000
## 149 34 14.00000000
## 150 25 0.00000000
## 151 64 8.00000000
## 152 50 1.19664084
## 153 61 3.22362973
## 154 50 0.00000000
## 155 46 2.00000000
## 156 27 3.00000000
## 157 38 -8.45485525
## 158 68 6.00000000
## 159 42 -1.46920366
## 160 27 0.00000000
## 161 30 2.00000000
## 162 68 0.00000000
## 163 57 1.00000000
## 164 32 5.59286997
## 165 43 0.00000000
## 166 63 0.00000000
## 167 21 10.40011293
## 168 34 0.00000000
## 169 21 3.00000000
## 170 39 0.45432134
## 171 50 14.00000000
## 172 61 2.69549685
## 173 27 1.00000000
## 174 32 1.00000000
## 175 46 0.00000000
## 176 24 1.00000000
## 177 33 6.06955804
## 178 43 -3.04253680
## 179 64 1.00000000
## 180 34 0.00000000
## 181 61 2.00000000
## 182 45 5.12061080
## 183 63 6.00000000
## 184 27 0.00000000
## 185 51 10.00000000
## 186 68 2.75808359
## 187 29 7.00000000
## 188 47 8.84118735
## 189 31 5.31654672
## 190 44 3.00000000
## 191 68 4.00000000
## 192 51 -2.92273925
## 193 68 1.00000000
## 194 68 3.00000000
## 195 37 0.00000000
## 196 50 1.00000000
## 197 34 1.00000000
## 198 57 9.00000000
## 199 46 12.00000000
## 200 45 2.00000000
## 201 55 0.00000000
## 202 27 0.52809977
## 203 32 1.00000000
## 204 42 2.00000000
## 205 42 -5.34824087
## 206 50 2.02014226
## 207 61 2.00000000
## 208 32 4.00000000
## 209 65 4.00000000
## 210 24 10.00000000
## 211 41 2.00000000
## 212 64 10.00000000
## 213 29 3.80231303
## 214 52 0.94959985
## 215 23 0.00000000
## 216 67 3.05266622
## 217 39 4.00000000
## 218 35 0.00000000
## 219 53 0.00000000
## 220 62 2.00000000
## 221 61 0.00000000
## 222 47 2.00000000
## 223 40 3.42184105
## 224 52 1.00000000
## 225 24 -2.21075063
## 226 61 -0.40934722
## 227 60 3.95598508
## 228 20 3.81975258
## 229 57 6.14145815
## 230 28 5.97966409
## 231 46 1.00000000
## 232 57 3.00000000
## 233 57 -1.36657519
## 234 60 0.00000000
## 235 24 2.00000000
## 236 33 8.00000000
## 237 59 4.85961536
## 238 21 3.00000000
## 239 28 1.00000000
## 240 26 1.00000000
## 241 35 3.00000000
## 242 29 1.00000000
## 243 45 10.00000000
## 244 20 12.00000000
## 245 57 1.00000000
## 246 39 18.00000000
## 247 35 9.93724945
## 248 69 1.75844570
## 249 52 20.00000000
## 250 66 9.61736168
## 251 53 0.00000000
## 252 54 0.00000000
## 253 42 8.63838718
## 254 57 16.00000000
## 255 37 -1.65953419
## 256 21 0.89104940
## 257 28 6.35552151
## 258 22 1.00000000
## 259 42 2.00000000
## 260 68 7.25261568
## 261 61 6.03684156
## 262 41 3.00000000
## 263 27 0.00000000
## 264 66 2.00000000
## 265 67 7.14027736
## 266 58 0.00000000
## 267 55 0.27370875
## 268 52 0.20625290
## 269 51 1.00000000
## 270 45 5.00000000
## 271 58 0.00000000
## 272 33 7.94892487
## 273 25 0.15886971
## 274 66 3.00000000
## 275 27 8.00000000
## 276 50 7.00000000
## 277 66 -5.14952354
## 278 64 5.00000000
## 279 33 -1.68042802
## 280 61 3.00000000
## 281 45 1.00000000
## 282 20 7.00000000
## 283 21 10.00000000
## 284 36 10.00000000
## 285 55 6.54845335
## 286 60 1.00000000
## 287 22 3.69402287
## 288 48 -0.04633854
## 289 27 6.00000000
## 290 62 2.00000000
## 291 34 1.00000000
## 292 34 0.00000000
## 293 24 1.39608419
## 294 37 2.00000000
## 295 64 2.00000000
## 296 22 1.00000000
## 297 41 0.00000000
## 298 49 0.00000000
## 299 40 13.00000000
## 300 32 0.00000000
## 301 37 0.00000000
## 302 43 0.00000000
## 303 58 0.94796896
## 304 62 3.00000000
## 305 25 4.00000000
## 306 35 22.00000000
## 307 40 7.23211889
## 308 21 -5.57306156
## 309 54 11.00000000
## 310 50 2.00000000
## 311 32 4.09467214
## 312 67 1.00000000
## 313 64 0.00000000
## 314 56 7.66172023
## 315 61 0.00000000
## 316 44 6.00000000
## 317 48 13.00000000
## 318 38 0.00000000
## 319 21 8.10460342
## 320 44 1.00000000
## 321 52 1.10202671
## 322 62 4.34644643
## 323 69 6.15214385
## 324 66 1.00000000
## 325 24 2.37191410
## 326 52 -1.29265768
## 327 50 2.66684995
## 328 32 0.00000000
## 329 55 15.00000000
## 330 67 0.00000000
## 331 42 1.00000000
## 332 28 17.00000000
## 333 55 0.08004416
## 334 63 8.00000000
## 335 65 5.60220250
## 336 60 2.00000000
## 337 49 -4.62408650
## 338 33 6.07802785
## 339 32 -1.46651304
## 340 37 4.00000000
## 341 50 10.93770548
## 342 21 4.00000000
## 343 32 0.00000000
## 344 45 3.00000000
## 345 30 0.00000000
## 346 53 5.13133854
## 347 42 2.00000000
## 348 55 10.00000000
## 349 42 6.44078598
## 350 40 6.00000000
## 351 64 -4.23551483
## 352 62 1.00000000
## 353 43 0.00000000
## 354 22 2.00000000
## 355 43 3.00000000
## 356 46 0.77033516
## 357 53 0.00000000
## 358 61 0.00000000
## 359 67 3.52959495
## 360 58 0.00000000
## 361 49 0.00000000
## 362 45 2.00000000
## 363 46 6.00000000
## 364 47 5.00000000
## 365 57 -0.60767178
## 366 24 5.03059499
## 367 23 7.00000000
## 368 25 2.00000000
## 369 42 -0.85837560
## 370 62 0.55174767
## 371 24 4.00000000
## 372 65 1.00000000
## 373 50 -2.69458757
## 374 38 -4.87163349
## 375 63 -0.80090139
## 376 35 4.00000000
## 377 59 2.00000000
## 378 23 0.03923363
## 379 64 0.00000000
## 380 36 2.00000000
## 381 31 3.31640345
## 382 30 4.74593007
## 383 56 8.87267692
## 384 36 0.00000000
## 385 62 1.00000000
## 386 53 4.00000000
## 387 65 0.97632410
## 388 43 4.00000000
## 389 52 7.00000000
## 390 65 1.00000000
## 391 55 1.00000000
## 392 43 3.00000000
## 393 51 0.00000000
## 394 24 3.00000000
## 395 39 1.00000000
## 396 42 0.00000000
## 397 44 7.00000000
## 398 55 6.00000000
## 399 60 0.41271940
## 400 32 0.00000000
## 401 61 1.00000000
## 402 64 0.00000000
## 403 61 5.54154037
## 404 52 4.14549736
## 405 34 3.45356087
## 406 50 12.08502556
## 407 23 13.95373304
## 408 23 0.00000000
## 409 59 4.00000000
## 410 32 13.00000000
## 411 52 3.00000000
## 412 24 0.00000000
## 413 51 6.98846749
## 414 57 0.00000000
## 415 44 11.00552092
## 416 22 6.00000000
## 417 54 0.00000000
## 418 40 8.08181165
## 419 51 0.00000000
## 420 23 8.00000000
## 421 47 8.00000000
## 422 49 5.71745442
## 423 60 4.00000000
## 424 29 6.27853276
## 425 51 2.00000000
## 426 31 3.00000000
## 427 37 3.00000000
## 428 54 0.00000000
## 429 64 0.91655037
## 430 60 0.00000000
## 431 56 3.00000000
## 432 28 1.00000000
## 433 56 3.80906765
## 434 31 1.00000000
## 435 51 3.00000000
## 436 67 6.91210428
## 437 41 0.00000000
## 438 34 -2.16804035
## 439 33 3.00000000
## 440 23 0.00000000
## 441 49 11.00000000
## 442 30 2.00000000
## 443 45 -9.87735843
## 444 46 7.65382294
## 445 45 1.47825066
## 446 60 5.00000000
## 447 59 7.00000000
## 448 28 3.00000000
## 449 26 0.00000000
## 450 53 0.36520677
## 451 20 0.00000000
## 452 33 1.00000000
## 453 34 0.00000000
## 454 47 1.00000000
## 455 22 0.00000000
## 456 41 -1.27908213
## 457 36 0.00000000
## 458 39 7.66185321
## 459 52 2.00000000
## 460 24 5.44842468
## 461 62 0.00000000
## 462 37 9.00000000
## 463 25 4.00000000
## 464 47 12.00000000
## 465 44 2.70073011
## 466 24 6.00000000
## 467 54 0.00000000
## 468 26 6.00000000
## 469 59 4.00000000
## 470 43 0.00000000
## 471 67 0.00000000
## 472 60 2.64564516
## 473 31 5.57936881
## 474 44 4.00000000
## 475 37 1.00000000
## 476 57 0.00000000
## 477 51 8.00000000
## 478 66 0.00000000
## 479 62 4.00000000
## 480 37 18.00000000
## 481 30 1.00000000
## 482 48 4.38198805
## 483 44 -0.73358603
## 484 22 6.81720022
## 485 52 6.87528040
## 486 38 1.00000000
## 487 53 3.35186486
## 488 48 13.00000000
## 489 34 1.29911641
## 490 32 15.00000000
## 491 40 8.00000000
## 492 28 0.00000000
## 493 34 16.40557761
## 494 57 2.00000000
## 495 49 0.00000000
## 496 27 -1.69719433
## 497 24 0.00000000
## 498 55 10.00000000
## 499 32 4.00000000
## 500 45 5.97172195
fit <- with(imp, lm(age ~ depression_score))
summary(fit)
## # A tibble: 2 x 6
## term estimate std.error statistic p.value nobs
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 (Intercept) 45.5 0.811 56.1 1.87e-217 500
## 2 depression_score -0.305 0.151 -2.02 4.37e- 2 500
fit <- with(subset_incomplete, lm(age ~ depression_score))
t_stats <- with(imp, t.test(subset_incomplete))
t_stats
## call :
## with.mids(data = imp, expr = t.test(subset_incomplete))
##
## call1 :
## mice(data = subset_incomplete, m = 1, method = "norm.nob", maxit = 1)
##
## nmis :
## age depression_score
## 0 177
##
## analyses :
## [[1]]
##
## One Sample t-test
##
## data: subset_incomplete
## t = 34.986, df = 822, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 26.70895 29.88400
## sample estimates:
## mean of x
## 28.29648
#alternative
#regression imputation
regOut <- mice(subset_incomplete, method = "norm.predict", m = 1, maxit = 1)
##
## iter imp variable
## 1 1 depression_score
inc1 <- complete(regOut)
#compare imputed vs. true
colMeans(inc1)
## age depression_score
## 44.484000 3.234688
colMeans(subset_complete)
## age depression_score
## 44.484 3.564
sapply(inc1, var)
## age depression_score
## 206.65105 11.22561
sapply(subset_complete, var)
## age depression_score
## 206.65105 20.60311
cor(inc1)
## age depression_score
## age 1.0000000 -0.1211871
## depression_score -0.1211871 1.0000000
cor(subset_complete)
## age depression_score
## age 1.00000000 -0.06125558
## depression_score -0.06125558 1.00000000