Introduction

In this assignment, we will investigate the scope of the missingness problem from the incomplete data set we received. This data is derived from National Health and Nutrition Examination Survey (NHANES). After inspecting and analyzing the data, we create a research question with a variable that holds missing values. We will use different strategies to cope with these NA values and compare them to the results we retreive from analyzing the complete data set.

Importing the Data

The first step we take is obviously reading the data:

data_complete <- readRDS("g3_complete_data.rds")
data_incomplete <- readRDS("g3_incomplete_data.rds")

Inspect Data (describe the observed data)

Now that we have loaded our data sets, we take a look at a summary of our incomplete data to obtain a general understanding of it.

View(data_incomplete) #looking through the dataframe
head(data_incomplete) #viewing the head of the data
##      id    sex age          ethnicity        education       marital
## 1 43736 female  49   mexican_american     some_college       married
## 2 43339 female  34   mexican_american high_school_grad       married
## 3 47572 female  68 non-hispanic_white some_high_school      divorced
## 4 48127   male  24   mexican_american     some_college never_married
## 5 46971 female  67 non-hispanic_white     college_grad      divorced
## 6 43233   male  40   mexican_american     some_college     separated
##   household_size household_income weight height   bmi pulse bp_sys1 bp_dia1
## 1              2      75000:99999     NA  151.6    NA    74     180      94
## 2              2      65000:74999   69.3  172.6 23.26   102     100      84
## 3              1        5000:9999   67.8  161.3 26.06    70     110      68
## 4              7      15000:19999   84.1  170.5 28.93    64     126      66
## 5              1      35000:44999     NA  150.9    NA    74     132      70
## 6              2      55000:64999   77.8  165.7 28.34    94      96      72
##   bp_sys2 bp_dia2 time_sed drink_regularly days_drinking dep1 dep2 dep3 dep4
## 1      NA      NA      480             yes           364    0    0    0    0
## 2      NA      NA       10             yes            52    3   NA   NA    3
## 3     104      62      120            <NA>             1    0    0    0    0
## 4     124      60      480             yes           120    0    0    0    0
## 5     128      72      240             yes            24    0    0    0    0
## 6      NA      NA      480             yes            52    0   NA   NA    1
##   dep5 dep6 dep7 dep8 dep9
## 1    0    0    0    0    0
## 2   NA   NA    3    3    0
## 3    0    0    0    0   NA
## 4    1    0    0    0    0
## 5    0    0    0    0    0
## 6   NA   NA    0    0    0
tail(data_incomplete)#viewing the tail of the data
##        id    sex age          ethnicity        education             marital
## 495 44001 female  49 non-hispanic_black     college_grad             married
## 496 51309   male  27 non-hispanic_white     some_college             married
## 497 43378 female  24   mexican_american   no_high_school living_with_partner
## 498 45675   male  55     other_hispanic   no_high_school             married
## 499 46007   male  32 non-hispanic_white     some_college             married
## 500 45751 female  45     other_hispanic some_high_school             married
##     household_size household_income weight height   bmi pulse bp_sys1 bp_dia1
## 495              5          100000+     NA  163.1    NA    80     108      76
## 496              3      45000:54999  100.9     NA    NA    58     108      58
## 497              6      15000:19999   79.3  156.2 32.50    54      94      58
## 498              6      45000:54999     NA  165.1    NA    84     132      64
## 499              3      45000:54999   92.0  184.4 27.06    66     110      58
## 500              5          100000+   61.5  153.1 26.24    76     108      66
##     bp_sys2 bp_dia2 time_sed drink_regularly days_drinking dep1 dep2 dep3 dep4
## 495     108      78      360             yes           156    0    0    0    0
## 496     108      64      120             yes            52    0    0    0    1
## 497      98      70       30              no             0    0    0    0    0
## 498     124      64      420              no             0    0    0    3    3
## 499     108      54      180             yes            12    0    0    2    2
## 500     106      64      120             yes           364    0    0    1    1
##     dep5 dep6 dep7 dep8 dep9
## 495    0    0    0    0    0
## 496    0    0    1    0   NA
## 497    0    0    0    0    0
## 498    3    0    0    1    0
## 499    0    0    0    0    0
## 500    1    0    0    0   NA
summary(data_incomplete) #viewing means, median, mins, maxs and NA's for each variable
##        id            sex           age                     ethnicity  
##  Min.   :41487   male  :256   Min.   :20.00   mexican_american  :101  
##  1st Qu.:44313   female:244   1st Qu.:32.00   other_hispanic    : 66  
##  Median :46962                Median :45.00   non-hispanic_white:220  
##  Mean   :46743                Mean   :44.48   non-hispanic_black: 98  
##  3rd Qu.:49245                3rd Qu.:57.00   other             : 15  
##  Max.   :51614                Max.   :69.00                           
##                                                                       
##             education                  marital    household_size 
##  no_high_school  : 59   married            :264   Min.   :1.000  
##  some_high_school: 93   widowed            : 13   1st Qu.:2.000  
##  high_school_grad:113   divorced           : 58   Median :3.000  
##  some_college    :151   separated          : 28   Mean   :3.304  
##  college_grad    : 84   never_married      :100   3rd Qu.:4.000  
##                         living_with_partner: 37   Max.   :7.000  
##                                                                  
##     household_income     weight           height           bmi       
##  100000+    : 78     Min.   : 48.00   Min.   :143.3   Min.   :17.20  
##  25000:34999: 74     1st Qu.: 68.33   1st Qu.:161.1   1st Qu.:24.55  
##  75000:99999: 57     Median : 81.45   Median :168.1   Median :28.07  
##  35000:44999: 43     Mean   : 83.58   Mean   :168.1   Mean   :28.99  
##  10000:14999: 42     3rd Qu.: 95.75   3rd Qu.:176.3   3rd Qu.:32.16  
##  45000:54999: 41     Max.   :195.80   Max.   :192.9   Max.   :58.59  
##  (Other)    :165     NA's   :150      NA's   :96      NA's   :222    
##      pulse           bp_sys1         bp_dia1          bp_sys2     
##  Min.   : 46.00   Min.   : 82.0   Min.   : 28.00   Min.   : 76.0  
##  1st Qu.: 66.00   1st Qu.:110.0   1st Qu.: 64.00   1st Qu.:108.0  
##  Median : 74.00   Median :120.0   Median : 72.00   Median :118.0  
##  Mean   : 74.86   Mean   :122.2   Mean   : 72.39   Mean   :119.5  
##  3rd Qu.: 82.00   3rd Qu.:132.0   3rd Qu.: 80.00   3rd Qu.:130.0  
##  Max.   :128.00   Max.   :210.0   Max.   :110.00   Max.   :180.0  
##                                                    NA's   :100    
##     bp_dia2          time_sed      drink_regularly days_drinking   
##  Min.   : 40.00   Min.   :   0.0   yes :335        Min.   :  0.00  
##  1st Qu.: 62.00   1st Qu.: 180.0   no  :115        1st Qu.:  0.75  
##  Median : 70.00   Median : 240.0   NA's: 50        Median :  7.50  
##  Mean   : 70.57   Mean   : 307.4                   Mean   : 50.69  
##  3rd Qu.: 78.00   3rd Qu.: 480.0                   3rd Qu.: 52.00  
##  Max.   :108.00   Max.   :1080.0                   Max.   :365.00  
##  NA's   :100                                                       
##       dep1            dep2             dep3             dep4      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.000   Median :0.0000   Median :0.0000   Median :0.000  
##  Mean   :0.368   Mean   :0.3153   Mean   :0.6918   Mean   :0.794  
##  3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :3.000   Max.   :3.0000   Max.   :3.0000   Max.   :3.000  
##                  NA's   :75       NA's   :75                      
##       dep5             dep6             dep7            dep8       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.000   Median :0.0000  
##  Mean   :0.4094   Mean   :0.2682   Mean   :0.286   Mean   :0.1674  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:0.0000  
##  Max.   :3.0000   Max.   :3.0000   Max.   :3.000   Max.   :3.0000  
##  NA's   :75       NA's   :75                       NA's   :40      
##       dep9        
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.06205  
##  3rd Qu.:0.00000  
##  Max.   :3.00000  
##  NA's   :81

Comparing the Incomplete Data and the Complete Data

We know now what our data looks like. Now we will start comparing their distributions and descriptive statistics.

In the table above, we saw the summary statistics of the incomplete data set. We will now take a look at that for the complete data set as well.

summary(data_complete)
##        id            sex           age                     ethnicity  
##  Min.   :41487   male  :256   Min.   :20.00   mexican_american  :101  
##  1st Qu.:44313   female:244   1st Qu.:32.00   other_hispanic    : 66  
##  Median :46962                Median :45.00   non-hispanic_white:220  
##  Mean   :46743                Mean   :44.48   non-hispanic_black: 98  
##  3rd Qu.:49245                3rd Qu.:57.00   other             : 15  
##  Max.   :51614                Max.   :69.00                           
##                                                                       
##             education                  marital    household_size 
##  no_high_school  : 59   married            :264   Min.   :1.000  
##  some_high_school: 93   widowed            : 13   1st Qu.:2.000  
##  high_school_grad:113   divorced           : 58   Median :3.000  
##  some_college    :151   separated          : 28   Mean   :3.304  
##  college_grad    : 84   never_married      :100   3rd Qu.:4.000  
##                         living_with_partner: 37   Max.   :7.000  
##                                                                  
##     household_income     weight           height           bmi       
##  100000+    : 78     Min.   : 40.80   Min.   :141.9   Min.   :16.88  
##  25000:34999: 74     1st Qu.: 67.10   1st Qu.:161.2   1st Qu.:24.60  
##  75000:99999: 57     Median : 80.15   Median :168.0   Median :28.24  
##  35000:44999: 43     Mean   : 83.06   Mean   :168.1   Mean   :29.31  
##  10000:14999: 42     3rd Qu.: 94.62   3rd Qu.:176.3   3rd Qu.:32.38  
##  45000:54999: 41     Max.   :213.50   Max.   :192.9   Max.   :71.83  
##  (Other)    :165                                                     
##      pulse           bp_sys1         bp_dia1          bp_sys2     
##  Min.   : 46.00   Min.   : 82.0   Min.   : 28.00   Min.   : 76.0  
##  1st Qu.: 66.00   1st Qu.:110.0   1st Qu.: 64.00   1st Qu.:108.0  
##  Median : 74.00   Median :120.0   Median : 72.00   Median :118.0  
##  Mean   : 74.86   Mean   :122.2   Mean   : 72.39   Mean   :120.9  
##  3rd Qu.: 82.00   3rd Qu.:132.0   3rd Qu.: 80.00   3rd Qu.:130.0  
##  Max.   :128.00   Max.   :210.0   Max.   :110.00   Max.   :214.0  
##                                                                   
##     bp_dia2          time_sed      drink_regularly days_drinking   
##  Min.   : 34.00   Min.   :   0.0   yes:370         Min.   :  0.00  
##  1st Qu.: 64.00   1st Qu.: 180.0   no :130         1st Qu.:  0.75  
##  Median : 72.00   Median : 240.0                   Median :  7.50  
##  Mean   : 71.86   Mean   : 307.4                   Mean   : 50.69  
##  3rd Qu.: 80.00   3rd Qu.: 480.0                   3rd Qu.: 52.00  
##  Max.   :108.00   Max.   :1080.0                   Max.   :365.00  
##                                                                    
##       dep1            dep2            dep3            dep4            dep5     
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.00  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.00  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000   Median :0.00  
##  Mean   :0.368   Mean   :0.382   Mean   :0.738   Mean   :0.794   Mean   :0.45  
##  3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.00  
##  Max.   :3.000   Max.   :3.000   Max.   :3.000   Max.   :3.000   Max.   :3.00  
##                                                                                
##       dep6            dep7            dep8            dep9     
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.00  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.00  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.00  
##  Mean   :0.312   Mean   :0.286   Mean   :0.164   Mean   :0.07  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.00  
##  Max.   :3.000   Max.   :3.000   Max.   :3.000   Max.   :3.00  
## 

Here we use arsenal::comparedf() in order to detect the differences between the two datasets. It will show us the amount of NA values per variable, but it also gives us some additional information. In this case we are specifically interested in the ‘summary of overall comparison’ report. It tells us the following. There are 12 variables that have differences: they contain NA values and there are 16 that don’t. From the 500 observations, there are 367 with some compared variables unequal. The total number of differences between the two datasets is 1139, across the 12 variables with some values unequal.

summary(comparedf(data_incomplete,data_complete))
## 
## 
## Table: Summary of data.frames
## 
## version   arg                ncol   nrow
## --------  ----------------  -----  -----
## x         data_incomplete      28    500
## y         data_complete        28    500
## 
## 
## 
## Table: Summary of overall comparison
## 
## statistic                                                      value
## ------------------------------------------------------------  ------
## Number of by-variables                                             0
## Number of non-by variables in common                              28
## Number of variables compared                                      28
## Number of variables in x but not y                                 0
## Number of variables in y but not x                                 0
## Number of variables compared with some values unequal             12
## Number of variables compared with all values equal                16
## Number of observations in common                                 500
## Number of observations in x but not y                              0
## Number of observations in y but not x                              0
## Number of observations with some compared variables unequal      367
## Number of observations with all compared variables equal         133
## Number of values unequal                                        1139
## 
## 
## 
## Table: Variables not shared
## 
##                          
##  ------------------------
##  No variables not shared 
##  ------------------------
## 
## 
## 
## Table: Other variables not compared
## 
##                                  
##  --------------------------------
##  No other variables not compared 
##  --------------------------------
## 
## 
## 
## Table: Observations not shared
## 
##                             
##  ---------------------------
##  No observations not shared 
##  ---------------------------
## 
## 
## 
## Table: Differences detected by variable
## 
## var.x              var.y                 n   NAs
## -----------------  -----------------  ----  ----
## id                 id                    0     0
## sex                sex                   0     0
## age                age                   0     0
## ethnicity          ethnicity             0     0
## education          education             0     0
## marital            marital               0     0
## household_size     household_size        0     0
## household_income   household_income      0     0
## weight             weight              150   150
## height             height               96    96
## bmi                bmi                 222   222
## pulse              pulse                 0     0
## bp_sys1            bp_sys1               0     0
## bp_dia1            bp_dia1               0     0
## bp_sys2            bp_sys2             100   100
## bp_dia2            bp_dia2             100   100
## time_sed           time_sed              0     0
## drink_regularly    drink_regularly      50    50
## days_drinking      days_drinking         0     0
## dep1               dep1                  0     0
## dep2               dep2                 75    75
## dep3               dep3                 75    75
## dep4               dep4                  0     0
## dep5               dep5                 75    75
## dep6               dep6                 75    75
## dep7               dep7                  0     0
## dep8               dep8                 40    40
## dep9               dep9                 81    81
## 
## 
## 
## Table: Differences detected (1089 not shown)
## 
## var.x     var.y      ..row.names..  values.x   values.y    row.x   row.y
## --------  --------  --------------  ---------  ---------  ------  ------
## weight    weight                 1  NA         67              1       1
## weight    weight                 5  NA         56.8            5       5
## weight    weight                 8  NA         108.2           8       8
## weight    weight                13  NA         107.9          13      13
## weight    weight                21  NA         86.7           21      21
## weight    weight                27  NA         63.9           27      27
## weight    weight                30  NA         97.9           30      30
## weight    weight                33  NA         50.9           33      33
## weight    weight                34  NA         52.8           34      34
## weight    weight                36  NA         79.4           36      36
## height    height                15  NA         157.1          15      15
## height    height                16  NA         166.5          16      16
## height    height                22  NA         164.1          22      22
## height    height                24  NA         149.5          24      24
## height    height                25  NA         168.4          25      25
## height    height                26  NA         181.9          26      26
## height    height                28  NA         178            28      28
## height    height                32  NA         164.6          32      32
## height    height                34  NA         161.6          34      34
## height    height                39  NA         188.3          39      39
## bmi       bmi                    1  NA         29.15           1       1
## bmi       bmi                    5  NA         24.94           5       5
## bmi       bmi                    8  NA         32.24           8       8
## bmi       bmi                   13  NA         32.79          13      13
## bmi       bmi                   15  NA         26.42          15      15
## bmi       bmi                   16  NA         26.33          16      16
## bmi       bmi                   21  NA         33.36          21      21
## bmi       bmi                   22  NA         31.16          22      22
## bmi       bmi                   24  NA         26.89          24      24
## bmi       bmi                   25  NA         38.58          25      25
## bp_sys2   bp_sys2                1  NA         180             1       1
## bp_sys2   bp_sys2                2  NA         100             2       2
## bp_sys2   bp_sys2                6  NA         98              6       6
## bp_sys2   bp_sys2               10  NA         134            10      10
## bp_sys2   bp_sys2               11  NA         156            11      11
## bp_sys2   bp_sys2               15  NA         104            15      15
## bp_sys2   bp_sys2               16  NA         126            16      16
## bp_sys2   bp_sys2               20  NA         106            20      20
## bp_sys2   bp_sys2               23  NA         138            23      23
## bp_sys2   bp_sys2               28  NA         134            28      28
## bp_dia2   bp_dia2                1  NA         98              1       1
## bp_dia2   bp_dia2                2  NA         78              2       2
## bp_dia2   bp_dia2                6  NA         70              6       6
## bp_dia2   bp_dia2               10  NA         68             10      10
## bp_dia2   bp_dia2               11  NA         76             11      11
## bp_dia2   bp_dia2               15  NA         62             15      15
## bp_dia2   bp_dia2               16  NA         84             16      16
## bp_dia2   bp_dia2               20  NA         68             20      20
## bp_dia2   bp_dia2               23  NA         70             23      23
## bp_dia2   bp_dia2               28  NA         70             28      28
## 
## 
## 
## Table: Non-identical attributes
## 
##                              
##  ----------------------------
##  No non-identical attributes 
##  ----------------------------

With this information, we will do some computations to compare the means, variances and correlations between the datasets.

#converting the columns of our data sets for comparing computations

columns_to_convert <- c('id', 'sex', 'age', 'ethnicity',        'education', 'marital', 'household_size', 'household_income', 'weight', 'height', 'bmi', 'pulse', 'bp_sys1', 'bp_dia1', 'bp_sys2', 'bp_dia2', 'time_sed', 'drink_regularly', 'days_drinking', 'dep1' , 'dep2', 'dep3', 'dep4', 'dep5', 'dep6', 'dep7', 'dep8', 'dep9')


for (x in columns_to_convert) {data_complete[[x]] <- as.numeric(data_complete[[x]])}

for (x in columns_to_convert) {data_incomplete[[x]] <- as.numeric(data_incomplete[[x]])}


#comparing the means of the data sets
mean_complete <- apply(data_complete, 2, mean, na.rm = TRUE)
mean_incomplete<- apply(data_incomplete, 2, mean, na.rm = TRUE)
cbind(mean_complete, mean_incomplete, na.rm = TRUE)
##                  mean_complete mean_incomplete na.rm
## id                 46743.24400    4.674324e+04     1
## sex                    1.48800    1.488000e+00     1
## age                   44.48400    4.448400e+01     1
## ethnicity              2.72000    2.720000e+00     1
## education              3.21600    3.216000e+00     1
## marital                2.59600    2.596000e+00     1
## household_size         3.30400    3.304000e+00     1
## household_income       7.49800    7.498000e+00     1
## weight                83.05860    8.358057e+01     1
## height               168.11680    1.681314e+02     1
## bmi                   29.30561    2.898734e+01     1
## pulse                 74.86400    7.486400e+01     1
## bp_sys1              122.15600    1.221560e+02     1
## bp_dia1               72.38800    7.238800e+01     1
## bp_sys2              120.86800    1.195500e+02     1
## bp_dia2               71.86000    7.057000e+01     1
## time_sed             307.42400    3.074240e+02     1
## drink_regularly        1.26000    1.255556e+00     1
## days_drinking         50.69400    5.069400e+01     1
## dep1                   0.36800    3.680000e-01     1
## dep2                   0.38200    3.152941e-01     1
## dep3                   0.73800    6.917647e-01     1
## dep4                   0.79400    7.940000e-01     1
## dep5                   0.45000    4.094118e-01     1
## dep6                   0.31200    2.682353e-01     1
## dep7                   0.28600    2.860000e-01     1
## dep8                   0.16400    1.673913e-01     1
## dep9                   0.07000    6.205251e-02     1

Here we create a table that compares the variances for our two datasets.

var1 <- apply(data_complete, 2, var)
var2 <- apply(data_incomplete, 2, var, na.rm = TRUE)
(var_table <- data.frame(Variable = colnames(data_complete), Variance1 = var1, Variance2 = var2, Difference = var1 - var2))
##                          Variable    Variance1    Variance2   Difference
## id                             id 8.365722e+06 8.365722e+06  0.000000000
## sex                           sex 2.503567e-01 2.503567e-01  0.000000000
## age                           age 2.066510e+02 2.066510e+02  0.000000000
## ethnicity               ethnicity 1.179960e+00 1.179960e+00  0.000000000
## education               education 1.588521e+00 1.588521e+00  0.000000000
## marital                   marital 3.503792e+00 3.503792e+00  0.000000000
## household_size     household_size 2.881347e+00 2.881347e+00  0.000000000
## household_income household_income 1.046693e+01 1.046693e+01  0.000000000
## weight                     weight 4.695250e+02 4.215897e+02 47.935305850
## height                     height 1.001351e+02 1.002984e+02 -0.163370733
## bmi                           bmi 4.777918e+01 4.051594e+01  7.263236522
## pulse                       pulse 1.629394e+02 1.629394e+02  0.000000000
## bp_sys1                   bp_sys1 3.274706e+02 3.274706e+02  0.000000000
## bp_dia1                   bp_dia1 1.418051e+02 1.418051e+02  0.000000000
## bp_sys2                   bp_sys2 3.045837e+02 2.723584e+02 32.225347497
## bp_dia2                   bp_dia2 1.389062e+02 1.253084e+02 13.597841498
## time_sed                 time_sed 4.108440e+04 4.108440e+04  0.000000000
## drink_regularly   drink_regularly 1.927856e-01 1.906706e-01  0.002114945
## days_drinking       days_drinking 7.769459e+03 7.769459e+03  0.000000000
## dep1                         dep1 5.296353e-01 5.296353e-01  0.000000000
## dep2                         dep2 5.932625e-01 4.805438e-01  0.112718685
## dep3                         dep3 9.712986e-01 9.684462e-01  0.002852426
## dep4                         dep4 9.895431e-01 9.895431e-01  0.000000000
## dep5                         dep5 6.968938e-01 6.385905e-01  0.058303333
## dep6                         dep6 4.876313e-01 4.137292e-01  0.073902073
## dep7                         dep7 4.410862e-01 4.410862e-01  0.000000000
## dep8                         dep8 2.576192e-01 2.660368e-01 -0.008417514
## dep9                         dep9 1.093186e-01 9.183405e-02  0.017484583

Plotted, it looks like this:

(var_plot <- ggplot(var_table, aes(x = Variable, y = Difference)) + 
  geom_bar(stat = "identity", fill = "blue") + 
  ggtitle("Differences in Variances") + 
  xlab("Variable") + 
  ylab("Difference in Variance") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=8)))

We now do the same, but then for the correlations.The Correlation1 versions indicate the correlation coefficient for the variable in the complete dataset. The Correlation2 versions refer to the incomplete variant.

cor1 <- cor(data_complete)
cor2 <- cor(data_incomplete, use = 'complete.obs')

(cor_table <- data.frame(Variable = colnames(data_complete), Correlation1 = cor1, Correlation2 = cor2, Difference = cor1 - cor2))
##                          Variable Correlation1.id Correlation1.sex
## id                             id     1.000000000     -0.009061094
## sex                           sex    -0.009061094      1.000000000
## age                           age    -0.025562771      0.023098114
## ethnicity               ethnicity    -0.014312468     -0.039378345
## education               education     0.021615281      0.035896163
## marital                   marital    -0.038175760     -0.024443810
## household_size     household_size    -0.004221008      0.011382283
## household_income household_income     0.026733200     -0.061294379
## weight                     weight    -0.025072187     -0.272949497
## height                     height     0.021715970     -0.689623117
## bmi                           bmi    -0.038143481      0.042259004
## pulse                       pulse    -0.035046305      0.114582565
## bp_sys1                   bp_sys1    -0.043128147     -0.152286887
## bp_dia1                   bp_dia1     0.027575190     -0.171757617
## bp_sys2                   bp_sys2    -0.085713697     -0.162891182
## bp_dia2                   bp_dia2     0.016405141     -0.165781630
## time_sed                 time_sed    -0.003417041     -0.010580475
## drink_regularly   drink_regularly    -0.061661981      0.242276137
## days_drinking       days_drinking     0.086961928     -0.242475355
## dep1                         dep1     0.043327849      0.078192362
## dep2                         dep2    -0.003542063      0.076917166
## dep3                         dep3    -0.049868469      0.093177194
## dep4                         dep4    -0.074186403      0.117824652
## dep5                         dep5     0.042081421      0.149689429
## dep6                         dep6     0.018947108      0.125447659
## dep7                         dep7    -0.059105937      0.061608268
## dep8                         dep8    -0.013813271      0.039328621
## dep9                         dep9     0.014889936     -0.061537017
##                  Correlation1.age Correlation1.ethnicity Correlation1.education
## id                   -0.025562771           -0.014312468           2.161528e-02
## sex                   0.023098114           -0.039378345           3.589616e-02
## age                   1.000000000            0.050276740          -3.166380e-02
## ethnicity             0.050276740            1.000000000           2.711466e-01
## education            -0.031663800            0.271146569           1.000000e+00
## marital              -0.357051255            0.046756433          -1.115895e-01
## household_size       -0.256527457           -0.200457902          -2.199695e-01
## household_income      0.084346917            0.085376036           4.040911e-01
## weight                0.007704952            0.121989730           8.578564e-02
## height               -0.089705102            0.306217362           2.442509e-01
## bmi                   0.061898028           -0.005645942          -2.006648e-02
## pulse                -0.133970475           -0.001017479          -9.184198e-02
## bp_sys1               0.381669741            0.075425374          -1.107846e-01
## bp_dia1               0.172956118            0.046217061           1.281824e-05
## bp_sys2               0.362182470            0.061260550          -1.151354e-01
## bp_dia2               0.148466542            0.072380706           1.148349e-02
## time_sed             -0.077221674            0.146806236           2.894475e-01
## drink_regularly       0.053365358            0.110925780          -9.444377e-02
## days_drinking         0.073618466            0.033219351           1.379435e-01
## dep1                 -0.018016843           -0.016426778          -1.654866e-01
## dep2                 -0.027229023           -0.029987944          -1.016807e-01
## dep3                 -0.041670731            0.015574452          -9.954969e-02
## dep4                 -0.014735481            0.026261028          -9.390923e-02
## dep5                 -0.091662265            0.015469653          -1.135183e-01
## dep6                 -0.065780258            0.015006119          -4.712416e-02
## dep7                 -0.055878734            0.027889313          -7.873697e-02
## dep8                 -0.071050606           -0.029223534          -1.557310e-01
## dep9                  0.022371793           -0.079233176          -1.373454e-01
##                  Correlation1.marital Correlation1.household_size
## id                       -0.038175760                -0.004221008
## sex                      -0.024443810                 0.011382283
## age                      -0.357051255                -0.256527457
## ethnicity                 0.046756433                -0.200457902
## education                -0.111589524                -0.219969510
## marital                   1.000000000                -0.013618380
## household_size           -0.013618380                 1.000000000
## household_income         -0.266191931                 0.051199072
## weight                   -0.029178776                -0.095064813
## height                    0.037691375                -0.150761239
## bmi                      -0.050470988                -0.031230985
## pulse                     0.064625767                 0.069983608
## bp_sys1                  -0.042862289                -0.109454330
## bp_dia1                  -0.081959670                -0.010605769
## bp_sys2                  -0.027155085                -0.110530816
## bp_dia2                  -0.058525366                -0.040340758
## time_sed                  0.118101884                -0.199365701
## drink_regularly          -0.049937065                 0.060445050
## days_drinking            -0.009034375                -0.139235885
## dep1                      0.076991432                 0.003348289
## dep2                      0.075286584                -0.024622488
## dep3                      0.078296909                -0.043335704
## dep4                      0.076831300                -0.011497899
## dep5                      0.149920678                -0.051477777
## dep6                      0.095092075                -0.017623426
## dep7                      0.068949030                -0.031058651
## dep8                      0.120500874                 0.049013792
## dep9                      0.078166467                 0.154825990
##                  Correlation1.household_income Correlation1.weight
## id                                  0.02673320        -0.025072187
## sex                                -0.06129438        -0.272949497
## age                                 0.08434692         0.007704952
## ethnicity                           0.08537604         0.121989730
## education                           0.40409105         0.085785639
## marital                            -0.26619193        -0.029178776
## household_size                      0.05119907        -0.095064813
## household_income                    1.00000000        -0.039477663
## weight                             -0.03947766         1.000000000
## height                              0.16008286         0.415429394
## bmi                                -0.12162978         0.886379420
## pulse                              -0.11942971         0.152457915
## bp_sys1                            -0.06657151         0.202607336
## bp_dia1                             0.06082786         0.237188551
## bp_sys2                            -0.05956108         0.216514575
## bp_dia2                             0.06363893         0.209744411
## time_sed                            0.12569705         0.118132995
## drink_regularly                    -0.09979702        -0.069555839
## days_drinking                       0.19435804         0.022801412
## dep1                               -0.14523184         0.054062624
## dep2                               -0.16656517         0.035842786
## dep3                               -0.19720311         0.128597838
## dep4                               -0.13992236         0.083566675
## dep5                               -0.22931647         0.148976260
## dep6                               -0.13011837         0.060056746
## dep7                               -0.07947655         0.054910755
## dep8                               -0.13404338         0.018019985
## dep9                               -0.05513571         0.011873820
##                  Correlation1.height Correlation1.bmi Correlation1.pulse
## id                       0.021715970     -0.038143481       -0.035046305
## sex                     -0.689623117      0.042259004        0.114582565
## age                     -0.089705102      0.061898028       -0.133970475
## ethnicity                0.306217362     -0.005645942       -0.001017479
## education                0.244250903     -0.020066484       -0.091841980
## marital                  0.037691375     -0.050470988        0.064625767
## household_size          -0.150761239     -0.031230985        0.069983608
## household_income         0.160082860     -0.121629779       -0.119429710
## weight                   0.415429394      0.886379420        0.152457915
## height                   1.000000000     -0.038822222       -0.082254825
## bmi                     -0.038822222      1.000000000        0.211810098
## pulse                   -0.082254825      0.211810098        1.000000000
## bp_sys1                  0.060416606      0.204099015       -0.007525159
## bp_dia1                  0.195226206      0.167590329        0.170867088
## bp_sys2                  0.084863522      0.206465963        0.011703570
## bp_dia2                  0.182779568      0.144101143        0.150902776
## time_sed                 0.136526507      0.059674333        0.062559283
## drink_regularly         -0.213908045      0.029184378        0.067822017
## days_drinking            0.271969296     -0.108446083       -0.083106304
## dep1                    -0.019397815      0.069563577        0.086510403
## dep2                    -0.033075015      0.057931789        0.120457212
## dep3                    -0.040010495      0.163298313        0.175416295
## dep4                    -0.034741878      0.110121565        0.122311217
## dep5                    -0.083071197      0.203279834        0.182721912
## dep6                    -0.039009098      0.080526726        0.087055045
## dep7                     0.016825197      0.044896165        0.089460416
## dep8                    -0.050534781      0.039038055        0.127174396
## dep9                     0.009213944      0.009429586        0.023627604
##                  Correlation1.bp_sys1 Correlation1.bp_dia1 Correlation1.bp_sys2
## id                       -0.043128147         2.757519e-02         -0.085713697
## sex                      -0.152286887        -1.717576e-01         -0.162891182
## age                       0.381669741         1.729561e-01          0.362182470
## ethnicity                 0.075425374         4.621706e-02          0.061260550
## education                -0.110784638         1.281824e-05         -0.115135358
## marital                  -0.042862289        -8.195967e-02         -0.027155085
## household_size           -0.109454330        -1.060577e-02         -0.110530816
## household_income         -0.066571510         6.082786e-02         -0.059561079
## weight                    0.202607336         2.371886e-01          0.216514575
## height                    0.060416606         1.952262e-01          0.084863522
## bmi                       0.204099015         1.675903e-01          0.206465963
## pulse                    -0.007525159         1.708671e-01          0.011703570
## bp_sys1                   1.000000000         4.830037e-01          0.926381496
## bp_dia1                   0.483003674         1.000000e+00          0.497291337
## bp_sys2                   0.926381496         4.972913e-01          1.000000000
## bp_dia2                   0.466530196         8.685409e-01          0.487850540
## time_sed                 -0.003782454         2.178395e-03          0.022736306
## drink_regularly          -0.026805726        -5.766066e-02         -0.013818842
## days_drinking             0.148590830         1.172064e-01          0.124082412
## dep1                     -0.029932158         6.072578e-02         -0.021097356
## dep2                     -0.061219730         4.323728e-02         -0.045289054
## dep3                      0.009937245         5.751556e-02         -0.001432162
## dep4                     -0.026042674         2.740026e-02         -0.022462736
## dep5                     -0.047371796         3.118607e-02         -0.051347675
## dep6                     -0.059681989        -2.615469e-02         -0.050878215
## dep7                     -0.102765982        -4.345257e-02         -0.071946080
## dep8                     -0.025918588        -4.580857e-03          0.013760427
## dep9                      0.055780801         7.656179e-02          0.067937920
##                  Correlation1.bp_dia2 Correlation1.time_sed
## id                        0.016405141          -0.003417041
## sex                      -0.165781630          -0.010580475
## age                       0.148466542          -0.077221674
## ethnicity                 0.072380706           0.146806236
## education                 0.011483486           0.289447488
## marital                  -0.058525366           0.118101884
## household_size           -0.040340758          -0.199365701
## household_income          0.063638934           0.125697050
## weight                    0.209744411           0.118132995
## height                    0.182779568           0.136526507
## bmi                       0.144101143           0.059674333
## pulse                     0.150902776           0.062559283
## bp_sys1                   0.466530196          -0.003782454
## bp_dia1                   0.868540923           0.002178395
## bp_sys2                   0.487850540           0.022736306
## bp_dia2                   1.000000000          -0.002446444
## time_sed                 -0.002446444           1.000000000
## drink_regularly          -0.032452292          -0.055598865
## days_drinking             0.112310342           0.116689916
## dep1                      0.066765423           0.026518491
## dep2                      0.054028153           0.068057904
## dep3                      0.065502380           0.030903810
## dep4                      0.076847235           0.078405683
## dep5                      0.031672746           0.051348399
## dep6                      0.019440766           0.084268961
## dep7                      0.028167515           0.052525857
## dep8                      0.026626086          -0.032895905
## dep9                      0.106402543          -0.101067153
##                  Correlation1.drink_regularly Correlation1.days_drinking
## id                                -0.06166198                0.086961928
## sex                                0.24227614               -0.242475355
## age                                0.05336536                0.073618466
## ethnicity                          0.11092578                0.033219351
## education                         -0.09444377                0.137943546
## marital                           -0.04993706               -0.009034375
## household_size                     0.06044505               -0.139235885
## household_income                  -0.09979702                0.194358042
## weight                            -0.06955584                0.022801412
## height                            -0.21390805                0.271969296
## bmi                                0.02918438               -0.108446083
## pulse                              0.06782202               -0.083106304
## bp_sys1                           -0.02680573                0.148590830
## bp_dia1                           -0.05766066                0.117206398
## bp_sys2                           -0.01381884                0.124082412
## bp_dia2                           -0.03245229                0.112310342
## time_sed                          -0.05559887                0.116689916
## drink_regularly                    1.00000000               -0.311575058
## days_drinking                     -0.31157506                1.000000000
## dep1                               0.01354651               -0.091712111
## dep2                               0.04349455               -0.103534470
## dep3                               0.04195793               -0.091562615
## dep4                               0.04487285               -0.106563193
## dep5                               0.01366844               -0.060791679
## dep6                               0.01594800               -0.071961869
## dep7                               0.03999663               -0.102261637
## dep8                               0.04208418               -0.042997630
## dep9                               0.06764118               -0.034195320
##                  Correlation1.dep1 Correlation1.dep2 Correlation1.dep3
## id                     0.043327849      -0.003542063      -0.049868469
## sex                    0.078192362       0.076917166       0.093177194
## age                   -0.018016843      -0.027229023      -0.041670731
## ethnicity             -0.016426778      -0.029987944       0.015574452
## education             -0.165486570      -0.101680729      -0.099549689
## marital                0.076991432       0.075286584       0.078296909
## household_size         0.003348289      -0.024622488      -0.043335704
## household_income      -0.145231837      -0.166565166      -0.197203111
## weight                 0.054062624       0.035842786       0.128597838
## height                -0.019397815      -0.033075015      -0.040010495
## bmi                    0.069563577       0.057931789       0.163298313
## pulse                  0.086510403       0.120457212       0.175416295
## bp_sys1               -0.029932158      -0.061219730       0.009937245
## bp_dia1                0.060725777       0.043237280       0.057515565
## bp_sys2               -0.021097356      -0.045289054      -0.001432162
## bp_dia2                0.066765423       0.054028153       0.065502380
## time_sed               0.026518491       0.068057904       0.030903810
## drink_regularly        0.013546512       0.043494545       0.041957926
## days_drinking         -0.091712111      -0.103534470      -0.091562615
## dep1                   1.000000000       0.531659479       0.355425881
## dep2                   0.531659479       1.000000000       0.430426607
## dep3                   0.355425881       0.430426607       1.000000000
## dep4                   0.478628650       0.516163119       0.515146065
## dep5                   0.389892773       0.380391220       0.516265612
## dep6                   0.451876269       0.612566401       0.416029788
## dep7                   0.407885666       0.506828795       0.371891354
## dep8                   0.302860623       0.387922544       0.310417331
## dep9                   0.200882083       0.359070197       0.277796046
##                  Correlation1.dep4 Correlation1.dep5 Correlation1.dep6
## id                     -0.07418640        0.04208142        0.01894711
## sex                     0.11782465        0.14968943        0.12544766
## age                    -0.01473548       -0.09166227       -0.06578026
## ethnicity               0.02626103        0.01546965        0.01500612
## education              -0.09390923       -0.11351831       -0.04712416
## marital                 0.07683130        0.14992068        0.09509207
## household_size         -0.01149790       -0.05147778       -0.01762343
## household_income       -0.13992236       -0.22931647       -0.13011837
## weight                  0.08356667        0.14897626        0.06005675
## height                 -0.03474188       -0.08307120       -0.03900910
## bmi                     0.11012156        0.20327983        0.08052673
## pulse                   0.12231122        0.18272191        0.08705505
## bp_sys1                -0.02604267       -0.04737180       -0.05968199
## bp_dia1                 0.02740026        0.03118607       -0.02615469
## bp_sys2                -0.02246274       -0.05134768       -0.05087821
## bp_dia2                 0.07684724        0.03167275        0.01944077
## time_sed                0.07840568        0.05134840        0.08426896
## drink_regularly         0.04487285        0.01366844        0.01594800
## days_drinking          -0.10656319       -0.06079168       -0.07196187
## dep1                    0.47862865        0.38989277        0.45187627
## dep2                    0.51616312        0.38039122        0.61256640
## dep3                    0.51514607        0.51626561        0.41602979
## dep4                    1.00000000        0.50762303        0.41582328
## dep5                    0.50762303        1.00000000        0.40840103
## dep6                    0.41582328        0.40840103        1.00000000
## dep7                    0.44728951        0.28789904        0.52018848
## dep8                    0.31313090        0.33627657        0.42075653
## dep9                    0.22062947        0.16880761        0.33052440
##                  Correlation1.dep7 Correlation1.dep8 Correlation1.dep9
## id                     -0.05910594      -0.013813271       0.014889936
## sex                     0.06160827       0.039328621      -0.061537017
## age                    -0.05587873      -0.071050606       0.022371793
## ethnicity               0.02788931      -0.029223534      -0.079233176
## education              -0.07873697      -0.155730995      -0.137345384
## marital                 0.06894903       0.120500874       0.078166467
## household_size         -0.03105865       0.049013792       0.154825990
## household_income       -0.07947655      -0.134043377      -0.055135708
## weight                  0.05491076       0.018019985       0.011873820
## height                  0.01682520      -0.050534781       0.009213944
## bmi                     0.04489616       0.039038055       0.009429586
## pulse                   0.08946042       0.127174396       0.023627604
## bp_sys1                -0.10276598      -0.025918588       0.055780801
## bp_dia1                -0.04345257      -0.004580857       0.076561787
## bp_sys2                -0.07194608       0.013760427       0.067937920
## bp_dia2                 0.02816752       0.026626086       0.106402543
## time_sed                0.05252586      -0.032895905      -0.101067153
## drink_regularly         0.03999663       0.042084183       0.067641179
## days_drinking          -0.10226164      -0.042997630      -0.034195320
## dep1                    0.40788567       0.302860623       0.200882083
## dep2                    0.50682879       0.387922544       0.359070197
## dep3                    0.37189135       0.310417331       0.277796046
## dep4                    0.44728951       0.313130895       0.220629469
## dep5                    0.28789904       0.336276573       0.168807606
## dep6                    0.52018848       0.420756525       0.330524395
## dep7                    1.00000000       0.461019386       0.237190291
## dep8                    0.46101939       1.000000000       0.229995578
## dep9                    0.23719029       0.229995578       1.000000000
##                  Correlation2.id Correlation2.sex Correlation2.age
## id                   1.000000000      -0.05317545     -0.002276105
## sex                 -0.053175454       1.00000000     -0.063952534
## age                 -0.002276105      -0.06395253      1.000000000
## ethnicity           -0.035437642      -0.17985483      0.094081873
## education           -0.003002854      -0.04289781     -0.002330056
## marital              0.027369336       0.12969303     -0.347414999
## household_size       0.061231696       0.15521282     -0.346197982
## household_income    -0.033751166      -0.10301234     -0.030968901
## weight              -0.020770620      -0.30441606      0.053511978
## height              -0.003927402      -0.72253226     -0.021830956
## bmi                 -0.021902239       0.02515022      0.095499834
## pulse               -0.054038979       0.16614422     -0.129248337
## bp_sys1              0.021249306      -0.28159678      0.395539507
## bp_dia1              0.035232574      -0.03699273      0.176862727
## bp_sys2              0.013395807      -0.31524051      0.397004628
## bp_dia2              0.038168565      -0.04151953      0.157760752
## time_sed             0.091144103      -0.09773092     -0.171538179
## drink_regularly      0.116209870       0.12679009     -0.011966665
## days_drinking       -0.011595180      -0.24739987      0.051273908
## dep1                 0.131402973       0.01769036     -0.125731081
## dep2                 0.035251327       0.02870827     -0.106066244
## dep3                -0.013550269       0.13594785     -0.018909433
## dep4                -0.047593785       0.15351734     -0.130332994
## dep5                 0.106406993       0.16571963     -0.169892392
## dep6                 0.086418021       0.03935623     -0.022402285
## dep7                -0.047095282       0.15010310     -0.151210760
## dep8                 0.101114771       0.10655202     -0.141801472
## dep9                 0.001234142      -0.01798982     -0.015020876
##                  Correlation2.ethnicity Correlation2.education
## id                         -0.035437642           -0.003002854
## sex                        -0.179854832           -0.042897808
## age                         0.094081873           -0.002330056
## ethnicity                   1.000000000            0.205012197
## education                   0.205012197            1.000000000
## marital                     0.118635238           -0.059525948
## household_size             -0.315564020           -0.307730824
## household_income            0.043563631            0.324451009
## weight                      0.241620888            0.077728264
## height                      0.404234125            0.287553219
## bmi                         0.089883360           -0.050273227
## pulse                       0.116095769           -0.087687403
## bp_sys1                     0.132275904            0.044164422
## bp_dia1                     0.114125051            0.033329734
## bp_sys2                     0.121183962            0.033293549
## bp_dia2                     0.115024449            0.105443445
## time_sed                    0.101396736            0.329169644
## drink_regularly             0.050131549           -0.046961454
## days_drinking               0.153414092            0.130718354
## dep1                        0.009757537           -0.007678133
## dep2                        0.124161734            0.021260763
## dep3                        0.070259535           -0.242021635
## dep4                        0.093364780           -0.052406423
## dep5                        0.060258975           -0.199214036
## dep6                        0.044150074           -0.043504103
## dep7                       -0.050950161           -0.001121101
## dep8                        0.032973997           -0.146332016
## dep9                        0.054876685           -0.031959361
##                  Correlation2.marital Correlation2.household_size
## id                       2.736934e-02                 0.061231696
## sex                      1.296930e-01                 0.155212815
## age                     -3.474150e-01                -0.346197982
## ethnicity                1.186352e-01                -0.315564020
## education               -5.952595e-02                -0.307730824
## marital                  1.000000e+00                 0.060803128
## household_size           6.080313e-02                 1.000000000
## household_income        -2.605452e-01                -0.036424322
## weight                   5.686866e-02                -0.169090175
## height                   2.308751e-02                -0.281971238
## bmi                      3.896273e-02                -0.067980008
## pulse                    4.389539e-05                 0.128496999
## bp_sys1                 -6.932595e-02                -0.173709027
## bp_dia1                 -8.785076e-04                 0.008893365
## bp_sys2                 -6.459353e-02                -0.128000070
## bp_dia2                  6.124346e-03                -0.042865748
## time_sed                 2.085367e-01                -0.161443254
## drink_regularly         -1.543426e-02                 0.125171867
## days_drinking            1.035354e-01                -0.164332991
## dep1                     1.292284e-01                -0.018798947
## dep2                     1.977895e-01                -0.001860202
## dep3                     9.738092e-02                 0.044880660
## dep4                     1.561584e-01                 0.039257738
## dep5                     2.362056e-01                 0.029350780
## dep6                     1.453389e-01                -0.043106569
## dep7                     1.129602e-01                 0.088616396
## dep8                     1.932295e-01                 0.033152707
## dep9                     1.451907e-01                 0.122896158
##                  Correlation2.household_income Correlation2.weight
## id                                -0.033751166         -0.02077062
## sex                               -0.103012341         -0.30441606
## age                               -0.030968901          0.05351198
## ethnicity                          0.043563631          0.24162089
## education                          0.324451009          0.07772826
## marital                           -0.260545165          0.05686866
## household_size                    -0.036424322         -0.16909017
## household_income                   1.000000000          0.04816387
## weight                             0.048163873          1.00000000
## height                             0.172106834          0.47330089
## bmi                               -0.030371914          0.88505114
## pulse                              0.045798547          0.20259261
## bp_sys1                            0.029065977          0.27989266
## bp_dia1                           -0.013716254          0.16079838
## bp_sys2                            0.053269188          0.26796311
## bp_dia2                            0.033881106          0.14935624
## time_sed                           0.247685742          0.14077805
## drink_regularly                   -0.111498516         -0.02618593
## days_drinking                      0.120322164          0.18221234
## dep1                              -0.083624100          0.24874122
## dep2                              -0.122138483          0.15931382
## dep3                              -0.108340910          0.17249174
## dep4                              -0.012093979          0.17099272
## dep5                              -0.207468754          0.22497615
## dep6                              -0.144415291          0.07451680
## dep7                              -0.023387590          0.08447968
## dep8                              -0.097552281          0.12106640
## dep9                               0.009153002          0.15863236
##                  Correlation2.height Correlation2.bmi Correlation2.pulse
## id                      -0.003927402      -0.02190224      -5.403898e-02
## sex                     -0.722532256       0.02515022       1.661442e-01
## age                     -0.021830956       0.09549983      -1.292483e-01
## ethnicity                0.404234125       0.08988336       1.160958e-01
## education                0.287553219      -0.05027323      -8.768740e-02
## marital                  0.023087513       0.03896273       4.389539e-05
## household_size          -0.281971238      -0.06798001       1.284970e-01
## household_income         0.172106834      -0.03037191       4.579855e-02
## weight                   0.473300889       0.88505114       2.025926e-01
## height                   1.000000000       0.02406659      -3.645864e-02
## bmi                      0.024066587       1.00000000       2.408402e-01
## pulse                   -0.036458642       0.24084019       1.000000e+00
## bp_sys1                  0.203543405       0.21588066      -2.418255e-02
## bp_dia1                  0.044673159       0.16128707       1.623017e-01
## bp_sys2                  0.255127402       0.18314912      -2.280450e-02
## bp_dia2                  0.068085621       0.13628992       9.133461e-02
## time_sed                 0.220536773       0.03993516       2.534172e-02
## drink_regularly         -0.111972367       0.03071495       2.103099e-02
## days_drinking            0.340214150       0.02402465      -5.042144e-02
## dep1                     0.075345786       0.22507851       9.866183e-02
## dep2                     0.018000752       0.17029592       5.637505e-02
## dep3                    -0.078834207       0.22649222       2.553235e-01
## dep4                    -0.064666372       0.21935462       2.756444e-02
## dep5                    -0.133141369       0.31084715       9.709907e-02
## dep6                    -0.009180345       0.08400255      -7.086921e-03
## dep7                    -0.036462633       0.11564580       1.219219e-01
## dep8                    -0.057950727       0.13907595       1.387164e-01
## dep9                     0.057628941       0.14729224       2.413567e-02
##                  Correlation2.bp_sys1 Correlation2.bp_dia1 Correlation2.bp_sys2
## id                        0.021249306         0.0352325744          0.013395807
## sex                      -0.281596777        -0.0369927263         -0.315240512
## age                       0.395539507         0.1768627275          0.397004628
## ethnicity                 0.132275904         0.1141250515          0.121183962
## education                 0.044164422         0.0333297342          0.033293549
## marital                  -0.069325946        -0.0008785076         -0.064593535
## household_size           -0.173709027         0.0088933651         -0.128000070
## household_income          0.029065977        -0.0137162536          0.053269188
## weight                    0.279892656         0.1607983834          0.267963107
## height                    0.203543405         0.0446731587          0.255127402
## bmi                       0.215880663         0.1612870653          0.183149116
## pulse                    -0.024182553         0.1623017122         -0.022804501
## bp_sys1                   1.000000000         0.4949457172          0.919915343
## bp_dia1                   0.494945717         1.0000000000          0.482730495
## bp_sys2                   0.919915343         0.4827304946          1.000000000
## bp_dia2                   0.450601249         0.8845695997          0.459109573
## time_sed                  0.031224411        -0.0383178177          0.046892537
## drink_regularly          -0.095005305         0.0343748192         -0.119492100
## days_drinking             0.192123023         0.1583876956          0.194125369
## dep1                     -0.080327579         0.0008762567         -0.023457433
## dep2                      0.007780815         0.1014952948         -0.001674213
## dep3                      0.086869817         0.1845356805          0.072666014
## dep4                      0.020892273         0.0601421231          0.022045134
## dep5                      0.070289844         0.1351529625          0.032105826
## dep6                     -0.052080069        -0.0211730064         -0.041019017
## dep7                     -0.125935014        -0.0752176411         -0.067651846
## dep8                      0.021267349         0.0481398418         -0.008103926
## dep9                      0.087144809         0.1012306737          0.065230696
##                  Correlation2.bp_dia2 Correlation2.time_sed
## id                        0.038168565           0.091144103
## sex                      -0.041519534          -0.097730920
## age                       0.157760752          -0.171538179
## ethnicity                 0.115024449           0.101396736
## education                 0.105443445           0.329169644
## marital                   0.006124346           0.208536664
## household_size           -0.042865748          -0.161443254
## household_income          0.033881106           0.247685742
## weight                    0.149356243           0.140778051
## height                    0.068085621           0.220536773
## bmi                       0.136289921           0.039935162
## pulse                     0.091334609           0.025341721
## bp_sys1                   0.450601249           0.031224411
## bp_dia1                   0.884569600          -0.038317818
## bp_sys2                   0.459109573           0.046892537
## bp_dia2                   1.000000000          -0.001216683
## time_sed                 -0.001216683           1.000000000
## drink_regularly          -0.002095607          -0.218078067
## days_drinking             0.131839456           0.288441702
## dep1                      0.036848526           0.047947832
## dep2                      0.063765883           0.120509227
## dep3                      0.143204430          -0.036670858
## dep4                      0.105911308           0.015434764
## dep5                      0.101878824           0.042338131
## dep6                     -0.033636785           0.005146929
## dep7                     -0.076340986           0.040743386
## dep8                      0.054925393           0.088082824
## dep9                      0.088322140           0.054415820
##                  Correlation2.drink_regularly Correlation2.days_drinking
## id                                0.116209870                -0.01159518
## sex                               0.126790089                -0.24739987
## age                              -0.011966665                 0.05127391
## ethnicity                         0.050131549                 0.15341409
## education                        -0.046961454                 0.13071835
## marital                          -0.015434258                 0.10353541
## household_size                    0.125171867                -0.16433299
## household_income                 -0.111498516                 0.12032216
## weight                           -0.026185929                 0.18221234
## height                           -0.111972367                 0.34021415
## bmi                               0.030714949                 0.02402465
## pulse                             0.021030988                -0.05042144
## bp_sys1                          -0.095005305                 0.19212302
## bp_dia1                           0.034374819                 0.15838770
## bp_sys2                          -0.119492100                 0.19412537
## bp_dia2                          -0.002095607                 0.13183946
## time_sed                         -0.218078067                 0.28844170
## drink_regularly                   1.000000000                -0.30302913
## days_drinking                    -0.303029134                 1.00000000
## dep1                             -0.167616110                 0.01606481
## dep2                             -0.037649562                -0.05630038
## dep3                             -0.018145893                -0.06264327
## dep4                             -0.156416401                -0.08323949
## dep5                             -0.083941151                -0.02280718
## dep6                             -0.003833638                -0.11723723
## dep7                             -0.052426078                -0.16428494
## dep8                             -0.012342702                 0.08353104
## dep9                              0.132178372                -0.08778648
##                  Correlation2.dep1 Correlation2.dep2 Correlation2.dep3
## id                    0.1314029735       0.035251327       -0.01355027
## sex                   0.0176903573       0.028708275        0.13594785
## age                  -0.1257310813      -0.106066244       -0.01890943
## ethnicity             0.0097575373       0.124161734        0.07025954
## education            -0.0076781334       0.021260763       -0.24202163
## marital               0.1292283906       0.197789477        0.09738092
## household_size       -0.0187989473      -0.001860202        0.04488066
## household_income     -0.0836241002      -0.122138483       -0.10834091
## weight                0.2487412200       0.159313815        0.17249174
## height                0.0753457859       0.018000752       -0.07883421
## bmi                   0.2250785129       0.170295924        0.22649222
## pulse                 0.0986618343       0.056375051        0.25532352
## bp_sys1              -0.0803275788       0.007780815        0.08686982
## bp_dia1               0.0008762567       0.101495295        0.18453568
## bp_sys2              -0.0234574335      -0.001674213        0.07266601
## bp_dia2               0.0368485263       0.063765883        0.14320443
## time_sed              0.0479478318       0.120509227       -0.03667086
## drink_regularly      -0.1676161096      -0.037649562       -0.01814589
## days_drinking         0.0160648051      -0.056300378       -0.06264327
## dep1                  1.0000000000       0.449989580        0.31272982
## dep2                  0.4499895804       1.000000000        0.31103697
## dep3                  0.3127298241       0.311036973        1.00000000
## dep4                  0.4006425699       0.376959408        0.49823240
## dep5                  0.4163066105       0.437692284        0.61815208
## dep6                  0.4702227247       0.654100215        0.25789713
## dep7                  0.3883858961       0.465352056        0.27027105
## dep8                  0.3050335689       0.239866474        0.32056362
## dep9                  0.0992030215       0.450969361        0.18524844
##                  Correlation2.dep4 Correlation2.dep5 Correlation2.dep6
## id                     -0.04759379        0.10640699       0.086418021
## sex                     0.15351734        0.16571963       0.039356233
## age                    -0.13033299       -0.16989239      -0.022402285
## ethnicity               0.09336478        0.06025898       0.044150074
## education              -0.05240642       -0.19921404      -0.043504103
## marital                 0.15615839        0.23620555       0.145338886
## household_size          0.03925774        0.02935078      -0.043106569
## household_income       -0.01209398       -0.20746875      -0.144415291
## weight                  0.17099272        0.22497615       0.074516799
## height                 -0.06466637       -0.13314137      -0.009180345
## bmi                     0.21935462        0.31084715       0.084002546
## pulse                   0.02756444        0.09709907      -0.007086921
## bp_sys1                 0.02089227        0.07028984      -0.052080069
## bp_dia1                 0.06014212        0.13515296      -0.021173006
## bp_sys2                 0.02204513        0.03210583      -0.041019017
## bp_dia2                 0.10591131        0.10187882      -0.033636785
## time_sed                0.01543476        0.04233813       0.005146929
## drink_regularly        -0.15641640       -0.08394115      -0.003833638
## days_drinking          -0.08323949       -0.02280718      -0.117237230
## dep1                    0.40064257        0.41630661       0.470222725
## dep2                    0.37695941        0.43769228       0.654100215
## dep3                    0.49823240        0.61815208       0.257897135
## dep4                    1.00000000        0.57074546       0.261577176
## dep5                    0.57074546        1.00000000       0.349493047
## dep6                    0.26157718        0.34949305       1.000000000
## dep7                    0.42079208        0.45082819       0.463516413
## dep8                    0.30972311        0.55906700       0.312811391
## dep9                    0.22360680        0.28211304       0.521096600
##                  Correlation2.dep7 Correlation2.dep8 Correlation2.dep9
## id                    -0.047095282       0.101114771       0.001234142
## sex                    0.150103101       0.106552019      -0.017989824
## age                   -0.151210760      -0.141801472      -0.015020876
## ethnicity             -0.050950161       0.032973997       0.054876685
## education             -0.001121101      -0.146332016      -0.031959361
## marital                0.112960173       0.193229491       0.145190702
## household_size         0.088616396       0.033152707       0.122896158
## household_income      -0.023387590      -0.097552281       0.009153002
## weight                 0.084479677       0.121066400       0.158632364
## height                -0.036462633      -0.057950727       0.057628941
## bmi                    0.115645801       0.139075945       0.147292236
## pulse                  0.121921893       0.138716409       0.024135667
## bp_sys1               -0.125935014       0.021267349       0.087144809
## bp_dia1               -0.075217641       0.048139842       0.101230674
## bp_sys2               -0.067651846      -0.008103926       0.065230696
## bp_dia2               -0.076340986       0.054925393       0.088322140
## time_sed               0.040743386       0.088082824       0.054415820
## drink_regularly       -0.052426078      -0.012342702       0.132178372
## days_drinking         -0.164284937       0.083531036      -0.087786479
## dep1                   0.388385896       0.305033569       0.099203022
## dep2                   0.465352056       0.239866474       0.450969361
## dep3                   0.270271047       0.320563620       0.185248436
## dep4                   0.420792081       0.309723105       0.223606798
## dep5                   0.450828190       0.559067004       0.282113038
## dep6                   0.463516413       0.312811391       0.521096600
## dep7                   1.000000000       0.487490569       0.416927913
## dep8                   0.487490569       1.000000000       0.313209182
## dep9                   0.416927913       0.313209182       1.000000000
##                  Difference.id Difference.sex Difference.age
## id                 0.000000000    0.044114360   -0.023286666
## sex                0.044114360    0.000000000    0.087050648
## age               -0.023286666    0.087050648    0.000000000
## ethnicity          0.021125175    0.140476488   -0.043805133
## education          0.024618135    0.078793972   -0.029333744
## marital           -0.065545095   -0.154136845   -0.009636256
## household_size    -0.065452704   -0.143830532    0.089670525
## household_income   0.060484367    0.041717963    0.115315819
## weight            -0.004301567    0.031466565   -0.045807026
## height             0.025643372    0.032909139   -0.067874147
## bmi               -0.016241242    0.017108787   -0.033601806
## pulse              0.018992674   -0.051561660   -0.004722139
## bp_sys1           -0.064377452    0.129309891   -0.013869766
## bp_dia1           -0.007657384   -0.134764891   -0.003906610
## bp_sys2           -0.099109504    0.152349330   -0.034822158
## bp_dia2           -0.021763424   -0.124262097   -0.009294210
## time_sed          -0.094561144    0.087150445    0.094316505
## drink_regularly   -0.177871851    0.115486049    0.065332023
## days_drinking      0.098557108    0.004924517    0.022344558
## dep1              -0.088075125    0.060502005    0.107714238
## dep2              -0.038793391    0.048208891    0.078837221
## dep3              -0.036318200   -0.042770656   -0.022761297
## dep4              -0.026592618   -0.035692687    0.115597513
## dep5              -0.064325572   -0.016030206    0.078230127
## dep6              -0.067470913    0.086091426   -0.043377973
## dep7              -0.012010655   -0.088494834    0.095332027
## dep8              -0.114928042   -0.067223398    0.070750866
## dep9               0.013655795   -0.043547193    0.037392669
##                  Difference.ethnicity Difference.education Difference.marital
## id                         0.02112517          0.024618135       -0.065545095
## sex                        0.14047649          0.078793972       -0.154136845
## age                       -0.04380513         -0.029333744       -0.009636256
## ethnicity                  0.00000000          0.066134371       -0.071878804
## education                  0.06613437          0.000000000       -0.052063576
## marital                   -0.07187880         -0.052063576        0.000000000
## household_size             0.11510612          0.087761315       -0.074421507
## household_income           0.04181241          0.079640044       -0.005646766
## weight                    -0.11963116          0.008057375       -0.086047440
## height                    -0.09801676         -0.043302316        0.014603862
## bmi                       -0.09552930          0.030206742       -0.089433713
## pulse                     -0.11711325         -0.004154577        0.064581872
## bp_sys1                   -0.05685053         -0.154949060        0.026463658
## bp_dia1                   -0.06790799         -0.033316916       -0.081081162
## bp_sys2                   -0.05992341         -0.148428906        0.037438450
## bp_dia2                   -0.04264374         -0.093959959       -0.064649712
## time_sed                   0.04540950         -0.039722156       -0.090434780
## drink_regularly            0.06079423         -0.047482311       -0.034502807
## days_drinking             -0.12019474          0.007225192       -0.112569782
## dep1                      -0.02618432         -0.157808437       -0.052236958
## dep2                      -0.15414968         -0.122941492       -0.122502892
## dep3                      -0.05468508          0.142471946       -0.019084006
## dep4                      -0.06710375         -0.041502804       -0.079327093
## dep5                      -0.04478932          0.085695723       -0.086284873
## dep6                      -0.02914395         -0.003620056       -0.050246811
## dep7                       0.07883947         -0.077615871       -0.044011143
## dep8                      -0.06219753         -0.009398979       -0.072728617
## dep9                      -0.13410986         -0.105386022       -0.067024235
##                  Difference.household_size Difference.household_income
## id                            -0.065452704                 0.060484367
## sex                           -0.143830532                 0.041717963
## age                            0.089670525                 0.115315819
## ethnicity                      0.115106118                 0.041812405
## education                      0.087761315                 0.079640044
## marital                       -0.074421507                -0.005646766
## household_size                 0.000000000                 0.087623394
## household_income               0.087623394                 0.000000000
## weight                         0.074025362                -0.087641536
## height                         0.131209999                -0.012023974
## bmi                            0.036749022                -0.091257864
## pulse                         -0.058513391                -0.165228256
## bp_sys1                        0.064254697                -0.095637487
## bp_dia1                       -0.019499134                 0.074544116
## bp_sys2                        0.017469254                -0.112830267
## bp_dia2                        0.002524989                 0.029757828
## time_sed                      -0.037922447                -0.121988693
## drink_regularly               -0.064726817                 0.011701492
## days_drinking                  0.025097106                 0.074035878
## dep1                           0.022147236                -0.061607736
## dep2                          -0.022762286                -0.044426684
## dep3                          -0.088216364                -0.088862201
## dep4                          -0.050755636                -0.127828378
## dep5                          -0.080828557                -0.021847716
## dep6                           0.025483144                 0.014296921
## dep7                          -0.119675048                -0.056088964
## dep8                           0.015861086                -0.036491096
## dep9                           0.031929832                -0.064288710
##                  Difference.weight Difference.height Difference.bmi
## id                    -0.004301567       0.025643372   -0.016241242
## sex                    0.031466565       0.032909139    0.017108787
## age                   -0.045807026      -0.067874147   -0.033601806
## ethnicity             -0.119631157      -0.098016763   -0.095529302
## education              0.008057375      -0.043302316    0.030206742
## marital               -0.086047440       0.014603862   -0.089433713
## household_size         0.074025362       0.131209999    0.036749022
## household_income      -0.087641536      -0.012023974   -0.091257864
## weight                 0.000000000      -0.057871496    0.001328277
## height                -0.057871496       0.000000000   -0.062888809
## bmi                    0.001328277      -0.062888809    0.000000000
## pulse                 -0.050134697      -0.045796183   -0.029030088
## bp_sys1               -0.077285319      -0.143126799   -0.011781648
## bp_dia1                0.076390168       0.150553047    0.006303263
## bp_sys2               -0.051448532      -0.170263879    0.023316848
## bp_dia2                0.060388168       0.114693947    0.007811222
## time_sed              -0.022645056      -0.084010266    0.019739171
## drink_regularly       -0.043369910      -0.101935678   -0.001530571
## days_drinking         -0.159410929      -0.068244854   -0.132470732
## dep1                  -0.194678596      -0.094743601   -0.155514936
## dep2                  -0.123471029      -0.051075768   -0.112364135
## dep3                  -0.043893899       0.038823712   -0.063193912
## dep4                  -0.087426042       0.029924494   -0.109233051
## dep5                  -0.075999885       0.050070172   -0.107567319
## dep6                  -0.014460053      -0.029828753   -0.003475820
## dep7                  -0.029568922       0.053287829   -0.070749636
## dep8                  -0.103046415       0.007415945   -0.100037890
## dep9                  -0.146758543      -0.048414997   -0.137862650
##                  Difference.pulse Difference.bp_sys1 Difference.bp_dia1
## id                   0.0189926738       -0.064377452       -0.007657384
## sex                 -0.0515616596        0.129309891       -0.134764891
## age                 -0.0047221385       -0.013869766       -0.003906610
## ethnicity           -0.1171132483       -0.056850530       -0.067907991
## education           -0.0041545769       -0.154949060       -0.033316916
## marital              0.0645818721        0.026463658       -0.081081162
## household_size      -0.0585133911        0.064254697       -0.019499134
## household_income    -0.1652282563       -0.095637487        0.074544116
## weight              -0.0501346973       -0.077285319        0.076390168
## height              -0.0457961833       -0.143126799        0.150553047
## bmi                 -0.0290300879       -0.011781648        0.006303263
## pulse                0.0000000000        0.016657394        0.008565375
## bp_sys1              0.0166573938        0.000000000       -0.011942043
## bp_dia1              0.0085653753       -0.011942043        0.000000000
## bp_sys2              0.0345080714        0.006466153        0.014560843
## bp_dia2              0.0595681670        0.015928947       -0.016028676
## time_sed             0.0372175627       -0.035006865        0.040496212
## drink_regularly      0.0467910286        0.068199579       -0.092035484
## days_drinking       -0.0326848646       -0.043532193       -0.041181298
## dep1                -0.0121514311        0.050395420        0.059849521
## dep2                 0.0640821613       -0.069000545       -0.058258015
## dep3                -0.0799072237       -0.076932572       -0.127020116
## dep4                 0.0947467759       -0.046934947       -0.032741861
## dep5                 0.0856228410       -0.117661639       -0.103966896
## dep6                 0.0941419664       -0.007601920       -0.004981686
## dep7                -0.0324614771        0.023169032        0.031765071
## dep8                -0.0115420136       -0.047185937       -0.052720699
## dep9                -0.0005080625       -0.031364008       -0.024668887
##                  Difference.bp_sys2 Difference.bp_dia2 Difference.time_sed
## id                     -0.099109504       -0.021763424        -0.094561144
## sex                     0.152349330       -0.124262097         0.087150445
## age                    -0.034822158       -0.009294210         0.094316505
## ethnicity              -0.059923412       -0.042643742         0.045409499
## education              -0.148428906       -0.093959959        -0.039722156
## marital                 0.037438450       -0.064649712        -0.090434780
## household_size          0.017469254        0.002524989        -0.037922447
## household_income       -0.112830267        0.029757828        -0.121988693
## weight                 -0.051448532        0.060388168        -0.022645056
## height                 -0.170263879        0.114693947        -0.084010266
## bmi                     0.023316848        0.007811222         0.019739171
## pulse                   0.034508071        0.059568167         0.037217563
## bp_sys1                 0.006466153        0.015928947        -0.035006865
## bp_dia1                 0.014560843       -0.016028676         0.040496212
## bp_sys2                 0.000000000        0.028740967        -0.024156231
## bp_dia2                 0.028740967        0.000000000        -0.001229762
## time_sed               -0.024156231       -0.001229762         0.000000000
## drink_regularly         0.105673258       -0.030356685         0.162479202
## days_drinking          -0.070042957       -0.019529114        -0.171751786
## dep1                    0.002360078        0.029916897        -0.021429341
## dep2                   -0.043614841       -0.009737730        -0.052451323
## dep3                   -0.074098176       -0.077702049         0.067574669
## dep4                   -0.044507870       -0.029064073         0.062970919
## dep5                   -0.083453502       -0.070206078         0.009010268
## dep6                   -0.009859198        0.053077551         0.079122032
## dep7                   -0.004294235        0.104508502         0.011782471
## dep8                    0.021864353       -0.028299307        -0.120978729
## dep9                    0.002707224        0.018080403        -0.155482973
##                  Difference.drink_regularly Difference.days_drinking
## id                             -0.177871851              0.098557108
## sex                             0.115486049              0.004924517
## age                             0.065332023              0.022344558
## ethnicity                       0.060794231             -0.120194742
## education                      -0.047482311              0.007225192
## marital                        -0.034502807             -0.112569782
## household_size                 -0.064726817              0.025097106
## household_income                0.011701492              0.074035878
## weight                         -0.043369910             -0.159410929
## height                         -0.101935678             -0.068244854
## bmi                            -0.001530571             -0.132470732
## pulse                           0.046791029             -0.032684865
## bp_sys1                         0.068199579             -0.043532193
## bp_dia1                        -0.092035484             -0.041181298
## bp_sys2                         0.105673258             -0.070042957
## bp_dia2                        -0.030356685             -0.019529114
## time_sed                        0.162479202             -0.171751786
## drink_regularly                 0.000000000             -0.008545924
## days_drinking                  -0.008545924              0.000000000
## dep1                            0.181162622             -0.107776916
## dep2                            0.081144107             -0.047234092
## dep3                            0.060103819             -0.028919342
## dep4                            0.201289255             -0.023323698
## dep5                            0.097609590             -0.037984502
## dep6                            0.019781639              0.045275361
## dep7                            0.092422713              0.062023300
## dep8                            0.054426885             -0.126528667
## dep9                           -0.064537194              0.053591159
##                  Difference.dep1 Difference.dep2 Difference.dep3
## id                  -0.088075125     -0.03879339     -0.03631820
## sex                  0.060502005      0.04820889     -0.04277066
## age                  0.107714238      0.07883722     -0.02276130
## ethnicity           -0.026184315     -0.15414968     -0.05468508
## education           -0.157808437     -0.12294149      0.14247195
## marital             -0.052236958     -0.12250289     -0.01908401
## household_size       0.022147236     -0.02276229     -0.08821636
## household_income    -0.061607736     -0.04442668     -0.08886220
## weight              -0.194678596     -0.12347103     -0.04389390
## height              -0.094743601     -0.05107577      0.03882371
## bmi                 -0.155514936     -0.11236414     -0.06319391
## pulse               -0.012151431      0.06408216     -0.07990722
## bp_sys1              0.050395420     -0.06900055     -0.07693257
## bp_dia1              0.059849521     -0.05825802     -0.12702012
## bp_sys2              0.002360078     -0.04361484     -0.07409818
## bp_dia2              0.029916897     -0.00973773     -0.07770205
## time_sed            -0.021429341     -0.05245132      0.06757467
## drink_regularly      0.181162622      0.08114411      0.06010382
## days_drinking       -0.107776916     -0.04723409     -0.02891934
## dep1                 0.000000000      0.08166990      0.04269606
## dep2                 0.081669899      0.00000000      0.11938963
## dep3                 0.042696057      0.11938963      0.00000000
## dep4                 0.077986080      0.13920371      0.01691367
## dep5                -0.026413838     -0.05730106     -0.10188646
## dep6                -0.018346456     -0.04153381      0.15813265
## dep7                 0.019499770      0.04147674      0.10162031
## dep8                -0.002172946      0.14805607     -0.01014629
## dep9                 0.101679062     -0.09189916      0.09254761
##                  Difference.dep4 Difference.dep5 Difference.dep6
## id                  -0.026592618    -0.064325572    -0.067470913
## sex                 -0.035692687    -0.016030206     0.086091426
## age                  0.115597513     0.078230127    -0.043377973
## ethnicity           -0.067103752    -0.044789322    -0.029143955
## education           -0.041502804     0.085695723    -0.003620056
## marital             -0.079327093    -0.086284873    -0.050246811
## household_size      -0.050755636    -0.080828557     0.025483144
## household_income    -0.127828378    -0.021847716     0.014296921
## weight              -0.087426042    -0.075999885    -0.014460053
## height               0.029924494     0.050070172    -0.029828753
## bmi                 -0.109233051    -0.107567319    -0.003475820
## pulse                0.094746776     0.085622841     0.094141966
## bp_sys1             -0.046934947    -0.117661639    -0.007601920
## bp_dia1             -0.032741861    -0.103966896    -0.004981686
## bp_sys2             -0.044507870    -0.083453502    -0.009859198
## bp_dia2             -0.029064073    -0.070206078     0.053077551
## time_sed             0.062970919     0.009010268     0.079122032
## drink_regularly      0.201289255     0.097609590     0.019781639
## days_drinking       -0.023323698    -0.037984502     0.045275361
## dep1                 0.077986080    -0.026413838    -0.018346456
## dep2                 0.139203711    -0.057301064    -0.041533814
## dep3                 0.016913670    -0.101886465     0.158132653
## dep4                 0.000000000    -0.063122427     0.154246099
## dep5                -0.063122427     0.000000000     0.058907982
## dep6                 0.154246099     0.058907982     0.000000000
## dep7                 0.026497433    -0.162929150     0.056672069
## dep8                 0.003407790    -0.222790431     0.107945134
## dep9                -0.002977329    -0.113305432    -0.190572205
##                  Difference.dep7 Difference.dep8 Difference.dep9
## id                  -0.012010655    -0.114928042    0.0136557946
## sex                 -0.088494834    -0.067223398   -0.0435471935
## age                  0.095332027     0.070750866    0.0373926690
## ethnicity            0.078839474    -0.062197530   -0.1341098608
## education           -0.077615871    -0.009398979   -0.1053860223
## marital             -0.044011143    -0.072728617   -0.0670242347
## household_size      -0.119675048     0.015861086    0.0319298315
## household_income    -0.056088964    -0.036491096   -0.0642887097
## weight              -0.029568922    -0.103046415   -0.1467585435
## height               0.053287829     0.007415945   -0.0484149970
## bmi                 -0.070749636    -0.100037890   -0.1378626502
## pulse               -0.032461477    -0.011542014   -0.0005080625
## bp_sys1              0.023169032    -0.047185937   -0.0313640077
## bp_dia1              0.031765071    -0.052720699   -0.0246688869
## bp_sys2             -0.004294235     0.021864353    0.0027072237
## bp_dia2              0.104508502    -0.028299307    0.0180804029
## time_sed             0.011782471    -0.120978729   -0.1554829729
## drink_regularly      0.092422713     0.054426885   -0.0645371937
## days_drinking        0.062023300    -0.126528667    0.0535911589
## dep1                 0.019499770    -0.002172946    0.1016790618
## dep2                 0.041476739     0.148056070   -0.0918991639
## dep3                 0.101620307    -0.010146289    0.0925476095
## dep4                 0.026497433     0.003407790   -0.0029773285
## dep5                -0.162929150    -0.222790431   -0.1133054317
## dep6                 0.056672069     0.107945134   -0.1905722051
## dep7                 0.000000000    -0.026471182   -0.1797376219
## dep8                -0.026471182     0.000000000   -0.0832136036
## dep9                -0.179737622    -0.083213604    0.0000000000

Missingness Inspection

Let’s take a look at the missigness in our incomplete dataset. We start with a visualization that tells us the amount of missingness per variable. From there, we see that 8.1% of the data is missing. This concerns data about weight, height bmi, blood pressure, drinking habits and depression.

vis_miss(data_incomplete) + 
  ggtitle("Missing Data Plot for Incomplete Dataset") +
  theme(plot.title = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 10),
        axis.text.y = element_text(size = 10),
        legend.text = element_text(size = 10),
        legend.title = element_blank())

To be able to make a better comparison, we use the following plot.

gg_miss_var(data_incomplete, show_pct = TRUE)

##Research Question Now that we have an understanding of our data, we formulate our research question, containing a variable with missing values.

We are interested in if age has a significant effect on depression screening score. We think this is an interesting question, since the screening score is build from all depression variables together. Furthermore, it contains a significant amount of total NA’s, which is in line with the purpose of this assignment.

The depression screening score is the sum of all scores given to the depression screening questions. To create this score, we add a new column to our data frame.

Our full research question and hypothesis are as follows. Rq: Does age have a (pos/neg) impact on the depression screening score? H1: Age has a (pos/neg) impact on depression screening score H0: Age has no significant correlation with depression screening score

data_complete1 <- data_complete %>% mutate(depression_score = dep1 + dep2 + dep3 + dep4 + dep5 + dep6 + dep7 + dep8 + dep9)

data_incomplete1 <- data_incomplete %>% mutate(depression_score = dep1 + dep2 + dep3 + dep4 + dep5 + dep6 + dep7 + dep8 + dep9)

The correlation test on the full data that shows us the relation is not signifcant at 95% confidence interval. However, with a p-value of 0.17 it is not that far away from 0.05. This seems interesting, since we like to find out if different imputations on NA values will give us another result;if it will tell us the effect is significant.

cor.test(data_complete1$age, data_complete1$depression_score)
## 
##  Pearson's product-moment correlation
## 
## data:  data_complete1$age and data_complete1$depression_score
## t = -1.3695, df = 498, p-value = 0.1714
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.14815038  0.02657777
## sample estimates:
##         cor 
## -0.06125558

For the sake of our research, we make a subset of the data with only the involved variables included.

subset_complete <- data_complete1[,c("age", "depression_score")]

subset_incomplete <- data_incomplete1[,c("age", "depression_score")]

Testing MCAR

Now, we do a MCAR test to see if the missing data is observed at random. The results show that the missing values are not observed completely at random. However, if we take a look at just the variables in our subset, these are observed completely at random.

# not MCAR if we look at all our data
out <- mcar_test(data_incomplete)
out$statistic  # 1448.785
## [1] 1448.785
out$p.value # 1.968048e-10
## [1] 1.968048e-10
# MCAR for the variables in our research subset
out_sub <- mcar_test(subset_incomplete)
out_sub$statistic #0.07599126
## [1] 0.07599126
out_sub$p.value # 0.7828053
## [1] 0.7828053

Missingness in Research Data

We start inspecting our research data with a quick summary. The minimum age of participiants is 20, the maximum age 69 and the mean age 44.48. The lowest scored depression score is 0, the highest score is 22 and the mean score is 3.2. There are no missing values for age and 177 missing values for depression score.

summary(subset_incomplete)
##       age        depression_score
##  Min.   :20.00   Min.   : 0.000  
##  1st Qu.:32.00   1st Qu.: 0.000  
##  Median :45.00   Median : 2.000  
##  Mean   :44.48   Mean   : 3.238  
##  3rd Qu.:57.00   3rd Qu.: 4.000  
##  Max.   :69.00   Max.   :22.000  
##                  NA's   :177

Visualizing the distribution of missing values between depression score and age:

# visualize the distribution of missing values between age & depression score
ggplot(data = subset_incomplete, mapping = aes(x = age, y =depression_score)) + geom_miss_point()

Now we take a look at the missingness proportion.

# Compute the proportion of missing values
pm <- colMeans(is.na(subset_incomplete))
pm['depression_score'] #   0.354 
## depression_score 
##            0.354
#Visualizing missing values 
(vis <- vis_miss(subset_incomplete)) #35% of the depression scores are missing

# visualize the response patterns
plot_pattern(subset_incomplete) #177 values are missing for depression score

Here we compare the mean age of individuals with missing depression score values and those with non-missing values to see if the missingness relates to the observed data. The p-value of 0.7831269 indicates that there is no significant difference between the age of individuals with and without missing values in the depression score variable: the missingness in dep_score does not significantly depend on age.

# Create a missingness vector for dependent variables
mDep <- is.na(subset_incomplete$depression_score)

# age ~ dep_score
out <- t.test(age ~ mDep, data = subset_incomplete)
out$statistic # -0.2754501 
##          t 
## -0.2754501
out$p.value # 0.7831269
## [1] 0.7831269

From the distribution visualization of the missing values, missing values in dep_score spread evenly at age axis. The logistic regression model further shows that the missingness on dep_score is independent of age. Both visualization suggest the missing values in depression score are missing completely at random (MCAR).

# create a subset for visualization
incomplete_missingness <- transform(subset_incomplete, missingness = depression_score)
incomplete_missingness <- incomplete_missingness[c("age", "missingness")]

# Re-group: the observed value is coded as 1
incomplete_missingness$`missingness` <- ifelse(incomplete_missingness$`missingness` == "", 0, 1)

# Re-group: the missing value is coded as 0
incomplete_missingness[is.na(incomplete_missingness)] <- 0
View(incomplete_missingness)

# create a logit model to examine whether the missingness of depression score is dependent on age
logit_model <- glm(missingness ~ age, data = incomplete_missingness, family = "binomial")
summary(logit_model)
## 
## Call:
## glm(formula = missingness ~ age, family = "binomial", data = incomplete_missingness)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4609  -1.4331   0.9257   0.9392   0.9516  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  0.681464   0.305121   2.233   0.0255 *
## age         -0.001795   0.006513  -0.276   0.7828  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 649.89  on 499  degrees of freedom
## Residual deviance: 649.81  on 498  degrees of freedom
## AIC: 653.81
## 
## Number of Fisher Scoring iterations: 4
# visualize the relation between the missingness of dep_score and age
ggplot(incomplete_missingness, aes(x=age, y=missingness)) + geom_point() + geom_smooth(method = "glm", method.args = list(family = "binomial"), se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

Age and Depression Score Pearson Correlation

The correlation estimate for the dataset with missing values is is -0.0978, which indicates a weak negative correlation between age and depression score. The p-value is 0.0792, which suggests that there is no strong evidence of a significant correlation between age and depression score. However, it is close to the conventional threshold of 0.05.

The correlation estimate for the complete subset is -0.0613, which indicates a weak negative correlation between age and depression_score. The p-value is 0.1714, which also suggests that there is no strong evidence of a significant correlation between age and depression_score.

We see that the p-value goes up when we look at the complete data, meaning that the missing values cause us to think the data is closer to being statistical significantly correlated than it is.

#test on the incomplete subset
cor_test1 <- cor.test(subset_incomplete$age, subset_incomplete$depression_score, 
                     method = "pearson")
cor_test1$estimate  # -0.09782773 
##         cor 
## -0.09782773
cor_test1$p.value  # 0.07916073
## [1] 0.07916073
#test on the subset without missing values
cor_test2 <- cor.test(subset_complete$age, subset_complete$depression_score, 
                     method = "pearson")
cor_test2$estimate  # -0.06125558  
##         cor 
## -0.06125558
cor_test2$p.value  #  0.1714459
## [1] 0.1714459

Complete Data Analysis

When looking at the complete subset, we see that the mean for age is 44.484 and for depression score 3.564. The variance for age is 206.65105 and 20.60311 for the depression score. They have a correlation of -0.06125558. According to the regression model we made, if age goes up with one year, the depression score goes down with 0.01934. The p-value of this effect is 0.171, indicating that it is 0.121 away from being statistically signifcant at 95% confidence level. This is interesting, as we like to find out if different imputation methods for our missing data can cause us to think the relation is statistically significant, while it in reality is not.

As estimates of the linear model show, the linear association between age and depression score isn’t significant and the model only accounts for 0.38% variability. Therefore, this linear model is not an ideal representation of their relation. The visualization also agrees with the non-linearity, with lots of points far away from the line.

# compute means, variances, and correlations of all variables in the complete data
colMeans(subset_complete)
##              age depression_score 
##           44.484            3.564
sapply(subset_complete, var)
##              age depression_score 
##        206.65105         20.60311
cor(subset_complete)
##                          age depression_score
## age               1.00000000      -0.06125558
## depression_score -0.06125558       1.00000000
plot_corr(subset_complete, label = TRUE)

# build a linear regression model
model1 <- lm(depression_score ~ age, data = subset_complete)
summary(model1)
## 
## Call:
## lm(formula = depression_score ~ age, data = subset_complete)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.038 -3.225 -1.718  1.291 20.233 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.42439    0.66016   6.702 5.58e-11 ***
## age         -0.01934    0.01412  -1.370    0.171    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.535 on 498 degrees of freedom
## Multiple R-squared:  0.003752,   Adjusted R-squared:  0.001752 
## F-statistic: 1.876 on 1 and 498 DF,  p-value: 0.1714
# visualize the linear regression model
ggmice(subset_complete, aes(age, depression_score)) + geom_point() + geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

##Incomplete Data Analysis with Imputation Methods

Now we will analyze our incomplete data to see if we get different results. First we do list wise deletion of the NA values. Then, mean imputation on the NA values. And finally regression imputation for the NA values.

Deletion-Based Treatments

Neither the mean nor the variance of variable age changes since it has no missing data. However, the variance of dep_score decreases because 35.4% of data was deleted in the observed dataset, leading to sampling variability.The mean of depression score also decreases in the observed data. Nonetheless, the negative correlation between age and depression score remains low but gets slightly stronger, from -0.06 in the complete data to -0.1 in the observed data. From the comparison of linear models estimated from complete data and observed data, the R² slightly increases and the regression estimates are slightly biased in the model with deleted data.

# the proportion of values are deleted in Deletion-Based Treatments
pm <- colMeans(is.na(subset_incomplete))
pm['age']
## age 
##   0
# compute means, variances, and correlations of all variables in the observed data
colMeans(subset_incomplete, na.rm = TRUE)
##              age depression_score 
##         44.48400          3.23839
sapply(subset_incomplete, var, na.rm = TRUE)
##              age depression_score 
##        206.65105         17.30635
cor(subset_incomplete, use = "pairwise.complete.obs")
##                          age depression_score
## age               1.00000000      -0.09782773
## depression_score -0.09782773       1.00000000
plot_corr(subset_incomplete, label = TRUE)

# build a linear regression model
model2 <- lm(depression_score ~ age, data = subset_incomplete)
summary(model2)
## 
## Call:
## lm(formula = depression_score ~ age, data = subset_incomplete)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.926 -2.813 -1.571  1.232 18.497 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.49114    0.74780   6.006 5.15e-09 ***
## age         -0.02825    0.01604  -1.761   0.0792 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.147 on 321 degrees of freedom
##   (177 observations deleted due to missingness)
## Multiple R-squared:  0.00957,    Adjusted R-squared:  0.006485 
## F-statistic: 3.102 on 1 and 321 DF,  p-value: 0.07916
# visulise the linear regression model
ggmice(subset_incomplete, aes(age, depression_score)) + geom_point() + geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 177 rows containing non-finite values (`stat_smooth()`).

Mean Substitution

The Pearson’s product-moment correlation shows that the correlation coefficient is -0.07876674, which indicates a weak negative correlation. Due to our p-value of 0.07847, there is not enough evidence to reject the null hypothesis of no correlation based on a 95% confidence interval. Compared to the complete data the correlation has gone down from -0.06125558 to -0.07876674 . The p-value has gone down as well, from 0.1714 to 0.07847. Since the p-value is greater than the typical significance level of 0.05, we do not have sufficient evidence to reject the null hypothesis. Therefore, we can conclude that there is no significant difference in mean depression scores between the two groups at the 5% significance level. So, although mean imputation normally is not the smartest thing to do, unless you know what you are doing, this tests shows us there is no significant difference in means for this particular case.

miceOut <- mice(subset_incomplete, method = "mean", m = 1, maxit = 1)
## 
##  iter imp variable
##   1   1  depression_score
mean_imputed_data <- complete(miceOut)

# correlation test:

cor.test(mean_imputed_data$age, mean_imputed_data$depression_score)
## 
##  Pearson's product-moment correlation
## 
## data:  mean_imputed_data$age and mean_imputed_data$depression_score
## t = -1.7632, df = 498, p-value = 0.07847
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.165315474  0.008985917
## sample estimates:
##         cor 
## -0.07876674

Now we will show two histograms that plot the differences in depression score frequency for the mean imputed data set and the complete data set.This shows us that the lower depression scores seem more prevalent in the mean imputed data

#histogram with mean imputed data
hist(mean_imputed_data$depression_score, main = "Mean Imputed Depression Score", xlab = "Depression Score")

#histogram with complete data
hist(data_complete1$depression_score, main = "Complete Data Set Depression Score", xlab = "Depression Score")

Here we show by means of a t-test that he differences in the depression score for the full data set and the mean imputed data set are not significant, indicated by the 0.1968 p-value.

t.test(data_complete1$depression_score, mean_imputed_data$depression_score)
## 
##  Welch Two Sample t-test
## 
## data:  data_complete1$depression_score and mean_imputed_data$depression_score
## t = 1.2917, df = 917.11, p-value = 0.1968
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1690996  0.8203194
## sample estimates:
## mean of x mean of y 
##   3.56400   3.23839

Here we show our regression model with the mean imputed data. Comparing it to the regression model with the complete subset data, the coefficient goes up from -0.01934 to -0.01831, indicating mean imputation causes a slightly less negative relationship. The p-value goes down from 0.171 to 0.0785, indicating mean imputation puts us closer to having a statistical significant correlation than there is in reality.

fit <- glm(data= mean_imputed_data, depression_score ~  age)
summary(fit)
## 
## Call:
## glm(formula = depression_score ~ age, data = mean_imputed_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.6867  -2.3617  -0.2194   0.3390  18.5880  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.05292    0.48543   8.349 6.85e-16 ***
## age         -0.01831    0.01038  -1.763   0.0785 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 11.12062)
## 
##     Null deviance: 5572.6  on 499  degrees of freedom
## Residual deviance: 5538.1  on 498  degrees of freedom
## AIC: 2627.3
## 
## Number of Fisher Scoring iterations: 2

Regression Imputation

#regression model
fit <- with(subset_incomplete, lm(age ~ depression_score))
summary(fit)
## 
## Call:
## lm(formula = age ~ depression_score)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.4502 -13.2643  -0.0949  12.9381  24.5828 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       45.4502     1.0133  44.854   <2e-16 ***
## depression_score  -0.3388     0.1924  -1.761   0.0792 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.36 on 321 degrees of freedom
##   (177 observations deleted due to missingness)
## Multiple R-squared:  0.00957,    Adjusted R-squared:  0.006485 
## F-statistic: 3.102 on 1 and 321 DF,  p-value: 0.07916
#mean imputation
#create imp
imp <- mice(subset_incomplete, method = "mean", m = 1, maxit = 1)
## 
##  iter imp variable
##   1   1  depression_score
#use imp to complete data set
complete(imp)
##     age depression_score
## 1    49          0.00000
## 2    34          3.23839
## 3    68          3.23839
## 4    24          1.00000
## 5    67          0.00000
## 6    40          3.23839
## 7    60          0.00000
## 8    60          3.23839
## 9    38          3.23839
## 10   21          3.23839
## 11   57          2.00000
## 12   47          3.23839
## 13   56          2.00000
## 14   61          1.00000
## 15   25          3.23839
## 16   34          0.00000
## 17   62          3.23839
## 18   32          0.00000
## 19   64          0.00000
## 20   35          3.23839
## 21   66         10.00000
## 22   48          3.23839
## 23   69          3.23839
## 24   22          5.00000
## 25   27          1.00000
## 26   69          3.00000
## 27   56          2.00000
## 28   55          3.23839
## 29   23          4.00000
## 30   46          3.23839
## 31   26          0.00000
## 32   29          8.00000
## 33   28          4.00000
## 34   28          3.23839
## 35   23          3.00000
## 36   60          0.00000
## 37   25          3.23839
## 38   37          4.00000
## 39   50          0.00000
## 40   40          3.23839
## 41   60          3.23839
## 42   21          3.23839
## 43   66          0.00000
## 44   59          0.00000
## 45   44          1.00000
## 46   34          3.23839
## 47   31          2.00000
## 48   66          3.00000
## 49   53          3.23839
## 50   28          3.23839
## 51   60          8.00000
## 52   35          1.00000
## 53   26          3.23839
## 54   66          3.23839
## 55   64          3.23839
## 56   63          3.23839
## 57   68          4.00000
## 58   59          0.00000
## 59   49          3.00000
## 60   50          3.23839
## 61   34          0.00000
## 62   42          3.23839
## 63   65          3.23839
## 64   33          0.00000
## 65   37          2.00000
## 66   21          3.23839
## 67   27          3.23839
## 68   23         11.00000
## 69   30          6.00000
## 70   38          1.00000
## 71   52          3.23839
## 72   40          3.23839
## 73   56          0.00000
## 74   26          3.23839
## 75   63          7.00000
## 76   55          1.00000
## 77   63          0.00000
## 78   22         10.00000
## 79   39          3.23839
## 80   54          0.00000
## 81   51          0.00000
## 82   33          3.23839
## 83   32          0.00000
## 84   36          3.23839
## 85   35          3.23839
## 86   62          3.23839
## 87   30          3.23839
## 88   62          4.00000
## 89   26          9.00000
## 90   20         10.00000
## 91   34          0.00000
## 92   59          4.00000
## 93   66          3.23839
## 94   61          5.00000
## 95   51          3.23839
## 96   58          0.00000
## 97   69          2.00000
## 98   44          0.00000
## 99   52          2.00000
## 100  30          0.00000
## 101  24          2.00000
## 102  36          3.23839
## 103  34          1.00000
## 104  38          3.23839
## 105  49          0.00000
## 106  30          1.00000
## 107  52          2.00000
## 108  51         11.00000
## 109  63          1.00000
## 110  63          3.23839
## 111  65          0.00000
## 112  41          3.00000
## 113  59          3.00000
## 114  27          2.00000
## 115  63          0.00000
## 116  31          5.00000
## 117  60          0.00000
## 118  25          1.00000
## 119  53          3.23839
## 120  30          3.23839
## 121  40          0.00000
## 122  66          0.00000
## 123  27          3.23839
## 124  43          5.00000
## 125  32          0.00000
## 126  53          5.00000
## 127  29          2.00000
## 128  50          3.23839
## 129  34          3.23839
## 130  37         19.00000
## 131  68          3.23839
## 132  49          3.23839
## 133  25          3.23839
## 134  45          3.23839
## 135  29          3.23839
## 136  34          4.00000
## 137  60          3.23839
## 138  50          4.00000
## 139  41          4.00000
## 140  41          3.23839
## 141  23          0.00000
## 142  60          3.23839
## 143  67          0.00000
## 144  33          9.00000
## 145  47          0.00000
## 146  28          5.00000
## 147  31          0.00000
## 148  32         10.00000
## 149  34         14.00000
## 150  25          0.00000
## 151  64          8.00000
## 152  50          3.23839
## 153  61          3.23839
## 154  50          0.00000
## 155  46          2.00000
## 156  27          3.00000
## 157  38          3.23839
## 158  68          6.00000
## 159  42          3.23839
## 160  27          0.00000
## 161  30          2.00000
## 162  68          0.00000
## 163  57          1.00000
## 164  32          3.23839
## 165  43          0.00000
## 166  63          0.00000
## 167  21          3.23839
## 168  34          0.00000
## 169  21          3.00000
## 170  39          3.23839
## 171  50         14.00000
## 172  61          3.23839
## 173  27          1.00000
## 174  32          1.00000
## 175  46          0.00000
## 176  24          1.00000
## 177  33          3.23839
## 178  43          3.23839
## 179  64          1.00000
## 180  34          0.00000
## 181  61          2.00000
## 182  45          3.23839
## 183  63          6.00000
## 184  27          0.00000
## 185  51         10.00000
## 186  68          3.23839
## 187  29          7.00000
## 188  47          3.23839
## 189  31          3.23839
## 190  44          3.00000
## 191  68          4.00000
## 192  51          3.23839
## 193  68          1.00000
## 194  68          3.00000
## 195  37          0.00000
## 196  50          1.00000
## 197  34          1.00000
## 198  57          9.00000
## 199  46         12.00000
## 200  45          2.00000
## 201  55          0.00000
## 202  27          3.23839
## 203  32          1.00000
## 204  42          2.00000
## 205  42          3.23839
## 206  50          3.23839
## 207  61          2.00000
## 208  32          4.00000
## 209  65          4.00000
## 210  24         10.00000
## 211  41          2.00000
## 212  64         10.00000
## 213  29          3.23839
## 214  52          3.23839
## 215  23          0.00000
## 216  67          3.23839
## 217  39          4.00000
## 218  35          0.00000
## 219  53          0.00000
## 220  62          2.00000
## 221  61          0.00000
## 222  47          2.00000
## 223  40          3.23839
## 224  52          1.00000
## 225  24          3.23839
## 226  61          3.23839
## 227  60          3.23839
## 228  20          3.23839
## 229  57          3.23839
## 230  28          3.23839
## 231  46          1.00000
## 232  57          3.00000
## 233  57          3.23839
## 234  60          0.00000
## 235  24          2.00000
## 236  33          8.00000
## 237  59          3.23839
## 238  21          3.00000
## 239  28          1.00000
## 240  26          1.00000
## 241  35          3.00000
## 242  29          1.00000
## 243  45         10.00000
## 244  20         12.00000
## 245  57          1.00000
## 246  39         18.00000
## 247  35          3.23839
## 248  69          3.23839
## 249  52         20.00000
## 250  66          3.23839
## 251  53          0.00000
## 252  54          0.00000
## 253  42          3.23839
## 254  57         16.00000
## 255  37          3.23839
## 256  21          3.23839
## 257  28          3.23839
## 258  22          1.00000
## 259  42          2.00000
## 260  68          3.23839
## 261  61          3.23839
## 262  41          3.00000
## 263  27          0.00000
## 264  66          2.00000
## 265  67          3.23839
## 266  58          0.00000
## 267  55          3.23839
## 268  52          3.23839
## 269  51          1.00000
## 270  45          5.00000
## 271  58          0.00000
## 272  33          3.23839
## 273  25          3.23839
## 274  66          3.00000
## 275  27          8.00000
## 276  50          7.00000
## 277  66          3.23839
## 278  64          5.00000
## 279  33          3.23839
## 280  61          3.00000
## 281  45          1.00000
## 282  20          7.00000
## 283  21         10.00000
## 284  36         10.00000
## 285  55          3.23839
## 286  60          1.00000
## 287  22          3.23839
## 288  48          3.23839
## 289  27          6.00000
## 290  62          2.00000
## 291  34          1.00000
## 292  34          0.00000
## 293  24          3.23839
## 294  37          2.00000
## 295  64          2.00000
## 296  22          1.00000
## 297  41          0.00000
## 298  49          0.00000
## 299  40         13.00000
## 300  32          0.00000
## 301  37          0.00000
## 302  43          0.00000
## 303  58          3.23839
## 304  62          3.00000
## 305  25          4.00000
## 306  35         22.00000
## 307  40          3.23839
## 308  21          3.23839
## 309  54         11.00000
## 310  50          2.00000
## 311  32          3.23839
## 312  67          1.00000
## 313  64          0.00000
## 314  56          3.23839
## 315  61          0.00000
## 316  44          6.00000
## 317  48         13.00000
## 318  38          0.00000
## 319  21          3.23839
## 320  44          1.00000
## 321  52          3.23839
## 322  62          3.23839
## 323  69          3.23839
## 324  66          1.00000
## 325  24          3.23839
## 326  52          3.23839
## 327  50          3.23839
## 328  32          0.00000
## 329  55         15.00000
## 330  67          0.00000
## 331  42          1.00000
## 332  28         17.00000
## 333  55          3.23839
## 334  63          8.00000
## 335  65          3.23839
## 336  60          2.00000
## 337  49          3.23839
## 338  33          3.23839
## 339  32          3.23839
## 340  37          4.00000
## 341  50          3.23839
## 342  21          4.00000
## 343  32          0.00000
## 344  45          3.00000
## 345  30          0.00000
## 346  53          3.23839
## 347  42          2.00000
## 348  55         10.00000
## 349  42          3.23839
## 350  40          6.00000
## 351  64          3.23839
## 352  62          1.00000
## 353  43          0.00000
## 354  22          2.00000
## 355  43          3.00000
## 356  46          3.23839
## 357  53          0.00000
## 358  61          0.00000
## 359  67          3.23839
## 360  58          0.00000
## 361  49          0.00000
## 362  45          2.00000
## 363  46          6.00000
## 364  47          5.00000
## 365  57          3.23839
## 366  24          3.23839
## 367  23          7.00000
## 368  25          2.00000
## 369  42          3.23839
## 370  62          3.23839
## 371  24          4.00000
## 372  65          1.00000
## 373  50          3.23839
## 374  38          3.23839
## 375  63          3.23839
## 376  35          4.00000
## 377  59          2.00000
## 378  23          3.23839
## 379  64          0.00000
## 380  36          2.00000
## 381  31          3.23839
## 382  30          3.23839
## 383  56          3.23839
## 384  36          0.00000
## 385  62          1.00000
## 386  53          4.00000
## 387  65          3.23839
## 388  43          4.00000
## 389  52          7.00000
## 390  65          1.00000
## 391  55          1.00000
## 392  43          3.00000
## 393  51          0.00000
## 394  24          3.00000
## 395  39          1.00000
## 396  42          0.00000
## 397  44          7.00000
## 398  55          6.00000
## 399  60          3.23839
## 400  32          0.00000
## 401  61          1.00000
## 402  64          0.00000
## 403  61          3.23839
## 404  52          3.23839
## 405  34          3.23839
## 406  50          3.23839
## 407  23          3.23839
## 408  23          0.00000
## 409  59          4.00000
## 410  32         13.00000
## 411  52          3.00000
## 412  24          0.00000
## 413  51          3.23839
## 414  57          0.00000
## 415  44          3.23839
## 416  22          6.00000
## 417  54          0.00000
## 418  40          3.23839
## 419  51          0.00000
## 420  23          8.00000
## 421  47          8.00000
## 422  49          3.23839
## 423  60          4.00000
## 424  29          3.23839
## 425  51          2.00000
## 426  31          3.00000
## 427  37          3.00000
## 428  54          0.00000
## 429  64          3.23839
## 430  60          0.00000
## 431  56          3.00000
## 432  28          1.00000
## 433  56          3.23839
## 434  31          1.00000
## 435  51          3.00000
## 436  67          3.23839
## 437  41          0.00000
## 438  34          3.23839
## 439  33          3.00000
## 440  23          0.00000
## 441  49         11.00000
## 442  30          2.00000
## 443  45          3.23839
## 444  46          3.23839
## 445  45          3.23839
## 446  60          5.00000
## 447  59          7.00000
## 448  28          3.00000
## 449  26          0.00000
## 450  53          3.23839
## 451  20          0.00000
## 452  33          1.00000
## 453  34          0.00000
## 454  47          1.00000
## 455  22          0.00000
## 456  41          3.23839
## 457  36          0.00000
## 458  39          3.23839
## 459  52          2.00000
## 460  24          3.23839
## 461  62          0.00000
## 462  37          9.00000
## 463  25          4.00000
## 464  47         12.00000
## 465  44          3.23839
## 466  24          6.00000
## 467  54          0.00000
## 468  26          6.00000
## 469  59          4.00000
## 470  43          0.00000
## 471  67          0.00000
## 472  60          3.23839
## 473  31          3.23839
## 474  44          4.00000
## 475  37          1.00000
## 476  57          0.00000
## 477  51          8.00000
## 478  66          0.00000
## 479  62          4.00000
## 480  37         18.00000
## 481  30          1.00000
## 482  48          3.23839
## 483  44          3.23839
## 484  22          3.23839
## 485  52          3.23839
## 486  38          1.00000
## 487  53          3.23839
## 488  48         13.00000
## 489  34          3.23839
## 490  32         15.00000
## 491  40          8.00000
## 492  28          0.00000
## 493  34          3.23839
## 494  57          2.00000
## 495  49          0.00000
## 496  27          3.23839
## 497  24          0.00000
## 498  55         10.00000
## 499  32          4.00000
## 500  45          3.23839
colMeans(subset_incomplete, na.rm = TRUE)
##              age depression_score 
##         44.48400          3.23839
#regression model with imputed data
fit <- with(imp, lm(age ~ depression_score))
summary(fit)
## # A tibble: 2 x 6
##   term             estimate std.error statistic   p.value  nobs
##   <chr>               <dbl>     <dbl>     <dbl>     <dbl> <int>
## 1 (Intercept)        45.6       0.894     51.0  7.78e-200   500
## 2 depression_score   -0.339     0.192     -1.76 7.85e-  2   500
densityplot(subset_incomplete$depression_score)
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
##     ggplot2::geom_density() 
## 
## See amices.org/ggmice for more info.

#regression imputation
imp <- mice(subset_incomplete, method = "norm.predict", m = 1, maxit = 1)
## 
##  iter imp variable
##   1   1  depression_score
complete(imp)
##     age depression_score
## 1    49         0.000000
## 2    34         3.530810
## 3    68         2.570477
## 4    24         1.000000
## 5    67         0.000000
## 6    40         3.361339
## 7    60         0.000000
## 8    60         2.796438
## 9    38         3.417829
## 10   21         3.897996
## 11   57         2.000000
## 12   47         3.163624
## 13   56         2.000000
## 14   61         1.000000
## 15   25         3.785015
## 16   34         0.000000
## 17   62         2.739948
## 18   32         0.000000
## 19   64         0.000000
## 20   35         3.502565
## 21   66        10.000000
## 22   48         3.135379
## 23   69         2.542232
## 24   22         5.000000
## 25   27         1.000000
## 26   69         3.000000
## 27   56         2.000000
## 28   55         2.937663
## 29   23         4.000000
## 30   46         3.191869
## 31   26         0.000000
## 32   29         8.000000
## 33   28         4.000000
## 34   28         3.700280
## 35   23         3.000000
## 36   60         0.000000
## 37   25         3.785015
## 38   37         4.000000
## 39   50         0.000000
## 40   40         3.361339
## 41   60         2.796438
## 42   21         3.897996
## 43   66         0.000000
## 44   59         0.000000
## 45   44         1.000000
## 46   34         3.530810
## 47   31         2.000000
## 48   66         3.000000
## 49   53         2.994153
## 50   28         3.700280
## 51   60         8.000000
## 52   35         1.000000
## 53   26         3.756770
## 54   66         2.626967
## 55   64         2.683457
## 56   63         2.711703
## 57   68         4.000000
## 58   59         0.000000
## 59   49         3.000000
## 60   50         3.078889
## 61   34         0.000000
## 62   42         3.304849
## 63   65         2.655212
## 64   33         0.000000
## 65   37         2.000000
## 66   21         3.897996
## 67   27         3.728525
## 68   23        11.000000
## 69   30         6.000000
## 70   38         1.000000
## 71   52         3.022398
## 72   40         3.361339
## 73   56         0.000000
## 74   26         3.756770
## 75   63         7.000000
## 76   55         1.000000
## 77   63         0.000000
## 78   22        10.000000
## 79   39         3.389584
## 80   54         0.000000
## 81   51         0.000000
## 82   33         3.559055
## 83   32         0.000000
## 84   36         3.474320
## 85   35         3.502565
## 86   62         2.739948
## 87   30         3.643790
## 88   62         4.000000
## 89   26         9.000000
## 90   20        10.000000
## 91   34         0.000000
## 92   59         4.000000
## 93   66         2.626967
## 94   61         5.000000
## 95   51         3.050643
## 96   58         0.000000
## 97   69         2.000000
## 98   44         0.000000
## 99   52         2.000000
## 100  30         0.000000
## 101  24         2.000000
## 102  36         3.474320
## 103  34         1.000000
## 104  38         3.417829
## 105  49         0.000000
## 106  30         1.000000
## 107  52         2.000000
## 108  51        11.000000
## 109  63         1.000000
## 110  63         2.711703
## 111  65         0.000000
## 112  41         3.000000
## 113  59         3.000000
## 114  27         2.000000
## 115  63         0.000000
## 116  31         5.000000
## 117  60         0.000000
## 118  25         1.000000
## 119  53         2.994153
## 120  30         3.643790
## 121  40         0.000000
## 122  66         0.000000
## 123  27         3.728525
## 124  43         5.000000
## 125  32         0.000000
## 126  53         5.000000
## 127  29         2.000000
## 128  50         3.078889
## 129  34         3.530810
## 130  37        19.000000
## 131  68         2.570477
## 132  49         3.107134
## 133  25         3.785015
## 134  45         3.220114
## 135  29         3.672035
## 136  34         4.000000
## 137  60         2.796438
## 138  50         4.000000
## 139  41         4.000000
## 140  41         3.333094
## 141  23         0.000000
## 142  60         2.796438
## 143  67         0.000000
## 144  33         9.000000
## 145  47         0.000000
## 146  28         5.000000
## 147  31         0.000000
## 148  32        10.000000
## 149  34        14.000000
## 150  25         0.000000
## 151  64         8.000000
## 152  50         3.078889
## 153  61         2.768193
## 154  50         0.000000
## 155  46         2.000000
## 156  27         3.000000
## 157  38         3.417829
## 158  68         6.000000
## 159  42         3.304849
## 160  27         0.000000
## 161  30         2.000000
## 162  68         0.000000
## 163  57         1.000000
## 164  32         3.587300
## 165  43         0.000000
## 166  63         0.000000
## 167  21         3.897996
## 168  34         0.000000
## 169  21         3.000000
## 170  39         3.389584
## 171  50        14.000000
## 172  61         2.768193
## 173  27         1.000000
## 174  32         1.000000
## 175  46         0.000000
## 176  24         1.000000
## 177  33         3.559055
## 178  43         3.276604
## 179  64         1.000000
## 180  34         0.000000
## 181  61         2.000000
## 182  45         3.220114
## 183  63         6.000000
## 184  27         0.000000
## 185  51        10.000000
## 186  68         2.570477
## 187  29         7.000000
## 188  47         3.163624
## 189  31         3.615545
## 190  44         3.000000
## 191  68         4.000000
## 192  51         3.050643
## 193  68         1.000000
## 194  68         3.000000
## 195  37         0.000000
## 196  50         1.000000
## 197  34         1.000000
## 198  57         9.000000
## 199  46        12.000000
## 200  45         2.000000
## 201  55         0.000000
## 202  27         3.728525
## 203  32         1.000000
## 204  42         2.000000
## 205  42         3.304849
## 206  50         3.078889
## 207  61         2.000000
## 208  32         4.000000
## 209  65         4.000000
## 210  24        10.000000
## 211  41         2.000000
## 212  64        10.000000
## 213  29         3.672035
## 214  52         3.022398
## 215  23         0.000000
## 216  67         2.598722
## 217  39         4.000000
## 218  35         0.000000
## 219  53         0.000000
## 220  62         2.000000
## 221  61         0.000000
## 222  47         2.000000
## 223  40         3.361339
## 224  52         1.000000
## 225  24         3.813260
## 226  61         2.768193
## 227  60         2.796438
## 228  20         3.926241
## 229  57         2.881173
## 230  28         3.700280
## 231  46         1.000000
## 232  57         3.000000
## 233  57         2.881173
## 234  60         0.000000
## 235  24         2.000000
## 236  33         8.000000
## 237  59         2.824683
## 238  21         3.000000
## 239  28         1.000000
## 240  26         1.000000
## 241  35         3.000000
## 242  29         1.000000
## 243  45        10.000000
## 244  20        12.000000
## 245  57         1.000000
## 246  39        18.000000
## 247  35         3.502565
## 248  69         2.542232
## 249  52        20.000000
## 250  66         2.626967
## 251  53         0.000000
## 252  54         0.000000
## 253  42         3.304849
## 254  57        16.000000
## 255  37         3.446074
## 256  21         3.897996
## 257  28         3.700280
## 258  22         1.000000
## 259  42         2.000000
## 260  68         2.570477
## 261  61         2.768193
## 262  41         3.000000
## 263  27         0.000000
## 264  66         2.000000
## 265  67         2.598722
## 266  58         0.000000
## 267  55         2.937663
## 268  52         3.022398
## 269  51         1.000000
## 270  45         5.000000
## 271  58         0.000000
## 272  33         3.559055
## 273  25         3.785015
## 274  66         3.000000
## 275  27         8.000000
## 276  50         7.000000
## 277  66         2.626967
## 278  64         5.000000
## 279  33         3.559055
## 280  61         3.000000
## 281  45         1.000000
## 282  20         7.000000
## 283  21        10.000000
## 284  36        10.000000
## 285  55         2.937663
## 286  60         1.000000
## 287  22         3.869751
## 288  48         3.135379
## 289  27         6.000000
## 290  62         2.000000
## 291  34         1.000000
## 292  34         0.000000
## 293  24         3.813260
## 294  37         2.000000
## 295  64         2.000000
## 296  22         1.000000
## 297  41         0.000000
## 298  49         0.000000
## 299  40        13.000000
## 300  32         0.000000
## 301  37         0.000000
## 302  43         0.000000
## 303  58         2.852928
## 304  62         3.000000
## 305  25         4.000000
## 306  35        22.000000
## 307  40         3.361339
## 308  21         3.897996
## 309  54        11.000000
## 310  50         2.000000
## 311  32         3.587300
## 312  67         1.000000
## 313  64         0.000000
## 314  56         2.909418
## 315  61         0.000000
## 316  44         6.000000
## 317  48        13.000000
## 318  38         0.000000
## 319  21         3.897996
## 320  44         1.000000
## 321  52         3.022398
## 322  62         2.739948
## 323  69         2.542232
## 324  66         1.000000
## 325  24         3.813260
## 326  52         3.022398
## 327  50         3.078889
## 328  32         0.000000
## 329  55        15.000000
## 330  67         0.000000
## 331  42         1.000000
## 332  28        17.000000
## 333  55         2.937663
## 334  63         8.000000
## 335  65         2.655212
## 336  60         2.000000
## 337  49         3.107134
## 338  33         3.559055
## 339  32         3.587300
## 340  37         4.000000
## 341  50         3.078889
## 342  21         4.000000
## 343  32         0.000000
## 344  45         3.000000
## 345  30         0.000000
## 346  53         2.994153
## 347  42         2.000000
## 348  55        10.000000
## 349  42         3.304849
## 350  40         6.000000
## 351  64         2.683457
## 352  62         1.000000
## 353  43         0.000000
## 354  22         2.000000
## 355  43         3.000000
## 356  46         3.191869
## 357  53         0.000000
## 358  61         0.000000
## 359  67         2.598722
## 360  58         0.000000
## 361  49         0.000000
## 362  45         2.000000
## 363  46         6.000000
## 364  47         5.000000
## 365  57         2.881173
## 366  24         3.813260
## 367  23         7.000000
## 368  25         2.000000
## 369  42         3.304849
## 370  62         2.739948
## 371  24         4.000000
## 372  65         1.000000
## 373  50         3.078889
## 374  38         3.417829
## 375  63         2.711703
## 376  35         4.000000
## 377  59         2.000000
## 378  23         3.841505
## 379  64         0.000000
## 380  36         2.000000
## 381  31         3.615545
## 382  30         3.643790
## 383  56         2.909418
## 384  36         0.000000
## 385  62         1.000000
## 386  53         4.000000
## 387  65         2.655212
## 388  43         4.000000
## 389  52         7.000000
## 390  65         1.000000
## 391  55         1.000000
## 392  43         3.000000
## 393  51         0.000000
## 394  24         3.000000
## 395  39         1.000000
## 396  42         0.000000
## 397  44         7.000000
## 398  55         6.000000
## 399  60         2.796438
## 400  32         0.000000
## 401  61         1.000000
## 402  64         0.000000
## 403  61         2.768193
## 404  52         3.022398
## 405  34         3.530810
## 406  50         3.078889
## 407  23         3.841505
## 408  23         0.000000
## 409  59         4.000000
## 410  32        13.000000
## 411  52         3.000000
## 412  24         0.000000
## 413  51         3.050643
## 414  57         0.000000
## 415  44         3.248359
## 416  22         6.000000
## 417  54         0.000000
## 418  40         3.361339
## 419  51         0.000000
## 420  23         8.000000
## 421  47         8.000000
## 422  49         3.107134
## 423  60         4.000000
## 424  29         3.672035
## 425  51         2.000000
## 426  31         3.000000
## 427  37         3.000000
## 428  54         0.000000
## 429  64         2.683457
## 430  60         0.000000
## 431  56         3.000000
## 432  28         1.000000
## 433  56         2.909418
## 434  31         1.000000
## 435  51         3.000000
## 436  67         2.598722
## 437  41         0.000000
## 438  34         3.530810
## 439  33         3.000000
## 440  23         0.000000
## 441  49        11.000000
## 442  30         2.000000
## 443  45         3.220114
## 444  46         3.191869
## 445  45         3.220114
## 446  60         5.000000
## 447  59         7.000000
## 448  28         3.000000
## 449  26         0.000000
## 450  53         2.994153
## 451  20         0.000000
## 452  33         1.000000
## 453  34         0.000000
## 454  47         1.000000
## 455  22         0.000000
## 456  41         3.333094
## 457  36         0.000000
## 458  39         3.389584
## 459  52         2.000000
## 460  24         3.813260
## 461  62         0.000000
## 462  37         9.000000
## 463  25         4.000000
## 464  47        12.000000
## 465  44         3.248359
## 466  24         6.000000
## 467  54         0.000000
## 468  26         6.000000
## 469  59         4.000000
## 470  43         0.000000
## 471  67         0.000000
## 472  60         2.796438
## 473  31         3.615545
## 474  44         4.000000
## 475  37         1.000000
## 476  57         0.000000
## 477  51         8.000000
## 478  66         0.000000
## 479  62         4.000000
## 480  37        18.000000
## 481  30         1.000000
## 482  48         3.135379
## 483  44         3.248359
## 484  22         3.869751
## 485  52         3.022398
## 486  38         1.000000
## 487  53         2.994153
## 488  48        13.000000
## 489  34         3.530810
## 490  32        15.000000
## 491  40         8.000000
## 492  28         0.000000
## 493  34         3.530810
## 494  57         2.000000
## 495  49         0.000000
## 496  27         3.728525
## 497  24         0.000000
## 498  55        10.000000
## 499  32         4.000000
## 500  45         3.220114
fit <- with(imp, lm(age ~ depression_score))
summary(fit)
## # A tibble: 2 x 6
##   term             estimate std.error statistic   p.value  nobs
##   <chr>               <dbl>     <dbl>     <dbl>     <dbl> <int>
## 1 (Intercept)        46.2       0.888     52.0  2.88e-203   500
## 2 depression_score   -0.520     0.191     -2.72 6.67e-  3   500
imp <- mice(subset_incomplete, method = "norm.nob", m = 1, maxit = 1)
## 
##  iter imp variable
##   1   1  depression_score
complete(imp)
##     age depression_score
## 1    49       0.00000000
## 2    34       1.81329563
## 3    68       3.38643558
## 4    24       1.00000000
## 5    67       0.00000000
## 6    40       9.64360648
## 7    60       0.00000000
## 8    60       4.32234129
## 9    38       1.57777629
## 10   21      13.20507349
## 11   57       2.00000000
## 12   47      11.54446729
## 13   56       2.00000000
## 14   61       1.00000000
## 15   25       1.82110627
## 16   34       0.00000000
## 17   62       1.89286619
## 18   32       0.00000000
## 19   64       0.00000000
## 20   35       1.34858925
## 21   66      10.00000000
## 22   48       4.44472779
## 23   69      -3.90104434
## 24   22       5.00000000
## 25   27       1.00000000
## 26   69       3.00000000
## 27   56       2.00000000
## 28   55       0.64866872
## 29   23       4.00000000
## 30   46       8.48852974
## 31   26       0.00000000
## 32   29       8.00000000
## 33   28       4.00000000
## 34   28       1.76512809
## 35   23       3.00000000
## 36   60       0.00000000
## 37   25      -1.90927616
## 38   37       4.00000000
## 39   50       0.00000000
## 40   40       4.33988312
## 41   60      -2.90416833
## 42   21       2.26037526
## 43   66       0.00000000
## 44   59       0.00000000
## 45   44       1.00000000
## 46   34       4.85875648
## 47   31       2.00000000
## 48   66       3.00000000
## 49   53       0.19965770
## 50   28       8.23887235
## 51   60       8.00000000
## 52   35       1.00000000
## 53   26       8.89803394
## 54   66       2.22254953
## 55   64       0.01056736
## 56   63       0.61649241
## 57   68       4.00000000
## 58   59       0.00000000
## 59   49       3.00000000
## 60   50       6.90527612
## 61   34       0.00000000
## 62   42       4.54406104
## 63   65       9.83433727
## 64   33       0.00000000
## 65   37       2.00000000
## 66   21       4.08993610
## 67   27       8.47928896
## 68   23      11.00000000
## 69   30       6.00000000
## 70   38       1.00000000
## 71   52      13.35669024
## 72   40      -1.06283625
## 73   56       0.00000000
## 74   26       2.77580603
## 75   63       7.00000000
## 76   55       1.00000000
## 77   63       0.00000000
## 78   22      10.00000000
## 79   39       3.44806813
## 80   54       0.00000000
## 81   51       0.00000000
## 82   33       4.72458723
## 83   32       0.00000000
## 84   36       1.74880972
## 85   35       2.75456281
## 86   62       8.79547195
## 87   30       2.19156589
## 88   62       4.00000000
## 89   26       9.00000000
## 90   20      10.00000000
## 91   34       0.00000000
## 92   59       4.00000000
## 93   66       9.19050153
## 94   61       5.00000000
## 95   51       7.80877721
## 96   58       0.00000000
## 97   69       2.00000000
## 98   44       0.00000000
## 99   52       2.00000000
## 100  30       0.00000000
## 101  24       2.00000000
## 102  36      -3.84081361
## 103  34       1.00000000
## 104  38      -1.00994544
## 105  49       0.00000000
## 106  30       1.00000000
## 107  52       2.00000000
## 108  51      11.00000000
## 109  63       1.00000000
## 110  63      -3.11795201
## 111  65       0.00000000
## 112  41       3.00000000
## 113  59       3.00000000
## 114  27       2.00000000
## 115  63       0.00000000
## 116  31       5.00000000
## 117  60       0.00000000
## 118  25       1.00000000
## 119  53       6.80605083
## 120  30       8.59744132
## 121  40       0.00000000
## 122  66       0.00000000
## 123  27      12.26800924
## 124  43       5.00000000
## 125  32       0.00000000
## 126  53       5.00000000
## 127  29       2.00000000
## 128  50       6.80964833
## 129  34       9.36473881
## 130  37      19.00000000
## 131  68       4.99722765
## 132  49      -0.82389309
## 133  25       1.93278156
## 134  45       4.29071073
## 135  29       0.75567050
## 136  34       4.00000000
## 137  60       1.36630035
## 138  50       4.00000000
## 139  41       4.00000000
## 140  41       5.20325087
## 141  23       0.00000000
## 142  60       2.93065444
## 143  67       0.00000000
## 144  33       9.00000000
## 145  47       0.00000000
## 146  28       5.00000000
## 147  31       0.00000000
## 148  32      10.00000000
## 149  34      14.00000000
## 150  25       0.00000000
## 151  64       8.00000000
## 152  50       1.19664084
## 153  61       3.22362973
## 154  50       0.00000000
## 155  46       2.00000000
## 156  27       3.00000000
## 157  38      -8.45485525
## 158  68       6.00000000
## 159  42      -1.46920366
## 160  27       0.00000000
## 161  30       2.00000000
## 162  68       0.00000000
## 163  57       1.00000000
## 164  32       5.59286997
## 165  43       0.00000000
## 166  63       0.00000000
## 167  21      10.40011293
## 168  34       0.00000000
## 169  21       3.00000000
## 170  39       0.45432134
## 171  50      14.00000000
## 172  61       2.69549685
## 173  27       1.00000000
## 174  32       1.00000000
## 175  46       0.00000000
## 176  24       1.00000000
## 177  33       6.06955804
## 178  43      -3.04253680
## 179  64       1.00000000
## 180  34       0.00000000
## 181  61       2.00000000
## 182  45       5.12061080
## 183  63       6.00000000
## 184  27       0.00000000
## 185  51      10.00000000
## 186  68       2.75808359
## 187  29       7.00000000
## 188  47       8.84118735
## 189  31       5.31654672
## 190  44       3.00000000
## 191  68       4.00000000
## 192  51      -2.92273925
## 193  68       1.00000000
## 194  68       3.00000000
## 195  37       0.00000000
## 196  50       1.00000000
## 197  34       1.00000000
## 198  57       9.00000000
## 199  46      12.00000000
## 200  45       2.00000000
## 201  55       0.00000000
## 202  27       0.52809977
## 203  32       1.00000000
## 204  42       2.00000000
## 205  42      -5.34824087
## 206  50       2.02014226
## 207  61       2.00000000
## 208  32       4.00000000
## 209  65       4.00000000
## 210  24      10.00000000
## 211  41       2.00000000
## 212  64      10.00000000
## 213  29       3.80231303
## 214  52       0.94959985
## 215  23       0.00000000
## 216  67       3.05266622
## 217  39       4.00000000
## 218  35       0.00000000
## 219  53       0.00000000
## 220  62       2.00000000
## 221  61       0.00000000
## 222  47       2.00000000
## 223  40       3.42184105
## 224  52       1.00000000
## 225  24      -2.21075063
## 226  61      -0.40934722
## 227  60       3.95598508
## 228  20       3.81975258
## 229  57       6.14145815
## 230  28       5.97966409
## 231  46       1.00000000
## 232  57       3.00000000
## 233  57      -1.36657519
## 234  60       0.00000000
## 235  24       2.00000000
## 236  33       8.00000000
## 237  59       4.85961536
## 238  21       3.00000000
## 239  28       1.00000000
## 240  26       1.00000000
## 241  35       3.00000000
## 242  29       1.00000000
## 243  45      10.00000000
## 244  20      12.00000000
## 245  57       1.00000000
## 246  39      18.00000000
## 247  35       9.93724945
## 248  69       1.75844570
## 249  52      20.00000000
## 250  66       9.61736168
## 251  53       0.00000000
## 252  54       0.00000000
## 253  42       8.63838718
## 254  57      16.00000000
## 255  37      -1.65953419
## 256  21       0.89104940
## 257  28       6.35552151
## 258  22       1.00000000
## 259  42       2.00000000
## 260  68       7.25261568
## 261  61       6.03684156
## 262  41       3.00000000
## 263  27       0.00000000
## 264  66       2.00000000
## 265  67       7.14027736
## 266  58       0.00000000
## 267  55       0.27370875
## 268  52       0.20625290
## 269  51       1.00000000
## 270  45       5.00000000
## 271  58       0.00000000
## 272  33       7.94892487
## 273  25       0.15886971
## 274  66       3.00000000
## 275  27       8.00000000
## 276  50       7.00000000
## 277  66      -5.14952354
## 278  64       5.00000000
## 279  33      -1.68042802
## 280  61       3.00000000
## 281  45       1.00000000
## 282  20       7.00000000
## 283  21      10.00000000
## 284  36      10.00000000
## 285  55       6.54845335
## 286  60       1.00000000
## 287  22       3.69402287
## 288  48      -0.04633854
## 289  27       6.00000000
## 290  62       2.00000000
## 291  34       1.00000000
## 292  34       0.00000000
## 293  24       1.39608419
## 294  37       2.00000000
## 295  64       2.00000000
## 296  22       1.00000000
## 297  41       0.00000000
## 298  49       0.00000000
## 299  40      13.00000000
## 300  32       0.00000000
## 301  37       0.00000000
## 302  43       0.00000000
## 303  58       0.94796896
## 304  62       3.00000000
## 305  25       4.00000000
## 306  35      22.00000000
## 307  40       7.23211889
## 308  21      -5.57306156
## 309  54      11.00000000
## 310  50       2.00000000
## 311  32       4.09467214
## 312  67       1.00000000
## 313  64       0.00000000
## 314  56       7.66172023
## 315  61       0.00000000
## 316  44       6.00000000
## 317  48      13.00000000
## 318  38       0.00000000
## 319  21       8.10460342
## 320  44       1.00000000
## 321  52       1.10202671
## 322  62       4.34644643
## 323  69       6.15214385
## 324  66       1.00000000
## 325  24       2.37191410
## 326  52      -1.29265768
## 327  50       2.66684995
## 328  32       0.00000000
## 329  55      15.00000000
## 330  67       0.00000000
## 331  42       1.00000000
## 332  28      17.00000000
## 333  55       0.08004416
## 334  63       8.00000000
## 335  65       5.60220250
## 336  60       2.00000000
## 337  49      -4.62408650
## 338  33       6.07802785
## 339  32      -1.46651304
## 340  37       4.00000000
## 341  50      10.93770548
## 342  21       4.00000000
## 343  32       0.00000000
## 344  45       3.00000000
## 345  30       0.00000000
## 346  53       5.13133854
## 347  42       2.00000000
## 348  55      10.00000000
## 349  42       6.44078598
## 350  40       6.00000000
## 351  64      -4.23551483
## 352  62       1.00000000
## 353  43       0.00000000
## 354  22       2.00000000
## 355  43       3.00000000
## 356  46       0.77033516
## 357  53       0.00000000
## 358  61       0.00000000
## 359  67       3.52959495
## 360  58       0.00000000
## 361  49       0.00000000
## 362  45       2.00000000
## 363  46       6.00000000
## 364  47       5.00000000
## 365  57      -0.60767178
## 366  24       5.03059499
## 367  23       7.00000000
## 368  25       2.00000000
## 369  42      -0.85837560
## 370  62       0.55174767
## 371  24       4.00000000
## 372  65       1.00000000
## 373  50      -2.69458757
## 374  38      -4.87163349
## 375  63      -0.80090139
## 376  35       4.00000000
## 377  59       2.00000000
## 378  23       0.03923363
## 379  64       0.00000000
## 380  36       2.00000000
## 381  31       3.31640345
## 382  30       4.74593007
## 383  56       8.87267692
## 384  36       0.00000000
## 385  62       1.00000000
## 386  53       4.00000000
## 387  65       0.97632410
## 388  43       4.00000000
## 389  52       7.00000000
## 390  65       1.00000000
## 391  55       1.00000000
## 392  43       3.00000000
## 393  51       0.00000000
## 394  24       3.00000000
## 395  39       1.00000000
## 396  42       0.00000000
## 397  44       7.00000000
## 398  55       6.00000000
## 399  60       0.41271940
## 400  32       0.00000000
## 401  61       1.00000000
## 402  64       0.00000000
## 403  61       5.54154037
## 404  52       4.14549736
## 405  34       3.45356087
## 406  50      12.08502556
## 407  23      13.95373304
## 408  23       0.00000000
## 409  59       4.00000000
## 410  32      13.00000000
## 411  52       3.00000000
## 412  24       0.00000000
## 413  51       6.98846749
## 414  57       0.00000000
## 415  44      11.00552092
## 416  22       6.00000000
## 417  54       0.00000000
## 418  40       8.08181165
## 419  51       0.00000000
## 420  23       8.00000000
## 421  47       8.00000000
## 422  49       5.71745442
## 423  60       4.00000000
## 424  29       6.27853276
## 425  51       2.00000000
## 426  31       3.00000000
## 427  37       3.00000000
## 428  54       0.00000000
## 429  64       0.91655037
## 430  60       0.00000000
## 431  56       3.00000000
## 432  28       1.00000000
## 433  56       3.80906765
## 434  31       1.00000000
## 435  51       3.00000000
## 436  67       6.91210428
## 437  41       0.00000000
## 438  34      -2.16804035
## 439  33       3.00000000
## 440  23       0.00000000
## 441  49      11.00000000
## 442  30       2.00000000
## 443  45      -9.87735843
## 444  46       7.65382294
## 445  45       1.47825066
## 446  60       5.00000000
## 447  59       7.00000000
## 448  28       3.00000000
## 449  26       0.00000000
## 450  53       0.36520677
## 451  20       0.00000000
## 452  33       1.00000000
## 453  34       0.00000000
## 454  47       1.00000000
## 455  22       0.00000000
## 456  41      -1.27908213
## 457  36       0.00000000
## 458  39       7.66185321
## 459  52       2.00000000
## 460  24       5.44842468
## 461  62       0.00000000
## 462  37       9.00000000
## 463  25       4.00000000
## 464  47      12.00000000
## 465  44       2.70073011
## 466  24       6.00000000
## 467  54       0.00000000
## 468  26       6.00000000
## 469  59       4.00000000
## 470  43       0.00000000
## 471  67       0.00000000
## 472  60       2.64564516
## 473  31       5.57936881
## 474  44       4.00000000
## 475  37       1.00000000
## 476  57       0.00000000
## 477  51       8.00000000
## 478  66       0.00000000
## 479  62       4.00000000
## 480  37      18.00000000
## 481  30       1.00000000
## 482  48       4.38198805
## 483  44      -0.73358603
## 484  22       6.81720022
## 485  52       6.87528040
## 486  38       1.00000000
## 487  53       3.35186486
## 488  48      13.00000000
## 489  34       1.29911641
## 490  32      15.00000000
## 491  40       8.00000000
## 492  28       0.00000000
## 493  34      16.40557761
## 494  57       2.00000000
## 495  49       0.00000000
## 496  27      -1.69719433
## 497  24       0.00000000
## 498  55      10.00000000
## 499  32       4.00000000
## 500  45       5.97172195
fit <- with(imp, lm(age ~ depression_score))
summary(fit)
## # A tibble: 2 x 6
##   term             estimate std.error statistic   p.value  nobs
##   <chr>               <dbl>     <dbl>     <dbl>     <dbl> <int>
## 1 (Intercept)        45.5       0.811     56.1  1.87e-217   500
## 2 depression_score   -0.305     0.151     -2.02 4.37e-  2   500
fit <- with(subset_incomplete, lm(age ~ depression_score))

t_stats <- with(imp, t.test(subset_incomplete))
t_stats
## call :
## with.mids(data = imp, expr = t.test(subset_incomplete))
## 
## call1 :
## mice(data = subset_incomplete, m = 1, method = "norm.nob", maxit = 1)
## 
## nmis :
##              age depression_score 
##                0              177 
## 
## analyses :
## [[1]]
## 
##  One Sample t-test
## 
## data:  subset_incomplete
## t = 34.986, df = 822, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  26.70895 29.88400
## sample estimates:
## mean of x 
##  28.29648
#alternative
#regression imputation
regOut <- mice(subset_incomplete, method = "norm.predict", m = 1, maxit = 1)
## 
##  iter imp variable
##   1   1  depression_score
inc1 <- complete(regOut)

#compare imputed vs. true
colMeans(inc1)
##              age depression_score 
##        44.484000         3.234688
colMeans(subset_complete)
##              age depression_score 
##           44.484            3.564
sapply(inc1, var)
##              age depression_score 
##        206.65105         11.22561
sapply(subset_complete, var)
##              age depression_score 
##        206.65105         20.60311
cor(inc1)
##                         age depression_score
## age               1.0000000       -0.1211871
## depression_score -0.1211871        1.0000000
cor(subset_complete)
##                          age depression_score
## age               1.00000000      -0.06125558
## depression_score -0.06125558       1.00000000