library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ─────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter()    masks stats::filter()
## x dplyr::lag()       masks stats::lag()
## x dplyr::src()       masks Hmisc::src()
## x dplyr::summarize() masks Hmisc::summarize()
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
deletingkidney <-haven::read_sav ("deletingkidney.sav")
glimpse(deletingkidney)
## Rows: 980,143
## Columns: 20
## $ WL_ORG             <chr> "", "", "", "", "", "", "", "", "", "", "", "", ""…
## $ ON_DIALYSIS        <chr> "", "", "", "", "", "", "", "", "", "", "", "", ""…
## $ GENDER             <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M", …
## $ ABO                <chr> "O", "O", "B", "B", "B", "O", "O", "AB", "A", "B",…
## $ PERM_STATE         <chr> "PA", "IL", "FL", "HI", "WA", "VA", "CA", "IL", "O…
## $ EDUCATION          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 99…
## $ FUNC_STAT_TCR      <dbl> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998, 2,…
## $ DAYSWAIT_CHRON     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ INIT_AGE           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ ETHNICITY          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ ETHCAT             <dbl> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ PT_CODE            <dbl> 481014, 260021, 425419, 175705, 400671, 406485, 14…
## $ REGION             <dbl> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1, 2…
## $ WORK_INCOME_TCR    <chr> "", "", "", "", "", "", "", "", "", "", "", "", ""…
## $ PRI_PAYMENT_TCR_KI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13…
## $ DON_TY             <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", …
## $ DISTANCE           <dbl> 153, 779, 620, 9, 4, 0, 12, 205, 0, 2174, 55, 56, …
## $ DIAG_KI            <dbl> 3006, 999, 3007, 3048, 999, 3041, 3041, 999, 3017,…
## $ SHARE_TY           <dbl> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3,…
## $ AGE_GROUP          <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", …
str(deletingkidney)
## tibble [980,143 × 20] (S3: tbl_df/tbl/data.frame)
##  $ WL_ORG            : chr [1:980143] "" "" "" "" ...
##   ..- attr(*, "label")= chr "ORGAN LISTED FOR"
##   ..- attr(*, "format.spss")= chr "A12"
##   ..- attr(*, "display_width")= int 14
##  $ ON_DIALYSIS       : chr [1:980143] "" "" "" "" ...
##   ..- attr(*, "label")= chr "WL MOST RECENT CANDIDATE ON DIALYSIS?"
##   ..- attr(*, "format.spss")= chr "A3"
##   ..- attr(*, "display_width")= int 13
##  $ GENDER            : chr [1:980143] "M" "M" "F" "F" ...
##   ..- attr(*, "label")= chr "TCR RECIPIENT GENDER"
##   ..- attr(*, "format.spss")= chr "A3"
##  $ ABO               : chr [1:980143] "O" "O" "B" "B" ...
##   ..- attr(*, "label")= chr "TCR ABO BLOOD GROUP"
##   ..- attr(*, "format.spss")= chr "A9"
##   ..- attr(*, "display_width")= int 11
##  $ PERM_STATE        : chr [1:980143] "PA" "IL" "FL" "HI" ...
##   ..- attr(*, "label")= chr "TCR State of Permanent Residence at Listing"
##   ..- attr(*, "format.spss")= chr "A6"
##   ..- attr(*, "display_width")= int 12
##  $ EDUCATION         : num [1:980143] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "TCR HIGHEST EDUCATION LEVEL:"
##   ..- attr(*, "format.spss")= chr "F3.0"
##   ..- attr(*, "display_width")= int 11
##  $ FUNC_STAT_TCR     : num [1:980143] 998 2 2 998 1 1 998 998 1 998 ...
##   ..- attr(*, "label")= chr "TCR FUNCTIONAL STATUS @ LISTING"
##   ..- attr(*, "format.spss")= chr "F4.0"
##   ..- attr(*, "display_width")= int 15
##  $ DAYSWAIT_CHRON    : num [1:980143] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME"
##   ..- attr(*, "format.spss")= chr "F5.0"
##   ..- attr(*, "display_width")= int 16
##  $ INIT_AGE          : num [1:980143] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "CALCULATED AGE AT LISTING"
##   ..- attr(*, "format.spss")= chr "F2.0"
##   ..- attr(*, "display_width")= int 10
##  $ ETHNICITY         : num [1:980143] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "TCR ETHNICITY"
##   ..- attr(*, "format.spss")= chr "F1.0"
##   ..- attr(*, "display_width")= int 11
##  $ ETHCAT            : num [1:980143] 1 1 2 2 1 5 2 998 1 1 ...
##   ..- attr(*, "label")= chr "ETHNICITY CATEGORY"
##   ..- attr(*, "format.spss")= chr "F3.0"
##  $ PT_CODE           : num [1:980143] 481014 260021 425419 175705 400671 ...
##   ..- attr(*, "label")= chr "ENCRYPTED PATIENT IDENTIFIER"
##   ..- attr(*, "format.spss")= chr "F7.0"
##   ..- attr(*, "display_width")= int 9
##  $ REGION            : num [1:980143] 2 7 3 5 6 2 5 7 10 5 ...
##   ..- attr(*, "label")= chr "region"
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ WORK_INCOME_TCR   : chr [1:980143] "" "" "" "" ...
##   ..- attr(*, "label")= chr "TCR WORKING FOR INCOME:"
##   ..- attr(*, "format.spss")= chr "A3"
##   ..- attr(*, "display_width")= int 17
##  $ PRI_PAYMENT_TCR_KI: num [1:980143] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "TCR KIDNEY PRIMARY PROJECTED SOURCE PAY"
##   ..- attr(*, "format.spss")= chr "F2.0"
##   ..- attr(*, "display_width")= int 20
##  $ DON_TY            : chr [1:980143] "C" "C" "C" "C" ...
##   ..- attr(*, "label")= chr "DONOR TYPE - DECEASED, LIVING OR FOREIGN"
##   ..- attr(*, "format.spss")= chr "A9"
##   ..- attr(*, "display_width")= int 11
##  $ DISTANCE          : num [1:980143] 153 779 620 9 4 ...
##   ..- attr(*, "label")= chr "DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles)"
##   ..- attr(*, "format.spss")= chr "F4.0"
##   ..- attr(*, "display_width")= int 10
##  $ DIAG_KI           : num [1:980143] 3006 999 3007 3048 999 ...
##   ..- attr(*, "label")= chr "Kidney Diagnosis from TRR/TCR"
##   ..- attr(*, "format.spss")= chr "F4.0"
##   ..- attr(*, "display_width")= int 9
##  $ SHARE_TY          : num [1:980143] 4 5 5 3 3 3 3 3 3 5 ...
##   ..- attr(*, "label")= chr "Share Type"
##   ..- attr(*, "format.spss")= chr "F1.0"
##   ..- attr(*, "display_width")= int 10
##  $ AGE_GROUP         : chr [1:980143] "A" "A" "A" "A" ...
##   ..- attr(*, "format.spss")= chr "A3"
##   ..- attr(*, "display_width")= int 11
describe(deletingkidney)
## deletingkidney 
## 
##  20  Variables      980143  Observations
## --------------------------------------------------------------------------------
## WL_ORG : ORGAN LISTED FOR  Format:A12 
##        n  missing distinct 
##   933957    46186        4 
##                                       
## Value          KI     KP     PA     PI
## Frequency  870845  40914  20598   1600
## Proportion  0.932  0.044  0.022  0.002
## --------------------------------------------------------------------------------
## ON_DIALYSIS : WL MOST RECENT CANDIDATE ON DIALYSIS?  Format:A3 
##        n  missing distinct 
##   866551   113592        3 
##                                
## Value           1      N      Y
## Frequency     320 291373 574858
## Proportion  0.000  0.336  0.663
## --------------------------------------------------------------------------------
## GENDER : TCR RECIPIENT GENDER  Format:A3 
##        n  missing distinct 
##   980143        0        2 
##                         
## Value           F      M
## Frequency  392502 587641
## Proportion    0.4    0.6
## --------------------------------------------------------------------------------
## ABO : TCR ABO BLOOD GROUP  Format:A9 
##        n  missing distinct 
##   980143        0        9 
## 
## lowest : A   A1  A1B A2  A2B, highest: A2B AB  B   O   UNK
##                                                                          
## Value           A     A1    A1B     A2    A2B     AB      B      O    UNK
## Frequency  322360   6614    570   1136    196  36603 139828 472810     26
## Proportion  0.329  0.007  0.001  0.001  0.000  0.037  0.143  0.482  0.000
## --------------------------------------------------------------------------------
## PERM_STATE : TCR State of Permanent Residence at Listing  Format:A6 
##        n  missing distinct 
##   961641    18502       58 
## 
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## EDUCATION : TCR HIGHEST EDUCATION LEVEL:  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   862361   117782        8    0.932    126.1    215.5 
## 
## lowest :   1   2   3   4   5, highest:   4   5   6 996 998
##                                                                   
## Value           1      2      3      4      5      6    996    998
## Frequency    4864  53470 321621 190618 130688  54938   5729 100433
## Proportion  0.006  0.062  0.373  0.221  0.152  0.064  0.007  0.116
## --------------------------------------------------------------------------------
## FUNC_STAT_TCR : TCR FUNCTIONAL STATUS @ LISTING  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   960908    19235       25    0.967     1304     1046        1        1 
##      .25      .50      .75      .90      .95 
##        1     2070     2080     2090     2100 
## 
## lowest :    1    2    3  996  998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## DAYSWAIT_CHRON : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME  Format:F5.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   933768    46375     6556        1    806.9    833.1       29       69 
##      .25      .50      .75      .90      .95 
##      200      546     1154     1891     2418 
## 
## lowest :     0     1     2     3     4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## INIT_AGE : CALCULATED AGE AT LISTING  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   933941    46202       92        1    47.68    16.45       22       28 
##      .25      .50      .75      .90      .95 
##       38       49       59       66       69 
## 
## lowest :  0  1  2  3  4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ETHNICITY : TCR ETHNICITY  Format:F1.0 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##   980143        0        2    0.374   143081    0.146   0.2493 
## 
## --------------------------------------------------------------------------------
## ETHCAT : ETHNICITY CATEGORY  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   980143        0        8    0.838    2.106    1.554 
## 
## lowest :   1   2   4   5   6, highest:   5   6   7   9 998
##                                                                   
## Value           1      2      4      5      6      7      9    998
## Frequency  510028 258237 140020  52532   8790   4231   6228     77
## Proportion  0.520  0.263  0.143  0.054  0.009  0.004  0.006  0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER  Format:F7.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   980143        0   717271        1   619253   431816    58935   117888 
##      .25      .50      .75      .90      .95 
##   295648   602183   936606  1153064  1232521 
## 
## lowest :       1       3       4       9      10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## REGION : region  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   980143        0       11    0.986    5.691    3.482        2        2 
##      .25      .50      .75      .90      .95 
##        3        5        8       10       11 
## 
## lowest :  1  2  3  4  5, highest:  7  8  9 10 11
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency   39409 139067 124332  89378 171415  28231  98173  50238  71524
## Proportion  0.040  0.142  0.127  0.091  0.175  0.029  0.100  0.051  0.073
##                         
## Value          10     11
## Frequency   81472  86904
## Proportion  0.083  0.089
## --------------------------------------------------------------------------------
## WORK_INCOME_TCR : TCR WORKING FOR INCOME:  Format:A3 
##        n  missing distinct 
##   582461   397682        3 
##                                
## Value           N      U      Y
## Frequency  364318  27725 190418
## Proportion  0.625  0.048  0.327
## --------------------------------------------------------------------------------
## PRI_PAYMENT_TCR_KI : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   842576   137567       14    0.898    3.649    3.682        1        1 
##      .25      .50      .75      .90      .95 
##        1        2        4       13       13 
## 
## lowest :  1  2  3  4  5, highest: 10 11 12 13 14
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency  372757  67387 188741  74272    725   7286   5392   2819    135
## Proportion  0.442  0.080  0.224  0.088  0.001  0.009  0.006  0.003  0.000
##                                              
## Value          10     11     12     13     14
## Frequency     531   3009    835 114779   3908
## Proportion  0.001  0.004  0.001  0.136  0.005
## --------------------------------------------------------------------------------
## DON_TY : DONOR TYPE - DECEASED, LIVING OR FOREIGN  Format:A9 
##        n  missing distinct 
##   499185   480958        3 
##                                
## Value           C      F      L
## Frequency  341971     85 157129
## Proportion  0.685  0.000  0.315
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles)  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   494294   485849     2614    0.939    163.8    270.3        0        0 
##      .25      .50      .75      .90      .95 
##        0        8      117      517      967 
## 
## lowest :    0    1    2    3    4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   482794   497349       75    0.992     2905    261.8      999     3004 
##      .25      .50      .75      .90      .95 
##     3008     3037     3041     3070     3070 
## 
## lowest :  999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##                                                                          
## Value        1000   3000   3005   3010   3015   3020   3025   3030   3035
## Frequency   30349   9552  54805  88198   2401  15439   7936   9647  42385
## Proportion  0.063  0.020  0.114  0.183  0.005  0.032  0.016  0.020  0.088
##                                                                   
## Value        3040   3045   3050   3055   3060   3065   3070   3075
## Frequency  118859   4096  10730   3681   1266   1236  81004   1210
## Proportion  0.246  0.008  0.022  0.008  0.003  0.003  0.168  0.003
## 
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## SHARE_TY : Share Type  Format:F1.0 
##        n  missing distinct     Info     Mean      Gmd 
##   499184   480959        4    0.472    3.311   0.5203 
##                                       
## Value           3      4      5      6
## Frequency  402971  37097  59031     85
## Proportion  0.807  0.074  0.118  0.000
## --------------------------------------------------------------------------------
## AGE_GROUP  Format:A3 
##        n  missing distinct 
##   499185   480958        3 
##                                
## Value           A      P      U
## Frequency  475087  24097      1
## Proportion  0.952  0.048  0.000
## --------------------------------------------------------------------------------
deletingkidney.clean <- deletingkidney %>%
  rename(.,
         age_group = AGE_GROUP,
         donor_type = DON_TY,
         region = REGION,
         working_income = WORK_INCOME_TCR,
         payment_method = PRI_PAYMENT_TCR_KI,
         dialysis = ON_DIALYSIS,
         gender = GENDER,
         abo_type = ABO,
         education = EDUCATION,
         race_binary = ETHCAT,
         ethnicity = ETHNICITY,
         functional_status = FUNC_STAT_TCR,
         organ_type = WL_ORG,
         locality_type = SHARE_TY,
         waitlist_days = DAYSWAIT_CHRON,
         age_init = INIT_AGE,
         state_residence = PERM_STATE)
  
  glimpse(deletingkidney.clean)
## Rows: 980,143
## Columns: 20
## $ organ_type        <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
## $ dialysis          <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
## $ gender            <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M", "…
## $ abo_type          <chr> "O", "O", "B", "B", "B", "O", "O", "AB", "A", "B", …
## $ state_residence   <chr> "PA", "IL", "FL", "HI", "WA", "VA", "CA", "IL", "OH…
## $ education         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 998…
## $ functional_status <dbl> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998, 2, …
## $ waitlist_days     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ age_init          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ethnicity         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ race_binary       <dbl> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ PT_CODE           <dbl> 481014, 260021, 425419, 175705, 400671, 406485, 142…
## $ region            <dbl> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1, 2,…
## $ working_income    <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
## $ payment_method    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13,…
## $ donor_type        <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "…
## $ DISTANCE          <dbl> 153, 779, 620, 9, 4, 0, 12, 205, 0, 2174, 55, 56, 1…
## $ DIAG_KI           <dbl> 3006, 999, 3007, 3048, 999, 3041, 3041, 999, 3017, …
## $ locality_type     <dbl> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, …
## $ age_group         <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
describe(deletingkidney.clean)
## deletingkidney.clean 
## 
##  20  Variables      980143  Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR  Format:A12 
##        n  missing distinct 
##   933957    46186        4 
##                                       
## Value          KI     KP     PA     PI
## Frequency  870845  40914  20598   1600
## Proportion  0.932  0.044  0.022  0.002
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS?  Format:A3 
##        n  missing distinct 
##   866551   113592        3 
##                                
## Value           1      N      Y
## Frequency     320 291373 574858
## Proportion  0.000  0.336  0.663
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER  Format:A3 
##        n  missing distinct 
##   980143        0        2 
##                         
## Value           F      M
## Frequency  392502 587641
## Proportion    0.4    0.6
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP  Format:A9 
##        n  missing distinct 
##   980143        0        9 
## 
## lowest : A   A1  A1B A2  A2B, highest: A2B AB  B   O   UNK
##                                                                          
## Value           A     A1    A1B     A2    A2B     AB      B      O    UNK
## Frequency  322360   6614    570   1136    196  36603 139828 472810     26
## Proportion  0.329  0.007  0.001  0.001  0.000  0.037  0.143  0.482  0.000
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing  Format:A6 
##        n  missing distinct 
##   961641    18502       58 
## 
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL:  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   862361   117782        8    0.932    126.1    215.5 
## 
## lowest :   1   2   3   4   5, highest:   4   5   6 996 998
##                                                                   
## Value           1      2      3      4      5      6    996    998
## Frequency    4864  53470 321621 190618 130688  54938   5729 100433
## Proportion  0.006  0.062  0.373  0.221  0.152  0.064  0.007  0.116
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   960908    19235       25    0.967     1304     1046        1        1 
##      .25      .50      .75      .90      .95 
##        1     2070     2080     2090     2100 
## 
## lowest :    1    2    3  996  998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME  Format:F5.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   933768    46375     6556        1    806.9    833.1       29       69 
##      .25      .50      .75      .90      .95 
##      200      546     1154     1891     2418 
## 
## lowest :     0     1     2     3     4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   933941    46202       92        1    47.68    16.45       22       28 
##      .25      .50      .75      .90      .95 
##       38       49       59       66       69 
## 
## lowest :  0  1  2  3  4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY  Format:F1.0 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##   980143        0        2    0.374   143081    0.146   0.2493 
## 
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   980143        0        8    0.838    2.106    1.554 
## 
## lowest :   1   2   4   5   6, highest:   5   6   7   9 998
##                                                                   
## Value           1      2      4      5      6      7      9    998
## Frequency  510028 258237 140020  52532   8790   4231   6228     77
## Proportion  0.520  0.263  0.143  0.054  0.009  0.004  0.006  0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER  Format:F7.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   980143        0   717271        1   619253   431816    58935   117888 
##      .25      .50      .75      .90      .95 
##   295648   602183   936606  1153064  1232521 
## 
## lowest :       1       3       4       9      10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## region  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   980143        0       11    0.986    5.691    3.482        2        2 
##      .25      .50      .75      .90      .95 
##        3        5        8       10       11 
## 
## lowest :  1  2  3  4  5, highest:  7  8  9 10 11
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency   39409 139067 124332  89378 171415  28231  98173  50238  71524
## Proportion  0.040  0.142  0.127  0.091  0.175  0.029  0.100  0.051  0.073
##                         
## Value          10     11
## Frequency   81472  86904
## Proportion  0.083  0.089
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME:  Format:A3 
##        n  missing distinct 
##   582461   397682        3 
##                                
## Value           N      U      Y
## Frequency  364318  27725 190418
## Proportion  0.625  0.048  0.327
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   842576   137567       14    0.898    3.649    3.682        1        1 
##      .25      .50      .75      .90      .95 
##        1        2        4       13       13 
## 
## lowest :  1  2  3  4  5, highest: 10 11 12 13 14
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency  372757  67387 188741  74272    725   7286   5392   2819    135
## Proportion  0.442  0.080  0.224  0.088  0.001  0.009  0.006  0.003  0.000
##                                              
## Value          10     11     12     13     14
## Frequency     531   3009    835 114779   3908
## Proportion  0.001  0.004  0.001  0.136  0.005
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN  Format:A9 
##        n  missing distinct 
##   499185   480958        3 
##                                
## Value           C      F      L
## Frequency  341971     85 157129
## Proportion  0.685  0.000  0.315
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles)  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   494294   485849     2614    0.939    163.8    270.3        0        0 
##      .25      .50      .75      .90      .95 
##        0        8      117      517      967 
## 
## lowest :    0    1    2    3    4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   482794   497349       75    0.992     2905    261.8      999     3004 
##      .25      .50      .75      .90      .95 
##     3008     3037     3041     3070     3070 
## 
## lowest :  999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##                                                                          
## Value        1000   3000   3005   3010   3015   3020   3025   3030   3035
## Frequency   30349   9552  54805  88198   2401  15439   7936   9647  42385
## Proportion  0.063  0.020  0.114  0.183  0.005  0.032  0.016  0.020  0.088
##                                                                   
## Value        3040   3045   3050   3055   3060   3065   3070   3075
## Frequency  118859   4096  10730   3681   1266   1236  81004   1210
## Proportion  0.246  0.008  0.022  0.008  0.003  0.003  0.168  0.003
## 
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type  Format:F1.0 
##        n  missing distinct     Info     Mean      Gmd 
##   499184   480959        4    0.472    3.311   0.5203 
##                                       
## Value           3      4      5      6
## Frequency  402971  37097  59031     85
## Proportion  0.807  0.074  0.118  0.000
## --------------------------------------------------------------------------------
## age_group  Format:A3 
##        n  missing distinct 
##   499185   480958        3 
##                                
## Value           A      P      U
## Frequency  475087  24097      1
## Proportion  0.952  0.048  0.000
## --------------------------------------------------------------------------------
kidney.cleanfactors <- deletingkidney.clean %>%
  
  mutate(.,
         age_group_fac = as_factor(age_group),
         donor_type_fac = as_factor(donor_type),
         region_fac = as_factor(region),
         working_income_fac = as_factor(working_income),
         payment_method_fac = as_factor(payment_method),
         dialysis_fac =as_factor(dialysis),
         gender_fac =as_factor(gender),
         abo_type_fac =as_factor(abo_type),
         education_fac =as_factor(education),
         race_fac = as_factor(race_binary),
         ethnicity_fac =as_factor(ethnicity),
         functional_status_fac =as_factor(functional_status),
         organ_type_fac =as_factor(organ_type),
         locality_type_fac =as_factor(locality_type),
         state_residence_fac =as_factor(state_residence))
glimpse(kidney.cleanfactors)
## Rows: 980,143
## Columns: 35
## $ organ_type            <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ dialysis              <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ gender                <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M…
## $ abo_type              <chr> "O", "O", "B", "B", "B", "O", "O", "AB", "A", "…
## $ state_residence       <chr> "PA", "IL", "FL", "HI", "WA", "VA", "CA", "IL",…
## $ education             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ functional_status     <dbl> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998,…
## $ waitlist_days         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ age_init              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ethnicity             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ race_binary           <dbl> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, …
## $ PT_CODE               <dbl> 481014, 260021, 425419, 175705, 400671, 406485,…
## $ region                <dbl> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1…
## $ working_income        <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ payment_method        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ donor_type            <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C…
## $ DISTANCE              <dbl> 153, 779, 620, 9, 4, 0, 12, 205, 0, 2174, 55, 5…
## $ DIAG_KI               <dbl> 3006, 999, 3007, 3048, 999, 3041, 3041, 999, 30…
## $ locality_type         <dbl> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3,…
## $ age_group             <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A…
## $ age_group_fac         <fct> A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,…
## $ donor_type_fac        <fct> C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C,…
## $ region_fac            <fct> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1…
## $ working_income_fac    <fct> , , , , , , , , , , , , , , , , , , , , , , , U…
## $ payment_method_fac    <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ dialysis_fac          <fct> , , , , , , , , , , , , , , , , , , , , , , , ,…
## $ gender_fac            <fct> M, M, F, F, F, F, F, M, M, M, M, M, M, F, F, M,…
## $ abo_type_fac          <fct> O, O, B, B, B, O, O, AB, A, B, O, A, A, A, O, O…
## $ education_fac         <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ race_fac              <fct> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, …
## $ ethnicity_fac         <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ functional_status_fac <fct> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998,…
## $ organ_type_fac        <fct> , , , , , , , , , , , , , , , , , , , , , , , ,…
## $ locality_type_fac     <fct> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3,…
## $ state_residence_fac   <fct> PA, IL, FL, HI, WA, VA, CA, IL, OH, CA, FL, CA,…
describe(kidney.cleanfactors)
## kidney.cleanfactors 
## 
##  35  Variables      980143  Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR  Format:A12 
##        n  missing distinct 
##   933957    46186        4 
##                                       
## Value          KI     KP     PA     PI
## Frequency  870845  40914  20598   1600
## Proportion  0.932  0.044  0.022  0.002
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS?  Format:A3 
##        n  missing distinct 
##   866551   113592        3 
##                                
## Value           1      N      Y
## Frequency     320 291373 574858
## Proportion  0.000  0.336  0.663
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER  Format:A3 
##        n  missing distinct 
##   980143        0        2 
##                         
## Value           F      M
## Frequency  392502 587641
## Proportion    0.4    0.6
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP  Format:A9 
##        n  missing distinct 
##   980143        0        9 
## 
## lowest : A   A1  A1B A2  A2B, highest: A2B AB  B   O   UNK
##                                                                          
## Value           A     A1    A1B     A2    A2B     AB      B      O    UNK
## Frequency  322360   6614    570   1136    196  36603 139828 472810     26
## Proportion  0.329  0.007  0.001  0.001  0.000  0.037  0.143  0.482  0.000
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing  Format:A6 
##        n  missing distinct 
##   961641    18502       58 
## 
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL:  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   862361   117782        8    0.932    126.1    215.5 
## 
## lowest :   1   2   3   4   5, highest:   4   5   6 996 998
##                                                                   
## Value           1      2      3      4      5      6    996    998
## Frequency    4864  53470 321621 190618 130688  54938   5729 100433
## Proportion  0.006  0.062  0.373  0.221  0.152  0.064  0.007  0.116
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   960908    19235       25    0.967     1304     1046        1        1 
##      .25      .50      .75      .90      .95 
##        1     2070     2080     2090     2100 
## 
## lowest :    1    2    3  996  998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME  Format:F5.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   933768    46375     6556        1    806.9    833.1       29       69 
##      .25      .50      .75      .90      .95 
##      200      546     1154     1891     2418 
## 
## lowest :     0     1     2     3     4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   933941    46202       92        1    47.68    16.45       22       28 
##      .25      .50      .75      .90      .95 
##       38       49       59       66       69 
## 
## lowest :  0  1  2  3  4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY  Format:F1.0 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##   980143        0        2    0.374   143081    0.146   0.2493 
## 
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   980143        0        8    0.838    2.106    1.554 
## 
## lowest :   1   2   4   5   6, highest:   5   6   7   9 998
##                                                                   
## Value           1      2      4      5      6      7      9    998
## Frequency  510028 258237 140020  52532   8790   4231   6228     77
## Proportion  0.520  0.263  0.143  0.054  0.009  0.004  0.006  0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER  Format:F7.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   980143        0   717271        1   619253   431816    58935   117888 
##      .25      .50      .75      .90      .95 
##   295648   602183   936606  1153064  1232521 
## 
## lowest :       1       3       4       9      10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## region  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   980143        0       11    0.986    5.691    3.482        2        2 
##      .25      .50      .75      .90      .95 
##        3        5        8       10       11 
## 
## lowest :  1  2  3  4  5, highest:  7  8  9 10 11
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency   39409 139067 124332  89378 171415  28231  98173  50238  71524
## Proportion  0.040  0.142  0.127  0.091  0.175  0.029  0.100  0.051  0.073
##                         
## Value          10     11
## Frequency   81472  86904
## Proportion  0.083  0.089
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME:  Format:A3 
##        n  missing distinct 
##   582461   397682        3 
##                                
## Value           N      U      Y
## Frequency  364318  27725 190418
## Proportion  0.625  0.048  0.327
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   842576   137567       14    0.898    3.649    3.682        1        1 
##      .25      .50      .75      .90      .95 
##        1        2        4       13       13 
## 
## lowest :  1  2  3  4  5, highest: 10 11 12 13 14
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency  372757  67387 188741  74272    725   7286   5392   2819    135
## Proportion  0.442  0.080  0.224  0.088  0.001  0.009  0.006  0.003  0.000
##                                              
## Value          10     11     12     13     14
## Frequency     531   3009    835 114779   3908
## Proportion  0.001  0.004  0.001  0.136  0.005
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN  Format:A9 
##        n  missing distinct 
##   499185   480958        3 
##                                
## Value           C      F      L
## Frequency  341971     85 157129
## Proportion  0.685  0.000  0.315
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles)  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   494294   485849     2614    0.939    163.8    270.3        0        0 
##      .25      .50      .75      .90      .95 
##        0        8      117      517      967 
## 
## lowest :    0    1    2    3    4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   482794   497349       75    0.992     2905    261.8      999     3004 
##      .25      .50      .75      .90      .95 
##     3008     3037     3041     3070     3070 
## 
## lowest :  999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##                                                                          
## Value        1000   3000   3005   3010   3015   3020   3025   3030   3035
## Frequency   30349   9552  54805  88198   2401  15439   7936   9647  42385
## Proportion  0.063  0.020  0.114  0.183  0.005  0.032  0.016  0.020  0.088
##                                                                   
## Value        3040   3045   3050   3055   3060   3065   3070   3075
## Frequency  118859   4096  10730   3681   1266   1236  81004   1210
## Proportion  0.246  0.008  0.022  0.008  0.003  0.003  0.168  0.003
## 
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type  Format:F1.0 
##        n  missing distinct     Info     Mean      Gmd 
##   499184   480959        4    0.472    3.311   0.5203 
##                                       
## Value           3      4      5      6
## Frequency  402971  37097  59031     85
## Proportion  0.807  0.074  0.118  0.000
## --------------------------------------------------------------------------------
## age_group  Format:A3 
##        n  missing distinct 
##   499185   480958        3 
##                                
## Value           A      P      U
## Frequency  475087  24097      1
## Proportion  0.952  0.048  0.000
## --------------------------------------------------------------------------------
## age_group_fac 
##        n  missing distinct 
##   980143        0        4 
##                                       
## Value           A      P      U       
## Frequency  475087  24097      1 480958
## Proportion  0.485  0.025  0.000  0.491
## --------------------------------------------------------------------------------
## donor_type_fac : DONOR TYPE - DECEASED, LIVING OR FOREIGN 
##        n  missing distinct 
##   980143        0        4 
##                                       
## Value           C      L      F       
## Frequency  341971 157129     85 480958
## Proportion  0.349  0.160  0.000  0.491
## --------------------------------------------------------------------------------
## region_fac 
##        n  missing distinct 
##   980143        0       11 
## 
## lowest : 1  2  3  4  5 , highest: 7  8  9  10 11
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency   39409 139067 124332  89378 171415  28231  98173  50238  71524
## Proportion  0.040  0.142  0.127  0.091  0.175  0.029  0.100  0.051  0.073
##                         
## Value          10     11
## Frequency   81472  86904
## Proportion  0.083  0.089
## --------------------------------------------------------------------------------
## working_income_fac : TCR WORKING FOR INCOME: 
##        n  missing distinct 
##   980143        0        4 
##                                       
## Value                  U      N      Y
## Frequency  397682  27725 364318 190418
## Proportion  0.406  0.028  0.372  0.194
## --------------------------------------------------------------------------------
## payment_method_fac 
##        n  missing distinct 
##   842576   137567       14 
## 
## lowest : 1  2  3  4  5 , highest: 10 11 12 13 14
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency  372757  67387 188741  74272    725   7286   5392   2819    135
## Proportion  0.442  0.080  0.224  0.088  0.001  0.009  0.006  0.003  0.000
##                                              
## Value          10     11     12     13     14
## Frequency     531   3009    835 114779   3908
## Proportion  0.001  0.004  0.001  0.136  0.005
## --------------------------------------------------------------------------------
## dialysis_fac : WL MOST RECENT CANDIDATE ON DIALYSIS? 
##        n  missing distinct 
##   980143        0        4 
##                                       
## Value                  N      Y      1
## Frequency  113592 291373 574858    320
## Proportion  0.116  0.297  0.587  0.000
## --------------------------------------------------------------------------------
## gender_fac : TCR RECIPIENT GENDER 
##        n  missing distinct 
##   980143        0        2 
##                         
## Value           M      F
## Frequency  587641 392502
## Proportion    0.6    0.4
## --------------------------------------------------------------------------------
## abo_type_fac : TCR ABO BLOOD GROUP 
##        n  missing distinct 
##   980143        0        9 
## 
## lowest : O   B   AB  A   A1 , highest: A1  A2  UNK A2B A1B
##                                                                          
## Value           O      B     AB      A     A1     A2    UNK    A2B    A1B
## Frequency  472810 139828  36603 322360   6614   1136     26    196    570
## Proportion  0.482  0.143  0.037  0.329  0.007  0.001  0.000  0.000  0.001
## --------------------------------------------------------------------------------
## education_fac 
##        n  missing distinct 
##   862361   117782        8 
## 
## lowest : 1   2   3   4   5  , highest: 4   5   6   996 998
##                                                                   
## Value           1      2      3      4      5      6    996    998
## Frequency    4864  53470 321621 190618 130688  54938   5729 100433
## Proportion  0.006  0.062  0.373  0.221  0.152  0.064  0.007  0.116
## --------------------------------------------------------------------------------
## race_fac 
##        n  missing distinct 
##   980143        0        8 
## 
## lowest : 1   2   4   5   6  , highest: 5   6   7   9   998
##                                                                   
## Value           1      2      4      5      6      7      9    998
## Frequency  510028 258237 140020  52532   8790   4231   6228     77
## Proportion  0.520  0.263  0.143  0.054  0.009  0.004  0.006  0.000
## --------------------------------------------------------------------------------
## ethnicity_fac 
##        n  missing distinct 
##   980143        0        2 
##                         
## Value           0      1
## Frequency  837062 143081
## Proportion  0.854  0.146
## --------------------------------------------------------------------------------
## functional_status_fac 
##        n  missing distinct 
##   960908    19235       25 
## 
## lowest : 1    2    3    996  998 , highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## organ_type_fac : ORGAN LISTED FOR 
##        n  missing distinct 
##   980143        0        5 
## 
## lowest :    KI KP PA PI, highest:    KI KP PA PI
##                                              
## Value                 KI     KP     PA     PI
## Frequency   46186 870845  40914  20598   1600
## Proportion  0.047  0.888  0.042  0.021  0.002
## --------------------------------------------------------------------------------
## locality_type_fac 
##        n  missing distinct 
##   499184   480959        4 
##                                       
## Value           3      4      5      6
## Frequency  402971  37097  59031     85
## Proportion  0.807  0.074  0.118  0.000
## --------------------------------------------------------------------------------
## state_residence_fac : TCR State of Permanent Residence at Listing 
##        n  missing distinct 
##   980143        0       59 
## 
## lowest : PA IL FL HI WA, highest: MT NV GU AS MP
## --------------------------------------------------------------------------------

#now time to add levels to my categories…

levels(kidney.cleanfactors$ethnicity_fac) = c("Non-Hispanic","Hispanic")
levels(kidney.cleanfactors$education_fac) = c("None","Grade School", "Highschool or GED", "Attended Some College", "Associate/Bachelor Degree", "Post-Graduate", "N/A<5years old", "unknown", "unknown")
levels(kidney.cleanfactors$race_fac) = c("White","Black", "Hispanic", "Asian", "Amer Ind/Alaska Native", "Native Hawaiin/other", "Multi-racial", "Unknown", "Unknown")
levels(kidney.cleanfactors$abo_type_fac) = c("O","B", "AB", "A", "A1", "A2", "Unknown", "A2B", "A1B")
levels(kidney.cleanfactors$gender_fac) = c("Male","Female")
levels(kidney.cleanfactors$locality_type_fac) = c("Local", "Regional", "National", "Foreign")
levels(kidney.cleanfactors$organ_type_fac) = c("unknown", "Kidney", "KP", "P", "PI")
levels(kidney.cleanfactors$dialysis_fac) =c("missing", "no", "yes", "unknown")
levels(kidney.cleanfactors$working_income_fac) =c("missing", "unknown", "no", "yes")
levels(kidney.cleanfactors$payment_method_fac) =c("Private Insurance", "Public InsuranceMedicaid", "PublicMedicare FFS", "PublicMedicareCh", "PublicCHIP", "PublicVA", "PublicOthrGovt", "Self", "Donation", "Free Care", "Pending", "Foreign Govt", "PublicMedicareunknowntype", "StateGovtAgency", "Unknown", "other", "missing")
levels(kidney.cleanfactors$donor_type_fac) =c("deceased donor", "living donor", "foreign", "missing")


glimpse(kidney.cleanfactors)
## Rows: 980,143
## Columns: 35
## $ organ_type            <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ dialysis              <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ gender                <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M…
## $ abo_type              <chr> "O", "O", "B", "B", "B", "O", "O", "AB", "A", "…
## $ state_residence       <chr> "PA", "IL", "FL", "HI", "WA", "VA", "CA", "IL",…
## $ education             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ functional_status     <dbl> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998,…
## $ waitlist_days         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ age_init              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ethnicity             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ race_binary           <dbl> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, …
## $ PT_CODE               <dbl> 481014, 260021, 425419, 175705, 400671, 406485,…
## $ region                <dbl> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1…
## $ working_income        <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ payment_method        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ donor_type            <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C…
## $ DISTANCE              <dbl> 153, 779, 620, 9, 4, 0, 12, 205, 0, 2174, 55, 5…
## $ DIAG_KI               <dbl> 3006, 999, 3007, 3048, 999, 3041, 3041, 999, 30…
## $ locality_type         <dbl> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3,…
## $ age_group             <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A…
## $ age_group_fac         <fct> A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,…
## $ donor_type_fac        <fct> deceased donor, deceased donor, deceased donor,…
## $ region_fac            <fct> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1…
## $ working_income_fac    <fct> missing, missing, missing, missing, missing, mi…
## $ payment_method_fac    <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ dialysis_fac          <fct> missing, missing, missing, missing, missing, mi…
## $ gender_fac            <fct> Male, Male, Female, Female, Female, Female, Fem…
## $ abo_type_fac          <fct> O, O, B, B, B, O, O, AB, A, B, O, A, A, A, O, O…
## $ education_fac         <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ race_fac              <fct> White, White, Black, Black, White, Asian, Black…
## $ ethnicity_fac         <fct> Non-Hispanic, Non-Hispanic, Non-Hispanic, Non-H…
## $ functional_status_fac <fct> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998,…
## $ organ_type_fac        <fct> unknown, unknown, unknown, unknown, unknown, un…
## $ locality_type_fac     <fct> Regional, National, National, Local, Local, Loc…
## $ state_residence_fac   <fct> PA, IL, FL, HI, WA, VA, CA, IL, OH, CA, FL, CA,…
describe(kidney.cleanfactors)
## kidney.cleanfactors 
## 
##  35  Variables      980143  Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR  Format:A12 
##        n  missing distinct 
##   933957    46186        4 
##                                       
## Value          KI     KP     PA     PI
## Frequency  870845  40914  20598   1600
## Proportion  0.932  0.044  0.022  0.002
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS?  Format:A3 
##        n  missing distinct 
##   866551   113592        3 
##                                
## Value           1      N      Y
## Frequency     320 291373 574858
## Proportion  0.000  0.336  0.663
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER  Format:A3 
##        n  missing distinct 
##   980143        0        2 
##                         
## Value           F      M
## Frequency  392502 587641
## Proportion    0.4    0.6
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP  Format:A9 
##        n  missing distinct 
##   980143        0        9 
## 
## lowest : A   A1  A1B A2  A2B, highest: A2B AB  B   O   UNK
##                                                                          
## Value           A     A1    A1B     A2    A2B     AB      B      O    UNK
## Frequency  322360   6614    570   1136    196  36603 139828 472810     26
## Proportion  0.329  0.007  0.001  0.001  0.000  0.037  0.143  0.482  0.000
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing  Format:A6 
##        n  missing distinct 
##   961641    18502       58 
## 
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL:  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   862361   117782        8    0.932    126.1    215.5 
## 
## lowest :   1   2   3   4   5, highest:   4   5   6 996 998
##                                                                   
## Value           1      2      3      4      5      6    996    998
## Frequency    4864  53470 321621 190618 130688  54938   5729 100433
## Proportion  0.006  0.062  0.373  0.221  0.152  0.064  0.007  0.116
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   960908    19235       25    0.967     1304     1046        1        1 
##      .25      .50      .75      .90      .95 
##        1     2070     2080     2090     2100 
## 
## lowest :    1    2    3  996  998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME  Format:F5.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   933768    46375     6556        1    806.9    833.1       29       69 
##      .25      .50      .75      .90      .95 
##      200      546     1154     1891     2418 
## 
## lowest :     0     1     2     3     4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   933941    46202       92        1    47.68    16.45       22       28 
##      .25      .50      .75      .90      .95 
##       38       49       59       66       69 
## 
## lowest :  0  1  2  3  4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY  Format:F1.0 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##   980143        0        2    0.374   143081    0.146   0.2493 
## 
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   980143        0        8    0.838    2.106    1.554 
## 
## lowest :   1   2   4   5   6, highest:   5   6   7   9 998
##                                                                   
## Value           1      2      4      5      6      7      9    998
## Frequency  510028 258237 140020  52532   8790   4231   6228     77
## Proportion  0.520  0.263  0.143  0.054  0.009  0.004  0.006  0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER  Format:F7.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   980143        0   717271        1   619253   431816    58935   117888 
##      .25      .50      .75      .90      .95 
##   295648   602183   936606  1153064  1232521 
## 
## lowest :       1       3       4       9      10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## region  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   980143        0       11    0.986    5.691    3.482        2        2 
##      .25      .50      .75      .90      .95 
##        3        5        8       10       11 
## 
## lowest :  1  2  3  4  5, highest:  7  8  9 10 11
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency   39409 139067 124332  89378 171415  28231  98173  50238  71524
## Proportion  0.040  0.142  0.127  0.091  0.175  0.029  0.100  0.051  0.073
##                         
## Value          10     11
## Frequency   81472  86904
## Proportion  0.083  0.089
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME:  Format:A3 
##        n  missing distinct 
##   582461   397682        3 
##                                
## Value           N      U      Y
## Frequency  364318  27725 190418
## Proportion  0.625  0.048  0.327
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   842576   137567       14    0.898    3.649    3.682        1        1 
##      .25      .50      .75      .90      .95 
##        1        2        4       13       13 
## 
## lowest :  1  2  3  4  5, highest: 10 11 12 13 14
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency  372757  67387 188741  74272    725   7286   5392   2819    135
## Proportion  0.442  0.080  0.224  0.088  0.001  0.009  0.006  0.003  0.000
##                                              
## Value          10     11     12     13     14
## Frequency     531   3009    835 114779   3908
## Proportion  0.001  0.004  0.001  0.136  0.005
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN  Format:A9 
##        n  missing distinct 
##   499185   480958        3 
##                                
## Value           C      F      L
## Frequency  341971     85 157129
## Proportion  0.685  0.000  0.315
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles)  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   494294   485849     2614    0.939    163.8    270.3        0        0 
##      .25      .50      .75      .90      .95 
##        0        8      117      517      967 
## 
## lowest :    0    1    2    3    4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   482794   497349       75    0.992     2905    261.8      999     3004 
##      .25      .50      .75      .90      .95 
##     3008     3037     3041     3070     3070 
## 
## lowest :  999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##                                                                          
## Value        1000   3000   3005   3010   3015   3020   3025   3030   3035
## Frequency   30349   9552  54805  88198   2401  15439   7936   9647  42385
## Proportion  0.063  0.020  0.114  0.183  0.005  0.032  0.016  0.020  0.088
##                                                                   
## Value        3040   3045   3050   3055   3060   3065   3070   3075
## Frequency  118859   4096  10730   3681   1266   1236  81004   1210
## Proportion  0.246  0.008  0.022  0.008  0.003  0.003  0.168  0.003
## 
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type  Format:F1.0 
##        n  missing distinct     Info     Mean      Gmd 
##   499184   480959        4    0.472    3.311   0.5203 
##                                       
## Value           3      4      5      6
## Frequency  402971  37097  59031     85
## Proportion  0.807  0.074  0.118  0.000
## --------------------------------------------------------------------------------
## age_group  Format:A3 
##        n  missing distinct 
##   499185   480958        3 
##                                
## Value           A      P      U
## Frequency  475087  24097      1
## Proportion  0.952  0.048  0.000
## --------------------------------------------------------------------------------
## age_group_fac 
##        n  missing distinct 
##   980143        0        4 
##                                       
## Value           A      P      U       
## Frequency  475087  24097      1 480958
## Proportion  0.485  0.025  0.000  0.491
## --------------------------------------------------------------------------------
## donor_type_fac : DONOR TYPE - DECEASED, LIVING OR FOREIGN 
##        n  missing distinct 
##   980143        0        4 
##                                                                       
## Value      deceased donor   living donor        foreign        missing
## Frequency          341971         157129             85         480958
## Proportion          0.349          0.160          0.000          0.491
## --------------------------------------------------------------------------------
## region_fac 
##        n  missing distinct 
##   980143        0       11 
## 
## lowest : 1  2  3  4  5 , highest: 7  8  9  10 11
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency   39409 139067 124332  89378 171415  28231  98173  50238  71524
## Proportion  0.040  0.142  0.127  0.091  0.175  0.029  0.100  0.051  0.073
##                         
## Value          10     11
## Frequency   81472  86904
## Proportion  0.083  0.089
## --------------------------------------------------------------------------------
## working_income_fac : TCR WORKING FOR INCOME: 
##        n  missing distinct 
##   980143        0        4 
##                                           
## Value      missing unknown      no     yes
## Frequency   397682   27725  364318  190418
## Proportion   0.406   0.028   0.372   0.194
## --------------------------------------------------------------------------------
## payment_method_fac 
##        n  missing distinct 
##   842576   137567       14 
## 
## lowest : Private Insurance         Public InsuranceMedicaid  PublicMedicare FFS        PublicMedicareCh          PublicCHIP               
## highest: Free Care                 Pending                   Foreign Govt              PublicMedicareunknowntype StateGovtAgency          
## 
## Private Insurance (372757, 0.442), Public InsuranceMedicaid (67387, 0.080),
## PublicMedicare FFS (188741, 0.224), PublicMedicareCh (74272, 0.088), PublicCHIP
## (725, 0.001), PublicVA (7286, 0.009), PublicOthrGovt (5392, 0.006), Self (2819,
## 0.003), Donation (135, 0.000), Free Care (531, 0.001), Pending (3009, 0.004),
## Foreign Govt (835, 0.001), PublicMedicareunknowntype (114779, 0.136),
## StateGovtAgency (3908, 0.005)
## --------------------------------------------------------------------------------
## dialysis_fac : WL MOST RECENT CANDIDATE ON DIALYSIS? 
##        n  missing distinct 
##   980143        0        4 
##                                           
## Value      missing      no     yes unknown
## Frequency   113592  291373  574858     320
## Proportion   0.116   0.297   0.587   0.000
## --------------------------------------------------------------------------------
## gender_fac : TCR RECIPIENT GENDER 
##        n  missing distinct 
##   980143        0        2 
##                         
## Value        Male Female
## Frequency  587641 392502
## Proportion    0.6    0.4
## --------------------------------------------------------------------------------
## abo_type_fac : TCR ABO BLOOD GROUP 
##        n  missing distinct 
##   980143        0        9 
## 
## lowest : O       B       AB      A       A1     
## highest: A1      A2      Unknown A2B     A1B    
##                                                                           
## Value            O       B      AB       A      A1      A2 Unknown     A2B
## Frequency   472810  139828   36603  322360    6614    1136      26     196
## Proportion   0.482   0.143   0.037   0.329   0.007   0.001   0.000   0.000
##                   
## Value          A1B
## Frequency      570
## Proportion   0.001
## --------------------------------------------------------------------------------
## education_fac 
##        n  missing distinct 
##   862361   117782        8 
## 
## lowest : None                      Grade School              Highschool or GED         Attended Some College     Associate/Bachelor Degree
## highest: Attended Some College     Associate/Bachelor Degree Post-Graduate             N/A<5years old            unknown                  
## 
## None (4864, 0.006), Grade School (53470, 0.062), Highschool or GED (321621,
## 0.373), Attended Some College (190618, 0.221), Associate/Bachelor Degree
## (130688, 0.152), Post-Graduate (54938, 0.064), N/A<5years old (5729, 0.007),
## unknown (100433, 0.116)
## --------------------------------------------------------------------------------
## race_fac 
##        n  missing distinct 
##   980143        0        8 
## 
## lowest : White                  Black                  Hispanic               Asian                  Amer Ind/Alaska Native
## highest: Asian                  Amer Ind/Alaska Native Native Hawaiin/other   Multi-racial           Unknown               
## 
## White (510028, 0.520), Black (258237, 0.263), Hispanic (140020, 0.143), Asian
## (52532, 0.054), Amer Ind/Alaska Native (8790, 0.009), Native Hawaiin/other
## (4231, 0.004), Multi-racial (6228, 0.006), Unknown (77, 0.000)
## --------------------------------------------------------------------------------
## ethnicity_fac 
##        n  missing distinct 
##   980143        0        2 
##                                     
## Value      Non-Hispanic     Hispanic
## Frequency        837062       143081
## Proportion        0.854        0.146
## --------------------------------------------------------------------------------
## functional_status_fac 
##        n  missing distinct 
##   960908    19235       25 
## 
## lowest : 1    2    3    996  998 , highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## organ_type_fac : ORGAN LISTED FOR 
##        n  missing distinct 
##   980143        0        5 
## 
## lowest : unknown Kidney  KP      P       PI     
## highest: unknown Kidney  KP      P       PI     
##                                                   
## Value      unknown  Kidney      KP       P      PI
## Frequency    46186  870845   40914   20598    1600
## Proportion   0.047   0.888   0.042   0.021   0.002
## --------------------------------------------------------------------------------
## locality_type_fac 
##        n  missing distinct 
##   499184   480959        4 
##                                               
## Value         Local Regional National  Foreign
## Frequency    402971    37097    59031       85
## Proportion    0.807    0.074    0.118    0.000
## --------------------------------------------------------------------------------
## state_residence_fac : TCR State of Permanent Residence at Listing 
##        n  missing distinct 
##   980143        0       59 
## 
## lowest : PA IL FL HI WA, highest: MT NV GU AS MP
## --------------------------------------------------------------------------------

###this is where I stopped to check out what variables got “wonky” when labeled. All variables have been adjusted/fixed ##Next sstep, delete all organs except kidney before reducing other variables

kidney.clean1 <- kidney.cleanfactors %>%
   filter(!(organ_type_fac %in%  
           c("unknown", "KP", "P", "PI")))

describe(kidney.clean1)
## kidney.clean1 
## 
##  35  Variables      870845  Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR  Format:A12 
##        n  missing distinct    value 
##   870845        0        1       KI 
##                  
## Value          KI
## Frequency  870845
## Proportion      1
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS?  Format:A3 
##        n  missing distinct 
##   828121    42724        2 
##                         
## Value           N      Y
## Frequency  276310 551811
## Proportion  0.334  0.666
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER  Format:A3 
##        n  missing distinct 
##   870845        0        2 
##                         
## Value           F      M
## Frequency  345591 525254
## Proportion  0.397  0.603
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP  Format:A9 
##        n  missing distinct 
##   870845        0        9 
## 
## lowest : A   A1  A1B A2  A2B, highest: A2B AB  B   O   UNK
##                                                                          
## Value           A     A1    A1B     A2    A2B     AB      B      O    UNK
## Frequency  281281   5792    494    910    170  32466 126724 423000      8
## Proportion  0.323  0.007  0.001  0.001  0.000  0.037  0.146  0.486  0.000
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing  Format:A6 
##        n  missing distinct 
##   853946    16899       58 
## 
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL:  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   770184   100661        8     0.93    118.3    203.7 
## 
## lowest :   1   2   3   4   5, highest:   4   5   6 996 998
##                                                                   
## Value           1      2      3      4      5      6    996    998
## Frequency    4518  49816 290142 170305 116596  50015   3670  85122
## Proportion  0.006  0.065  0.377  0.221  0.151  0.065  0.005  0.111
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   853114    17731       25     0.97     1353     1022        1        1 
##      .25      .50      .75      .90      .95 
##        1     2070     2080     2090     2100 
## 
## lowest :    1    2    3  996  998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME  Format:F5.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   870676      169     6531        1    828.7    844.2       32       74 
##      .25      .50      .75      .90      .95 
##      212      573     1185     1920     2445 
## 
## lowest :     0     1     2     3     4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   870829       16       92        1    48.26    16.53       22       28 
##      .25      .50      .75      .90      .95 
##       38       50       59       66       69 
## 
## lowest :  0  1  2  3  4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY  Format:F1.0 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##   870845        0        2    0.388   132751   0.1524   0.2584 
## 
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   870845        0        8    0.855      2.1    1.463 
## 
## lowest :   1   2   4   5   6, highest:   5   6   7   9 998
##                                                                   
## Value           1      2      4      5      6      7      9    998
## Frequency  428439 243839 129948  50758   8108   3971   5771     11
## Proportion  0.492  0.280  0.149  0.058  0.009  0.005  0.007  0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER  Format:F7.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   870845        0   673711        1   637073   434456    62038   123802 
##      .25      .50      .75      .90      .95 
##   310222   632125   958919  1163741  1238466 
## 
## lowest :       1       3       4       9      10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## region  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   870845        0       11    0.985     5.65    3.463        2        2 
##      .25      .50      .75      .90      .95 
##        3        5        8       10       11 
## 
## lowest :  1  2  3  4  5, highest:  7  8  9 10 11
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency   34021 124644 111934  82781 158773  24058  79663  44112  64780
## Proportion  0.039  0.143  0.129  0.095  0.182  0.028  0.091  0.051  0.074
##                         
## Value          10     11
## Frequency   68805  77274
## Proportion  0.079  0.089
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME:  Format:A3 
##        n  missing distinct 
##   539093   331752        3 
##                                
## Value           N      U      Y
## Frequency  339953  24967 174173
## Proportion  0.631  0.046  0.323
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   769863   100982       14    0.903    3.624    3.619        1        1 
##      .25      .50      .75      .90      .95 
##        1        2        4       13       13 
## 
## lowest :  1  2  3  4  5, highest: 10 11 12 13 14
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency  333109  62423 179422  70903    669   7084   5139   2319     96
## Proportion  0.433  0.081  0.233  0.092  0.001  0.009  0.007  0.003  0.000
##                                              
## Value          10     11     12     13     14
## Frequency     450   2723    731 101461   3334
## Proportion  0.001  0.004  0.001  0.132  0.004
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN  Format:A9 
##        n  missing distinct 
##   420735   450110        3 
##                                
## Value           C      F      L
## Frequency  308791     80 111864
## Proportion  0.734  0.000  0.266
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles)  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   416460   454385     2601    0.957      180    292.6        0        0 
##      .25      .50      .75      .90      .95 
##        0       12      139      577     1064 
## 
## lowest :    0    1    2    3    4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   414453   456392       74     0.99     2904    264.1      999     3004 
##      .25      .50      .75      .90      .95 
##     3008     3037     3041     3070     3070 
## 
## lowest :  999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##                                                                          
## Value        1000   3000   3005   3010   3015   3020   3025   3030   3035
## Frequency   26353   8286  48392  68810   2065  13801   6427   8198  38187
## Proportion  0.064  0.020  0.117  0.166  0.005  0.033  0.016  0.020  0.092
##                                                                   
## Value        3040   3045   3050   3055   3060   3065   3070   3075
## Frequency  107936   3585   9207   3254   1113   1156  66482   1201
## Proportion  0.260  0.009  0.022  0.008  0.003  0.003  0.160  0.003
## 
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type  Format:F1.0 
##        n  missing distinct     Info     Mean      Gmd 
##   420735   450110        4      0.5    3.338   0.5554 
##                                       
## Value           3      4      5      6
## Frequency  333384  32712  54559     80
## Proportion  0.792  0.078  0.130  0.000
## --------------------------------------------------------------------------------
## age_group  Format:A3 
##        n  missing distinct 
##   420735   450110        2 
##                         
## Value           A      P
## Frequency  403549  17186
## Proportion  0.959  0.041
## --------------------------------------------------------------------------------
## age_group_fac 
##        n  missing distinct 
##   870845        0        3 
##                                
## Value           A      P       
## Frequency  403549  17186 450110
## Proportion  0.463  0.020  0.517
## --------------------------------------------------------------------------------
## donor_type_fac : DONOR TYPE - DECEASED, LIVING OR FOREIGN 
##        n  missing distinct 
##   870845        0        4 
##                                                                       
## Value      deceased donor   living donor        foreign        missing
## Frequency          308791         111864             80         450110
## Proportion          0.355          0.128          0.000          0.517
## --------------------------------------------------------------------------------
## region_fac 
##        n  missing distinct 
##   870845        0       11 
## 
## lowest : 1  2  3  4  5 , highest: 7  8  9  10 11
##                                                                          
## Value           1      2      3      4      5      6      7      8      9
## Frequency   34021 124644 111934  82781 158773  24058  79663  44112  64780
## Proportion  0.039  0.143  0.129  0.095  0.182  0.028  0.091  0.051  0.074
##                         
## Value          10     11
## Frequency   68805  77274
## Proportion  0.079  0.089
## --------------------------------------------------------------------------------
## working_income_fac : TCR WORKING FOR INCOME: 
##        n  missing distinct 
##   870845        0        4 
##                                           
## Value      missing unknown      no     yes
## Frequency   331752   24967  339953  174173
## Proportion   0.381   0.029   0.390   0.200
## --------------------------------------------------------------------------------
## payment_method_fac 
##        n  missing distinct 
##   769863   100982       14 
## 
## lowest : Private Insurance         Public InsuranceMedicaid  PublicMedicare FFS        PublicMedicareCh          PublicCHIP               
## highest: Free Care                 Pending                   Foreign Govt              PublicMedicareunknowntype StateGovtAgency          
## 
## Private Insurance (333109, 0.433), Public InsuranceMedicaid (62423, 0.081),
## PublicMedicare FFS (179422, 0.233), PublicMedicareCh (70903, 0.092), PublicCHIP
## (669, 0.001), PublicVA (7084, 0.009), PublicOthrGovt (5139, 0.007), Self (2319,
## 0.003), Donation (96, 0.000), Free Care (450, 0.001), Pending (2723, 0.004),
## Foreign Govt (731, 0.001), PublicMedicareunknowntype (101461, 0.132),
## StateGovtAgency (3334, 0.004)
## --------------------------------------------------------------------------------
## dialysis_fac : WL MOST RECENT CANDIDATE ON DIALYSIS? 
##        n  missing distinct 
##   870845        0        3 
##                                   
## Value      missing      no     yes
## Frequency    42724  276310  551811
## Proportion   0.049   0.317   0.634
## --------------------------------------------------------------------------------
## gender_fac : TCR RECIPIENT GENDER 
##        n  missing distinct 
##   870845        0        2 
##                         
## Value        Male Female
## Frequency  525254 345591
## Proportion  0.603  0.397
## --------------------------------------------------------------------------------
## abo_type_fac : TCR ABO BLOOD GROUP 
##        n  missing distinct 
##   870845        0        9 
## 
## lowest : O       B       AB      A       A1     
## highest: A1      A2      Unknown A2B     A1B    
##                                                                           
## Value            O       B      AB       A      A1      A2 Unknown     A2B
## Frequency   423000  126724   32466  281281    5792     910       8     170
## Proportion   0.486   0.146   0.037   0.323   0.007   0.001   0.000   0.000
##                   
## Value          A1B
## Frequency      494
## Proportion   0.001
## --------------------------------------------------------------------------------
## education_fac 
##        n  missing distinct 
##   770184   100661        8 
## 
## lowest : None                      Grade School              Highschool or GED         Attended Some College     Associate/Bachelor Degree
## highest: Attended Some College     Associate/Bachelor Degree Post-Graduate             N/A<5years old            unknown                  
## 
## None (4518, 0.006), Grade School (49816, 0.065), Highschool or GED (290142,
## 0.377), Attended Some College (170305, 0.221), Associate/Bachelor Degree
## (116596, 0.151), Post-Graduate (50015, 0.065), N/A<5years old (3670, 0.005),
## unknown (85122, 0.111)
## --------------------------------------------------------------------------------
## race_fac 
##        n  missing distinct 
##   870845        0        8 
## 
## lowest : White                  Black                  Hispanic               Asian                  Amer Ind/Alaska Native
## highest: Asian                  Amer Ind/Alaska Native Native Hawaiin/other   Multi-racial           Unknown               
## 
## White (428439, 0.492), Black (243839, 0.280), Hispanic (129948, 0.149), Asian
## (50758, 0.058), Amer Ind/Alaska Native (8108, 0.009), Native Hawaiin/other
## (3971, 0.005), Multi-racial (5771, 0.007), Unknown (11, 0.000)
## --------------------------------------------------------------------------------
## ethnicity_fac 
##        n  missing distinct 
##   870845        0        2 
##                                     
## Value      Non-Hispanic     Hispanic
## Frequency        738094       132751
## Proportion        0.848        0.152
## --------------------------------------------------------------------------------
## functional_status_fac 
##        n  missing distinct 
##   853114    17731       25 
## 
## lowest : 1    2    3    996  998 , highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## organ_type_fac : ORGAN LISTED FOR 
##        n  missing distinct    value 
##   870845        0        1   Kidney 
##                  
## Value      Kidney
## Frequency  870845
## Proportion      1
## --------------------------------------------------------------------------------
## locality_type_fac 
##        n  missing distinct 
##   420735   450110        4 
##                                               
## Value         Local Regional National  Foreign
## Frequency    333384    32712    54559       80
## Proportion    0.792    0.078    0.130    0.000
## --------------------------------------------------------------------------------
## state_residence_fac : TCR State of Permanent Residence at Listing 
##        n  missing distinct 
##   870845        0       59 
## 
## lowest : PA IL FL HI WA, highest: MT NV GU AS MP
## --------------------------------------------------------------------------------
kidney.clean1 <- kidney.cleanfactors %>%
   filter(!(organ_type_fac %in%  
           c("unknown", "KP", "P", "PI")))%>%
  filter(!(locality_type_fac %in%
           c("Foreign"))) %>%
  filter(!(race_fac %in%
           c("Hispanic", "Asian", "Amer Ind/Alaska Native", "Native Hawaiin/other", "Multi-racial", "Unknown"))) %>%
  filter(!(education_fac %in%
           c("None", "N/A<5years old", "unknown"))) %>%
  filter(!(abo_type_fac %in%
           c("A1", "A1B", "A2", "A2B", "Unknown"))) %>%
  filter(!(dialysis_fac %in%
           c("missing", "unknown"))) %>%
  filter(!(working_income_fac %in%
           c("missing", "unknown"))) %>%
  filter(!(donor_type_fac %in%
           c("foreign", "missing"))) %>%
  filter(!(payment_method_fac %in%
           c("Self", "Donation", "Free Care", "Pending", "Foreign Govt")))

describe(kidney.clean1)
## kidney.clean1 
## 
##  35  Variables      156144  Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR  Format:A12 
##        n  missing distinct    value 
##   156144        0        1       KI 
##                  
## Value          KI
## Frequency  156144
## Proportion      1
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS?  Format:A3 
##        n  missing distinct 
##   156144        0        2 
##                         
## Value           N      Y
## Frequency   48517 107627
## Proportion  0.311  0.689
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER  Format:A3 
##        n  missing distinct 
##   156144        0        2 
##                       
## Value          F     M
## Frequency  60236 95908
## Proportion 0.386 0.614
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP  Format:A9 
##        n  missing distinct 
##   156144        0        4 
##                                   
## Value          A    AB     B     O
## Frequency  59726  8037 20602 67779
## Proportion 0.383 0.051 0.132 0.434
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing  Format:A6 
##        n  missing distinct 
##   156142        2       56 
## 
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL:  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   156134       10        5    0.904    3.952    1.115 
## 
## lowest : 2 3 4 5 6, highest: 2 3 4 5 6
##                                         
## Value          2     3     4     5     6
## Frequency   3097 62652 43567 32345 14473
## Proportion 0.020 0.401 0.279 0.207 0.093
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   156142        2       16    0.953     1970    219.8      998     2050 
##      .25      .50      .75      .90      .95 
##     2070     2080     2090     2100     2100 
## 
## lowest :    1    2    3  996  998, highest: 2070 2080 2090 2100 4100
##                                                                             
## Value          1     2     3   996   998  2010  2020  2030  2040  2050  2060
## Frequency   4862   386     8   115  5663   122   920   526  1884  3718  8726
## Proportion 0.031 0.002 0.000 0.001 0.036 0.001 0.006 0.003 0.012 0.024 0.056
##                                         
## Value       2070  2080  2090  2100  4100
## Frequency  27795 44086 39921 17409     1
## Proportion 0.178 0.282 0.256 0.111 0.000
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME  Format:F5.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   156144        0     3694        1    620.5    670.4       17       42 
##      .25      .50      .75      .90      .95 
##      130      382      921     1543     1954 
## 
## lowest :    0    1    2    3    4, highest: 7091 7120 7657 8783 9381
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   156144        0       73    0.999    50.75    15.06       26       31 
##      .25      .50      .75      .90      .95 
##       42       52       61       67       70 
## 
## lowest :  8 18 19 20 21, highest: 85 86 87 88 89
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY  Format:F1.0 
##        n  missing distinct     Info     Mean      Gmd 
##   156144        0        1        0        0        0 
##                  
## Value           0
## Frequency  156144
## Proportion      1
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY  Format:F3.0 
##        n  missing distinct     Info     Mean      Gmd 
##   156144        0        2    0.671    1.338   0.4476 
##                         
## Value           1      2
## Frequency  103346  52798
## Proportion  0.662  0.338
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER  Format:F7.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   156144        0   152422        1   801310   324099   188835   380083 
##      .25      .50      .75      .90      .95 
##   641304   824442  1019134  1159733  1217668 
## 
## lowest :      17      20      22      51      55
## highest: 1314759 1315216 1315395 1315814 1323019
## --------------------------------------------------------------------------------
## region  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   156144        0       11    0.988    5.964    3.768        2        2 
##      .25      .50      .75      .90      .95 
##        3        5        9       11       11 
## 
## lowest :  1  2  3  4  5, highest:  7  8  9 10 11
##                                                                             
## Value          1     2     3     4     5     6     7     8     9    10    11
## Frequency   7258 23185 22527 10894 15640  4893 14991 10451 11191 16607 18507
## Proportion 0.046 0.148 0.144 0.070 0.100 0.031 0.096 0.067 0.072 0.106 0.119
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME:  Format:A3 
##        n  missing distinct 
##   156144        0        2 
##                       
## Value          N     Y
## Frequency  92973 63171
## Proportion 0.595 0.405
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY  Format:F2.0 
##        n  missing distinct     Info     Mean      Gmd 
##   156138        6        9     0.84    2.121    1.318 
## 
## lowest :  1  2  3  4  5, highest:  5  6  7 13 14
##                                                                 
## Value          1     2     3     4     5     6     7    13    14
## Frequency  78668  8002 48752 17796     7  1670  1182    59     2
## Proportion 0.504 0.051 0.312 0.114 0.000 0.011 0.008 0.000 0.000
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN  Format:A9 
##        n  missing distinct 
##   156144        0        2 
##                         
## Value           C      L
## Frequency  102368  53776
## Proportion  0.656  0.344
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles)  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   156134       10     2353    0.933    161.4    264.4        0        0 
##      .25      .50      .75      .90      .95 
##        0        8      121      500      921 
## 
## lowest :    0    1    2    3    4, highest: 4340 4346 4347 4407 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR  Format:F4.0 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##   155512      632       72    0.981     2928    233.1      999     3004 
##      .25      .50      .75      .90      .95 
##     3008     3040     3069     3070     3070 
## 
## lowest :  999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##                                                                             
## Value       1000  3000  3005  3010  3015  3020  3025  3030  3035  3040  3045
## Frequency   8428  2442 19356 17533   840  4615  1467  2456 13633 37441  1258
## Proportion 0.054 0.016 0.124 0.113 0.005 0.030 0.009 0.016 0.088 0.241 0.008
##                                               
## Value       3050  3055  3060  3065  3070  3075
## Frequency   3230  1511   504   756 38973  1069
## Proportion 0.021 0.010 0.003 0.005 0.251 0.007
## 
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type  Format:F1.0 
##        n  missing distinct     Info     Mean      Gmd 
##   156144        0        3    0.457    3.294   0.4955 
##                                
## Value           3      4      5
## Frequency  127245  11917  16982
## Proportion  0.815  0.076  0.109
## --------------------------------------------------------------------------------
## age_group  Format:A3 
##        n  missing distinct 
##   156144        0        2 
##                         
## Value           A      P
## Frequency  156143      1
## Proportion      1      0
## --------------------------------------------------------------------------------
## age_group_fac 
##        n  missing distinct 
##   156144        0        2 
##                         
## Value           A      P
## Frequency  156143      1
## Proportion      1      0
## --------------------------------------------------------------------------------
## donor_type_fac : DONOR TYPE - DECEASED, LIVING OR FOREIGN 
##        n  missing distinct 
##   156144        0        2 
##                                         
## Value      deceased donor   living donor
## Frequency          102368          53776
## Proportion          0.656          0.344
## --------------------------------------------------------------------------------
## region_fac 
##        n  missing distinct 
##   156144        0       11 
## 
## lowest : 1  2  3  4  5 , highest: 7  8  9  10 11
##                                                                             
## Value          1     2     3     4     5     6     7     8     9    10    11
## Frequency   7258 23185 22527 10894 15640  4893 14991 10451 11191 16607 18507
## Proportion 0.046 0.148 0.144 0.070 0.100 0.031 0.096 0.067 0.072 0.106 0.119
## --------------------------------------------------------------------------------
## working_income_fac : TCR WORKING FOR INCOME: 
##        n  missing distinct 
##   156144        0        2 
##                       
## Value         no   yes
## Frequency  92973 63171
## Proportion 0.595 0.405
## --------------------------------------------------------------------------------
## payment_method_fac 
##        n  missing distinct 
##   156138        6        9 
## 
## lowest : Private Insurance         Public InsuranceMedicaid  PublicMedicare FFS        PublicMedicareCh          PublicCHIP               
## highest: PublicCHIP                PublicVA                  PublicOthrGovt            PublicMedicareunknowntype StateGovtAgency          
## 
## Private Insurance (78668, 0.504), Public InsuranceMedicaid (8002, 0.051),
## PublicMedicare FFS (48752, 0.312), PublicMedicareCh (17796, 0.114), PublicCHIP
## (7, 0.000), PublicVA (1670, 0.011), PublicOthrGovt (1182, 0.008),
## PublicMedicareunknowntype (59, 0.000), StateGovtAgency (2, 0.000)
## --------------------------------------------------------------------------------
## dialysis_fac : WL MOST RECENT CANDIDATE ON DIALYSIS? 
##        n  missing distinct 
##   156144        0        2 
##                         
## Value          no    yes
## Frequency   48517 107627
## Proportion  0.311  0.689
## --------------------------------------------------------------------------------
## gender_fac : TCR RECIPIENT GENDER 
##        n  missing distinct 
##   156144        0        2 
##                         
## Value        Male Female
## Frequency   95908  60236
## Proportion  0.614  0.386
## --------------------------------------------------------------------------------
## abo_type_fac : TCR ABO BLOOD GROUP 
##        n  missing distinct 
##   156144        0        4 
##                                   
## Value          O     B    AB     A
## Frequency  67779 20602  8037 59726
## Proportion 0.434 0.132 0.051 0.383
## --------------------------------------------------------------------------------
## education_fac 
##        n  missing distinct 
##   156134       10        5 
## 
## lowest : Grade School              Highschool or GED         Attended Some College     Associate/Bachelor Degree Post-Graduate            
## highest: Grade School              Highschool or GED         Attended Some College     Associate/Bachelor Degree Post-Graduate            
## 
## Grade School (3097, 0.020), Highschool or GED (62652, 0.401), Attended Some
## College (43567, 0.279), Associate/Bachelor Degree (32345, 0.207), Post-Graduate
## (14473, 0.093)
## --------------------------------------------------------------------------------
## race_fac 
##        n  missing distinct 
##   156144        0        2 
##                         
## Value       White  Black
## Frequency  103346  52798
## Proportion  0.662  0.338
## --------------------------------------------------------------------------------
## ethnicity_fac 
##            n      missing     distinct        value 
##       156144            0            1 Non-Hispanic 
##                        
## Value      Non-Hispanic
## Frequency        156144
## Proportion            1
## --------------------------------------------------------------------------------
## functional_status_fac 
##        n  missing distinct 
##   156142        2       16 
## 
## lowest : 1    2    3    996  998 , highest: 2070 2080 2090 2100 4100
##                                                                             
## Value          1     2     3   996   998  2010  2020  2030  2040  2050  2060
## Frequency   4862   386     8   115  5663   122   920   526  1884  3718  8726
## Proportion 0.031 0.002 0.000 0.001 0.036 0.001 0.006 0.003 0.012 0.024 0.056
##                                         
## Value       2070  2080  2090  2100  4100
## Frequency  27795 44086 39921 17409     1
## Proportion 0.178 0.282 0.256 0.111 0.000
## --------------------------------------------------------------------------------
## organ_type_fac : ORGAN LISTED FOR 
##        n  missing distinct    value 
##   156144        0        1   Kidney 
##                  
## Value      Kidney
## Frequency  156144
## Proportion      1
## --------------------------------------------------------------------------------
## locality_type_fac 
##        n  missing distinct 
##   156144        0        3 
##                                      
## Value         Local Regional National
## Frequency    127245    11917    16982
## Proportion    0.815    0.076    0.109
## --------------------------------------------------------------------------------
## state_residence_fac : TCR State of Permanent Residence at Listing 
##        n  missing distinct 
##   156144        0       57 
## 
## lowest : PA IL FL HI WA, highest: UT NH MT NV AS
## --------------------------------------------------------------------------------
glimpse(kidney.clean1)
## Rows: 156,144
## Columns: 35
## $ organ_type            <chr> "KI", "KI", "KI", "KI", "KI", "KI", "KI", "KI",…
## $ dialysis              <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N…
## $ gender                <chr> "F", "F", "M", "M", "M", "F", "M", "F", "F", "F…
## $ abo_type              <chr> "B", "O", "A", "A", "O", "O", "A", "A", "O", "O…
## $ state_residence       <chr> "FL", "NC", "CA", "NC", "CA", "OH", "NC", "IL",…
## $ education             <dbl> 5, 3, 6, 3, 4, 3, 3, 2, NA, 3, NA, NA, 3, 4, 4,…
## $ functional_status     <dbl> 2090, 2100, 2090, 2, 2100, 1, 2, 2100, 2100, 1,…
## $ waitlist_days         <dbl> 158, 982, 352, 68, 65, 85, 14, 0, 479, 1175, 0,…
## $ age_init              <dbl> 32, 47, 66, 32, 21, 22, 31, 45, 35, 25, 28, 46,…
## $ ethnicity             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ race_binary           <dbl> 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1,…
## $ PT_CODE               <dbl> 429440, 428022, 113034, 443431, 398160, 122076,…
## $ region                <dbl> 3, 11, 5, 11, 5, 10, 11, 7, 10, 3, 1, 6, 10, 3,…
## $ working_income        <chr> "Y", "N", "N", "Y", "N", "Y", "N", "N", "Y", "N…
## $ payment_method        <dbl> 1, 1, 1, 3, 3, 3, 3, 3, 1, 4, 2, NA, 3, 13, 2, …
## $ donor_type            <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C…
## $ DISTANCE              <dbl> 148, 8, 20, 732, 59, 205, 412, 3, 1, 114, 42, 8…
## $ DIAG_KI               <dbl> 3041, 3008, 3009, 3031, 3040, 3030, 3031, 3034,…
## $ locality_type         <dbl> 4, 3, 3, 5, 3, 4, 5, 3, 3, 4, 3, 3, 3, 5, 3, 3,…
## $ age_group             <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A…
## $ age_group_fac         <fct> A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,…
## $ donor_type_fac        <fct> deceased donor, deceased donor, deceased donor,…
## $ region_fac            <fct> 3, 11, 5, 11, 5, 10, 11, 7, 10, 3, 1, 6, 10, 3,…
## $ working_income_fac    <fct> yes, no, no, yes, no, yes, no, no, yes, no, no,…
## $ payment_method_fac    <fct> Private Insurance, Private Insurance, Private I…
## $ dialysis_fac          <fct> no, no, no, no, no, no, no, no, no, no, no, no,…
## $ gender_fac            <fct> Female, Female, Male, Male, Male, Female, Male,…
## $ abo_type_fac          <fct> B, O, A, A, O, O, A, A, O, O, O, O, A, O, A, O,…
## $ education_fac         <fct> Associate/Bachelor Degree, Highschool or GED, P…
## $ race_fac              <fct> White, Black, White, White, Black, White, White…
## $ ethnicity_fac         <fct> Non-Hispanic, Non-Hispanic, Non-Hispanic, Non-H…
## $ functional_status_fac <fct> 2090, 2100, 2090, 2, 2100, 1, 2, 2100, 2100, 1,…
## $ organ_type_fac        <fct> Kidney, Kidney, Kidney, Kidney, Kidney, Kidney,…
## $ locality_type_fac     <fct> Regional, Local, Local, National, Local, Region…
## $ state_residence_fac   <fct> FL, NC, CA, NC, CA, OH, NC, IL, ZZ, ZZ, MA, OR,…

Load Some Packages

library(tidyverse)
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(lme4)

Visualizing the cont. variables

hist(kidney.clean1$age_init, 
     main = "Distribution of age at registration for kidney transplant",
     sub = "N = 156,144 Ages of patients. Data from UNOS, STAR data.",
     xlab = "Ages in years")

hist(kidney.clean1$waitlist_days, 
     main = "Distribution of days spent on kidney waitlist",
     sub = "N = 156,144  Days on waitlist. Data from UNOS, STAR data.",
     xlab = "Total # of day waiting")

table(kidney.clean1$state_residence_fac)
## 
##    PA    IL    FL    HI    WA    VA    CA    OH    LA    MA    ZZ    DE    OR 
##  9029  6953  9089    97  2630  5439 10207  7040  2538  3559    14   600  1416 
##    WY    IN    NM    NY    MD    WV    AR    IA    OK    RI    KY    DC    NE 
##   210  3465   429 11221  4914   982  1394  1979  1699   514  1805   508  1223 
##    WI    AZ    CO    MO    AL    NC          SC    TX    CT    KS    TN    MN 
##  3623  3017  2179  3186  2782  5544     2  3125  9130  1910  1062  3595  3031 
##    NJ    NA    ND    MI    AK    ID    ME    VT    VI    GA    MS    SD    PR 
##  5489   103   363  6610   205   574   633   200    54  5358  1955   503     7 
##    UT    NH    MT    NV    GU    AS    MP 
##  1085   650   390   824     0     1     0

##Null Model with states

models <- list()
model.0 <- lmer(waitlist_days ~ (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.0)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: waitlist_days ~ (1 | state_residence_fac)
##    Data: kidney.clean1
## 
##      AIC      BIC   logLik deviance df.resid 
##  2467791  2467821 -1233892  2467785   156141 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2433 -0.7306 -0.3484  0.4701 13.5579 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  state_residence_fac (Intercept)   9862    99.31  
##  Residual                        427379   653.74  
## Number of obs: 156144, groups:  state_residence_fac, 57
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   590.32      13.98   42.22

Calculate ICC

null.ICC <- 9862/(9862 + 427379)
null.ICC
## [1] 0.02255507

#null model w/region ICC =.002) sticking with states!

model.null <- lmer(waitlist_days ~ (1|region_fac), REML = FALSE, data = kidney.clean1)
summary(model.null)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: waitlist_days ~ (1 | region_fac)
##    Data: kidney.clean1
## 
##      AIC      BIC   logLik deviance df.resid 
##  2469732  2469762 -1234863  2469726   156141 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0088 -0.7409 -0.3638  0.4585 13.3721 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  region_fac (Intercept)   1047    32.36  
##  Residual               433158   658.15  
## Number of obs: 156144, groups:  region_fac, 11
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  618.376      9.928   62.29

Using lmerTest to Evaluate Random Effects:

lmerTest::rand(model.0)
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## waitlist_days ~ (1 | state_residence_fac)
##                           npar   logLik     AIC    LRT Df Pr(>Chisq)    
## <none>                       3 -1233892 2467791                         
## (1 | state_residence_fac)    2 -1235001 2470006 2217.5  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Step Two: Adding Level One Demographics

model.1 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +  
##     (1 | state_residence_fac)
##    Data: kidney.clean1
## 
##      AIC      BIC   logLik deviance df.resid 
##  2459434  2459523 -1229708  2459416   156135 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6742 -0.6939 -0.3021  0.4810 13.7981 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  state_residence_fac (Intercept)   8117    90.1   
##  Residual                        405093   636.5   
## Number of obs: 156144, groups:  state_residence_fac, 57
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)       748.1171    14.4358  51.824
## age_init           -3.1594     0.1228 -25.732
## race_facBlack     244.5338     3.6324  67.321
## abo_type_facB     -24.8685     5.0831  -4.892
## abo_type_facAB   -266.8640     7.5157 -35.508
## abo_type_facA    -131.3870     3.5990 -36.507
## gender_facFemale    6.5721     3.3134   1.983
## 
## Correlation of Fixed Effects:
##             (Intr) age_nt rc_fcB ab_t_B ab__AB ab_t_A
## age_init    -0.438                                   
## race_fcBlck -0.115  0.095                            
## abo_typ_fcB -0.070 -0.013 -0.079                     
## ab_typ_fcAB -0.046 -0.020 -0.006  0.158              
## abo_typ_fcA -0.117 -0.018  0.100  0.319  0.222       
## gendr_fcFml -0.102  0.030 -0.018  0.014  0.007  0.011

Adding additional level one predictors

model.2 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + payment_method_fac + donor_type_fac + region_fac + working_income_fac + dialysis_fac + education_fac + locality_type_fac + payment_method_fac + (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.2)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +  
##     payment_method_fac + donor_type_fac + region_fac + working_income_fac +  
##     dialysis_fac + education_fac + locality_type_fac + payment_method_fac +  
##     (1 | state_residence_fac)
##    Data: kidney.clean1
## 
##      AIC      BIC   logLik deviance df.resid 
##  2440537  2440896 -1220233  2440465   156098 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1938 -0.6598 -0.1860  0.4559 14.6642 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  state_residence_fac (Intercept)  11542   107.4   
##  Residual                        359100   599.2   
## Number of obs: 156134, groups:  state_residence_fac, 57
## 
## Fixed effects:
##                                              Estimate Std. Error  t value
## (Intercept)                                 1168.0688    29.7558   39.255
## age_init                                      -4.1607     0.1219  -34.133
## race_facBlack                                130.8743     3.6286   36.068
## abo_type_facB                                -16.6387     4.7882   -3.475
## abo_type_facAB                              -299.5063     7.0832  -42.284
## abo_type_facA                               -140.6105     3.3909  -41.467
## gender_facFemale                               9.6067     3.1442    3.055
## payment_method_facPublic InsuranceMedicaid   -41.6684     7.3625   -5.660
## payment_method_facPublicMedicare FFS        -119.7092     3.9150  -30.577
## payment_method_facPublicMedicareCh           -78.2294     5.3200  -14.705
## payment_method_facPublicCHIP                -347.3413   226.6246   -1.533
## payment_method_facPublicVA                    -1.3045    15.5871   -0.084
## payment_method_facPublicOthrGovt             -86.2251    17.6622   -4.882
## payment_method_facPublicMedicareunknowntype   85.2222    78.1322    1.091
## payment_method_facStateGovtAgency           -454.7059   426.5394   -1.066
## donor_type_facliving donor                  -506.3443     3.7553 -134.834
## region_fac2                                 -101.9603    24.5765   -4.149
## region_fac3                                 -116.0996    26.1561   -4.439
## region_fac4                                 -143.5089    32.5809   -4.405
## region_fac5                                   -7.6698    30.5983   -0.251
## region_fac6                                  -57.0078    34.8710   -1.635
## region_fac7                                 -119.4984    27.6505   -4.322
## region_fac8                                 -190.5175    28.3204   -6.727
## region_fac9                                  -67.0145    24.5415   -2.731
## region_fac10                                 -82.9880    27.5949   -3.007
## region_fac11                                -179.9094    25.8055   -6.972
## working_income_facyes                         54.3605     3.5720   15.219
## dialysis_facyes                               18.0189     3.6393    4.951
## education_facHighschool or GED                 1.7638    11.0791    0.159
## education_facAttended Some College           -19.9793    11.2387   -1.778
## education_facAssociate/Bachelor Degree       -35.5921    11.4061   -3.120
## education_facPost-Graduate                   -35.1780    12.0206   -2.926
## locality_type_facRegional                   -154.5246     5.9961  -25.771
## locality_type_facNational                   -176.8417     5.2113  -33.935
## 
## Correlation matrix not shown by default, as p = 34 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

Testing the Interaction of race_fac and region_fac

model.3 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + payment_method_fac + donor_type_fac + region_fac + working_income_fac + dialysis_fac + education_fac + locality_type_fac + payment_method_fac + race_fac:region_fac + (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.3)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +  
##     payment_method_fac + donor_type_fac + region_fac + working_income_fac +  
##     dialysis_fac + education_fac + locality_type_fac + payment_method_fac +  
##     race_fac:region_fac + (1 | state_residence_fac)
##    Data: kidney.clean1
## 
##      AIC      BIC   logLik deviance df.resid 
##  2440187  2440645 -1220048  2440095   156088 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2173 -0.6589 -0.1844  0.4561 14.7022 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  state_residence_fac (Intercept)  11090   105.3   
##  Residual                        358254   598.5   
## Number of obs: 156134, groups:  state_residence_fac, 57
## 
## Fixed effects:
##                                              Estimate Std. Error  t value
## (Intercept)                                 1143.4271    29.7452   38.441
## age_init                                      -4.1375     0.1218  -33.972
## race_facBlack                                220.0565    18.2536   12.056
## abo_type_facB                                -16.0795     4.7831   -3.362
## abo_type_facAB                              -299.1407     7.0758  -42.277
## abo_type_facA                               -140.7723     3.3870  -41.562
## gender_facFemale                               9.1638     3.1416    2.917
## payment_method_facPublic InsuranceMedicaid   -45.2032     7.3611   -6.141
## payment_method_facPublicMedicare FFS        -119.3317     3.9118  -30.505
## payment_method_facPublicMedicareCh           -78.3391     5.3153  -14.738
## payment_method_facPublicCHIP                -331.9611   226.3765   -1.466
## payment_method_facPublicVA                     7.3187    15.5949    0.469
## payment_method_facPublicOthrGovt             -89.2746    17.6496   -5.058
## payment_method_facPublicMedicareunknowntype   86.8837    78.0454    1.113
## payment_method_facStateGovtAgency           -462.7279   425.9665   -1.086
## donor_type_facliving donor                  -506.9768     3.7521 -135.119
## region_fac2                                  -66.3274    24.9245   -2.661
## region_fac3                                 -106.1609    26.4974   -4.006
## region_fac4                                 -114.0882    32.8669   -3.471
## region_fac5                                    5.6867    30.6982    0.185
## region_fac6                                  -18.5270    35.0750   -0.528
## region_fac7                                 -114.2948    27.8338   -4.106
## region_fac8                                 -140.3065    28.5536   -4.914
## region_fac9                                  -76.5451    24.8950   -3.075
## region_fac10                                 -44.1774    27.8053   -1.589
## region_fac11                                -150.4766    26.2479   -5.733
## working_income_facyes                         54.7766     3.5680   15.352
## dialysis_facyes                               17.6835     3.6352    4.864
## education_facHighschool or GED                 4.6797    11.0691    0.423
## education_facAttended Some College           -16.4393    11.2293   -1.464
## education_facAssociate/Bachelor Degree       -31.6635    11.3965   -2.778
## education_facPost-Graduate                   -30.3841    12.0125   -2.529
## locality_type_facRegional                   -154.5841     5.9908  -25.803
## locality_type_facNational                   -179.9523     5.2148  -34.508
## race_facBlack:region_fac2                   -130.7419    20.0402   -6.524
## race_facBlack:region_fac3                    -65.5048    19.9137   -3.289
## race_facBlack:region_fac4                   -120.7868    21.7627   -5.550
## race_facBlack:region_fac5                    -33.1259    21.7110   -1.526
## race_facBlack:region_fac6                   -206.3109    32.5475   -6.339
## race_facBlack:region_fac7                      4.3984    21.8904    0.201
## race_facBlack:region_fac8                   -224.5291    23.3844   -9.602
## race_facBlack:region_fac9                     16.3925    21.7015    0.755
## race_facBlack:region_fac10                  -152.2610    20.8692   -7.296
## race_facBlack:region_fac11                  -105.7348    20.2818   -5.213
## 
## Correlation matrix not shown by default, as p = 44 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
table(kidney.clean1$race_fac)
## 
##                  White                  Black               Hispanic 
##                 103346                  52798                      0 
##                  Asian Amer Ind/Alaska Native   Native Hawaiin/other 
##                      0                      0                      0 
##           Multi-racial                Unknown 
##                      0                      0

Should We Include That Interaction? Comparing model.2 with model.3:

anova(model.2, model.3)
## Data: kidney.clean1
## Models:
## model.2: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + 
## model.2:     payment_method_fac + donor_type_fac + region_fac + working_income_fac + 
## model.2:     dialysis_fac + education_fac + locality_type_fac + payment_method_fac + 
## model.2:     (1 | state_residence_fac)
## model.3: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + 
## model.3:     payment_method_fac + donor_type_fac + region_fac + working_income_fac + 
## model.3:     dialysis_fac + education_fac + locality_type_fac + payment_method_fac + 
## model.3:     race_fac:region_fac + (1 | state_residence_fac)
##         npar     AIC     BIC   logLik deviance  Chisq Df Pr(>Chisq)    
## model.2   36 2440537 2440896 -1220233  2440465                         
## model.3   46 2440187 2440645 -1220048  2440095 370.21 10  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interaction w/Blood type & race

model.4 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + donor_type_fac + region_fac + working_income_fac + dialysis_fac + education_fac + locality_type_fac + payment_method_fac + abo_type_fac:race_fac +  (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.4)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +  
##     donor_type_fac + region_fac + working_income_fac + dialysis_fac +  
##     education_fac + locality_type_fac + payment_method_fac +  
##     abo_type_fac:race_fac + (1 | state_residence_fac)
##    Data: kidney.clean1
## 
##      AIC      BIC   logLik deviance df.resid 
##  2440400  2440789 -1220161  2440322   156095 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2217 -0.6572 -0.1856  0.4539 14.6872 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  state_residence_fac (Intercept)  11552   107.5   
##  Residual                        358771   599.0   
## Number of obs: 156134, groups:  state_residence_fac, 57
## 
## Fixed effects:
##                                              Estimate Std. Error  t value
## (Intercept)                                 1159.7098    29.7765   38.947
## age_init                                      -4.1653     0.1218  -34.187
## race_facBlack                                154.6677     5.0432   30.669
## abo_type_facB                                -28.3695     6.4872   -4.373
## abo_type_facAB                              -253.3974     8.9572  -28.290
## abo_type_facA                               -122.7956     4.0546  -30.285
## gender_facFemale                               9.6617     3.1429    3.074
## donor_type_facliving donor                  -506.0810     3.7543 -134.800
## region_fac2                                 -101.7058    24.5689   -4.140
## region_fac3                                 -115.8045    26.1483   -4.429
## region_fac4                                 -144.7423    32.5707   -4.444
## region_fac5                                   -8.0079    30.5903   -0.262
## region_fac6                                  -57.3398    34.8618   -1.645
## region_fac7                                 -118.8662    27.6427   -4.300
## region_fac8                                 -190.3373    28.3125   -6.723
## region_fac9                                  -66.4921    24.5330   -2.710
## region_fac10                                 -82.2540    27.5864   -2.982
## region_fac11                                -180.4044    25.7979   -6.993
## working_income_facyes                         54.5259     3.5704   15.272
## dialysis_facyes                               18.0247     3.6376    4.955
## education_facHighschool or GED                 1.4479    11.0740    0.131
## education_facAttended Some College           -20.3060    11.2336   -1.808
## education_facAssociate/Bachelor Degree       -35.8349    11.4009   -3.143
## education_facPost-Graduate                   -35.5149    12.0153   -2.956
## locality_type_facRegional                   -154.6426     5.9934  -25.802
## locality_type_facNational                   -176.9271     5.2091  -33.965
## payment_method_facPublic InsuranceMedicaid   -41.6610     7.3592   -5.661
## payment_method_facPublicMedicare FFS        -119.7221     3.9132  -30.594
## payment_method_facPublicMedicareCh           -78.0238     5.3176  -14.673
## payment_method_facPublicCHIP                -343.7990   226.5221   -1.518
## payment_method_facPublicVA                    -0.5974    15.5802   -0.038
## payment_method_facPublicOthrGovt             -85.6412    17.6542   -4.851
## payment_method_facPublicMedicareunknowntype   84.3044    78.0966    1.079
## payment_method_facStateGovtAgency           -444.3041   426.3497   -1.042
## race_facBlack:abo_type_facB                   18.5547     9.6262    1.928
## race_facBlack:abo_type_facAB                -123.0991    14.6055   -8.428
## race_facBlack:abo_type_facA                  -59.8320     7.3981   -8.087
## 
## Correlation matrix not shown by default, as p = 37 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

Should We Include Blood type & race Interaction? Comparing model.2&model.4:

anova(model.2, model.4)
## Data: kidney.clean1
## Models:
## model.2: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + 
## model.2:     payment_method_fac + donor_type_fac + region_fac + working_income_fac + 
## model.2:     dialysis_fac + education_fac + locality_type_fac + payment_method_fac + 
## model.2:     (1 | state_residence_fac)
## model.4: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + 
## model.4:     donor_type_fac + region_fac + working_income_fac + dialysis_fac + 
## model.4:     education_fac + locality_type_fac + payment_method_fac + 
## model.4:     abo_type_fac:race_fac + (1 | state_residence_fac)
##         npar     AIC     BIC   logLik deviance  Chisq Df Pr(>Chisq)    
## model.2   36 2440537 2440896 -1220233  2440465                         
## model.4   39 2440400 2440789 -1220161  2440322 142.93  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Should We Include either Interaction? Comparing model.2,‘model.3’ model.4:

anova(model.2, model.3, model.4)
## Data: kidney.clean1
## Models:
## model.2: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + 
## model.2:     payment_method_fac + donor_type_fac + region_fac + working_income_fac + 
## model.2:     dialysis_fac + education_fac + locality_type_fac + payment_method_fac + 
## model.2:     (1 | state_residence_fac)
## model.4: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + 
## model.4:     donor_type_fac + region_fac + working_income_fac + dialysis_fac + 
## model.4:     education_fac + locality_type_fac + payment_method_fac + 
## model.4:     abo_type_fac:race_fac + (1 | state_residence_fac)
## model.3: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + 
## model.3:     payment_method_fac + donor_type_fac + region_fac + working_income_fac + 
## model.3:     dialysis_fac + education_fac + locality_type_fac + payment_method_fac + 
## model.3:     race_fac:region_fac + (1 | state_residence_fac)
##         npar     AIC     BIC   logLik deviance  Chisq Df Pr(>Chisq)    
## model.2   36 2440537 2440896 -1220233  2440465                         
## model.4   39 2440400 2440789 -1220161  2440322 142.93  3  < 2.2e-16 ***
## model.3   46 2440187 2440645 -1220048  2440095 227.28  7  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Random slope of race

model.5 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + donor_type_fac + region_fac + working_income_fac + dialysis_fac + education_fac + locality_type_fac + payment_method_fac + (race_fac|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.5)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +  
##     donor_type_fac + region_fac + working_income_fac + dialysis_fac +  
##     education_fac + locality_type_fac + payment_method_fac +  
##     (race_fac | state_residence_fac)
##    Data: kidney.clean1
## 
##      AIC      BIC   logLik deviance df.resid 
##  2440086  2440464 -1220005  2440010   156096 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3481 -0.6561 -0.1855  0.4560 14.6839 
## 
## Random effects:
##  Groups              Name          Variance Std.Dev. Corr
##  state_residence_fac (Intercept)     7693    87.71       
##                      race_facBlack   9446    97.19   0.74
##  Residual                          357896   598.24       
## Number of obs: 156134, groups:  state_residence_fac, 57
## 
## Fixed effects:
##                                              Estimate Std. Error  t value
## (Intercept)                                 1150.4866    26.8196   42.897
## age_init                                      -4.1510     0.1217  -34.095
## race_facBlack                                 98.2602    14.9640    6.566
## abo_type_facB                                -16.6877     4.7813   -3.490
## abo_type_facAB                              -298.8500     7.0730  -42.252
## abo_type_facA                               -141.0749     3.3857  -41.667
## gender_facFemale                               9.2530     3.1402    2.947
## donor_type_facliving donor                  -506.0428     3.7509 -134.911
## region_fac2                                  -75.3480    22.4469   -3.357
## region_fac3                                  -97.6833    23.6948   -4.123
## region_fac4                                 -119.2632    29.6358   -4.024
## region_fac5                                    4.2757    27.2000    0.157
## region_fac6                                  -34.1545    31.1483   -1.097
## region_fac7                                  -96.3049    24.8256   -3.879
## region_fac8                                 -146.8840    25.4030   -5.782
## region_fac9                                  -52.2786    22.9779   -2.275
## region_fac10                                 -52.0251    25.0782   -2.075
## region_fac11                                -152.5990    23.4115   -6.518
## working_income_facyes                         54.5967     3.5665   15.308
## dialysis_facyes                               17.7286     3.6340    4.879
## education_facHighschool or GED                 2.3797    11.0669    0.215
## education_facAttended Some College           -19.0359    11.2276   -1.695
## education_facAssociate/Bachelor Degree       -34.3231    11.3946   -3.012
## education_facPost-Graduate                   -33.1481    12.0103   -2.760
## locality_type_facRegional                   -154.5888     5.9906  -25.805
## locality_type_facNational                   -178.8519     5.2124  -34.313
## payment_method_facPublic InsuranceMedicaid   -45.5518     7.3601   -6.189
## payment_method_facPublicMedicare FFS        -119.5218     3.9109  -30.561
## payment_method_facPublicMedicareCh           -78.4868     5.3138  -14.770
## payment_method_facPublicCHIP                -335.2976   226.2627   -1.482
## payment_method_facPublicVA                    -5.0642    15.5176   -0.326
## payment_method_facPublicOthrGovt             -90.3661    17.6385   -5.123
## payment_method_facPublicMedicareunknowntype   99.1562    78.0198    1.271
## payment_method_facStateGovtAgency           -466.5132   424.8898   -1.098
## 
## Correlation matrix not shown by default, as p = 34 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

Using the modelsummary and broom.mixed Packages to Organize Your Results:

library(modelsummary)
## 
## Attaching package: 'modelsummary'
## The following object is masked from 'package:psych':
## 
##     SD
## The following object is masked from 'package:Hmisc':
## 
##     Mean
library(broom.mixed)
## Registered S3 method overwritten by 'broom.mixed':
##   method      from 
##   tidy.gamlss broom
models <- list(model.0, model.1, model.2, model.3, model.4, model.5)
modelsummary(models)
Model 1 Model 2 Model 3 Model 4 Model 5 Model 6
(Intercept) 590.320 748.117 1168.069 1143.427 1159.710 1150.487
(13.982) (14.436) (29.756) (29.745) (29.777) (26.820)
sd__(Intercept) 99.310 90.096 107.433 105.308 107.478 87.710
sd__Observation 653.742 636.469 599.249 598.543 598.975 598.244
age_init -3.159 -4.161 -4.138 -4.165 -4.151
(0.123) (0.122) (0.122) (0.122) (0.122)
race_facBlack 244.534 130.874 220.056 154.668 98.260
(3.632) (3.629) (18.254) (5.043) (14.964)
abo_type_facB -24.869 -16.639 -16.080 -28.369 -16.688
(5.083) (4.788) (4.783) (6.487) (4.781)
abo_type_facAB -266.864 -299.506 -299.141 -253.397 -298.850
(7.516) (7.083) (7.076) (8.957) (7.073)
abo_type_facA -131.387 -140.611 -140.772 -122.796 -141.075
(3.599) (3.391) (3.387) (4.055) (3.386)
gender_facFemale 6.572 9.607 9.164 9.662 9.253
(3.313) (3.144) (3.142) (3.143) (3.140)
payment_method_facPublic InsuranceMedicaid -41.668 -45.203 -41.661 -45.552
(7.362) (7.361) (7.359) (7.360)
payment_method_facPublicMedicare FFS -119.709 -119.332 -119.722 -119.522
(3.915) (3.912) (3.913) (3.911)
payment_method_facPublicMedicareCh -78.229 -78.339 -78.024 -78.487
(5.320) (5.315) (5.318) (5.314)
payment_method_facPublicCHIP -347.341 -331.961 -343.799 -335.298
(226.625) (226.377) (226.522) (226.263)
payment_method_facPublicVA -1.304 7.319 -0.597 -5.064
(15.587) (15.595) (15.580) (15.518)
payment_method_facPublicOthrGovt -86.225 -89.275 -85.641 -90.366
(17.662) (17.650) (17.654) (17.639)
payment_method_facPublicMedicareunknowntype 85.222 86.884 84.304 99.156
(78.132) (78.045) (78.097) (78.020)
payment_method_facStateGovtAgency -454.706 -462.728 -444.304 -466.513
(426.539) (425.966) (426.350) (424.890)
donor_type_facliving donor -506.344 -506.977 -506.081 -506.043
(3.755) (3.752) (3.754) (3.751)
region_fac2 -101.960 -66.327 -101.706 -75.348
(24.576) (24.925) (24.569) (22.447)
region_fac3 -116.100 -106.161 -115.805 -97.683
(26.156) (26.497) (26.148) (23.695)
region_fac4 -143.509 -114.088 -144.742 -119.263
(32.581) (32.867) (32.571) (29.636)
region_fac5 -7.670 5.687 -8.008 4.276
(30.598) (30.698) (30.590) (27.200)
region_fac6 -57.008 -18.527 -57.340 -34.155
(34.871) (35.075) (34.862) (31.148)
region_fac7 -119.498 -114.295 -118.866 -96.305
(27.650) (27.834) (27.643) (24.826)
region_fac8 -190.518 -140.306 -190.337 -146.884
(28.320) (28.554) (28.312) (25.403)
region_fac9 -67.015 -76.545 -66.492 -52.279
(24.542) (24.895) (24.533) (22.978)
region_fac10 -82.988 -44.177 -82.254 -52.025
(27.595) (27.805) (27.586) (25.078)
region_fac11 -179.909 -150.477 -180.404 -152.599
(25.805) (26.248) (25.798) (23.412)
working_income_facyes 54.360 54.777 54.526 54.597
(3.572) (3.568) (3.570) (3.566)
dialysis_facyes 18.019 17.683 18.025 17.729
(3.639) (3.635) (3.638) (3.634)
education_facHighschool or GED 1.764 4.680 1.448 2.380
(11.079) (11.069) (11.074) (11.067)
education_facAttended Some College -19.979 -16.439 -20.306 -19.036
(11.239) (11.229) (11.234) (11.228)
education_facAssociate/Bachelor Degree -35.592 -31.664 -35.835 -34.323
(11.406) (11.396) (11.401) (11.395)
education_facPost-Graduate -35.178 -30.384 -35.515 -33.148
(12.021) (12.012) (12.015) (12.010)
locality_type_facRegional -154.525 -154.584 -154.643 -154.589
(5.996) (5.991) (5.993) (5.991)
locality_type_facNational -176.842 -179.952 -176.927 -178.852
(5.211) (5.215) (5.209) (5.212)
race_facBlack × region_fac2 -130.742
(20.040)
race_facBlack × region_fac3 -65.505
(19.914)
race_facBlack × region_fac4 -120.787
(21.763)
race_facBlack × region_fac5 -33.126
(21.711)
race_facBlack × region_fac6 -206.311
(32.547)
race_facBlack × region_fac7 4.398
(21.890)
race_facBlack × region_fac8 -224.529
(23.384)
race_facBlack × region_fac9 16.393
(21.701)
race_facBlack × region_fac10 -152.261
(20.869)
race_facBlack × region_fac11 -105.735
(20.282)
race_facBlack × abo_type_facB 18.555
(9.626)
race_facBlack × abo_type_facAB -123.099
(14.606)
race_facBlack × abo_type_facA -59.832
(7.398)
cor__(Intercept).race_facBlack 0.736
sd__race_facBlack 97.191
AIC 2467790.8 2459433.5 2440537.3 2440187.1 2440400.4 2440085.5
BIC 2467820.7 2459523.2 2440895.8 2440645.2 2440788.7 2440463.9
Log.Lik. -1233892.417 -1229707.769 -1220232.645 -1220047.540 -1220161.180 -1220004.737

HTML Version That You Can Open in Word:

modelsummary(models, output = 'msum.html', title = 'MLM Estimates')