library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ─────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::src() masks Hmisc::src()
## x dplyr::summarize() masks Hmisc::summarize()
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
deletingkidney <-haven::read_sav ("deletingkidney.sav")
glimpse(deletingkidney)
## Rows: 980,143
## Columns: 20
## $ WL_ORG <chr> "", "", "", "", "", "", "", "", "", "", "", "", ""…
## $ ON_DIALYSIS <chr> "", "", "", "", "", "", "", "", "", "", "", "", ""…
## $ GENDER <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M", …
## $ ABO <chr> "O", "O", "B", "B", "B", "O", "O", "AB", "A", "B",…
## $ PERM_STATE <chr> "PA", "IL", "FL", "HI", "WA", "VA", "CA", "IL", "O…
## $ EDUCATION <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 99…
## $ FUNC_STAT_TCR <dbl> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998, 2,…
## $ DAYSWAIT_CHRON <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ INIT_AGE <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ ETHNICITY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ ETHCAT <dbl> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ PT_CODE <dbl> 481014, 260021, 425419, 175705, 400671, 406485, 14…
## $ REGION <dbl> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1, 2…
## $ WORK_INCOME_TCR <chr> "", "", "", "", "", "", "", "", "", "", "", "", ""…
## $ PRI_PAYMENT_TCR_KI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13…
## $ DON_TY <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", …
## $ DISTANCE <dbl> 153, 779, 620, 9, 4, 0, 12, 205, 0, 2174, 55, 56, …
## $ DIAG_KI <dbl> 3006, 999, 3007, 3048, 999, 3041, 3041, 999, 3017,…
## $ SHARE_TY <dbl> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3,…
## $ AGE_GROUP <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", …
str(deletingkidney)
## tibble [980,143 × 20] (S3: tbl_df/tbl/data.frame)
## $ WL_ORG : chr [1:980143] "" "" "" "" ...
## ..- attr(*, "label")= chr "ORGAN LISTED FOR"
## ..- attr(*, "format.spss")= chr "A12"
## ..- attr(*, "display_width")= int 14
## $ ON_DIALYSIS : chr [1:980143] "" "" "" "" ...
## ..- attr(*, "label")= chr "WL MOST RECENT CANDIDATE ON DIALYSIS?"
## ..- attr(*, "format.spss")= chr "A3"
## ..- attr(*, "display_width")= int 13
## $ GENDER : chr [1:980143] "M" "M" "F" "F" ...
## ..- attr(*, "label")= chr "TCR RECIPIENT GENDER"
## ..- attr(*, "format.spss")= chr "A3"
## $ ABO : chr [1:980143] "O" "O" "B" "B" ...
## ..- attr(*, "label")= chr "TCR ABO BLOOD GROUP"
## ..- attr(*, "format.spss")= chr "A9"
## ..- attr(*, "display_width")= int 11
## $ PERM_STATE : chr [1:980143] "PA" "IL" "FL" "HI" ...
## ..- attr(*, "label")= chr "TCR State of Permanent Residence at Listing"
## ..- attr(*, "format.spss")= chr "A6"
## ..- attr(*, "display_width")= int 12
## $ EDUCATION : num [1:980143] NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "label")= chr "TCR HIGHEST EDUCATION LEVEL:"
## ..- attr(*, "format.spss")= chr "F3.0"
## ..- attr(*, "display_width")= int 11
## $ FUNC_STAT_TCR : num [1:980143] 998 2 2 998 1 1 998 998 1 998 ...
## ..- attr(*, "label")= chr "TCR FUNCTIONAL STATUS @ LISTING"
## ..- attr(*, "format.spss")= chr "F4.0"
## ..- attr(*, "display_width")= int 15
## $ DAYSWAIT_CHRON : num [1:980143] NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "label")= chr "TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME"
## ..- attr(*, "format.spss")= chr "F5.0"
## ..- attr(*, "display_width")= int 16
## $ INIT_AGE : num [1:980143] NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "label")= chr "CALCULATED AGE AT LISTING"
## ..- attr(*, "format.spss")= chr "F2.0"
## ..- attr(*, "display_width")= int 10
## $ ETHNICITY : num [1:980143] 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "TCR ETHNICITY"
## ..- attr(*, "format.spss")= chr "F1.0"
## ..- attr(*, "display_width")= int 11
## $ ETHCAT : num [1:980143] 1 1 2 2 1 5 2 998 1 1 ...
## ..- attr(*, "label")= chr "ETHNICITY CATEGORY"
## ..- attr(*, "format.spss")= chr "F3.0"
## $ PT_CODE : num [1:980143] 481014 260021 425419 175705 400671 ...
## ..- attr(*, "label")= chr "ENCRYPTED PATIENT IDENTIFIER"
## ..- attr(*, "format.spss")= chr "F7.0"
## ..- attr(*, "display_width")= int 9
## $ REGION : num [1:980143] 2 7 3 5 6 2 5 7 10 5 ...
## ..- attr(*, "label")= chr "region"
## ..- attr(*, "format.spss")= chr "F2.0"
## $ WORK_INCOME_TCR : chr [1:980143] "" "" "" "" ...
## ..- attr(*, "label")= chr "TCR WORKING FOR INCOME:"
## ..- attr(*, "format.spss")= chr "A3"
## ..- attr(*, "display_width")= int 17
## $ PRI_PAYMENT_TCR_KI: num [1:980143] NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "label")= chr "TCR KIDNEY PRIMARY PROJECTED SOURCE PAY"
## ..- attr(*, "format.spss")= chr "F2.0"
## ..- attr(*, "display_width")= int 20
## $ DON_TY : chr [1:980143] "C" "C" "C" "C" ...
## ..- attr(*, "label")= chr "DONOR TYPE - DECEASED, LIVING OR FOREIGN"
## ..- attr(*, "format.spss")= chr "A9"
## ..- attr(*, "display_width")= int 11
## $ DISTANCE : num [1:980143] 153 779 620 9 4 ...
## ..- attr(*, "label")= chr "DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles)"
## ..- attr(*, "format.spss")= chr "F4.0"
## ..- attr(*, "display_width")= int 10
## $ DIAG_KI : num [1:980143] 3006 999 3007 3048 999 ...
## ..- attr(*, "label")= chr "Kidney Diagnosis from TRR/TCR"
## ..- attr(*, "format.spss")= chr "F4.0"
## ..- attr(*, "display_width")= int 9
## $ SHARE_TY : num [1:980143] 4 5 5 3 3 3 3 3 3 5 ...
## ..- attr(*, "label")= chr "Share Type"
## ..- attr(*, "format.spss")= chr "F1.0"
## ..- attr(*, "display_width")= int 10
## $ AGE_GROUP : chr [1:980143] "A" "A" "A" "A" ...
## ..- attr(*, "format.spss")= chr "A3"
## ..- attr(*, "display_width")= int 11
describe(deletingkidney)
## deletingkidney
##
## 20 Variables 980143 Observations
## --------------------------------------------------------------------------------
## WL_ORG : ORGAN LISTED FOR Format:A12
## n missing distinct
## 933957 46186 4
##
## Value KI KP PA PI
## Frequency 870845 40914 20598 1600
## Proportion 0.932 0.044 0.022 0.002
## --------------------------------------------------------------------------------
## ON_DIALYSIS : WL MOST RECENT CANDIDATE ON DIALYSIS? Format:A3
## n missing distinct
## 866551 113592 3
##
## Value 1 N Y
## Frequency 320 291373 574858
## Proportion 0.000 0.336 0.663
## --------------------------------------------------------------------------------
## GENDER : TCR RECIPIENT GENDER Format:A3
## n missing distinct
## 980143 0 2
##
## Value F M
## Frequency 392502 587641
## Proportion 0.4 0.6
## --------------------------------------------------------------------------------
## ABO : TCR ABO BLOOD GROUP Format:A9
## n missing distinct
## 980143 0 9
##
## lowest : A A1 A1B A2 A2B, highest: A2B AB B O UNK
##
## Value A A1 A1B A2 A2B AB B O UNK
## Frequency 322360 6614 570 1136 196 36603 139828 472810 26
## Proportion 0.329 0.007 0.001 0.001 0.000 0.037 0.143 0.482 0.000
## --------------------------------------------------------------------------------
## PERM_STATE : TCR State of Permanent Residence at Listing Format:A6
## n missing distinct
## 961641 18502 58
##
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## EDUCATION : TCR HIGHEST EDUCATION LEVEL: Format:F3.0
## n missing distinct Info Mean Gmd
## 862361 117782 8 0.932 126.1 215.5
##
## lowest : 1 2 3 4 5, highest: 4 5 6 996 998
##
## Value 1 2 3 4 5 6 996 998
## Frequency 4864 53470 321621 190618 130688 54938 5729 100433
## Proportion 0.006 0.062 0.373 0.221 0.152 0.064 0.007 0.116
## --------------------------------------------------------------------------------
## FUNC_STAT_TCR : TCR FUNCTIONAL STATUS @ LISTING Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 960908 19235 25 0.967 1304 1046 1 1
## .25 .50 .75 .90 .95
## 1 2070 2080 2090 2100
##
## lowest : 1 2 3 996 998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## DAYSWAIT_CHRON : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME Format:F5.0
## n missing distinct Info Mean Gmd .05 .10
## 933768 46375 6556 1 806.9 833.1 29 69
## .25 .50 .75 .90 .95
## 200 546 1154 1891 2418
##
## lowest : 0 1 2 3 4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## INIT_AGE : CALCULATED AGE AT LISTING Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 933941 46202 92 1 47.68 16.45 22 28
## .25 .50 .75 .90 .95
## 38 49 59 66 69
##
## lowest : 0 1 2 3 4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ETHNICITY : TCR ETHNICITY Format:F1.0
## n missing distinct Info Sum Mean Gmd
## 980143 0 2 0.374 143081 0.146 0.2493
##
## --------------------------------------------------------------------------------
## ETHCAT : ETHNICITY CATEGORY Format:F3.0
## n missing distinct Info Mean Gmd
## 980143 0 8 0.838 2.106 1.554
##
## lowest : 1 2 4 5 6, highest: 5 6 7 9 998
##
## Value 1 2 4 5 6 7 9 998
## Frequency 510028 258237 140020 52532 8790 4231 6228 77
## Proportion 0.520 0.263 0.143 0.054 0.009 0.004 0.006 0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER Format:F7.0
## n missing distinct Info Mean Gmd .05 .10
## 980143 0 717271 1 619253 431816 58935 117888
## .25 .50 .75 .90 .95
## 295648 602183 936606 1153064 1232521
##
## lowest : 1 3 4 9 10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## REGION : region Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 980143 0 11 0.986 5.691 3.482 2 2
## .25 .50 .75 .90 .95
## 3 5 8 10 11
##
## lowest : 1 2 3 4 5, highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 39409 139067 124332 89378 171415 28231 98173 50238 71524
## Proportion 0.040 0.142 0.127 0.091 0.175 0.029 0.100 0.051 0.073
##
## Value 10 11
## Frequency 81472 86904
## Proportion 0.083 0.089
## --------------------------------------------------------------------------------
## WORK_INCOME_TCR : TCR WORKING FOR INCOME: Format:A3
## n missing distinct
## 582461 397682 3
##
## Value N U Y
## Frequency 364318 27725 190418
## Proportion 0.625 0.048 0.327
## --------------------------------------------------------------------------------
## PRI_PAYMENT_TCR_KI : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 842576 137567 14 0.898 3.649 3.682 1 1
## .25 .50 .75 .90 .95
## 1 2 4 13 13
##
## lowest : 1 2 3 4 5, highest: 10 11 12 13 14
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 372757 67387 188741 74272 725 7286 5392 2819 135
## Proportion 0.442 0.080 0.224 0.088 0.001 0.009 0.006 0.003 0.000
##
## Value 10 11 12 13 14
## Frequency 531 3009 835 114779 3908
## Proportion 0.001 0.004 0.001 0.136 0.005
## --------------------------------------------------------------------------------
## DON_TY : DONOR TYPE - DECEASED, LIVING OR FOREIGN Format:A9
## n missing distinct
## 499185 480958 3
##
## Value C F L
## Frequency 341971 85 157129
## Proportion 0.685 0.000 0.315
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles) Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 494294 485849 2614 0.939 163.8 270.3 0 0
## .25 .50 .75 .90 .95
## 0 8 117 517 967
##
## lowest : 0 1 2 3 4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 482794 497349 75 0.992 2905 261.8 999 3004
## .25 .50 .75 .90 .95
## 3008 3037 3041 3070 3070
##
## lowest : 999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##
## Value 1000 3000 3005 3010 3015 3020 3025 3030 3035
## Frequency 30349 9552 54805 88198 2401 15439 7936 9647 42385
## Proportion 0.063 0.020 0.114 0.183 0.005 0.032 0.016 0.020 0.088
##
## Value 3040 3045 3050 3055 3060 3065 3070 3075
## Frequency 118859 4096 10730 3681 1266 1236 81004 1210
## Proportion 0.246 0.008 0.022 0.008 0.003 0.003 0.168 0.003
##
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## SHARE_TY : Share Type Format:F1.0
## n missing distinct Info Mean Gmd
## 499184 480959 4 0.472 3.311 0.5203
##
## Value 3 4 5 6
## Frequency 402971 37097 59031 85
## Proportion 0.807 0.074 0.118 0.000
## --------------------------------------------------------------------------------
## AGE_GROUP Format:A3
## n missing distinct
## 499185 480958 3
##
## Value A P U
## Frequency 475087 24097 1
## Proportion 0.952 0.048 0.000
## --------------------------------------------------------------------------------
deletingkidney.clean <- deletingkidney %>%
rename(.,
age_group = AGE_GROUP,
donor_type = DON_TY,
region = REGION,
working_income = WORK_INCOME_TCR,
payment_method = PRI_PAYMENT_TCR_KI,
dialysis = ON_DIALYSIS,
gender = GENDER,
abo_type = ABO,
education = EDUCATION,
race_binary = ETHCAT,
ethnicity = ETHNICITY,
functional_status = FUNC_STAT_TCR,
organ_type = WL_ORG,
locality_type = SHARE_TY,
waitlist_days = DAYSWAIT_CHRON,
age_init = INIT_AGE,
state_residence = PERM_STATE)
glimpse(deletingkidney.clean)
## Rows: 980,143
## Columns: 20
## $ organ_type <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
## $ dialysis <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
## $ gender <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M", "…
## $ abo_type <chr> "O", "O", "B", "B", "B", "O", "O", "AB", "A", "B", …
## $ state_residence <chr> "PA", "IL", "FL", "HI", "WA", "VA", "CA", "IL", "OH…
## $ education <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 998…
## $ functional_status <dbl> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998, 2, …
## $ waitlist_days <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ age_init <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ethnicity <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ race_binary <dbl> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ PT_CODE <dbl> 481014, 260021, 425419, 175705, 400671, 406485, 142…
## $ region <dbl> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1, 2,…
## $ working_income <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
## $ payment_method <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13,…
## $ donor_type <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "…
## $ DISTANCE <dbl> 153, 779, 620, 9, 4, 0, 12, 205, 0, 2174, 55, 56, 1…
## $ DIAG_KI <dbl> 3006, 999, 3007, 3048, 999, 3041, 3041, 999, 3017, …
## $ locality_type <dbl> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, …
## $ age_group <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
describe(deletingkidney.clean)
## deletingkidney.clean
##
## 20 Variables 980143 Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR Format:A12
## n missing distinct
## 933957 46186 4
##
## Value KI KP PA PI
## Frequency 870845 40914 20598 1600
## Proportion 0.932 0.044 0.022 0.002
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS? Format:A3
## n missing distinct
## 866551 113592 3
##
## Value 1 N Y
## Frequency 320 291373 574858
## Proportion 0.000 0.336 0.663
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER Format:A3
## n missing distinct
## 980143 0 2
##
## Value F M
## Frequency 392502 587641
## Proportion 0.4 0.6
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP Format:A9
## n missing distinct
## 980143 0 9
##
## lowest : A A1 A1B A2 A2B, highest: A2B AB B O UNK
##
## Value A A1 A1B A2 A2B AB B O UNK
## Frequency 322360 6614 570 1136 196 36603 139828 472810 26
## Proportion 0.329 0.007 0.001 0.001 0.000 0.037 0.143 0.482 0.000
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing Format:A6
## n missing distinct
## 961641 18502 58
##
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL: Format:F3.0
## n missing distinct Info Mean Gmd
## 862361 117782 8 0.932 126.1 215.5
##
## lowest : 1 2 3 4 5, highest: 4 5 6 996 998
##
## Value 1 2 3 4 5 6 996 998
## Frequency 4864 53470 321621 190618 130688 54938 5729 100433
## Proportion 0.006 0.062 0.373 0.221 0.152 0.064 0.007 0.116
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 960908 19235 25 0.967 1304 1046 1 1
## .25 .50 .75 .90 .95
## 1 2070 2080 2090 2100
##
## lowest : 1 2 3 996 998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME Format:F5.0
## n missing distinct Info Mean Gmd .05 .10
## 933768 46375 6556 1 806.9 833.1 29 69
## .25 .50 .75 .90 .95
## 200 546 1154 1891 2418
##
## lowest : 0 1 2 3 4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 933941 46202 92 1 47.68 16.45 22 28
## .25 .50 .75 .90 .95
## 38 49 59 66 69
##
## lowest : 0 1 2 3 4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY Format:F1.0
## n missing distinct Info Sum Mean Gmd
## 980143 0 2 0.374 143081 0.146 0.2493
##
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY Format:F3.0
## n missing distinct Info Mean Gmd
## 980143 0 8 0.838 2.106 1.554
##
## lowest : 1 2 4 5 6, highest: 5 6 7 9 998
##
## Value 1 2 4 5 6 7 9 998
## Frequency 510028 258237 140020 52532 8790 4231 6228 77
## Proportion 0.520 0.263 0.143 0.054 0.009 0.004 0.006 0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER Format:F7.0
## n missing distinct Info Mean Gmd .05 .10
## 980143 0 717271 1 619253 431816 58935 117888
## .25 .50 .75 .90 .95
## 295648 602183 936606 1153064 1232521
##
## lowest : 1 3 4 9 10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## region Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 980143 0 11 0.986 5.691 3.482 2 2
## .25 .50 .75 .90 .95
## 3 5 8 10 11
##
## lowest : 1 2 3 4 5, highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 39409 139067 124332 89378 171415 28231 98173 50238 71524
## Proportion 0.040 0.142 0.127 0.091 0.175 0.029 0.100 0.051 0.073
##
## Value 10 11
## Frequency 81472 86904
## Proportion 0.083 0.089
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME: Format:A3
## n missing distinct
## 582461 397682 3
##
## Value N U Y
## Frequency 364318 27725 190418
## Proportion 0.625 0.048 0.327
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 842576 137567 14 0.898 3.649 3.682 1 1
## .25 .50 .75 .90 .95
## 1 2 4 13 13
##
## lowest : 1 2 3 4 5, highest: 10 11 12 13 14
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 372757 67387 188741 74272 725 7286 5392 2819 135
## Proportion 0.442 0.080 0.224 0.088 0.001 0.009 0.006 0.003 0.000
##
## Value 10 11 12 13 14
## Frequency 531 3009 835 114779 3908
## Proportion 0.001 0.004 0.001 0.136 0.005
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN Format:A9
## n missing distinct
## 499185 480958 3
##
## Value C F L
## Frequency 341971 85 157129
## Proportion 0.685 0.000 0.315
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles) Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 494294 485849 2614 0.939 163.8 270.3 0 0
## .25 .50 .75 .90 .95
## 0 8 117 517 967
##
## lowest : 0 1 2 3 4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 482794 497349 75 0.992 2905 261.8 999 3004
## .25 .50 .75 .90 .95
## 3008 3037 3041 3070 3070
##
## lowest : 999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##
## Value 1000 3000 3005 3010 3015 3020 3025 3030 3035
## Frequency 30349 9552 54805 88198 2401 15439 7936 9647 42385
## Proportion 0.063 0.020 0.114 0.183 0.005 0.032 0.016 0.020 0.088
##
## Value 3040 3045 3050 3055 3060 3065 3070 3075
## Frequency 118859 4096 10730 3681 1266 1236 81004 1210
## Proportion 0.246 0.008 0.022 0.008 0.003 0.003 0.168 0.003
##
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type Format:F1.0
## n missing distinct Info Mean Gmd
## 499184 480959 4 0.472 3.311 0.5203
##
## Value 3 4 5 6
## Frequency 402971 37097 59031 85
## Proportion 0.807 0.074 0.118 0.000
## --------------------------------------------------------------------------------
## age_group Format:A3
## n missing distinct
## 499185 480958 3
##
## Value A P U
## Frequency 475087 24097 1
## Proportion 0.952 0.048 0.000
## --------------------------------------------------------------------------------
kidney.cleanfactors <- deletingkidney.clean %>%
mutate(.,
age_group_fac = as_factor(age_group),
donor_type_fac = as_factor(donor_type),
region_fac = as_factor(region),
working_income_fac = as_factor(working_income),
payment_method_fac = as_factor(payment_method),
dialysis_fac =as_factor(dialysis),
gender_fac =as_factor(gender),
abo_type_fac =as_factor(abo_type),
education_fac =as_factor(education),
race_fac = as_factor(race_binary),
ethnicity_fac =as_factor(ethnicity),
functional_status_fac =as_factor(functional_status),
organ_type_fac =as_factor(organ_type),
locality_type_fac =as_factor(locality_type),
state_residence_fac =as_factor(state_residence))
glimpse(kidney.cleanfactors)
## Rows: 980,143
## Columns: 35
## $ organ_type <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ dialysis <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ gender <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M…
## $ abo_type <chr> "O", "O", "B", "B", "B", "O", "O", "AB", "A", "…
## $ state_residence <chr> "PA", "IL", "FL", "HI", "WA", "VA", "CA", "IL",…
## $ education <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ functional_status <dbl> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998,…
## $ waitlist_days <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ age_init <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ethnicity <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ race_binary <dbl> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, …
## $ PT_CODE <dbl> 481014, 260021, 425419, 175705, 400671, 406485,…
## $ region <dbl> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1…
## $ working_income <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ payment_method <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ donor_type <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C…
## $ DISTANCE <dbl> 153, 779, 620, 9, 4, 0, 12, 205, 0, 2174, 55, 5…
## $ DIAG_KI <dbl> 3006, 999, 3007, 3048, 999, 3041, 3041, 999, 30…
## $ locality_type <dbl> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3,…
## $ age_group <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A…
## $ age_group_fac <fct> A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,…
## $ donor_type_fac <fct> C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C,…
## $ region_fac <fct> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1…
## $ working_income_fac <fct> , , , , , , , , , , , , , , , , , , , , , , , U…
## $ payment_method_fac <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ dialysis_fac <fct> , , , , , , , , , , , , , , , , , , , , , , , ,…
## $ gender_fac <fct> M, M, F, F, F, F, F, M, M, M, M, M, M, F, F, M,…
## $ abo_type_fac <fct> O, O, B, B, B, O, O, AB, A, B, O, A, A, A, O, O…
## $ education_fac <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ race_fac <fct> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, …
## $ ethnicity_fac <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ functional_status_fac <fct> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998,…
## $ organ_type_fac <fct> , , , , , , , , , , , , , , , , , , , , , , , ,…
## $ locality_type_fac <fct> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3,…
## $ state_residence_fac <fct> PA, IL, FL, HI, WA, VA, CA, IL, OH, CA, FL, CA,…
describe(kidney.cleanfactors)
## kidney.cleanfactors
##
## 35 Variables 980143 Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR Format:A12
## n missing distinct
## 933957 46186 4
##
## Value KI KP PA PI
## Frequency 870845 40914 20598 1600
## Proportion 0.932 0.044 0.022 0.002
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS? Format:A3
## n missing distinct
## 866551 113592 3
##
## Value 1 N Y
## Frequency 320 291373 574858
## Proportion 0.000 0.336 0.663
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER Format:A3
## n missing distinct
## 980143 0 2
##
## Value F M
## Frequency 392502 587641
## Proportion 0.4 0.6
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP Format:A9
## n missing distinct
## 980143 0 9
##
## lowest : A A1 A1B A2 A2B, highest: A2B AB B O UNK
##
## Value A A1 A1B A2 A2B AB B O UNK
## Frequency 322360 6614 570 1136 196 36603 139828 472810 26
## Proportion 0.329 0.007 0.001 0.001 0.000 0.037 0.143 0.482 0.000
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing Format:A6
## n missing distinct
## 961641 18502 58
##
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL: Format:F3.0
## n missing distinct Info Mean Gmd
## 862361 117782 8 0.932 126.1 215.5
##
## lowest : 1 2 3 4 5, highest: 4 5 6 996 998
##
## Value 1 2 3 4 5 6 996 998
## Frequency 4864 53470 321621 190618 130688 54938 5729 100433
## Proportion 0.006 0.062 0.373 0.221 0.152 0.064 0.007 0.116
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 960908 19235 25 0.967 1304 1046 1 1
## .25 .50 .75 .90 .95
## 1 2070 2080 2090 2100
##
## lowest : 1 2 3 996 998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME Format:F5.0
## n missing distinct Info Mean Gmd .05 .10
## 933768 46375 6556 1 806.9 833.1 29 69
## .25 .50 .75 .90 .95
## 200 546 1154 1891 2418
##
## lowest : 0 1 2 3 4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 933941 46202 92 1 47.68 16.45 22 28
## .25 .50 .75 .90 .95
## 38 49 59 66 69
##
## lowest : 0 1 2 3 4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY Format:F1.0
## n missing distinct Info Sum Mean Gmd
## 980143 0 2 0.374 143081 0.146 0.2493
##
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY Format:F3.0
## n missing distinct Info Mean Gmd
## 980143 0 8 0.838 2.106 1.554
##
## lowest : 1 2 4 5 6, highest: 5 6 7 9 998
##
## Value 1 2 4 5 6 7 9 998
## Frequency 510028 258237 140020 52532 8790 4231 6228 77
## Proportion 0.520 0.263 0.143 0.054 0.009 0.004 0.006 0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER Format:F7.0
## n missing distinct Info Mean Gmd .05 .10
## 980143 0 717271 1 619253 431816 58935 117888
## .25 .50 .75 .90 .95
## 295648 602183 936606 1153064 1232521
##
## lowest : 1 3 4 9 10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## region Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 980143 0 11 0.986 5.691 3.482 2 2
## .25 .50 .75 .90 .95
## 3 5 8 10 11
##
## lowest : 1 2 3 4 5, highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 39409 139067 124332 89378 171415 28231 98173 50238 71524
## Proportion 0.040 0.142 0.127 0.091 0.175 0.029 0.100 0.051 0.073
##
## Value 10 11
## Frequency 81472 86904
## Proportion 0.083 0.089
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME: Format:A3
## n missing distinct
## 582461 397682 3
##
## Value N U Y
## Frequency 364318 27725 190418
## Proportion 0.625 0.048 0.327
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 842576 137567 14 0.898 3.649 3.682 1 1
## .25 .50 .75 .90 .95
## 1 2 4 13 13
##
## lowest : 1 2 3 4 5, highest: 10 11 12 13 14
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 372757 67387 188741 74272 725 7286 5392 2819 135
## Proportion 0.442 0.080 0.224 0.088 0.001 0.009 0.006 0.003 0.000
##
## Value 10 11 12 13 14
## Frequency 531 3009 835 114779 3908
## Proportion 0.001 0.004 0.001 0.136 0.005
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN Format:A9
## n missing distinct
## 499185 480958 3
##
## Value C F L
## Frequency 341971 85 157129
## Proportion 0.685 0.000 0.315
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles) Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 494294 485849 2614 0.939 163.8 270.3 0 0
## .25 .50 .75 .90 .95
## 0 8 117 517 967
##
## lowest : 0 1 2 3 4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 482794 497349 75 0.992 2905 261.8 999 3004
## .25 .50 .75 .90 .95
## 3008 3037 3041 3070 3070
##
## lowest : 999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##
## Value 1000 3000 3005 3010 3015 3020 3025 3030 3035
## Frequency 30349 9552 54805 88198 2401 15439 7936 9647 42385
## Proportion 0.063 0.020 0.114 0.183 0.005 0.032 0.016 0.020 0.088
##
## Value 3040 3045 3050 3055 3060 3065 3070 3075
## Frequency 118859 4096 10730 3681 1266 1236 81004 1210
## Proportion 0.246 0.008 0.022 0.008 0.003 0.003 0.168 0.003
##
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type Format:F1.0
## n missing distinct Info Mean Gmd
## 499184 480959 4 0.472 3.311 0.5203
##
## Value 3 4 5 6
## Frequency 402971 37097 59031 85
## Proportion 0.807 0.074 0.118 0.000
## --------------------------------------------------------------------------------
## age_group Format:A3
## n missing distinct
## 499185 480958 3
##
## Value A P U
## Frequency 475087 24097 1
## Proportion 0.952 0.048 0.000
## --------------------------------------------------------------------------------
## age_group_fac
## n missing distinct
## 980143 0 4
##
## Value A P U
## Frequency 475087 24097 1 480958
## Proportion 0.485 0.025 0.000 0.491
## --------------------------------------------------------------------------------
## donor_type_fac : DONOR TYPE - DECEASED, LIVING OR FOREIGN
## n missing distinct
## 980143 0 4
##
## Value C L F
## Frequency 341971 157129 85 480958
## Proportion 0.349 0.160 0.000 0.491
## --------------------------------------------------------------------------------
## region_fac
## n missing distinct
## 980143 0 11
##
## lowest : 1 2 3 4 5 , highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 39409 139067 124332 89378 171415 28231 98173 50238 71524
## Proportion 0.040 0.142 0.127 0.091 0.175 0.029 0.100 0.051 0.073
##
## Value 10 11
## Frequency 81472 86904
## Proportion 0.083 0.089
## --------------------------------------------------------------------------------
## working_income_fac : TCR WORKING FOR INCOME:
## n missing distinct
## 980143 0 4
##
## Value U N Y
## Frequency 397682 27725 364318 190418
## Proportion 0.406 0.028 0.372 0.194
## --------------------------------------------------------------------------------
## payment_method_fac
## n missing distinct
## 842576 137567 14
##
## lowest : 1 2 3 4 5 , highest: 10 11 12 13 14
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 372757 67387 188741 74272 725 7286 5392 2819 135
## Proportion 0.442 0.080 0.224 0.088 0.001 0.009 0.006 0.003 0.000
##
## Value 10 11 12 13 14
## Frequency 531 3009 835 114779 3908
## Proportion 0.001 0.004 0.001 0.136 0.005
## --------------------------------------------------------------------------------
## dialysis_fac : WL MOST RECENT CANDIDATE ON DIALYSIS?
## n missing distinct
## 980143 0 4
##
## Value N Y 1
## Frequency 113592 291373 574858 320
## Proportion 0.116 0.297 0.587 0.000
## --------------------------------------------------------------------------------
## gender_fac : TCR RECIPIENT GENDER
## n missing distinct
## 980143 0 2
##
## Value M F
## Frequency 587641 392502
## Proportion 0.6 0.4
## --------------------------------------------------------------------------------
## abo_type_fac : TCR ABO BLOOD GROUP
## n missing distinct
## 980143 0 9
##
## lowest : O B AB A A1 , highest: A1 A2 UNK A2B A1B
##
## Value O B AB A A1 A2 UNK A2B A1B
## Frequency 472810 139828 36603 322360 6614 1136 26 196 570
## Proportion 0.482 0.143 0.037 0.329 0.007 0.001 0.000 0.000 0.001
## --------------------------------------------------------------------------------
## education_fac
## n missing distinct
## 862361 117782 8
##
## lowest : 1 2 3 4 5 , highest: 4 5 6 996 998
##
## Value 1 2 3 4 5 6 996 998
## Frequency 4864 53470 321621 190618 130688 54938 5729 100433
## Proportion 0.006 0.062 0.373 0.221 0.152 0.064 0.007 0.116
## --------------------------------------------------------------------------------
## race_fac
## n missing distinct
## 980143 0 8
##
## lowest : 1 2 4 5 6 , highest: 5 6 7 9 998
##
## Value 1 2 4 5 6 7 9 998
## Frequency 510028 258237 140020 52532 8790 4231 6228 77
## Proportion 0.520 0.263 0.143 0.054 0.009 0.004 0.006 0.000
## --------------------------------------------------------------------------------
## ethnicity_fac
## n missing distinct
## 980143 0 2
##
## Value 0 1
## Frequency 837062 143081
## Proportion 0.854 0.146
## --------------------------------------------------------------------------------
## functional_status_fac
## n missing distinct
## 960908 19235 25
##
## lowest : 1 2 3 996 998 , highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## organ_type_fac : ORGAN LISTED FOR
## n missing distinct
## 980143 0 5
##
## lowest : KI KP PA PI, highest: KI KP PA PI
##
## Value KI KP PA PI
## Frequency 46186 870845 40914 20598 1600
## Proportion 0.047 0.888 0.042 0.021 0.002
## --------------------------------------------------------------------------------
## locality_type_fac
## n missing distinct
## 499184 480959 4
##
## Value 3 4 5 6
## Frequency 402971 37097 59031 85
## Proportion 0.807 0.074 0.118 0.000
## --------------------------------------------------------------------------------
## state_residence_fac : TCR State of Permanent Residence at Listing
## n missing distinct
## 980143 0 59
##
## lowest : PA IL FL HI WA, highest: MT NV GU AS MP
## --------------------------------------------------------------------------------
#now time to add levels to my categories…
levels(kidney.cleanfactors$ethnicity_fac) = c("Non-Hispanic","Hispanic")
levels(kidney.cleanfactors$education_fac) = c("None","Grade School", "Highschool or GED", "Attended Some College", "Associate/Bachelor Degree", "Post-Graduate", "N/A<5years old", "unknown", "unknown")
levels(kidney.cleanfactors$race_fac) = c("White","Black", "Hispanic", "Asian", "Amer Ind/Alaska Native", "Native Hawaiin/other", "Multi-racial", "Unknown", "Unknown")
levels(kidney.cleanfactors$abo_type_fac) = c("O","B", "AB", "A", "A1", "A2", "Unknown", "A2B", "A1B")
levels(kidney.cleanfactors$gender_fac) = c("Male","Female")
levels(kidney.cleanfactors$locality_type_fac) = c("Local", "Regional", "National", "Foreign")
levels(kidney.cleanfactors$organ_type_fac) = c("unknown", "Kidney", "KP", "P", "PI")
levels(kidney.cleanfactors$dialysis_fac) =c("missing", "no", "yes", "unknown")
levels(kidney.cleanfactors$working_income_fac) =c("missing", "unknown", "no", "yes")
levels(kidney.cleanfactors$payment_method_fac) =c("Private Insurance", "Public InsuranceMedicaid", "PublicMedicare FFS", "PublicMedicareCh", "PublicCHIP", "PublicVA", "PublicOthrGovt", "Self", "Donation", "Free Care", "Pending", "Foreign Govt", "PublicMedicareunknowntype", "StateGovtAgency", "Unknown", "other", "missing")
levels(kidney.cleanfactors$donor_type_fac) =c("deceased donor", "living donor", "foreign", "missing")
glimpse(kidney.cleanfactors)
## Rows: 980,143
## Columns: 35
## $ organ_type <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ dialysis <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ gender <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M…
## $ abo_type <chr> "O", "O", "B", "B", "B", "O", "O", "AB", "A", "…
## $ state_residence <chr> "PA", "IL", "FL", "HI", "WA", "VA", "CA", "IL",…
## $ education <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ functional_status <dbl> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998,…
## $ waitlist_days <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ age_init <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ethnicity <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ race_binary <dbl> 1, 1, 2, 2, 1, 5, 2, 998, 1, 1, 1, 1, 1, 1, 1, …
## $ PT_CODE <dbl> 481014, 260021, 425419, 175705, 400671, 406485,…
## $ region <dbl> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1…
## $ working_income <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ payment_method <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ donor_type <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C…
## $ DISTANCE <dbl> 153, 779, 620, 9, 4, 0, 12, 205, 0, 2174, 55, 5…
## $ DIAG_KI <dbl> 3006, 999, 3007, 3048, 999, 3041, 3041, 999, 30…
## $ locality_type <dbl> 4, 5, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3,…
## $ age_group <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A…
## $ age_group_fac <fct> A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,…
## $ donor_type_fac <fct> deceased donor, deceased donor, deceased donor,…
## $ region_fac <fct> 2, 7, 3, 5, 6, 2, 5, 7, 10, 5, 3, 5, 3, 3, 7, 1…
## $ working_income_fac <fct> missing, missing, missing, missing, missing, mi…
## $ payment_method_fac <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ dialysis_fac <fct> missing, missing, missing, missing, missing, mi…
## $ gender_fac <fct> Male, Male, Female, Female, Female, Female, Fem…
## $ abo_type_fac <fct> O, O, B, B, B, O, O, AB, A, B, O, A, A, A, O, O…
## $ education_fac <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ race_fac <fct> White, White, Black, Black, White, Asian, Black…
## $ ethnicity_fac <fct> Non-Hispanic, Non-Hispanic, Non-Hispanic, Non-H…
## $ functional_status_fac <fct> 998, 2, 2, 998, 1, 1, 998, 998, 1, 998, 1, 998,…
## $ organ_type_fac <fct> unknown, unknown, unknown, unknown, unknown, un…
## $ locality_type_fac <fct> Regional, National, National, Local, Local, Loc…
## $ state_residence_fac <fct> PA, IL, FL, HI, WA, VA, CA, IL, OH, CA, FL, CA,…
describe(kidney.cleanfactors)
## kidney.cleanfactors
##
## 35 Variables 980143 Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR Format:A12
## n missing distinct
## 933957 46186 4
##
## Value KI KP PA PI
## Frequency 870845 40914 20598 1600
## Proportion 0.932 0.044 0.022 0.002
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS? Format:A3
## n missing distinct
## 866551 113592 3
##
## Value 1 N Y
## Frequency 320 291373 574858
## Proportion 0.000 0.336 0.663
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER Format:A3
## n missing distinct
## 980143 0 2
##
## Value F M
## Frequency 392502 587641
## Proportion 0.4 0.6
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP Format:A9
## n missing distinct
## 980143 0 9
##
## lowest : A A1 A1B A2 A2B, highest: A2B AB B O UNK
##
## Value A A1 A1B A2 A2B AB B O UNK
## Frequency 322360 6614 570 1136 196 36603 139828 472810 26
## Proportion 0.329 0.007 0.001 0.001 0.000 0.037 0.143 0.482 0.000
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing Format:A6
## n missing distinct
## 961641 18502 58
##
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL: Format:F3.0
## n missing distinct Info Mean Gmd
## 862361 117782 8 0.932 126.1 215.5
##
## lowest : 1 2 3 4 5, highest: 4 5 6 996 998
##
## Value 1 2 3 4 5 6 996 998
## Frequency 4864 53470 321621 190618 130688 54938 5729 100433
## Proportion 0.006 0.062 0.373 0.221 0.152 0.064 0.007 0.116
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 960908 19235 25 0.967 1304 1046 1 1
## .25 .50 .75 .90 .95
## 1 2070 2080 2090 2100
##
## lowest : 1 2 3 996 998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME Format:F5.0
## n missing distinct Info Mean Gmd .05 .10
## 933768 46375 6556 1 806.9 833.1 29 69
## .25 .50 .75 .90 .95
## 200 546 1154 1891 2418
##
## lowest : 0 1 2 3 4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 933941 46202 92 1 47.68 16.45 22 28
## .25 .50 .75 .90 .95
## 38 49 59 66 69
##
## lowest : 0 1 2 3 4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY Format:F1.0
## n missing distinct Info Sum Mean Gmd
## 980143 0 2 0.374 143081 0.146 0.2493
##
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY Format:F3.0
## n missing distinct Info Mean Gmd
## 980143 0 8 0.838 2.106 1.554
##
## lowest : 1 2 4 5 6, highest: 5 6 7 9 998
##
## Value 1 2 4 5 6 7 9 998
## Frequency 510028 258237 140020 52532 8790 4231 6228 77
## Proportion 0.520 0.263 0.143 0.054 0.009 0.004 0.006 0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER Format:F7.0
## n missing distinct Info Mean Gmd .05 .10
## 980143 0 717271 1 619253 431816 58935 117888
## .25 .50 .75 .90 .95
## 295648 602183 936606 1153064 1232521
##
## lowest : 1 3 4 9 10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## region Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 980143 0 11 0.986 5.691 3.482 2 2
## .25 .50 .75 .90 .95
## 3 5 8 10 11
##
## lowest : 1 2 3 4 5, highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 39409 139067 124332 89378 171415 28231 98173 50238 71524
## Proportion 0.040 0.142 0.127 0.091 0.175 0.029 0.100 0.051 0.073
##
## Value 10 11
## Frequency 81472 86904
## Proportion 0.083 0.089
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME: Format:A3
## n missing distinct
## 582461 397682 3
##
## Value N U Y
## Frequency 364318 27725 190418
## Proportion 0.625 0.048 0.327
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 842576 137567 14 0.898 3.649 3.682 1 1
## .25 .50 .75 .90 .95
## 1 2 4 13 13
##
## lowest : 1 2 3 4 5, highest: 10 11 12 13 14
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 372757 67387 188741 74272 725 7286 5392 2819 135
## Proportion 0.442 0.080 0.224 0.088 0.001 0.009 0.006 0.003 0.000
##
## Value 10 11 12 13 14
## Frequency 531 3009 835 114779 3908
## Proportion 0.001 0.004 0.001 0.136 0.005
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN Format:A9
## n missing distinct
## 499185 480958 3
##
## Value C F L
## Frequency 341971 85 157129
## Proportion 0.685 0.000 0.315
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles) Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 494294 485849 2614 0.939 163.8 270.3 0 0
## .25 .50 .75 .90 .95
## 0 8 117 517 967
##
## lowest : 0 1 2 3 4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 482794 497349 75 0.992 2905 261.8 999 3004
## .25 .50 .75 .90 .95
## 3008 3037 3041 3070 3070
##
## lowest : 999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##
## Value 1000 3000 3005 3010 3015 3020 3025 3030 3035
## Frequency 30349 9552 54805 88198 2401 15439 7936 9647 42385
## Proportion 0.063 0.020 0.114 0.183 0.005 0.032 0.016 0.020 0.088
##
## Value 3040 3045 3050 3055 3060 3065 3070 3075
## Frequency 118859 4096 10730 3681 1266 1236 81004 1210
## Proportion 0.246 0.008 0.022 0.008 0.003 0.003 0.168 0.003
##
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type Format:F1.0
## n missing distinct Info Mean Gmd
## 499184 480959 4 0.472 3.311 0.5203
##
## Value 3 4 5 6
## Frequency 402971 37097 59031 85
## Proportion 0.807 0.074 0.118 0.000
## --------------------------------------------------------------------------------
## age_group Format:A3
## n missing distinct
## 499185 480958 3
##
## Value A P U
## Frequency 475087 24097 1
## Proportion 0.952 0.048 0.000
## --------------------------------------------------------------------------------
## age_group_fac
## n missing distinct
## 980143 0 4
##
## Value A P U
## Frequency 475087 24097 1 480958
## Proportion 0.485 0.025 0.000 0.491
## --------------------------------------------------------------------------------
## donor_type_fac : DONOR TYPE - DECEASED, LIVING OR FOREIGN
## n missing distinct
## 980143 0 4
##
## Value deceased donor living donor foreign missing
## Frequency 341971 157129 85 480958
## Proportion 0.349 0.160 0.000 0.491
## --------------------------------------------------------------------------------
## region_fac
## n missing distinct
## 980143 0 11
##
## lowest : 1 2 3 4 5 , highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 39409 139067 124332 89378 171415 28231 98173 50238 71524
## Proportion 0.040 0.142 0.127 0.091 0.175 0.029 0.100 0.051 0.073
##
## Value 10 11
## Frequency 81472 86904
## Proportion 0.083 0.089
## --------------------------------------------------------------------------------
## working_income_fac : TCR WORKING FOR INCOME:
## n missing distinct
## 980143 0 4
##
## Value missing unknown no yes
## Frequency 397682 27725 364318 190418
## Proportion 0.406 0.028 0.372 0.194
## --------------------------------------------------------------------------------
## payment_method_fac
## n missing distinct
## 842576 137567 14
##
## lowest : Private Insurance Public InsuranceMedicaid PublicMedicare FFS PublicMedicareCh PublicCHIP
## highest: Free Care Pending Foreign Govt PublicMedicareunknowntype StateGovtAgency
##
## Private Insurance (372757, 0.442), Public InsuranceMedicaid (67387, 0.080),
## PublicMedicare FFS (188741, 0.224), PublicMedicareCh (74272, 0.088), PublicCHIP
## (725, 0.001), PublicVA (7286, 0.009), PublicOthrGovt (5392, 0.006), Self (2819,
## 0.003), Donation (135, 0.000), Free Care (531, 0.001), Pending (3009, 0.004),
## Foreign Govt (835, 0.001), PublicMedicareunknowntype (114779, 0.136),
## StateGovtAgency (3908, 0.005)
## --------------------------------------------------------------------------------
## dialysis_fac : WL MOST RECENT CANDIDATE ON DIALYSIS?
## n missing distinct
## 980143 0 4
##
## Value missing no yes unknown
## Frequency 113592 291373 574858 320
## Proportion 0.116 0.297 0.587 0.000
## --------------------------------------------------------------------------------
## gender_fac : TCR RECIPIENT GENDER
## n missing distinct
## 980143 0 2
##
## Value Male Female
## Frequency 587641 392502
## Proportion 0.6 0.4
## --------------------------------------------------------------------------------
## abo_type_fac : TCR ABO BLOOD GROUP
## n missing distinct
## 980143 0 9
##
## lowest : O B AB A A1
## highest: A1 A2 Unknown A2B A1B
##
## Value O B AB A A1 A2 Unknown A2B
## Frequency 472810 139828 36603 322360 6614 1136 26 196
## Proportion 0.482 0.143 0.037 0.329 0.007 0.001 0.000 0.000
##
## Value A1B
## Frequency 570
## Proportion 0.001
## --------------------------------------------------------------------------------
## education_fac
## n missing distinct
## 862361 117782 8
##
## lowest : None Grade School Highschool or GED Attended Some College Associate/Bachelor Degree
## highest: Attended Some College Associate/Bachelor Degree Post-Graduate N/A<5years old unknown
##
## None (4864, 0.006), Grade School (53470, 0.062), Highschool or GED (321621,
## 0.373), Attended Some College (190618, 0.221), Associate/Bachelor Degree
## (130688, 0.152), Post-Graduate (54938, 0.064), N/A<5years old (5729, 0.007),
## unknown (100433, 0.116)
## --------------------------------------------------------------------------------
## race_fac
## n missing distinct
## 980143 0 8
##
## lowest : White Black Hispanic Asian Amer Ind/Alaska Native
## highest: Asian Amer Ind/Alaska Native Native Hawaiin/other Multi-racial Unknown
##
## White (510028, 0.520), Black (258237, 0.263), Hispanic (140020, 0.143), Asian
## (52532, 0.054), Amer Ind/Alaska Native (8790, 0.009), Native Hawaiin/other
## (4231, 0.004), Multi-racial (6228, 0.006), Unknown (77, 0.000)
## --------------------------------------------------------------------------------
## ethnicity_fac
## n missing distinct
## 980143 0 2
##
## Value Non-Hispanic Hispanic
## Frequency 837062 143081
## Proportion 0.854 0.146
## --------------------------------------------------------------------------------
## functional_status_fac
## n missing distinct
## 960908 19235 25
##
## lowest : 1 2 3 996 998 , highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## organ_type_fac : ORGAN LISTED FOR
## n missing distinct
## 980143 0 5
##
## lowest : unknown Kidney KP P PI
## highest: unknown Kidney KP P PI
##
## Value unknown Kidney KP P PI
## Frequency 46186 870845 40914 20598 1600
## Proportion 0.047 0.888 0.042 0.021 0.002
## --------------------------------------------------------------------------------
## locality_type_fac
## n missing distinct
## 499184 480959 4
##
## Value Local Regional National Foreign
## Frequency 402971 37097 59031 85
## Proportion 0.807 0.074 0.118 0.000
## --------------------------------------------------------------------------------
## state_residence_fac : TCR State of Permanent Residence at Listing
## n missing distinct
## 980143 0 59
##
## lowest : PA IL FL HI WA, highest: MT NV GU AS MP
## --------------------------------------------------------------------------------
###this is where I stopped to check out what variables got “wonky” when labeled. All variables have been adjusted/fixed ##Next sstep, delete all organs except kidney before reducing other variables
kidney.clean1 <- kidney.cleanfactors %>%
filter(!(organ_type_fac %in%
c("unknown", "KP", "P", "PI")))
describe(kidney.clean1)
## kidney.clean1
##
## 35 Variables 870845 Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR Format:A12
## n missing distinct value
## 870845 0 1 KI
##
## Value KI
## Frequency 870845
## Proportion 1
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS? Format:A3
## n missing distinct
## 828121 42724 2
##
## Value N Y
## Frequency 276310 551811
## Proportion 0.334 0.666
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER Format:A3
## n missing distinct
## 870845 0 2
##
## Value F M
## Frequency 345591 525254
## Proportion 0.397 0.603
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP Format:A9
## n missing distinct
## 870845 0 9
##
## lowest : A A1 A1B A2 A2B, highest: A2B AB B O UNK
##
## Value A A1 A1B A2 A2B AB B O UNK
## Frequency 281281 5792 494 910 170 32466 126724 423000 8
## Proportion 0.323 0.007 0.001 0.001 0.000 0.037 0.146 0.486 0.000
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing Format:A6
## n missing distinct
## 853946 16899 58
##
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL: Format:F3.0
## n missing distinct Info Mean Gmd
## 770184 100661 8 0.93 118.3 203.7
##
## lowest : 1 2 3 4 5, highest: 4 5 6 996 998
##
## Value 1 2 3 4 5 6 996 998
## Frequency 4518 49816 290142 170305 116596 50015 3670 85122
## Proportion 0.006 0.065 0.377 0.221 0.151 0.065 0.005 0.111
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 853114 17731 25 0.97 1353 1022 1 1
## .25 .50 .75 .90 .95
## 1 2070 2080 2090 2100
##
## lowest : 1 2 3 996 998, highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME Format:F5.0
## n missing distinct Info Mean Gmd .05 .10
## 870676 169 6531 1 828.7 844.2 32 74
## .25 .50 .75 .90 .95
## 212 573 1185 1920 2445
##
## lowest : 0 1 2 3 4, highest: 11592 11826 12076 12788 13741
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 870829 16 92 1 48.26 16.53 22 28
## .25 .50 .75 .90 .95
## 38 50 59 66 69
##
## lowest : 0 1 2 3 4, highest: 87 88 89 90 91
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY Format:F1.0
## n missing distinct Info Sum Mean Gmd
## 870845 0 2 0.388 132751 0.1524 0.2584
##
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY Format:F3.0
## n missing distinct Info Mean Gmd
## 870845 0 8 0.855 2.1 1.463
##
## lowest : 1 2 4 5 6, highest: 5 6 7 9 998
##
## Value 1 2 4 5 6 7 9 998
## Frequency 428439 243839 129948 50758 8108 3971 5771 11
## Proportion 0.492 0.280 0.149 0.058 0.009 0.005 0.007 0.000
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER Format:F7.0
## n missing distinct Info Mean Gmd .05 .10
## 870845 0 673711 1 637073 434456 62038 123802
## .25 .50 .75 .90 .95
## 310222 632125 958919 1163741 1238466
##
## lowest : 1 3 4 9 10
## highest: 1325545 1325880 1326707 1326814 1327218
## --------------------------------------------------------------------------------
## region Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 870845 0 11 0.985 5.65 3.463 2 2
## .25 .50 .75 .90 .95
## 3 5 8 10 11
##
## lowest : 1 2 3 4 5, highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 34021 124644 111934 82781 158773 24058 79663 44112 64780
## Proportion 0.039 0.143 0.129 0.095 0.182 0.028 0.091 0.051 0.074
##
## Value 10 11
## Frequency 68805 77274
## Proportion 0.079 0.089
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME: Format:A3
## n missing distinct
## 539093 331752 3
##
## Value N U Y
## Frequency 339953 24967 174173
## Proportion 0.631 0.046 0.323
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 769863 100982 14 0.903 3.624 3.619 1 1
## .25 .50 .75 .90 .95
## 1 2 4 13 13
##
## lowest : 1 2 3 4 5, highest: 10 11 12 13 14
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 333109 62423 179422 70903 669 7084 5139 2319 96
## Proportion 0.433 0.081 0.233 0.092 0.001 0.009 0.007 0.003 0.000
##
## Value 10 11 12 13 14
## Frequency 450 2723 731 101461 3334
## Proportion 0.001 0.004 0.001 0.132 0.004
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN Format:A9
## n missing distinct
## 420735 450110 3
##
## Value C F L
## Frequency 308791 80 111864
## Proportion 0.734 0.000 0.266
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles) Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 416460 454385 2601 0.957 180 292.6 0 0
## .25 .50 .75 .90 .95
## 0 12 139 577 1064
##
## lowest : 0 1 2 3 4, highest: 4365 4383 4407 4408 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 414453 456392 74 0.99 2904 264.1 999 3004
## .25 .50 .75 .90 .95
## 3008 3037 3041 3070 3070
##
## lowest : 999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##
## Value 1000 3000 3005 3010 3015 3020 3025 3030 3035
## Frequency 26353 8286 48392 68810 2065 13801 6427 8198 38187
## Proportion 0.064 0.020 0.117 0.166 0.005 0.033 0.016 0.020 0.092
##
## Value 3040 3045 3050 3055 3060 3065 3070 3075
## Frequency 107936 3585 9207 3254 1113 1156 66482 1201
## Proportion 0.260 0.009 0.022 0.008 0.003 0.003 0.160 0.003
##
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type Format:F1.0
## n missing distinct Info Mean Gmd
## 420735 450110 4 0.5 3.338 0.5554
##
## Value 3 4 5 6
## Frequency 333384 32712 54559 80
## Proportion 0.792 0.078 0.130 0.000
## --------------------------------------------------------------------------------
## age_group Format:A3
## n missing distinct
## 420735 450110 2
##
## Value A P
## Frequency 403549 17186
## Proportion 0.959 0.041
## --------------------------------------------------------------------------------
## age_group_fac
## n missing distinct
## 870845 0 3
##
## Value A P
## Frequency 403549 17186 450110
## Proportion 0.463 0.020 0.517
## --------------------------------------------------------------------------------
## donor_type_fac : DONOR TYPE - DECEASED, LIVING OR FOREIGN
## n missing distinct
## 870845 0 4
##
## Value deceased donor living donor foreign missing
## Frequency 308791 111864 80 450110
## Proportion 0.355 0.128 0.000 0.517
## --------------------------------------------------------------------------------
## region_fac
## n missing distinct
## 870845 0 11
##
## lowest : 1 2 3 4 5 , highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9
## Frequency 34021 124644 111934 82781 158773 24058 79663 44112 64780
## Proportion 0.039 0.143 0.129 0.095 0.182 0.028 0.091 0.051 0.074
##
## Value 10 11
## Frequency 68805 77274
## Proportion 0.079 0.089
## --------------------------------------------------------------------------------
## working_income_fac : TCR WORKING FOR INCOME:
## n missing distinct
## 870845 0 4
##
## Value missing unknown no yes
## Frequency 331752 24967 339953 174173
## Proportion 0.381 0.029 0.390 0.200
## --------------------------------------------------------------------------------
## payment_method_fac
## n missing distinct
## 769863 100982 14
##
## lowest : Private Insurance Public InsuranceMedicaid PublicMedicare FFS PublicMedicareCh PublicCHIP
## highest: Free Care Pending Foreign Govt PublicMedicareunknowntype StateGovtAgency
##
## Private Insurance (333109, 0.433), Public InsuranceMedicaid (62423, 0.081),
## PublicMedicare FFS (179422, 0.233), PublicMedicareCh (70903, 0.092), PublicCHIP
## (669, 0.001), PublicVA (7084, 0.009), PublicOthrGovt (5139, 0.007), Self (2319,
## 0.003), Donation (96, 0.000), Free Care (450, 0.001), Pending (2723, 0.004),
## Foreign Govt (731, 0.001), PublicMedicareunknowntype (101461, 0.132),
## StateGovtAgency (3334, 0.004)
## --------------------------------------------------------------------------------
## dialysis_fac : WL MOST RECENT CANDIDATE ON DIALYSIS?
## n missing distinct
## 870845 0 3
##
## Value missing no yes
## Frequency 42724 276310 551811
## Proportion 0.049 0.317 0.634
## --------------------------------------------------------------------------------
## gender_fac : TCR RECIPIENT GENDER
## n missing distinct
## 870845 0 2
##
## Value Male Female
## Frequency 525254 345591
## Proportion 0.603 0.397
## --------------------------------------------------------------------------------
## abo_type_fac : TCR ABO BLOOD GROUP
## n missing distinct
## 870845 0 9
##
## lowest : O B AB A A1
## highest: A1 A2 Unknown A2B A1B
##
## Value O B AB A A1 A2 Unknown A2B
## Frequency 423000 126724 32466 281281 5792 910 8 170
## Proportion 0.486 0.146 0.037 0.323 0.007 0.001 0.000 0.000
##
## Value A1B
## Frequency 494
## Proportion 0.001
## --------------------------------------------------------------------------------
## education_fac
## n missing distinct
## 770184 100661 8
##
## lowest : None Grade School Highschool or GED Attended Some College Associate/Bachelor Degree
## highest: Attended Some College Associate/Bachelor Degree Post-Graduate N/A<5years old unknown
##
## None (4518, 0.006), Grade School (49816, 0.065), Highschool or GED (290142,
## 0.377), Attended Some College (170305, 0.221), Associate/Bachelor Degree
## (116596, 0.151), Post-Graduate (50015, 0.065), N/A<5years old (3670, 0.005),
## unknown (85122, 0.111)
## --------------------------------------------------------------------------------
## race_fac
## n missing distinct
## 870845 0 8
##
## lowest : White Black Hispanic Asian Amer Ind/Alaska Native
## highest: Asian Amer Ind/Alaska Native Native Hawaiin/other Multi-racial Unknown
##
## White (428439, 0.492), Black (243839, 0.280), Hispanic (129948, 0.149), Asian
## (50758, 0.058), Amer Ind/Alaska Native (8108, 0.009), Native Hawaiin/other
## (3971, 0.005), Multi-racial (5771, 0.007), Unknown (11, 0.000)
## --------------------------------------------------------------------------------
## ethnicity_fac
## n missing distinct
## 870845 0 2
##
## Value Non-Hispanic Hispanic
## Frequency 738094 132751
## Proportion 0.848 0.152
## --------------------------------------------------------------------------------
## functional_status_fac
## n missing distinct
## 853114 17731 25
##
## lowest : 1 2 3 996 998 , highest: 4060 4070 4080 4090 4100
## --------------------------------------------------------------------------------
## organ_type_fac : ORGAN LISTED FOR
## n missing distinct value
## 870845 0 1 Kidney
##
## Value Kidney
## Frequency 870845
## Proportion 1
## --------------------------------------------------------------------------------
## locality_type_fac
## n missing distinct
## 420735 450110 4
##
## Value Local Regional National Foreign
## Frequency 333384 32712 54559 80
## Proportion 0.792 0.078 0.130 0.000
## --------------------------------------------------------------------------------
## state_residence_fac : TCR State of Permanent Residence at Listing
## n missing distinct
## 870845 0 59
##
## lowest : PA IL FL HI WA, highest: MT NV GU AS MP
## --------------------------------------------------------------------------------
kidney.clean1 <- kidney.cleanfactors %>%
filter(!(organ_type_fac %in%
c("unknown", "KP", "P", "PI")))%>%
filter(!(locality_type_fac %in%
c("Foreign"))) %>%
filter(!(race_fac %in%
c("Hispanic", "Asian", "Amer Ind/Alaska Native", "Native Hawaiin/other", "Multi-racial", "Unknown"))) %>%
filter(!(education_fac %in%
c("None", "N/A<5years old", "unknown"))) %>%
filter(!(abo_type_fac %in%
c("A1", "A1B", "A2", "A2B", "Unknown"))) %>%
filter(!(dialysis_fac %in%
c("missing", "unknown"))) %>%
filter(!(working_income_fac %in%
c("missing", "unknown"))) %>%
filter(!(donor_type_fac %in%
c("foreign", "missing"))) %>%
filter(!(payment_method_fac %in%
c("Self", "Donation", "Free Care", "Pending", "Foreign Govt")))
describe(kidney.clean1)
## kidney.clean1
##
## 35 Variables 156144 Observations
## --------------------------------------------------------------------------------
## organ_type : ORGAN LISTED FOR Format:A12
## n missing distinct value
## 156144 0 1 KI
##
## Value KI
## Frequency 156144
## Proportion 1
## --------------------------------------------------------------------------------
## dialysis : WL MOST RECENT CANDIDATE ON DIALYSIS? Format:A3
## n missing distinct
## 156144 0 2
##
## Value N Y
## Frequency 48517 107627
## Proportion 0.311 0.689
## --------------------------------------------------------------------------------
## gender : TCR RECIPIENT GENDER Format:A3
## n missing distinct
## 156144 0 2
##
## Value F M
## Frequency 60236 95908
## Proportion 0.386 0.614
## --------------------------------------------------------------------------------
## abo_type : TCR ABO BLOOD GROUP Format:A9
## n missing distinct
## 156144 0 4
##
## Value A AB B O
## Frequency 59726 8037 20602 67779
## Proportion 0.383 0.051 0.132 0.434
## --------------------------------------------------------------------------------
## state_residence : TCR State of Permanent Residence at Listing Format:A6
## n missing distinct
## 156142 2 56
##
## lowest : AK AL AR AS AZ, highest: WA WI WV WY ZZ
## --------------------------------------------------------------------------------
## education : TCR HIGHEST EDUCATION LEVEL: Format:F3.0
## n missing distinct Info Mean Gmd
## 156134 10 5 0.904 3.952 1.115
##
## lowest : 2 3 4 5 6, highest: 2 3 4 5 6
##
## Value 2 3 4 5 6
## Frequency 3097 62652 43567 32345 14473
## Proportion 0.020 0.401 0.279 0.207 0.093
## --------------------------------------------------------------------------------
## functional_status : TCR FUNCTIONAL STATUS @ LISTING Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 156142 2 16 0.953 1970 219.8 998 2050
## .25 .50 .75 .90 .95
## 2070 2080 2090 2100 2100
##
## lowest : 1 2 3 996 998, highest: 2070 2080 2090 2100 4100
##
## Value 1 2 3 996 998 2010 2020 2030 2040 2050 2060
## Frequency 4862 386 8 115 5663 122 920 526 1884 3718 8726
## Proportion 0.031 0.002 0.000 0.001 0.036 0.001 0.006 0.003 0.012 0.024 0.056
##
## Value 2070 2080 2090 2100 4100
## Frequency 27795 44086 39921 17409 1
## Proportion 0.178 0.282 0.256 0.111 0.000
## --------------------------------------------------------------------------------
## waitlist_days : TOTAL DAYS ON WAITING LIST/INCLUDING INACTIVE TIME Format:F5.0
## n missing distinct Info Mean Gmd .05 .10
## 156144 0 3694 1 620.5 670.4 17 42
## .25 .50 .75 .90 .95
## 130 382 921 1543 1954
##
## lowest : 0 1 2 3 4, highest: 7091 7120 7657 8783 9381
## --------------------------------------------------------------------------------
## age_init : CALCULATED AGE AT LISTING Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 156144 0 73 0.999 50.75 15.06 26 31
## .25 .50 .75 .90 .95
## 42 52 61 67 70
##
## lowest : 8 18 19 20 21, highest: 85 86 87 88 89
## --------------------------------------------------------------------------------
## ethnicity : TCR ETHNICITY Format:F1.0
## n missing distinct Info Mean Gmd
## 156144 0 1 0 0 0
##
## Value 0
## Frequency 156144
## Proportion 1
## --------------------------------------------------------------------------------
## race_binary : ETHNICITY CATEGORY Format:F3.0
## n missing distinct Info Mean Gmd
## 156144 0 2 0.671 1.338 0.4476
##
## Value 1 2
## Frequency 103346 52798
## Proportion 0.662 0.338
## --------------------------------------------------------------------------------
## PT_CODE : ENCRYPTED PATIENT IDENTIFIER Format:F7.0
## n missing distinct Info Mean Gmd .05 .10
## 156144 0 152422 1 801310 324099 188835 380083
## .25 .50 .75 .90 .95
## 641304 824442 1019134 1159733 1217668
##
## lowest : 17 20 22 51 55
## highest: 1314759 1315216 1315395 1315814 1323019
## --------------------------------------------------------------------------------
## region Format:F2.0
## n missing distinct Info Mean Gmd .05 .10
## 156144 0 11 0.988 5.964 3.768 2 2
## .25 .50 .75 .90 .95
## 3 5 9 11 11
##
## lowest : 1 2 3 4 5, highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9 10 11
## Frequency 7258 23185 22527 10894 15640 4893 14991 10451 11191 16607 18507
## Proportion 0.046 0.148 0.144 0.070 0.100 0.031 0.096 0.067 0.072 0.106 0.119
## --------------------------------------------------------------------------------
## working_income : TCR WORKING FOR INCOME: Format:A3
## n missing distinct
## 156144 0 2
##
## Value N Y
## Frequency 92973 63171
## Proportion 0.595 0.405
## --------------------------------------------------------------------------------
## payment_method : TCR KIDNEY PRIMARY PROJECTED SOURCE PAY Format:F2.0
## n missing distinct Info Mean Gmd
## 156138 6 9 0.84 2.121 1.318
##
## lowest : 1 2 3 4 5, highest: 5 6 7 13 14
##
## Value 1 2 3 4 5 6 7 13 14
## Frequency 78668 8002 48752 17796 7 1670 1182 59 2
## Proportion 0.504 0.051 0.312 0.114 0.000 0.011 0.008 0.000 0.000
## --------------------------------------------------------------------------------
## donor_type : DONOR TYPE - DECEASED, LIVING OR FOREIGN Format:A9
## n missing distinct
## 156144 0 2
##
## Value C L
## Frequency 102368 53776
## Proportion 0.656 0.344
## --------------------------------------------------------------------------------
## DISTANCE : DISTANCE DONOR HOSP TO TX CENTER (Nautical Miles) Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 156134 10 2353 0.933 161.4 264.4 0 0
## .25 .50 .75 .90 .95
## 0 8 121 500 921
##
## lowest : 0 1 2 3 4, highest: 4340 4346 4347 4407 4409
## --------------------------------------------------------------------------------
## DIAG_KI : Kidney Diagnosis from TRR/TCR Format:F4.0
## n missing distinct Info Mean Gmd .05 .10
## 155512 632 72 0.981 2928 233.1 999 3004
## .25 .50 .75 .90 .95
## 3008 3040 3069 3070 3070
##
## lowest : 999 3000 3001 3002 3003, highest: 3070 3071 3072 3073 3074
##
## Value 1000 3000 3005 3010 3015 3020 3025 3030 3035 3040 3045
## Frequency 8428 2442 19356 17533 840 4615 1467 2456 13633 37441 1258
## Proportion 0.054 0.016 0.124 0.113 0.005 0.030 0.009 0.016 0.088 0.241 0.008
##
## Value 3050 3055 3060 3065 3070 3075
## Frequency 3230 1511 504 756 38973 1069
## Proportion 0.021 0.010 0.003 0.005 0.251 0.007
##
## For the frequency table, variable is rounded to the nearest 5
## --------------------------------------------------------------------------------
## locality_type : Share Type Format:F1.0
## n missing distinct Info Mean Gmd
## 156144 0 3 0.457 3.294 0.4955
##
## Value 3 4 5
## Frequency 127245 11917 16982
## Proportion 0.815 0.076 0.109
## --------------------------------------------------------------------------------
## age_group Format:A3
## n missing distinct
## 156144 0 2
##
## Value A P
## Frequency 156143 1
## Proportion 1 0
## --------------------------------------------------------------------------------
## age_group_fac
## n missing distinct
## 156144 0 2
##
## Value A P
## Frequency 156143 1
## Proportion 1 0
## --------------------------------------------------------------------------------
## donor_type_fac : DONOR TYPE - DECEASED, LIVING OR FOREIGN
## n missing distinct
## 156144 0 2
##
## Value deceased donor living donor
## Frequency 102368 53776
## Proportion 0.656 0.344
## --------------------------------------------------------------------------------
## region_fac
## n missing distinct
## 156144 0 11
##
## lowest : 1 2 3 4 5 , highest: 7 8 9 10 11
##
## Value 1 2 3 4 5 6 7 8 9 10 11
## Frequency 7258 23185 22527 10894 15640 4893 14991 10451 11191 16607 18507
## Proportion 0.046 0.148 0.144 0.070 0.100 0.031 0.096 0.067 0.072 0.106 0.119
## --------------------------------------------------------------------------------
## working_income_fac : TCR WORKING FOR INCOME:
## n missing distinct
## 156144 0 2
##
## Value no yes
## Frequency 92973 63171
## Proportion 0.595 0.405
## --------------------------------------------------------------------------------
## payment_method_fac
## n missing distinct
## 156138 6 9
##
## lowest : Private Insurance Public InsuranceMedicaid PublicMedicare FFS PublicMedicareCh PublicCHIP
## highest: PublicCHIP PublicVA PublicOthrGovt PublicMedicareunknowntype StateGovtAgency
##
## Private Insurance (78668, 0.504), Public InsuranceMedicaid (8002, 0.051),
## PublicMedicare FFS (48752, 0.312), PublicMedicareCh (17796, 0.114), PublicCHIP
## (7, 0.000), PublicVA (1670, 0.011), PublicOthrGovt (1182, 0.008),
## PublicMedicareunknowntype (59, 0.000), StateGovtAgency (2, 0.000)
## --------------------------------------------------------------------------------
## dialysis_fac : WL MOST RECENT CANDIDATE ON DIALYSIS?
## n missing distinct
## 156144 0 2
##
## Value no yes
## Frequency 48517 107627
## Proportion 0.311 0.689
## --------------------------------------------------------------------------------
## gender_fac : TCR RECIPIENT GENDER
## n missing distinct
## 156144 0 2
##
## Value Male Female
## Frequency 95908 60236
## Proportion 0.614 0.386
## --------------------------------------------------------------------------------
## abo_type_fac : TCR ABO BLOOD GROUP
## n missing distinct
## 156144 0 4
##
## Value O B AB A
## Frequency 67779 20602 8037 59726
## Proportion 0.434 0.132 0.051 0.383
## --------------------------------------------------------------------------------
## education_fac
## n missing distinct
## 156134 10 5
##
## lowest : Grade School Highschool or GED Attended Some College Associate/Bachelor Degree Post-Graduate
## highest: Grade School Highschool or GED Attended Some College Associate/Bachelor Degree Post-Graduate
##
## Grade School (3097, 0.020), Highschool or GED (62652, 0.401), Attended Some
## College (43567, 0.279), Associate/Bachelor Degree (32345, 0.207), Post-Graduate
## (14473, 0.093)
## --------------------------------------------------------------------------------
## race_fac
## n missing distinct
## 156144 0 2
##
## Value White Black
## Frequency 103346 52798
## Proportion 0.662 0.338
## --------------------------------------------------------------------------------
## ethnicity_fac
## n missing distinct value
## 156144 0 1 Non-Hispanic
##
## Value Non-Hispanic
## Frequency 156144
## Proportion 1
## --------------------------------------------------------------------------------
## functional_status_fac
## n missing distinct
## 156142 2 16
##
## lowest : 1 2 3 996 998 , highest: 2070 2080 2090 2100 4100
##
## Value 1 2 3 996 998 2010 2020 2030 2040 2050 2060
## Frequency 4862 386 8 115 5663 122 920 526 1884 3718 8726
## Proportion 0.031 0.002 0.000 0.001 0.036 0.001 0.006 0.003 0.012 0.024 0.056
##
## Value 2070 2080 2090 2100 4100
## Frequency 27795 44086 39921 17409 1
## Proportion 0.178 0.282 0.256 0.111 0.000
## --------------------------------------------------------------------------------
## organ_type_fac : ORGAN LISTED FOR
## n missing distinct value
## 156144 0 1 Kidney
##
## Value Kidney
## Frequency 156144
## Proportion 1
## --------------------------------------------------------------------------------
## locality_type_fac
## n missing distinct
## 156144 0 3
##
## Value Local Regional National
## Frequency 127245 11917 16982
## Proportion 0.815 0.076 0.109
## --------------------------------------------------------------------------------
## state_residence_fac : TCR State of Permanent Residence at Listing
## n missing distinct
## 156144 0 57
##
## lowest : PA IL FL HI WA, highest: UT NH MT NV AS
## --------------------------------------------------------------------------------
glimpse(kidney.clean1)
## Rows: 156,144
## Columns: 35
## $ organ_type <chr> "KI", "KI", "KI", "KI", "KI", "KI", "KI", "KI",…
## $ dialysis <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N…
## $ gender <chr> "F", "F", "M", "M", "M", "F", "M", "F", "F", "F…
## $ abo_type <chr> "B", "O", "A", "A", "O", "O", "A", "A", "O", "O…
## $ state_residence <chr> "FL", "NC", "CA", "NC", "CA", "OH", "NC", "IL",…
## $ education <dbl> 5, 3, 6, 3, 4, 3, 3, 2, NA, 3, NA, NA, 3, 4, 4,…
## $ functional_status <dbl> 2090, 2100, 2090, 2, 2100, 1, 2, 2100, 2100, 1,…
## $ waitlist_days <dbl> 158, 982, 352, 68, 65, 85, 14, 0, 479, 1175, 0,…
## $ age_init <dbl> 32, 47, 66, 32, 21, 22, 31, 45, 35, 25, 28, 46,…
## $ ethnicity <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ race_binary <dbl> 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1,…
## $ PT_CODE <dbl> 429440, 428022, 113034, 443431, 398160, 122076,…
## $ region <dbl> 3, 11, 5, 11, 5, 10, 11, 7, 10, 3, 1, 6, 10, 3,…
## $ working_income <chr> "Y", "N", "N", "Y", "N", "Y", "N", "N", "Y", "N…
## $ payment_method <dbl> 1, 1, 1, 3, 3, 3, 3, 3, 1, 4, 2, NA, 3, 13, 2, …
## $ donor_type <chr> "C", "C", "C", "C", "C", "C", "C", "C", "C", "C…
## $ DISTANCE <dbl> 148, 8, 20, 732, 59, 205, 412, 3, 1, 114, 42, 8…
## $ DIAG_KI <dbl> 3041, 3008, 3009, 3031, 3040, 3030, 3031, 3034,…
## $ locality_type <dbl> 4, 3, 3, 5, 3, 4, 5, 3, 3, 4, 3, 3, 3, 5, 3, 3,…
## $ age_group <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A…
## $ age_group_fac <fct> A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,…
## $ donor_type_fac <fct> deceased donor, deceased donor, deceased donor,…
## $ region_fac <fct> 3, 11, 5, 11, 5, 10, 11, 7, 10, 3, 1, 6, 10, 3,…
## $ working_income_fac <fct> yes, no, no, yes, no, yes, no, no, yes, no, no,…
## $ payment_method_fac <fct> Private Insurance, Private Insurance, Private I…
## $ dialysis_fac <fct> no, no, no, no, no, no, no, no, no, no, no, no,…
## $ gender_fac <fct> Female, Female, Male, Male, Male, Female, Male,…
## $ abo_type_fac <fct> B, O, A, A, O, O, A, A, O, O, O, O, A, O, A, O,…
## $ education_fac <fct> Associate/Bachelor Degree, Highschool or GED, P…
## $ race_fac <fct> White, Black, White, White, Black, White, White…
## $ ethnicity_fac <fct> Non-Hispanic, Non-Hispanic, Non-Hispanic, Non-H…
## $ functional_status_fac <fct> 2090, 2100, 2090, 2, 2100, 1, 2, 2100, 2100, 1,…
## $ organ_type_fac <fct> Kidney, Kidney, Kidney, Kidney, Kidney, Kidney,…
## $ locality_type_fac <fct> Regional, Local, Local, National, Local, Region…
## $ state_residence_fac <fct> FL, NC, CA, NC, CA, OH, NC, IL, ZZ, ZZ, MA, OR,…
library(tidyverse)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(lme4)
hist(kidney.clean1$age_init,
main = "Distribution of age at registration for kidney transplant",
sub = "N = 156,144 Ages of patients. Data from UNOS, STAR data.",
xlab = "Ages in years")
hist(kidney.clean1$waitlist_days,
main = "Distribution of days spent on kidney waitlist",
sub = "N = 156,144 Days on waitlist. Data from UNOS, STAR data.",
xlab = "Total # of day waiting")
table(kidney.clean1$state_residence_fac)
##
## PA IL FL HI WA VA CA OH LA MA ZZ DE OR
## 9029 6953 9089 97 2630 5439 10207 7040 2538 3559 14 600 1416
## WY IN NM NY MD WV AR IA OK RI KY DC NE
## 210 3465 429 11221 4914 982 1394 1979 1699 514 1805 508 1223
## WI AZ CO MO AL NC SC TX CT KS TN MN
## 3623 3017 2179 3186 2782 5544 2 3125 9130 1910 1062 3595 3031
## NJ NA ND MI AK ID ME VT VI GA MS SD PR
## 5489 103 363 6610 205 574 633 200 54 5358 1955 503 7
## UT NH MT NV GU AS MP
## 1085 650 390 824 0 1 0
##Null Model with states
models <- list()
model.0 <- lmer(waitlist_days ~ (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.0)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: waitlist_days ~ (1 | state_residence_fac)
## Data: kidney.clean1
##
## AIC BIC logLik deviance df.resid
## 2467791 2467821 -1233892 2467785 156141
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.2433 -0.7306 -0.3484 0.4701 13.5579
##
## Random effects:
## Groups Name Variance Std.Dev.
## state_residence_fac (Intercept) 9862 99.31
## Residual 427379 653.74
## Number of obs: 156144, groups: state_residence_fac, 57
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 590.32 13.98 42.22
null.ICC <- 9862/(9862 + 427379)
null.ICC
## [1] 0.02255507
#null model w/region ICC =.002) sticking with states!
model.null <- lmer(waitlist_days ~ (1|region_fac), REML = FALSE, data = kidney.clean1)
summary(model.null)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: waitlist_days ~ (1 | region_fac)
## Data: kidney.clean1
##
## AIC BIC logLik deviance df.resid
## 2469732 2469762 -1234863 2469726 156141
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.0088 -0.7409 -0.3638 0.4585 13.3721
##
## Random effects:
## Groups Name Variance Std.Dev.
## region_fac (Intercept) 1047 32.36
## Residual 433158 658.15
## Number of obs: 156144, groups: region_fac, 11
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 618.376 9.928 62.29
lmerTest to Evaluate Random Effects:lmerTest::rand(model.0)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## waitlist_days ~ (1 | state_residence_fac)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 3 -1233892 2467791
## (1 | state_residence_fac) 2 -1235001 2470006 2217.5 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.1 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.1)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## (1 | state_residence_fac)
## Data: kidney.clean1
##
## AIC BIC logLik deviance df.resid
## 2459434 2459523 -1229708 2459416 156135
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6742 -0.6939 -0.3021 0.4810 13.7981
##
## Random effects:
## Groups Name Variance Std.Dev.
## state_residence_fac (Intercept) 8117 90.1
## Residual 405093 636.5
## Number of obs: 156144, groups: state_residence_fac, 57
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 748.1171 14.4358 51.824
## age_init -3.1594 0.1228 -25.732
## race_facBlack 244.5338 3.6324 67.321
## abo_type_facB -24.8685 5.0831 -4.892
## abo_type_facAB -266.8640 7.5157 -35.508
## abo_type_facA -131.3870 3.5990 -36.507
## gender_facFemale 6.5721 3.3134 1.983
##
## Correlation of Fixed Effects:
## (Intr) age_nt rc_fcB ab_t_B ab__AB ab_t_A
## age_init -0.438
## race_fcBlck -0.115 0.095
## abo_typ_fcB -0.070 -0.013 -0.079
## ab_typ_fcAB -0.046 -0.020 -0.006 0.158
## abo_typ_fcA -0.117 -0.018 0.100 0.319 0.222
## gendr_fcFml -0.102 0.030 -0.018 0.014 0.007 0.011
model.2 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + payment_method_fac + donor_type_fac + region_fac + working_income_fac + dialysis_fac + education_fac + locality_type_fac + payment_method_fac + (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.2)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## payment_method_fac + donor_type_fac + region_fac + working_income_fac +
## dialysis_fac + education_fac + locality_type_fac + payment_method_fac +
## (1 | state_residence_fac)
## Data: kidney.clean1
##
## AIC BIC logLik deviance df.resid
## 2440537 2440896 -1220233 2440465 156098
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1938 -0.6598 -0.1860 0.4559 14.6642
##
## Random effects:
## Groups Name Variance Std.Dev.
## state_residence_fac (Intercept) 11542 107.4
## Residual 359100 599.2
## Number of obs: 156134, groups: state_residence_fac, 57
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1168.0688 29.7558 39.255
## age_init -4.1607 0.1219 -34.133
## race_facBlack 130.8743 3.6286 36.068
## abo_type_facB -16.6387 4.7882 -3.475
## abo_type_facAB -299.5063 7.0832 -42.284
## abo_type_facA -140.6105 3.3909 -41.467
## gender_facFemale 9.6067 3.1442 3.055
## payment_method_facPublic InsuranceMedicaid -41.6684 7.3625 -5.660
## payment_method_facPublicMedicare FFS -119.7092 3.9150 -30.577
## payment_method_facPublicMedicareCh -78.2294 5.3200 -14.705
## payment_method_facPublicCHIP -347.3413 226.6246 -1.533
## payment_method_facPublicVA -1.3045 15.5871 -0.084
## payment_method_facPublicOthrGovt -86.2251 17.6622 -4.882
## payment_method_facPublicMedicareunknowntype 85.2222 78.1322 1.091
## payment_method_facStateGovtAgency -454.7059 426.5394 -1.066
## donor_type_facliving donor -506.3443 3.7553 -134.834
## region_fac2 -101.9603 24.5765 -4.149
## region_fac3 -116.0996 26.1561 -4.439
## region_fac4 -143.5089 32.5809 -4.405
## region_fac5 -7.6698 30.5983 -0.251
## region_fac6 -57.0078 34.8710 -1.635
## region_fac7 -119.4984 27.6505 -4.322
## region_fac8 -190.5175 28.3204 -6.727
## region_fac9 -67.0145 24.5415 -2.731
## region_fac10 -82.9880 27.5949 -3.007
## region_fac11 -179.9094 25.8055 -6.972
## working_income_facyes 54.3605 3.5720 15.219
## dialysis_facyes 18.0189 3.6393 4.951
## education_facHighschool or GED 1.7638 11.0791 0.159
## education_facAttended Some College -19.9793 11.2387 -1.778
## education_facAssociate/Bachelor Degree -35.5921 11.4061 -3.120
## education_facPost-Graduate -35.1780 12.0206 -2.926
## locality_type_facRegional -154.5246 5.9961 -25.771
## locality_type_facNational -176.8417 5.2113 -33.935
##
## Correlation matrix not shown by default, as p = 34 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
race_fac and region_facmodel.3 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + payment_method_fac + donor_type_fac + region_fac + working_income_fac + dialysis_fac + education_fac + locality_type_fac + payment_method_fac + race_fac:region_fac + (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.3)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## payment_method_fac + donor_type_fac + region_fac + working_income_fac +
## dialysis_fac + education_fac + locality_type_fac + payment_method_fac +
## race_fac:region_fac + (1 | state_residence_fac)
## Data: kidney.clean1
##
## AIC BIC logLik deviance df.resid
## 2440187 2440645 -1220048 2440095 156088
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2173 -0.6589 -0.1844 0.4561 14.7022
##
## Random effects:
## Groups Name Variance Std.Dev.
## state_residence_fac (Intercept) 11090 105.3
## Residual 358254 598.5
## Number of obs: 156134, groups: state_residence_fac, 57
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1143.4271 29.7452 38.441
## age_init -4.1375 0.1218 -33.972
## race_facBlack 220.0565 18.2536 12.056
## abo_type_facB -16.0795 4.7831 -3.362
## abo_type_facAB -299.1407 7.0758 -42.277
## abo_type_facA -140.7723 3.3870 -41.562
## gender_facFemale 9.1638 3.1416 2.917
## payment_method_facPublic InsuranceMedicaid -45.2032 7.3611 -6.141
## payment_method_facPublicMedicare FFS -119.3317 3.9118 -30.505
## payment_method_facPublicMedicareCh -78.3391 5.3153 -14.738
## payment_method_facPublicCHIP -331.9611 226.3765 -1.466
## payment_method_facPublicVA 7.3187 15.5949 0.469
## payment_method_facPublicOthrGovt -89.2746 17.6496 -5.058
## payment_method_facPublicMedicareunknowntype 86.8837 78.0454 1.113
## payment_method_facStateGovtAgency -462.7279 425.9665 -1.086
## donor_type_facliving donor -506.9768 3.7521 -135.119
## region_fac2 -66.3274 24.9245 -2.661
## region_fac3 -106.1609 26.4974 -4.006
## region_fac4 -114.0882 32.8669 -3.471
## region_fac5 5.6867 30.6982 0.185
## region_fac6 -18.5270 35.0750 -0.528
## region_fac7 -114.2948 27.8338 -4.106
## region_fac8 -140.3065 28.5536 -4.914
## region_fac9 -76.5451 24.8950 -3.075
## region_fac10 -44.1774 27.8053 -1.589
## region_fac11 -150.4766 26.2479 -5.733
## working_income_facyes 54.7766 3.5680 15.352
## dialysis_facyes 17.6835 3.6352 4.864
## education_facHighschool or GED 4.6797 11.0691 0.423
## education_facAttended Some College -16.4393 11.2293 -1.464
## education_facAssociate/Bachelor Degree -31.6635 11.3965 -2.778
## education_facPost-Graduate -30.3841 12.0125 -2.529
## locality_type_facRegional -154.5841 5.9908 -25.803
## locality_type_facNational -179.9523 5.2148 -34.508
## race_facBlack:region_fac2 -130.7419 20.0402 -6.524
## race_facBlack:region_fac3 -65.5048 19.9137 -3.289
## race_facBlack:region_fac4 -120.7868 21.7627 -5.550
## race_facBlack:region_fac5 -33.1259 21.7110 -1.526
## race_facBlack:region_fac6 -206.3109 32.5475 -6.339
## race_facBlack:region_fac7 4.3984 21.8904 0.201
## race_facBlack:region_fac8 -224.5291 23.3844 -9.602
## race_facBlack:region_fac9 16.3925 21.7015 0.755
## race_facBlack:region_fac10 -152.2610 20.8692 -7.296
## race_facBlack:region_fac11 -105.7348 20.2818 -5.213
##
## Correlation matrix not shown by default, as p = 44 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
table(kidney.clean1$race_fac)
##
## White Black Hispanic
## 103346 52798 0
## Asian Amer Ind/Alaska Native Native Hawaiin/other
## 0 0 0
## Multi-racial Unknown
## 0 0
model.2 with model.3:anova(model.2, model.3)
## Data: kidney.clean1
## Models:
## model.2: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## model.2: payment_method_fac + donor_type_fac + region_fac + working_income_fac +
## model.2: dialysis_fac + education_fac + locality_type_fac + payment_method_fac +
## model.2: (1 | state_residence_fac)
## model.3: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## model.3: payment_method_fac + donor_type_fac + region_fac + working_income_fac +
## model.3: dialysis_fac + education_fac + locality_type_fac + payment_method_fac +
## model.3: race_fac:region_fac + (1 | state_residence_fac)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model.2 36 2440537 2440896 -1220233 2440465
## model.3 46 2440187 2440645 -1220048 2440095 370.21 10 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.4 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + donor_type_fac + region_fac + working_income_fac + dialysis_fac + education_fac + locality_type_fac + payment_method_fac + abo_type_fac:race_fac + (1|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.4)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## donor_type_fac + region_fac + working_income_fac + dialysis_fac +
## education_fac + locality_type_fac + payment_method_fac +
## abo_type_fac:race_fac + (1 | state_residence_fac)
## Data: kidney.clean1
##
## AIC BIC logLik deviance df.resid
## 2440400 2440789 -1220161 2440322 156095
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2217 -0.6572 -0.1856 0.4539 14.6872
##
## Random effects:
## Groups Name Variance Std.Dev.
## state_residence_fac (Intercept) 11552 107.5
## Residual 358771 599.0
## Number of obs: 156134, groups: state_residence_fac, 57
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1159.7098 29.7765 38.947
## age_init -4.1653 0.1218 -34.187
## race_facBlack 154.6677 5.0432 30.669
## abo_type_facB -28.3695 6.4872 -4.373
## abo_type_facAB -253.3974 8.9572 -28.290
## abo_type_facA -122.7956 4.0546 -30.285
## gender_facFemale 9.6617 3.1429 3.074
## donor_type_facliving donor -506.0810 3.7543 -134.800
## region_fac2 -101.7058 24.5689 -4.140
## region_fac3 -115.8045 26.1483 -4.429
## region_fac4 -144.7423 32.5707 -4.444
## region_fac5 -8.0079 30.5903 -0.262
## region_fac6 -57.3398 34.8618 -1.645
## region_fac7 -118.8662 27.6427 -4.300
## region_fac8 -190.3373 28.3125 -6.723
## region_fac9 -66.4921 24.5330 -2.710
## region_fac10 -82.2540 27.5864 -2.982
## region_fac11 -180.4044 25.7979 -6.993
## working_income_facyes 54.5259 3.5704 15.272
## dialysis_facyes 18.0247 3.6376 4.955
## education_facHighschool or GED 1.4479 11.0740 0.131
## education_facAttended Some College -20.3060 11.2336 -1.808
## education_facAssociate/Bachelor Degree -35.8349 11.4009 -3.143
## education_facPost-Graduate -35.5149 12.0153 -2.956
## locality_type_facRegional -154.6426 5.9934 -25.802
## locality_type_facNational -176.9271 5.2091 -33.965
## payment_method_facPublic InsuranceMedicaid -41.6610 7.3592 -5.661
## payment_method_facPublicMedicare FFS -119.7221 3.9132 -30.594
## payment_method_facPublicMedicareCh -78.0238 5.3176 -14.673
## payment_method_facPublicCHIP -343.7990 226.5221 -1.518
## payment_method_facPublicVA -0.5974 15.5802 -0.038
## payment_method_facPublicOthrGovt -85.6412 17.6542 -4.851
## payment_method_facPublicMedicareunknowntype 84.3044 78.0966 1.079
## payment_method_facStateGovtAgency -444.3041 426.3497 -1.042
## race_facBlack:abo_type_facB 18.5547 9.6262 1.928
## race_facBlack:abo_type_facAB -123.0991 14.6055 -8.428
## race_facBlack:abo_type_facA -59.8320 7.3981 -8.087
##
## Correlation matrix not shown by default, as p = 37 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
model.2&model.4:anova(model.2, model.4)
## Data: kidney.clean1
## Models:
## model.2: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## model.2: payment_method_fac + donor_type_fac + region_fac + working_income_fac +
## model.2: dialysis_fac + education_fac + locality_type_fac + payment_method_fac +
## model.2: (1 | state_residence_fac)
## model.4: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## model.4: donor_type_fac + region_fac + working_income_fac + dialysis_fac +
## model.4: education_fac + locality_type_fac + payment_method_fac +
## model.4: abo_type_fac:race_fac + (1 | state_residence_fac)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model.2 36 2440537 2440896 -1220233 2440465
## model.4 39 2440400 2440789 -1220161 2440322 142.93 3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.2,‘model.3’ model.4:anova(model.2, model.3, model.4)
## Data: kidney.clean1
## Models:
## model.2: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## model.2: payment_method_fac + donor_type_fac + region_fac + working_income_fac +
## model.2: dialysis_fac + education_fac + locality_type_fac + payment_method_fac +
## model.2: (1 | state_residence_fac)
## model.4: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## model.4: donor_type_fac + region_fac + working_income_fac + dialysis_fac +
## model.4: education_fac + locality_type_fac + payment_method_fac +
## model.4: abo_type_fac:race_fac + (1 | state_residence_fac)
## model.3: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## model.3: payment_method_fac + donor_type_fac + region_fac + working_income_fac +
## model.3: dialysis_fac + education_fac + locality_type_fac + payment_method_fac +
## model.3: race_fac:region_fac + (1 | state_residence_fac)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model.2 36 2440537 2440896 -1220233 2440465
## model.4 39 2440400 2440789 -1220161 2440322 142.93 3 < 2.2e-16 ***
## model.3 46 2440187 2440645 -1220048 2440095 227.28 7 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.5 <- lmer(waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac + donor_type_fac + region_fac + working_income_fac + dialysis_fac + education_fac + locality_type_fac + payment_method_fac + (race_fac|state_residence_fac), REML = FALSE, data = kidney.clean1)
summary(model.5)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: waitlist_days ~ age_init + race_fac + abo_type_fac + gender_fac +
## donor_type_fac + region_fac + working_income_fac + dialysis_fac +
## education_fac + locality_type_fac + payment_method_fac +
## (race_fac | state_residence_fac)
## Data: kidney.clean1
##
## AIC BIC logLik deviance df.resid
## 2440086 2440464 -1220005 2440010 156096
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3481 -0.6561 -0.1855 0.4560 14.6839
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## state_residence_fac (Intercept) 7693 87.71
## race_facBlack 9446 97.19 0.74
## Residual 357896 598.24
## Number of obs: 156134, groups: state_residence_fac, 57
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1150.4866 26.8196 42.897
## age_init -4.1510 0.1217 -34.095
## race_facBlack 98.2602 14.9640 6.566
## abo_type_facB -16.6877 4.7813 -3.490
## abo_type_facAB -298.8500 7.0730 -42.252
## abo_type_facA -141.0749 3.3857 -41.667
## gender_facFemale 9.2530 3.1402 2.947
## donor_type_facliving donor -506.0428 3.7509 -134.911
## region_fac2 -75.3480 22.4469 -3.357
## region_fac3 -97.6833 23.6948 -4.123
## region_fac4 -119.2632 29.6358 -4.024
## region_fac5 4.2757 27.2000 0.157
## region_fac6 -34.1545 31.1483 -1.097
## region_fac7 -96.3049 24.8256 -3.879
## region_fac8 -146.8840 25.4030 -5.782
## region_fac9 -52.2786 22.9779 -2.275
## region_fac10 -52.0251 25.0782 -2.075
## region_fac11 -152.5990 23.4115 -6.518
## working_income_facyes 54.5967 3.5665 15.308
## dialysis_facyes 17.7286 3.6340 4.879
## education_facHighschool or GED 2.3797 11.0669 0.215
## education_facAttended Some College -19.0359 11.2276 -1.695
## education_facAssociate/Bachelor Degree -34.3231 11.3946 -3.012
## education_facPost-Graduate -33.1481 12.0103 -2.760
## locality_type_facRegional -154.5888 5.9906 -25.805
## locality_type_facNational -178.8519 5.2124 -34.313
## payment_method_facPublic InsuranceMedicaid -45.5518 7.3601 -6.189
## payment_method_facPublicMedicare FFS -119.5218 3.9109 -30.561
## payment_method_facPublicMedicareCh -78.4868 5.3138 -14.770
## payment_method_facPublicCHIP -335.2976 226.2627 -1.482
## payment_method_facPublicVA -5.0642 15.5176 -0.326
## payment_method_facPublicOthrGovt -90.3661 17.6385 -5.123
## payment_method_facPublicMedicareunknowntype 99.1562 78.0198 1.271
## payment_method_facStateGovtAgency -466.5132 424.8898 -1.098
##
## Correlation matrix not shown by default, as p = 34 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
modelsummary and broom.mixed Packages to Organize Your Results:library(modelsummary)
##
## Attaching package: 'modelsummary'
## The following object is masked from 'package:psych':
##
## SD
## The following object is masked from 'package:Hmisc':
##
## Mean
library(broom.mixed)
## Registered S3 method overwritten by 'broom.mixed':
## method from
## tidy.gamlss broom
models <- list(model.0, model.1, model.2, model.3, model.4, model.5)
modelsummary(models)
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | |
|---|---|---|---|---|---|---|
| (Intercept) | 590.320 | 748.117 | 1168.069 | 1143.427 | 1159.710 | 1150.487 |
| (13.982) | (14.436) | (29.756) | (29.745) | (29.777) | (26.820) | |
| sd__(Intercept) | 99.310 | 90.096 | 107.433 | 105.308 | 107.478 | 87.710 |
| sd__Observation | 653.742 | 636.469 | 599.249 | 598.543 | 598.975 | 598.244 |
| age_init | -3.159 | -4.161 | -4.138 | -4.165 | -4.151 | |
| (0.123) | (0.122) | (0.122) | (0.122) | (0.122) | ||
| race_facBlack | 244.534 | 130.874 | 220.056 | 154.668 | 98.260 | |
| (3.632) | (3.629) | (18.254) | (5.043) | (14.964) | ||
| abo_type_facB | -24.869 | -16.639 | -16.080 | -28.369 | -16.688 | |
| (5.083) | (4.788) | (4.783) | (6.487) | (4.781) | ||
| abo_type_facAB | -266.864 | -299.506 | -299.141 | -253.397 | -298.850 | |
| (7.516) | (7.083) | (7.076) | (8.957) | (7.073) | ||
| abo_type_facA | -131.387 | -140.611 | -140.772 | -122.796 | -141.075 | |
| (3.599) | (3.391) | (3.387) | (4.055) | (3.386) | ||
| gender_facFemale | 6.572 | 9.607 | 9.164 | 9.662 | 9.253 | |
| (3.313) | (3.144) | (3.142) | (3.143) | (3.140) | ||
| payment_method_facPublic InsuranceMedicaid | -41.668 | -45.203 | -41.661 | -45.552 | ||
| (7.362) | (7.361) | (7.359) | (7.360) | |||
| payment_method_facPublicMedicare FFS | -119.709 | -119.332 | -119.722 | -119.522 | ||
| (3.915) | (3.912) | (3.913) | (3.911) | |||
| payment_method_facPublicMedicareCh | -78.229 | -78.339 | -78.024 | -78.487 | ||
| (5.320) | (5.315) | (5.318) | (5.314) | |||
| payment_method_facPublicCHIP | -347.341 | -331.961 | -343.799 | -335.298 | ||
| (226.625) | (226.377) | (226.522) | (226.263) | |||
| payment_method_facPublicVA | -1.304 | 7.319 | -0.597 | -5.064 | ||
| (15.587) | (15.595) | (15.580) | (15.518) | |||
| payment_method_facPublicOthrGovt | -86.225 | -89.275 | -85.641 | -90.366 | ||
| (17.662) | (17.650) | (17.654) | (17.639) | |||
| payment_method_facPublicMedicareunknowntype | 85.222 | 86.884 | 84.304 | 99.156 | ||
| (78.132) | (78.045) | (78.097) | (78.020) | |||
| payment_method_facStateGovtAgency | -454.706 | -462.728 | -444.304 | -466.513 | ||
| (426.539) | (425.966) | (426.350) | (424.890) | |||
| donor_type_facliving donor | -506.344 | -506.977 | -506.081 | -506.043 | ||
| (3.755) | (3.752) | (3.754) | (3.751) | |||
| region_fac2 | -101.960 | -66.327 | -101.706 | -75.348 | ||
| (24.576) | (24.925) | (24.569) | (22.447) | |||
| region_fac3 | -116.100 | -106.161 | -115.805 | -97.683 | ||
| (26.156) | (26.497) | (26.148) | (23.695) | |||
| region_fac4 | -143.509 | -114.088 | -144.742 | -119.263 | ||
| (32.581) | (32.867) | (32.571) | (29.636) | |||
| region_fac5 | -7.670 | 5.687 | -8.008 | 4.276 | ||
| (30.598) | (30.698) | (30.590) | (27.200) | |||
| region_fac6 | -57.008 | -18.527 | -57.340 | -34.155 | ||
| (34.871) | (35.075) | (34.862) | (31.148) | |||
| region_fac7 | -119.498 | -114.295 | -118.866 | -96.305 | ||
| (27.650) | (27.834) | (27.643) | (24.826) | |||
| region_fac8 | -190.518 | -140.306 | -190.337 | -146.884 | ||
| (28.320) | (28.554) | (28.312) | (25.403) | |||
| region_fac9 | -67.015 | -76.545 | -66.492 | -52.279 | ||
| (24.542) | (24.895) | (24.533) | (22.978) | |||
| region_fac10 | -82.988 | -44.177 | -82.254 | -52.025 | ||
| (27.595) | (27.805) | (27.586) | (25.078) | |||
| region_fac11 | -179.909 | -150.477 | -180.404 | -152.599 | ||
| (25.805) | (26.248) | (25.798) | (23.412) | |||
| working_income_facyes | 54.360 | 54.777 | 54.526 | 54.597 | ||
| (3.572) | (3.568) | (3.570) | (3.566) | |||
| dialysis_facyes | 18.019 | 17.683 | 18.025 | 17.729 | ||
| (3.639) | (3.635) | (3.638) | (3.634) | |||
| education_facHighschool or GED | 1.764 | 4.680 | 1.448 | 2.380 | ||
| (11.079) | (11.069) | (11.074) | (11.067) | |||
| education_facAttended Some College | -19.979 | -16.439 | -20.306 | -19.036 | ||
| (11.239) | (11.229) | (11.234) | (11.228) | |||
| education_facAssociate/Bachelor Degree | -35.592 | -31.664 | -35.835 | -34.323 | ||
| (11.406) | (11.396) | (11.401) | (11.395) | |||
| education_facPost-Graduate | -35.178 | -30.384 | -35.515 | -33.148 | ||
| (12.021) | (12.012) | (12.015) | (12.010) | |||
| locality_type_facRegional | -154.525 | -154.584 | -154.643 | -154.589 | ||
| (5.996) | (5.991) | (5.993) | (5.991) | |||
| locality_type_facNational | -176.842 | -179.952 | -176.927 | -178.852 | ||
| (5.211) | (5.215) | (5.209) | (5.212) | |||
| race_facBlack × region_fac2 | -130.742 | |||||
| (20.040) | ||||||
| race_facBlack × region_fac3 | -65.505 | |||||
| (19.914) | ||||||
| race_facBlack × region_fac4 | -120.787 | |||||
| (21.763) | ||||||
| race_facBlack × region_fac5 | -33.126 | |||||
| (21.711) | ||||||
| race_facBlack × region_fac6 | -206.311 | |||||
| (32.547) | ||||||
| race_facBlack × region_fac7 | 4.398 | |||||
| (21.890) | ||||||
| race_facBlack × region_fac8 | -224.529 | |||||
| (23.384) | ||||||
| race_facBlack × region_fac9 | 16.393 | |||||
| (21.701) | ||||||
| race_facBlack × region_fac10 | -152.261 | |||||
| (20.869) | ||||||
| race_facBlack × region_fac11 | -105.735 | |||||
| (20.282) | ||||||
| race_facBlack × abo_type_facB | 18.555 | |||||
| (9.626) | ||||||
| race_facBlack × abo_type_facAB | -123.099 | |||||
| (14.606) | ||||||
| race_facBlack × abo_type_facA | -59.832 | |||||
| (7.398) | ||||||
| cor__(Intercept).race_facBlack | 0.736 | |||||
| sd__race_facBlack | 97.191 | |||||
| AIC | 2467790.8 | 2459433.5 | 2440537.3 | 2440187.1 | 2440400.4 | 2440085.5 |
| BIC | 2467820.7 | 2459523.2 | 2440895.8 | 2440645.2 | 2440788.7 | 2440463.9 |
| Log.Lik. | -1233892.417 | -1229707.769 | -1220232.645 | -1220047.540 | -1220161.180 | -1220004.737 |
modelsummary(models, output = 'msum.html', title = 'MLM Estimates')