Data Ingestion

Necessary libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(stringi)
library(tinytex)
library(ggplot2)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(flextable)
## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:purrr':
## 
##     compose
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## 
## Attaching package: 'highcharter'
## 
## The following object is masked from 'package:flextable':
## 
##     colorize

Read in the flaps dataset from 2016-2019

#setwd("C:/Users/kathyochoa/Documents/DATA_205/Project")
flaps2016 <- read.csv("2016_Flap.csv", na = c("", "NA"))
flaps2017 <- read.csv("2017_Flap.csv", na = c("", "NA"))
flaps2018 <- read.csv("2018_Flap.csv", na = c("", "NA"))
flaps2019 <- read.csv("2019_Flap.csv", na = c("", "NA"))

Data Wrangling

# comCols used to merge flaps2016 and flaps2017
# comCols2 used to merge flaps2016/2017 and 2018/2019

comCols1 <- intersect(names(flaps2016), names(flaps2017))
comCols2 <- intersect(names(flaps2018), names(flaps2019))

# Full join the 4 dataframes
allYears <- flaps2016 %>%
  full_join(flaps2017, by = comCols1) %>%
  full_join(flaps2018, by = comCols2) %>%
  full_join(flaps2019, by = comCols2)

head(allYears)
##    X.1    X age age_neonate amonth aweekend died   discwt dispuniform dqtr drg
## 1   12   12  58          -9      2        0    0 4.999956           6    1 623
## 2  272  272  53          -9      2        0    0 4.999956           5    1 264
## 3  321  321  69          -9      1        0    0 4.999956           1    1 572
## 4  440  440  53          -9      8        0    0 4.999956           1    3 624
## 5  528  528  63          -9      7        0    0 4.999956           6    3 854
## 6 1046 1046  72          -9      6        0    0 4.999956           5    2 570
##   drgver drg_nopoa dxver elective female hcup_ed hcup_division hosp_nis i10_dx1
## 1     33       623    10        0      0       2             1    10001  E11621
## 2     33       264    10        0      1       2             1    10001   E1152
## 3     33       572    10        0      0       2             1    10001  L02416
## 4     33       624    10        0      0       2             1    10001  E11621
## 5     33       854    10        0      0       2             1    10001    A419
## 6     33       570    10        0      1       2             1    10001  L89154
##   i10_dx2 i10_dx3 i10_dx4 i10_dx5 i10_dx6 i10_dx7 i10_dx8 i10_dx9 i10_dx10
## 1    K921  L97519   E1142   E1165     I10    E785    I480   Z7901     E668
## 2  E11621  L03116  L97529   I2510     I10    Z720    E785  J45909    Z8619
## 3   B9561  L03116     I10    I252   I2510    E785  F17210 T783XXS    R7309
## 4  L97519     I10    Z794    K449    K219  F17210    E669   Z6835   Z89429
## 5  L03115   E1140  E11621  L97419    B952    Z794     I10    E669     E785
## 6   F0390    E119     I10    D649   I5181    R791    E039   M1990   Z96643
##   i10_dx11 i10_dx12 i10_dx13 i10_dx14 i10_dx15 i10_dx16 i10_dx17 i10_dx18
## 1    Z6832     I739   F17210     N401    R3911   E11649     Z794     <NA>
## 2     F319     L570    Z6839     I739     B951    Z9114    E1165     Z794
## 3  T63304A     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 4     F418    Z8614    G8929     M545     B954     <NA>     <NA>     <NA>
## 5     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 6   Z87891   G40909     I351   Z86718    Z7901   Z95828    I2510    L0889
##   i10_dx19 i10_dx20 i10_dx21 i10_dx22 i10_dx23 i10_dx24 i10_dx25 i10_dx26
## 1     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 2     E669     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 3     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 4     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 5     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 6     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
##   i10_dx27 i10_dx28 i10_dx29 i10_dx30 i10_ecause1 i10_ecause2 i10_ecause3
## 1     <NA>     <NA>     <NA>     <NA>        <NA>        <NA>        <NA>
## 2     <NA>     <NA>     <NA>     <NA>        <NA>        <NA>        <NA>
## 3     <NA>     <NA>     <NA>     <NA>        <NA>        <NA>        <NA>
## 4     <NA>     <NA>     <NA>     <NA>        <NA>        <NA>        <NA>
## 5     <NA>     <NA>     <NA>     <NA>        <NA>        <NA>        <NA>
## 6     <NA>     <NA>     <NA>     <NA>        <NA>        <NA>        <NA>
##   i10_ecause4 i10_ndx i10_necause i10_npr i10_pr1 i10_pr2 i10_pr3 i10_pr4
## 1        <NA>      17           0       3 0JBQ0ZZ 02HV33Z B546ZZA    <NA>
## 2        <NA>      19           0       2 0JBR0ZZ 0Y9N3ZX    <NA>    <NA>
## 3        <NA>      11           0       1 0JBM0ZZ    <NA>    <NA>    <NA>
## 4        <NA>      15           0       1 0JBQ0ZZ    <NA>    <NA>    <NA>
## 5        <NA>      10           0       2 0JBQ0ZZ 02HV33Z    <NA>    <NA>
## 6        <NA>      18           0       4 0HD8XZZ 30233K1 30233N1 0JB90ZZ
##   i10_pr5 i10_pr6 i10_pr7 i10_pr8 i10_pr9 i10_pr10 i10_pr11 i10_pr12 i10_pr13
## 1    <NA>    <NA>    <NA>    <NA>    <NA>     <NA>     <NA>     <NA>     <NA>
## 2    <NA>    <NA>    <NA>    <NA>    <NA>     <NA>     <NA>     <NA>     <NA>
## 3    <NA>    <NA>    <NA>    <NA>    <NA>     <NA>     <NA>     <NA>     <NA>
## 4    <NA>    <NA>    <NA>    <NA>    <NA>     <NA>     <NA>     <NA>     <NA>
## 5    <NA>    <NA>    <NA>    <NA>    <NA>     <NA>     <NA>     <NA>     <NA>
## 6    <NA>    <NA>    <NA>    <NA>    <NA>     <NA>     <NA>     <NA>     <NA>
##   i10_pr14 i10_pr15  key_nis los mdc mdc_nopoa nis_stratum pay1 pl_nchs prday1
## 1     <NA>     <NA> 10004372   4  10        10        1221    2       3      1
## 2     <NA>     <NA> 10063532   2   5         5        1221    2       1      0
## 3     <NA>     <NA> 10074390   4   9         9        1221    1       3      1
## 4     <NA>     <NA> 10100884   3  10        10        1221    2       1      0
## 5     <NA>     <NA> 10122030   5  18        18        1221    3       1      0
## 6     <NA>     <NA> 10233573   5   9         9        1221    1       1      1
##   prday2 prday3 prday4 prday5 prday6 prday7 prday8 prday9 prday10 prday11
## 1      3      3    -99    -99    -99    -99    -99    -99     -99     -99
## 2      0    -99    -99    -99    -99    -99    -99    -99     -99     -99
## 3    -99    -99    -99    -99    -99    -99    -99    -99     -99     -99
## 4    -99    -99    -99    -99    -99    -99    -99    -99     -99     -99
## 5      0    -99    -99    -99    -99    -99    -99    -99     -99     -99
## 6      1      1      2    -99    -99    -99    -99    -99     -99     -99
##   prday12 prday13 prday14 prday15 prver race totchg tran_in tran_out year
## 1     -99     -99     -99     -99    10    2  25219       0        0 2016
## 2     -99     -99     -99     -99    10    3  21814       0        2 2016
## 3     -99     -99     -99     -99    10    1  25152       0        0 2016
## 4     -99     -99     -99     -99    10    1  19816       0        0 2016
## 5     -99     -99     -99     -99    10    1  31522       0        0 2016
## 6     -99     -99     -99     -99    10    1  41261       0        2 2016
##   zipinc_qrtl flap i10_dx31 i10_dx32 i10_dx33 i10_dx34 i10_dx35 i10_dx36
## 1           1 TRUE     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 2           3 TRUE     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 3           4 TRUE     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 4           3 TRUE     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 5           3 TRUE     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 6           3 TRUE     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
##   i10_dx37 i10_dx38 i10_dx39 i10_dx40 i10_pr16 i10_pr17 i10_pr18 i10_pr19
## 1     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 2     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 3     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 4     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 5     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
## 6     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>
##   i10_pr20 i10_pr21 i10_pr22 i10_pr23 i10_pr24 i10_pr25 prday16 prday17 prday18
## 1     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>      NA      NA      NA
## 2     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>      NA      NA      NA
## 3     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>      NA      NA      NA
## 4     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>      NA      NA      NA
## 5     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>      NA      NA      NA
## 6     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>      NA      NA      NA
##   prday19 prday20 prday21 prday22 prday23 prday24 prday25 i10_birth
## 1      NA      NA      NA      NA      NA      NA      NA        NA
## 2      NA      NA      NA      NA      NA      NA      NA        NA
## 3      NA      NA      NA      NA      NA      NA      NA        NA
## 4      NA      NA      NA      NA      NA      NA      NA        NA
## 5      NA      NA      NA      NA      NA      NA      NA        NA
## 6      NA      NA      NA      NA      NA      NA      NA        NA
##   i10_delivery i10_injury i10_multinjury i10_serviceline pclass_orproc
## 1           NA         NA             NA              NA            NA
## 2           NA         NA             NA              NA            NA
## 3           NA         NA             NA              NA            NA
## 4           NA         NA             NA              NA            NA
## 5           NA         NA             NA              NA            NA
## 6           NA         NA             NA              NA            NA

Check the summary of allYears

summary(allYears)
##       X.1                X                age          age_neonate    
##  Min.   :     12   Min.   :     12   Min.   :-99.00   Min.   :-9.000  
##  1st Qu.:1789937   1st Qu.:1789937   1st Qu.: 44.00   1st Qu.:-9.000  
##  Median :3564736   Median :3564736   Median : 57.00   Median :-9.000  
##  Mean   :3564778   Mean   :3564778   Mean   : 55.45   Mean   :-8.886  
##  3rd Qu.:5330628   3rd Qu.:5330628   3rd Qu.: 69.00   3rd Qu.:-9.000  
##  Max.   :7159031   Max.   :7159031   Max.   : 90.00   Max.   : 1.000  
##                                                                       
##      amonth         aweekend            died              discwt     
##  Min.   :-9.00   Min.   :-9.0000   Min.   :-9.00000   Min.   :4.992  
##  1st Qu.: 4.00   1st Qu.: 0.0000   1st Qu.: 0.00000   1st Qu.:5.000  
##  Median : 7.00   Median : 0.0000   Median : 0.00000   Median :5.000  
##  Mean   : 6.52   Mean   : 0.1708   Mean   : 0.01212   Mean   :5.000  
##  3rd Qu.: 9.00   3rd Qu.: 0.0000   3rd Qu.: 0.00000   3rd Qu.:5.000  
##  Max.   :12.00   Max.   : 1.0000   Max.   : 1.00000   Max.   :5.004  
##                                                                      
##   dispuniform          dqtr             drg            drgver     
##  Min.   :-9.000   Min.   :-9.000   Min.   :  1.0   Min.   :33.00  
##  1st Qu.: 1.000   1st Qu.: 2.000   1st Qu.:464.0   1st Qu.:34.00  
##  Median : 5.000   Median : 3.000   Median :580.0   Median :35.00  
##  Mean   : 3.822   Mean   : 2.513   Mean   :601.3   Mean   :34.78  
##  3rd Qu.: 6.000   3rd Qu.: 3.000   3rd Qu.:853.0   3rd Qu.:36.00  
##  Max.   :99.000   Max.   : 4.000   Max.   :999.0   Max.   :37.00  
##                                                                   
##    drg_nopoa         dxver           elective           female       
##  Min.   :  1.0   Min.   :10       Min.   :-9.0000   Min.   :-9.0000  
##  1st Qu.:464.0   1st Qu.:10       1st Qu.: 0.0000   1st Qu.: 0.0000  
##  Median :580.0   Median :10       Median : 0.0000   Median : 0.0000  
##  Mean   :600.5   Mean   :10       Mean   : 0.2514   Mean   : 0.4469  
##  3rd Qu.:853.0   3rd Qu.:10       3rd Qu.: 1.0000   3rd Qu.: 1.0000  
##  Max.   :999.0   Max.   :10       Max.   : 1.0000   Max.   : 1.0000  
##                  NA's   :124164                                      
##     hcup_ed       hcup_division      hosp_nis       i10_dx1         
##  Min.   :0.0000   Min.   :1.000   Min.   :10001   Length:243422     
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:30338   Class :character  
##  Median :1.0000   Median :5.000   Median :50328   Mode  :character  
##  Mean   :0.7636   Mean   :5.078   Mean   :51083                     
##  3rd Qu.:1.0000   3rd Qu.:7.000   3rd Qu.:70460                     
##  Max.   :4.0000   Max.   :9.000   Max.   :90536                     
##                                                                     
##    i10_dx2            i10_dx3            i10_dx4            i10_dx5         
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx6            i10_dx7            i10_dx8            i10_dx9         
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx10           i10_dx11           i10_dx12           i10_dx13        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx14           i10_dx15           i10_dx16           i10_dx17        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx18           i10_dx19           i10_dx20           i10_dx21        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx22           i10_dx23           i10_dx24           i10_dx25        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx26           i10_dx27           i10_dx28           i10_dx29        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx30         i10_ecause1        i10_ecause2        i10_ecause3       
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  i10_ecause4           i10_ndx       i10_necause        i10_npr      
##  Length:243422      Min.   : 0.00   Min.   :0.00     Min.   : 1.000  
##  Class :character   1st Qu.:10.00   1st Qu.:0.00     1st Qu.: 2.000  
##  Mode  :character   Median :15.00   Median :0.00     Median : 3.000  
##                     Mean   :15.64   Mean   :0.55     Mean   : 4.802  
##                     3rd Qu.:21.00   3rd Qu.:1.00     3rd Qu.: 6.000  
##                     Max.   :40.00   Max.   :4.00     Max.   :25.000  
##                                     NA's   :184739                   
##    i10_pr1            i10_pr2            i10_pr3            i10_pr4         
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr5            i10_pr6            i10_pr7            i10_pr8         
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr9            i10_pr10           i10_pr11           i10_pr12        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr13           i10_pr14           i10_pr15            key_nis        
##  Length:243422      Length:243422      Length:243422      Min.   :10000016  
##  Class :character   Class :character   Class :character   1st Qu.:30477956  
##  Mode  :character   Mode  :character   Mode  :character   Median :50695430  
##                                                           Mean   :51258557  
##                                                           3rd Qu.:70486873  
##                                                           Max.   :90983758  
##                                                                             
##       los                 mdc         mdc_nopoa      nis_stratum  
##  Min.   :-6666.000   Min.   : 0.0   Min.   : 0.00   Min.   :1111  
##  1st Qu.:    4.000   1st Qu.: 8.0   1st Qu.: 8.00   1st Qu.:3232  
##  Median :    6.000   Median : 9.0   Median : 9.00   Median :5232  
##  Mean   :    9.131   Mean   :11.6   Mean   :11.64   Mean   :5307  
##  3rd Qu.:   12.000   3rd Qu.:18.0   3rd Qu.:18.00   3rd Qu.:7233  
##  Max.   :  359.000   Max.   :25.0   Max.   :25.00   Max.   :9333  
##                                                                   
##       pay1           pl_nchs            prday1             prday2      
##  Min.   :-9.000   Min.   :-99.000   Min.   :-99.0000   Min.   :-99.00  
##  1st Qu.: 1.000   1st Qu.:  1.000   1st Qu.:  0.0000   1st Qu.:  0.00  
##  Median : 2.000   Median :  2.000   Median :  1.0000   Median :  1.00  
##  Mean   : 2.082   Mean   :  1.927   Mean   :  0.1057   Mean   :-16.77  
##  3rd Qu.: 3.000   3rd Qu.:  4.000   3rd Qu.:  3.0000   3rd Qu.:  3.00  
##  Max.   : 6.000   Max.   :  6.000   Max.   :343.0000   Max.   :319.00  
##                                                                        
##      prday3           prday4           prday5           prday6      
##  Min.   :-99.00   Min.   :-99.00   Min.   :-99.00   Min.   :-99.00  
##  1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:-99.00  
##  Median :  0.00   Median :-99.00   Median :-99.00   Median :-99.00  
##  Mean   :-35.66   Mean   :-50.31   Mean   :-61.18   Mean   :-69.17  
##  3rd Qu.:  2.00   3rd Qu.:  1.00   3rd Qu.:  0.00   3rd Qu.:  0.00  
##  Max.   :337.00   Max.   :342.00   Max.   :366.00   Max.   :312.00  
##                                                                     
##      prday7           prday8           prday9         prday10      
##  Min.   :-99.00   Min.   :-99.00   Min.   :-99.0   Min.   :-99.00  
##  1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:-99.0   1st Qu.:-99.00  
##  Median :-99.00   Median :-99.00   Median :-99.0   Median :-99.00  
##  Mean   :-77.54   Mean   :-81.52   Mean   :-84.5   Mean   :-86.98  
##  3rd Qu.:-99.00   3rd Qu.:-99.00   3rd Qu.:-99.0   3rd Qu.:-99.00  
##  Max.   :320.00   Max.   :320.00   Max.   :320.0   Max.   :308.00  
##                                                                    
##     prday11          prday12          prday13          prday14      
##  Min.   :-99.00   Min.   :-99.00   Min.   :-99.00   Min.   :-99.00  
##  1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:-99.00  
##  Median :-99.00   Median :-99.00   Median :-99.00   Median :-99.00  
##  Mean   :-88.99   Mean   :-90.56   Mean   :-92.02   Mean   :-93.13  
##  3rd Qu.:-99.00   3rd Qu.:-99.00   3rd Qu.:-99.00   3rd Qu.:-99.00  
##  Max.   :355.00   Max.   :361.00   Max.   :335.00   Max.   :302.00  
##                                                                     
##     prday15          prver             race            totchg          
##  Min.   :-99.0   Min.   :10       Min.   :-9.000   Min.   :-999999999  
##  1st Qu.:-99.0   1st Qu.:10       1st Qu.: 1.000   1st Qu.:     34591  
##  Median :-99.0   Median :10       Median : 1.000   Median :     65211  
##  Mean   :-94.1   Mean   :10       Mean   : 1.317   Mean   :  -6491144  
##  3rd Qu.:-99.0   3rd Qu.:10       3rd Qu.: 2.000   3rd Qu.:    131770  
##  Max.   :343.0   Max.   :10       Max.   : 6.000   Max.   :   9999999  
##                  NA's   :124164                                        
##     tran_in           tran_out            year       zipinc_qrtl    
##  Min.   :-9.0000   Min.   :-9.0000   Min.   :2016   Min.   :-9.000  
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.:2017   1st Qu.: 1.000  
##  Median : 0.0000   Median : 0.0000   Median :2018   Median : 2.000  
##  Mean   : 0.1062   Mean   : 0.5584   Mean   :2018   Mean   : 2.015  
##  3rd Qu.: 0.0000   3rd Qu.: 2.0000   3rd Qu.:2019   3rd Qu.: 3.000  
##  Max.   : 2.0000   Max.   : 2.0000   Max.   :2019   Max.   : 4.000  
##                                                                     
##    flap           i10_dx31           i10_dx32           i10_dx33        
##  Mode:logical   Length:243422      Length:243422      Length:243422     
##  TRUE:243422    Class :character   Class :character   Class :character  
##                 Mode  :character   Mode  :character   Mode  :character  
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##    i10_dx34           i10_dx35           i10_dx36           i10_dx37        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx38           i10_dx39           i10_dx40           i10_pr16        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr17           i10_pr18           i10_pr19           i10_pr20        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr21           i10_pr22           i10_pr23           i10_pr24        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr25            prday16          prday17          prday18      
##  Length:243422      Min.   :-99.00   Min.   :-99.00   Min.   :-99.00  
##  Class :character   1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:-99.00  
##  Mode  :character   Median :-99.00   Median :-99.00   Median :-99.00  
##                     Mean   :-94.67   Mean   :-95.24   Mean   :-95.68  
##                     3rd Qu.:-99.00   3rd Qu.:-99.00   3rd Qu.:-99.00  
##                     Max.   :321.00   Max.   :302.00   Max.   :313.00  
##                     NA's   :58683    NA's   :58683    NA's   :58683   
##     prday19          prday20          prday21          prday22      
##  Min.   :-99.00   Min.   :-99.00   Min.   :-99.00   Min.   :-99.00  
##  1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:-99.00  
##  Median :-99.00   Median :-99.00   Median :-99.00   Median :-99.00  
##  Mean   :-96.07   Mean   :-96.43   Mean   :-96.83   Mean   :-97.28  
##  3rd Qu.:-99.00   3rd Qu.:-99.00   3rd Qu.:-99.00   3rd Qu.:-99.00  
##  Max.   :313.00   Max.   :302.00   Max.   :302.00   Max.   :302.00  
##  NA's   :58683    NA's   :58683    NA's   :58683    NA's   :58683   
##     prday23          prday24          prday25         i10_birth     
##  Min.   :-99.00   Min.   :-99.00   Min.   :-99.00   Min.   :0       
##  1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:-99.00   1st Qu.:0       
##  Median :-99.00   Median :-99.00   Median :-99.00   Median :0       
##  Mean   :-97.48   Mean   :-97.63   Mean   :-97.79   Mean   :0       
##  3rd Qu.:-99.00   3rd Qu.:-99.00   3rd Qu.:-99.00   3rd Qu.:0       
##  Max.   :302.00   Max.   :305.00   Max.   :305.00   Max.   :2       
##  NA's   :58683    NA's   :58683    NA's   :58683    NA's   :180732  
##   i10_delivery      i10_injury     i10_multinjury   i10_serviceline 
##  Min.   :0.00     Min.   :0.00     Min.   :0.0      Min.   :1.00    
##  1st Qu.:0.00     1st Qu.:0.00     1st Qu.:0.0      1st Qu.:4.00    
##  Median :0.00     Median :0.00     Median :0.0      Median :4.00    
##  Mean   :0.02     Mean   :0.26     Mean   :0.1      Mean   :3.85    
##  3rd Qu.:0.00     3rd Qu.:0.00     3rd Qu.:0.0      3rd Qu.:4.00    
##  Max.   :1.00     Max.   :2.00     Max.   :1.0      Max.   :5.00    
##  NA's   :180732   NA's   :180732   NA's   :180732   NA's   :180732  
##  pclass_orproc   
##  Min.   :0.00    
##  1st Qu.:1.00    
##  Median :1.00    
##  Mean   :0.93    
##  3rd Qu.:1.00    
##  Max.   :1.00    
##  NA's   :180732

We can see some variables contain negative values. Is it possible to be negative years old? Why do categorical variables have a -9 value? These negative values are actually NA’s.

To not skew our data, let’s replace these negative values with NA

allYears[allYears < 0] <- NA

Check the summary once again

summary(allYears)
##       X.1                X                age         age_neonate    
##  Min.   :     12   Min.   :     12   Min.   : 0.00   Min.   :0.00    
##  1st Qu.:1789937   1st Qu.:1789937   1st Qu.:44.00   1st Qu.:0.00    
##  Median :3564736   Median :3564736   Median :57.00   Median :0.00    
##  Mean   :3564778   Mean   :3564778   Mean   :55.46   Mean   :0.22    
##  3rd Qu.:5330628   3rd Qu.:5330628   3rd Qu.:69.00   3rd Qu.:0.00    
##  Max.   :7159031   Max.   :7159031   Max.   :90.00   Max.   :1.00    
##                                      NA's   :10      NA's   :240401  
##      amonth          aweekend           died             discwt     
##  Min.   : 1.000   Min.   :0.0000   Min.   :0.00000   Min.   :4.992  
##  1st Qu.: 4.000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:5.000  
##  Median : 7.000   Median :0.0000   Median :0.00000   Median :5.000  
##  Mean   : 6.535   Mean   :0.1709   Mean   :0.01695   Mean   :5.000  
##  3rd Qu.: 9.000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:5.000  
##  Max.   :12.000   Max.   :1.0000   Max.   :1.00000   Max.   :5.004  
##  NA's   :234      NA's   :1        NA's   :138                      
##   dispuniform          dqtr            drg            drgver     
##  Min.   : 1.000   Min.   :1.000   Min.   :  1.0   Min.   :33.00  
##  1st Qu.: 1.000   1st Qu.:2.000   1st Qu.:464.0   1st Qu.:34.00  
##  Median : 5.000   Median :3.000   Median :580.0   Median :35.00  
##  Mean   : 3.829   Mean   :2.524   Mean   :601.3   Mean   :34.78  
##  3rd Qu.: 6.000   3rd Qu.:3.000   3rd Qu.:853.0   3rd Qu.:36.00  
##  Max.   :99.000   Max.   :4.000   Max.   :999.0   Max.   :37.00  
##  NA's   :138      NA's   :234                                    
##    drg_nopoa         dxver           elective         female     
##  Min.   :  1.0   Min.   :10       Min.   :0.000   Min.   :0.000  
##  1st Qu.:464.0   1st Qu.:10       1st Qu.:0.000   1st Qu.:0.000  
##  Median :580.0   Median :10       Median :0.000   Median :0.000  
##  Mean   :600.5   Mean   :10       Mean   :0.275   Mean   :0.453  
##  3rd Qu.:853.0   3rd Qu.:10       3rd Qu.:1.000   3rd Qu.:1.000  
##  Max.   :999.0   Max.   :10       Max.   :1.000   Max.   :1.000  
##                  NA's   :124164   NA's   :618     NA's   :209    
##     hcup_ed       hcup_division      hosp_nis       i10_dx1         
##  Min.   :0.0000   Min.   :1.000   Min.   :10001   Length:243422     
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:30338   Class :character  
##  Median :1.0000   Median :5.000   Median :50328   Mode  :character  
##  Mean   :0.7636   Mean   :5.078   Mean   :51083                     
##  3rd Qu.:1.0000   3rd Qu.:7.000   3rd Qu.:70460                     
##  Max.   :4.0000   Max.   :9.000   Max.   :90536                     
##                                                                     
##    i10_dx2            i10_dx3            i10_dx4            i10_dx5         
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx6            i10_dx7            i10_dx8            i10_dx9         
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx10           i10_dx11           i10_dx12           i10_dx13        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx14           i10_dx15           i10_dx16           i10_dx17        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx18           i10_dx19           i10_dx20           i10_dx21        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx22           i10_dx23           i10_dx24           i10_dx25        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx26           i10_dx27           i10_dx28           i10_dx29        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx30         i10_ecause1        i10_ecause2        i10_ecause3       
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  i10_ecause4           i10_ndx       i10_necause        i10_npr      
##  Length:243422      Min.   : 0.00   Min.   :0.00     Min.   : 1.000  
##  Class :character   1st Qu.:10.00   1st Qu.:0.00     1st Qu.: 2.000  
##  Mode  :character   Median :15.00   Median :0.00     Median : 3.000  
##                     Mean   :15.64   Mean   :0.55     Mean   : 4.802  
##                     3rd Qu.:21.00   3rd Qu.:1.00     3rd Qu.: 6.000  
##                     Max.   :40.00   Max.   :4.00     Max.   :25.000  
##                                     NA's   :184739                   
##    i10_pr1            i10_pr2            i10_pr3            i10_pr4         
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr5            i10_pr6            i10_pr7            i10_pr8         
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr9            i10_pr10           i10_pr11           i10_pr12        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr13           i10_pr14           i10_pr15            key_nis        
##  Length:243422      Length:243422      Length:243422      Min.   :10000016  
##  Class :character   Class :character   Class :character   1st Qu.:30477956  
##  Mode  :character   Mode  :character   Mode  :character   Median :50695430  
##                                                           Mean   :51258557  
##                                                           3rd Qu.:70486873  
##                                                           Max.   :90983758  
##                                                                             
##       los             mdc         mdc_nopoa      nis_stratum        pay1      
##  Min.   :  0.0   Min.   : 0.0   Min.   : 0.00   Min.   :1111   Min.   :1.000  
##  1st Qu.:  4.0   1st Qu.: 8.0   1st Qu.: 8.00   1st Qu.:3232   1st Qu.:1.000  
##  Median :  6.0   Median : 9.0   Median : 9.00   Median :5232   Median :2.000  
##  Mean   : 10.2   Mean   :11.6   Mean   :11.64   Mean   :5307   Mean   :2.099  
##  3rd Qu.: 12.0   3rd Qu.:18.0   3rd Qu.:18.00   3rd Qu.:7233   3rd Qu.:3.000  
##  Max.   :359.0   Max.   :25.0   Max.   :25.00   Max.   :9333   Max.   :6.000  
##  NA's   :39                                                    NA's   :371    
##     pl_nchs          prday1            prday2           prday3     
##  Min.   :1.000   Min.   :  0.000   Min.   :  0.00   Min.   :  0.0  
##  1st Qu.:1.000   1st Qu.:  0.000   1st Qu.:  0.00   1st Qu.:  0.0  
##  Median :2.000   Median :  1.000   Median :  1.00   Median :  2.0  
##  Mean   :2.658   Mean   :  2.488   Mean   :  3.27   Mean   :  3.7  
##  3rd Qu.:4.000   3rd Qu.:  3.000   3rd Qu.:  4.00   3rd Qu.:  5.0  
##  Max.   :6.000   Max.   :343.000   Max.   :319.00   Max.   :337.0  
##  NA's   :1750    NA's   :10854     NA's   :51495    NA's   :95937  
##      prday4           prday5           prday6           prday7      
##  Min.   :  0.00   Min.   :  0.0    Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  0.00   1st Qu.:  0.0    1st Qu.:  0.00   1st Qu.:  0.00  
##  Median :  2.00   Median :  2.0    Median :  3.00   Median :  3.00  
##  Mean   :  4.26   Mean   :  4.9    Mean   :  5.57   Mean   :  6.28  
##  3rd Qu.:  6.00   3rd Qu.:  6.0    3rd Qu.:  7.00   3rd Qu.:  8.00  
##  Max.   :342.00   Max.   :366.0    Max.   :312.00   Max.   :320.00  
##  NA's   :130436   NA's   :156008   NA's   :174829   NA's   :194344  
##      prday8           prday9          prday10          prday11      
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  1.00   1st Qu.:  1.00  
##  Median :  3.00   Median :  4.00   Median :  4.00   Median :  5.00  
##  Mean   :  6.94   Mean   :  7.64   Mean   :  8.32   Mean   :  9.09  
##  3rd Qu.:  9.00   3rd Qu.: 10.00   3rd Qu.: 11.00   3rd Qu.: 12.00  
##  Max.   :320.00   Max.   :320.00   Max.   :308.00   Max.   :355.00  
##  NA's   :203667   NA's   :210658   NA's   :216382   NA's   :221034  
##     prday12          prday13          prday14          prday15      
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  1.00   1st Qu.:  1.00   1st Qu.:  1.00   1st Qu.:  1.00  
##  Median :  5.00   Median :  6.00   Median :  6.00   Median :  7.00  
##  Mean   :  9.79   Mean   : 10.69   Mean   : 11.31   Mean   : 12.22  
##  3rd Qu.: 13.00   3rd Qu.: 14.00   3rd Qu.: 15.00   3rd Qu.: 16.00  
##  Max.   :361.00   Max.   :335.00   Max.   :302.00   Max.   :343.00  
##  NA's   :224670   NA's   :228051   NA's   :230544   NA's   :232772  
##      prver             race           totchg           tran_in      
##  Min.   :10       Min.   :1.000   Min.   :    109   Min.   :0.0000  
##  1st Qu.:10       1st Qu.:1.000   1st Qu.:  35061   1st Qu.:0.0000  
##  Median :10       Median :1.000   Median :  65754   Median :0.0000  
##  Mean   :10       Mean   :1.654   Mean   : 126465   Mean   :0.1409  
##  3rd Qu.:10       3rd Qu.:2.000   3rd Qu.: 132548   3rd Qu.:0.0000  
##  Max.   :10       Max.   :6.000   Max.   :9999999   Max.   :2.0000  
##  NA's   :124164   NA's   :7704    NA's   :1619      NA's   :925     
##     tran_out           year       zipinc_qrtl      flap        
##  Min.   :0.0000   Min.   :2016   Min.   :1.000   Mode:logical  
##  1st Qu.:0.0000   1st Qu.:2017   1st Qu.:1.000   TRUE:243422   
##  Median :0.0000   Median :2018   Median :2.000                 
##  Mean   :0.5638   Mean   :2018   Mean   :2.251                 
##  3rd Qu.:2.0000   3rd Qu.:2019   3rd Qu.:3.000                 
##  Max.   :2.0000   Max.   :2019   Max.   :4.000                 
##  NA's   :138                     NA's   :5121                  
##    i10_dx31           i10_dx32           i10_dx33           i10_dx34        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx35           i10_dx36           i10_dx37           i10_dx38        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_dx39           i10_dx40           i10_pr16           i10_pr17        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr18           i10_pr19           i10_pr20           i10_pr21        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    i10_pr22           i10_pr23           i10_pr24           i10_pr25        
##  Length:243422      Length:243422      Length:243422      Length:243422     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     prday16          prday17          prday18          prday19      
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  1.00   1st Qu.:  2.00   1st Qu.:  2.00   1st Qu.:  2.00  
##  Median :  7.00   Median :  8.00   Median :  8.00   Median :  9.00  
##  Mean   : 12.89   Mean   : 13.95   Mean   : 14.58   Mean   : 15.12  
##  3rd Qu.: 17.00   3rd Qu.: 18.00   3rd Qu.: 19.00   3rd Qu.: 20.00  
##  Max.   :321.00   Max.   :302.00   Max.   :313.00   Max.   :313.00  
##  NA's   :236308   NA's   :237301   NA's   :238055   NA's   :238700  
##     prday20          prday21          prday22          prday23      
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  2.00   1st Qu.:  3.00   1st Qu.:  3.00   1st Qu.:  3.00  
##  Median :  9.00   Median : 10.00   Median : 10.00   Median : 11.00  
##  Mean   : 15.63   Mean   : 16.23   Mean   : 17.24   Mean   : 17.33  
##  3rd Qu.: 21.00   3rd Qu.: 22.00   3rd Qu.: 22.00   3rd Qu.: 22.50  
##  Max.   :302.00   Max.   :302.00   Max.   :302.00   Max.   :302.00  
##  NA's   :239293   NA's   :239967   NA's   :240707   NA's   :241023  
##     prday24          prday25         i10_birth       i10_delivery   
##  Min.   :  0.00   Min.   :  0.00   Min.   :0        Min.   :0.00    
##  1st Qu.:  3.00   1st Qu.:  3.00   1st Qu.:0        1st Qu.:0.00    
##  Median : 11.00   Median : 12.00   Median :0        Median :0.00    
##  Mean   : 18.51   Mean   : 19.24   Mean   :0        Mean   :0.02    
##  3rd Qu.: 24.00   3rd Qu.: 24.00   3rd Qu.:0        3rd Qu.:0.00    
##  Max.   :305.00   Max.   :305.00   Max.   :2        Max.   :1.00    
##  NA's   :241279   NA's   :241549   NA's   :180732   NA's   :180732  
##    i10_injury     i10_multinjury   i10_serviceline  pclass_orproc   
##  Min.   :0.00     Min.   :0.0      Min.   :1.00     Min.   :0.00    
##  1st Qu.:0.00     1st Qu.:0.0      1st Qu.:4.00     1st Qu.:1.00    
##  Median :0.00     Median :0.0      Median :4.00     Median :1.00    
##  Mean   :0.26     Mean   :0.1      Mean   :3.85     Mean   :0.93    
##  3rd Qu.:0.00     3rd Qu.:0.0      3rd Qu.:4.00     3rd Qu.:1.00    
##  Max.   :2.00     Max.   :1.0      Max.   :5.00     Max.   :1.00    
##  NA's   :180732   NA's   :180732   NA's   :180732   NA's   :180732

Summary looks good.

Procedure Codes

Take the combined dataset and select the procedure columns

procedures <- allYears %>%
  select(year, contains("i10_pr"))

Lengthen the procedures column

procLong <- pivot_longer(procedures, cols = -c(year))

# Remove white space from the codes
procLong$value <- stri_trim_both(procLong$value)

head(procLong)
## # A tibble: 6 × 3
##    year name    value  
##   <int> <chr>   <chr>  
## 1  2016 i10_pr1 0JBQ0ZZ
## 2  2016 i10_pr2 02HV33Z
## 3  2016 i10_pr3 B546ZZA
## 4  2016 i10_pr4 <NA>   
## 5  2016 i10_pr5 <NA>   
## 6  2016 i10_pr6 <NA>

Categorical Variables

Create new columns for categorical variables

Admission Month

for(i in 1:length(month.name)) {
  allYears$month[allYears$amonth == i] <- month.name[i]
}

# Reorder the months in consecutive order
allYears$month <- factor(allYears$month, levels = month.name)

Admission Day

allYears$day[allYears$aweekend == 0] <- "Mon-Fri"
allYears$day[allYears$aweekend == 1] <- "Sat-Sun"

Survival Status

allYears$status[allYears$died == 0] <- "Did not die"
allYears$status[allYears$died == 1] <- "Died"

Disposition

dispList <- c("Routine", "Transfer to short-term hospital", "Transfer Other", "Home Health Care", "Against medical advice", "Died in hospital", "Discharged/transferred to court/law enforcement", "Discharged alive, destination unknown")

allYears$disposition[allYears$dispuniform == 1] <- dispList[1]
allYears$disposition[allYears$dispuniform == 2] <- dispList[2]
allYears$disposition[allYears$dispuniform == 5] <- dispList[3]
allYears$disposition[allYears$dispuniform == 6] <- dispList[4]
allYears$disposition[allYears$dispuniform == 7] <- dispList[5]
allYears$disposition[allYears$dispuniform == 20] <- dispList[6]
allYears$disposition[allYears$dispuniform == 21] <- dispList[7]
allYears$disposition[allYears$dispuniform == 99] <- dispList[8]

# Reorder disposition
allYears$disposition <- factor(allYears$disposition, levels = dispList)

Discharge Quarter

qtrList <- c("1st: Jan - Mar", "2nd: Apr - Jun", "3rd: Jul - Sep", "4th: Oct - Dec")
for(i in 1:length(qtrList)) {
  allYears$disQtr[allYears$dqtr == i] <- qtrList[i]
}

# Reorder disQtr
allYears$disQtr <- factor(allYears$disQtr, levels = qtrList)

Elective Status

allYears$electiveStr[allYears$elective == 0] <- "Non-elective"
allYears$electiveStr[allYears$elective == 1] <- "Elective"

Patient Sex

allYears$sex[allYears$female == 0] <- "Male"
allYears$sex[allYears$female == 1] <- "Female"

Hospital Division

divList <- c("New England", "Middle Atlantic", "East North Central",  "West North Central", "South Atlantic", "East South Central", "West South Central", "Mountain", "Pacific")

for (i in 1:length(divList)) {
  allYears$division[allYears$hcup_division == i] <- divList[i]
}

# Reorder division
allYears$division <- factor(allYears$division, levels = divList)

Primary Payer

payList <- c("Medicare", "Medicaid", "Private insurance", 
         "Self-pay", "No charge", "Other")

for (i in 1:length(payList)) {
  allYears$payer[allYears$pay1 == i] <- payList[i]
}

# Reorder payer
allYears$payer <- factor(allYears$payer, levels = payList)

Patient Race

raceList <- c("White", "Black", "Hispanic", "Asian or Pacific Islander", "Native American", "Other")

for (i in 1:length(raceList)) {
  allYears$raceStr[allYears$race == i] <- raceList[i]
}

# Reorder raceStr
allYears$raceStr <- factor(allYears$raceStr, levels = raceList)

Patient Household Income

incomeList <- c("0-25th percentile", "26th to 50th percentile (median)", "51st to 75th percentile", "76th to 100th percentile")

for(i in 1:length(incomeList)) {
  allYears$income[allYears$zipinc_qrtl == i] <- incomeList[i]
}

Exploratory Data Analysis

General: Annual Incidence

Take the combined dataset, group by the year, then count the number of surgeries

annualIncidence <- allYears %>%
  group_by(year) %>%
  summarize(count = n())

Plot the annual incidence as a line chart

annualIncidencePlot <- annualIncidence %>% 
  ggplot(aes(x = year, y = count)) +
  ggtitle("Annual Flaps Incidence") +
  theme_minimal(base_size = 12) + 
  geom_line() +
  xlab("Year") +
  ylab("Annual Incidence") +
  ylim(0, 70000)

annualIncidencePlot

There was a steady increase of flaps surgeries from 2016 - 2019. This is most likely correlated to the population increase in the U.S.

General: Mean Annual Charge ($)

Take the combined dataset, group by the year, then calculate the average annual charge

annualCharge <- allYears %>%
  filter(totchg > 0) %>%
  group_by(year) %>%
  summarize(avgCharge = round(mean(totchg)))
annualChargePlot <- annualCharge %>%
  ggplot(aes(x = year, y = avgCharge)) +
  ggtitle("Mean Annual Total Charges of Procedures") +
  theme_minimal(base_size = 12) + 
  geom_line() +
  scale_y_continuous(labels = scales::dollar_format(scale = 0.001, suffix = "K")) +
  xlab("Year") +
  ylab("Mean Annual Charge") +
  expand_limits(y = 0)

annualChargePlot

There was a steady increase in the average cost of flaps surgeries from 2016 - 2019. From 2018-2019, there was a sharper increase in cost compared to the previous years. This is most likely as a result of the raising inflation rate and sharper inflation from 2018-2019.

General: Highchart Combining Annual Incidence & Average Charge

myChart <- highchart() %>%
  hc_plotOptions (
   series = list (animation = FALSE)
  )%>%
  hc_yAxis_multiples(
    list(title = list(text = "Annual Incidence")),
    list(title = list(text = "Average Cost ($)"),
         opposite = TRUE)
  ) %>%
  hc_add_series(data = annualIncidence$count,
                name = "Annual Incidence",
                type = "column",
                yAxis = 0) %>%
  hc_add_series(data = annualCharge$avgCharge,
                name = "Average Cost ($)",
                type = "line",
                yAxis = 1) %>%
  hc_xAxis(categories = annualIncidence$year) %>%
  hc_colors(c("black", "blue"))  %>%
  hc_title(text = "Annual Incidence Vs Mean Annual Charge of Flaps Procedures")

myChart

Patient Demographics

Box plot: Race and age

plot1 <- allYears %>%
  filter(raceStr != "NA") %>%
  ggplot(aes(x = raceStr, y = age, group = raceStr, fill = raceStr)) +
  geom_boxplot() +
  ggtitle("Median Age Among all Races \nis between 50-60 Years") +
  xlab("Race") +
  ylab("Age") +
  scale_y_continuous(breaks = seq(0,100, by=10)) +
  scale_fill_discrete(name = "Race") +
  theme(axis.text.x = element_text(angle = 25))

plot1
## Warning: Removed 10 rows containing non-finite values (`stat_boxplot()`).

The median age of patients throughout the racial groups lies between 50 and 60 years old with White patients on the higher end and Native Americans on the lower end.

Frequency Table: Race and sex

sexVrace <- table(allYears$raceStr, allYears$sex) %>%
  prop.table(margin = 1) %>%
  as.data.frame() %>%
  arrange(Var1)

tab1 <- flextable(sexVrace)
tab1 <- set_header_labels(tab1, Var1 = "Race", Var2 = "Sex", Freq = "Obs.(%)")
tab1

Race

Sex

Obs.(%)

White

Female

0.4520737

White

Male

0.5479263

Black

Female

0.4756832

Black

Male

0.5243168

Hispanic

Female

0.4217613

Hispanic

Male

0.5782387

Asian or Pacific Islander

Female

0.5109738

Asian or Pacific Islander

Male

0.4890262

Native American

Female

0.4504754

Native American

Male

0.5495246

Other

Female

0.4391323

Other

Male

0.5608677

With the exception of Asian/Pacific Islander, over 50% of patients in each racial group were male.

Procedures

Bar Chart: ICD-10 Codes

byValue <- procLong %>%
  na.omit() %>%
  group_by(value) %>%
  summarize(count = n()) %>%
  arrange(desc(count)) %>%
  head(10) %>%
  ggplot(aes(x = value, y = count, fill = value)) +
  geom_bar(stat = 'identity') +
  labs(title = "Top 10 Flap Procedure ICD-10 Codes") +
  xlab("ICD-10 Code") +
  ylab("Number of procedures") +
  theme_classic() +
  scale_fill_discrete(name = "ICD-10 Code") +
  theme(axis.text.x = element_text(face = "bold", 
                                   size = 7, angle = 10))
byValue

Most of the associated procedures contained the prefix “0JB” which are classified as tissue and fascia excision

Statistical Analysis & Final Product

Statistical Analysis

GLM: Annual Incidence

lmFit1 <- glm(count ~ year, data = annualIncidence)
summary(lmFit1)
## 
## Call:
## glm(formula = count ~ year, data = annualIncidence)
## 
## Deviance Residuals: 
##      1       2       3       4  
## -234.5   365.5   -27.5  -103.5  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -2545754.5   285352.6  -8.921   0.0123 *
## year            1292.0      141.4   9.135   0.0118 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 100024.5)
## 
##     Null deviance: 8546369  on 3  degrees of freedom
## Residual deviance:  200049  on 2  degrees of freedom
## AIC: 60.632
## 
## Number of Fisher Scoring iterations: 2

Count is the response variable and year is predictor variable We are 95% confident that the actual slope lies within (1016, 1568) Annual incidence is increasing at a rate of 1,292 surgeries Formula: y = 1292x - 2545754

summary(annualIncidence$count - lmFit1$fitted.values)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -234.50 -136.25  -65.50    0.00   70.75  365.50

Distribution is slightly right-skewed

GLM: Mean annual charge

lmFit2 <- glm(avgCharge ~ year, data = annualCharge)
summary(lmFit2)
## 
## Call:
## glm(formula = avgCharge ~ year, data = annualCharge)
## 
## Deviance Residuals: 
##       1        2        3        4  
##   319.3     93.1  -1144.1    731.7  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -1.352e+07  8.920e+05  -15.16  0.00432 **
## year         6.765e+03  4.422e+02   15.30  0.00424 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 977484.9)
## 
##     Null deviance: 230794625  on 3  degrees of freedom
## Residual deviance:   1954970  on 2  degrees of freedom
## AIC: 69.75
## 
## Number of Fisher Scoring iterations: 2

Average charge is the response variable and year is the predictor variable We are 95% confident that the actual slope lies within ($5702, $7764) Mean annual charge is increasing at a rate of $6733 Formula: y = 6733x - 13457644

summary(annualCharge$avgCharge - lmFit2$fitted.values)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1144.1  -216.2   206.2     0.0   422.4   731.7

Distribution is slightly right-skewed

GLM: Patient Survival

What predictor variables are statistically significant when evaluating a patient’s chance of survival?

dieGLM <- glm(died ~ year + female + race + age + i10_ndx + i10_npr + hcup_division + zipinc_qrtl, data = allYears, family = binomial)

summary(dieGLM)
## 
## Call:
## glm(formula = died ~ year + female + race + age + i10_ndx + i10_npr + 
##     hcup_division + zipinc_qrtl, family = binomial, data = allYears)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5662  -0.1792  -0.1146  -0.0716   3.9819  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   318.581210  31.456657  10.128  < 2e-16 ***
## year           -0.162416   0.015597 -10.413  < 2e-16 ***
## female          0.079968   0.033817   2.365 0.018042 *  
## race            0.054079   0.014871   3.637 0.000276 ***
## age             0.035191   0.001187  29.638  < 2e-16 ***
## i10_ndx         0.100403   0.002547  39.416  < 2e-16 ***
## i10_npr         0.118859   0.002646  44.916  < 2e-16 ***
## hcup_division   0.010918   0.007062   1.546 0.122101    
## zipinc_qrtl    -0.062091   0.015344  -4.047  5.2e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 39437  on 230459  degrees of freedom
## Residual deviance: 32261  on 230451  degrees of freedom
##   (12962 observations deleted due to missingness)
## AIC: 32279
## 
## Number of Fisher Scoring iterations: 8

Died is the response variable and year, sex, race, age, length of stay, number of diagnoses, number of procedures, hospital division and household income are the response variables. At an alpha level of 0.05, hospital division is not statistically significant and sex is only slightly significant. All other variables are statistically significant.

What is the best model for patient survival? Sex?

die1 <- glm(died ~ female, data = allYears, family = binomial)

summary(die1)
## 
## Call:
## glm(formula = died ~ female, family = binomial, data = allYears)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1862  -0.1862  -0.1840  -0.1840   2.8592  
## 
## Coefficients:
##             Estimate Std. Error  z value Pr(>|z|)    
## (Intercept) -4.07062    0.02135 -190.649   <2e-16 ***
## female       0.02452    0.03152    0.778    0.437    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 41801  on 243074  degrees of freedom
## Residual deviance: 41800  on 243073  degrees of freedom
##   (347 observations deleted due to missingness)
## AIC: 41804
## 
## Number of Fisher Scoring iterations: 7

AIC: 41804

Race?

die2 <- glm(died ~ race, data = allYears, family = binomial)

summary(die2)
## 
## Call:
## glm(formula = died ~ race, family = binomial, data = allYears)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1853  -0.1843  -0.1841  -0.1841   2.8588  
## 
## Coefficients:
##              Estimate Std. Error  z value Pr(>|z|)    
## (Intercept) -4.071921   0.028164 -144.580   <2e-16 ***
## race         0.002614   0.013979    0.187    0.852    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 40253  on 235584  degrees of freedom
## Residual deviance: 40253  on 235583  degrees of freedom
##   (7837 observations deleted due to missingness)
## AIC: 40257
## 
## Number of Fisher Scoring iterations: 7

AIC: 40257

Age?

die3 <- glm(died ~ age, data = allYears, family = binomial)

summary(die3)
## 
## Call:
## glm(formula = died ~ age, family = binomial, data = allYears)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.3090  -0.2100  -0.1729  -0.1325   3.5306  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.230444   0.067723  -92.00   <2e-16 ***
## age          0.035692   0.001005   35.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 41808  on 243273  degrees of freedom
## Residual deviance: 40381  on 243272  degrees of freedom
##   (148 observations deleted due to missingness)
## AIC: 40385
## 
## Number of Fisher Scoring iterations: 7

AIC: 40385

Hospital Division?

die4 <- glm(died ~ hcup_division, data = allYears, family = binomial)

summary(die4)
## 
## Call:
## glm(formula = died ~ hcup_division, family = binomial, data = allYears)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1896  -0.1873  -0.1850  -0.1827   2.8727  
## 
## Coefficients:
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -3.997194   0.036134 -110.621   <2e-16 ***
## hcup_division -0.012515   0.006498   -1.926   0.0541 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 41808  on 243283  degrees of freedom
## Residual deviance: 41804  on 243282  degrees of freedom
##   (138 observations deleted due to missingness)
## AIC: 41808
## 
## Number of Fisher Scoring iterations: 7

AIC: 41808

Household Income?

die5 <- glm(died ~ zipinc_qrtl, data = allYears, family = binomial)

summary(die5)
## 
## Call:
## glm(formula = died ~ zipinc_qrtl, family = binomial, data = allYears)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1881  -0.1881  -0.1855  -0.1829   2.8729  
## 
## Coefficients:
##             Estimate Std. Error  z value Pr(>|z|)    
## (Intercept) -3.99761    0.03589 -111.394   <2e-16 ***
## zipinc_qrtl -0.02820    0.01451   -1.944   0.0519 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 40919  on 238169  degrees of freedom
## Residual deviance: 40915  on 238168  degrees of freedom
##   (5252 observations deleted due to missingness)
## AIC: 40919
## 
## Number of Fisher Scoring iterations: 7

AIC: 40919

Of the 5 models, race has the lowest AIC and hospital division has the highest AIC. Race is the best predictor variable to model a patient’s likelihood of survival and hospital division is least efficient predictor variable.

Final Product

mostExpensive <- allYears %>%
  select(X, year, i10_ndx, i10_npr, i10_pr1, totchg) %>%
  na.omit() %>%
  filter((totchg > 900000) & (i10_npr == 1))  %>%
  group_by(i10_pr1) %>%
  summarize(count = n(),
            chgSum = sum(totchg),
            avgChg = chgSum/count,
            diagSum = sum(i10_ndx),
            avgDiag = round(diagSum / count))

mostExpensive
## # A tibble: 6 × 6
##   i10_pr1 count  chgSum   avgChg diagSum avgDiag
##   <chr>   <int>   <int>    <dbl>   <int>   <dbl>
## 1 0HB6XZZ     1  920753  920753       18      18
## 2 0HBBXZZ     1  946020  946020       33      33
## 3 0HBNXZZ     1  929004  929004        8       8
## 4 0JB70ZZ     3 5387347 1795782.      68      23
## 5 0JB80ZZ     2 2538870 1269435       61      30
## 6 0JBL0ZZ     1 1227665 1227665       32      32
myExpensiveChart <- highchart() %>%
  hc_yAxis_multiples(
    list(title = list(text = "Average Number of Diagnoses")),
    list(title = list(text = "Average Cost of Procedure ($)"),
         opposite = TRUE)
  ) %>%
  hc_add_series(data = mostExpensive$avgDiag,
                name = "Average Number of Diagnoses",
                type = "column",
                yAxis = 0) %>%
  hc_add_series(data = mostExpensive$avgChg,
                name = "Average Cost of Procedure ($)",
                type = "line",
                yAxis = 1) %>%
  hc_xAxis(categories = mostExpensive$i10_pr1) %>%
  hc_colors(c("red", "green")) %>%
  hc_chart(style = list(fontFamily = "Georgia",
                        fontWeight = "bold")) %>%
  hc_title(text = "Most Expensive Procedures & Their Associated Average Number of Diagnoses") 

myExpensiveChart

The codes with the prefix “0JB”, tissue and fascia excision, tend to be among the most expensive procedural codes. Codes with the prefix “0HB”, Skin Excision, come in second.

Conclusion: Data Story

With medicine growing faster than the average person can understand, it’s important for data scientists to gather medical data and present it in a way that the average person, especially policy makers, can understand. Over the years, the popularity of flap surgeries continues to grow and along with it, their cost. At a time when inflation is skyrocketing and people can hardly afford basic groceries, our quest to put patient care above profits should never end. To improve patient care throughout the nation, we must first understand who our patients are. Through analysis, we have found that a majority of flap sugery patients are within the 40 to 70 age bracket, with a majority of male patients across most racial groups. We’ve also discovered that skin and fascia incision are among the most common primary procedure and are also among the most expensive. In examining patient’s survival, we discovered that race plays a big role and hospital division does not. While it is good news that a hospital’s division does not affect a patient’s survival, it is quite concerning that a patient’s household income is statistically significant in predicting a patient’s survival. Ideally, economic status should never impact a patient’s care, especially their ability to live.