Read in cleaned data (39179 rows)
df1.input.data <- read_csv(here("results",
"code output",
"2018-12-18_mh-readmissions-dataset-for-classification.csv"))
library(lubridate)
# some more cleaning:
df2.cleaned.data <-
df1.input.data %>%
select(denom_PHN,
Gender,
AdmissionAge,
LHAName,
PostalCode,
GPCode,
AdjustedAdmissionDate,
denom_Institution,
denom_AdmissionDate,
denom_DischargeDate,
AdmissionFacilityLongName,
denom_nursingUnitCode,
AdmissionNursingUnit,
AdmittingDrService,
AdmissionAttendingDoctorService,
AdmissionCategoryCode,
AdmissionCategoryGroup,
AdmissionPatientServiceDADDescription,
ED_visits_all_time:ED_admits_last_365days,
elective_admits_all_time:elective_admits_last_365_days,
num_schizophrenia_dx1_all_time,
Readmit_Flag) %>%
mutate(LOS = difftime(denom_DischargeDate,
denom_AdmissionDate,
units = "days") %>%
as.numeric(),
AdmissionAge = as.numeric(AdmissionAge),
AdjustedAdmissionDate = ymd(AdjustedAdmissionDate)
) %>%
# convert all chars to factors:
mutate_if(is.character, factor) %>%
rename(Institution = denom_Institution,
AdmitAge = AdmissionAge,
ReadmitFlag = Readmit_Flag) %>%
clean_names()
# view results:
str(df2.cleaned.data) # 39179 obs. of 33 variables
head(df2.cleaned.data)
summary(df2.cleaned.data)
# skim(df2.cleaned.data)
1. Missing values in field adjusted_admission_date - 786 cases
# df2.cleaned.data %>%
# filter(is.na(adjusted_admission_date)) %>% View("missing vals")
df2.cleaned.data %>%
filter(is.na(adjusted_admission_date)) %>%
select(admit_age) %>%
summary # 786 cases
df2.cleaned.data %>%
filter(admission_facility_long_name == "NULL") %>%
select(admission_facility_long_name) %>%
summary # 800 cases
df2.cleaned.data %>%
filter(gender == "NULL") %>%
select(gender) %>%
summary # 786 cases
todo: delete these cases
- These 786 cases should be removed. Don’t really know what’s going on, but there’s important info missing. e.g. all have Gender = NULL. It’s possible that these are weird PatientIDs that aren’t joining properly across data sources.
- Then again: perhaps this is useful information. There may be patterns specific to these patients who have weird non-matching PatientIDs??
Recall that in SQL script 02_mh-readmissions_adding-predictor-fields.sql, all joins across data sources (ED, ADTC, ADR, Community) are done using PatientID, not PHN.
3. LOS = 0 cases (1886 rows)
# summary(df2.cleaned.data$los)
# ecdf(df2.cleaned.data %>% pull(los)) %>% plot
df3.los.zero <-
df2.cleaned.data %>%
filter(los == 0) %>%
select(institution,
denom_admission_date,
denom_discharge_date,
admit_age,
denom_nursing_unit_code,
admission_nursing_unit,
admitting_dr_service) # %>% nrow() # 1886 rows
df3.los.zero$denom_nursing_unit_code %>% summary
df3.los.zero %>%
select(institution,
denom_nursing_unit_code,
denom_admission_date,
denom_discharge_date) %>%
sample_n(20)
df2.cleaned.data %>%
select(los,
institution) %>%
filter(los <= 30) %>%
ggplot(aes(x = los)) +
geom_density() +
facet_wrap(~institution)

df3.los.zero %>%
group_by(denom_nursing_unit_code) %>%
summarize(count = n()) %>%
arrange(desc(count)) %>%
kable() %>%
kable_styling() %>%
scroll_box(width = "100%", height = "400px")
|
denom_nursing_unit_code
|
count
|
|
EMRG
|
480
|
|
REMR
|
365
|
|
EIP
|
227
|
|
EMERB
|
196
|
|
EMERA
|
130
|
|
EMERU
|
116
|
|
EA-SM
|
85
|
|
EMERC
|
63
|
|
EMERR
|
48
|
|
NULL
|
39
|
|
PER/INPT
|
16
|
|
UD1E
|
15
|
|
PAU
|
11
|
|
PPSY
|
9
|
|
MH-SM
|
8
|
|
MERS
|
7
|
|
EMERD
|
6
|
|
UD1W
|
6
|
|
WP3
|
6
|
|
4NW
|
5
|
|
EMERT
|
4
|
|
HLD
|
4
|
|
RPEU
|
4
|
|
SAU
|
3
|
|
2A-SM
|
2
|
|
2E
|
2
|
|
EMERM
|
2
|
|
ICU
|
2
|
|
M1S
|
2
|
|
MIU
|
2
|
|
SG1
|
2
|
|
UD2S
|
2
|
|
WP2
|
2
|
|
1AA-SM
|
1
|
|
3E
|
1
|
|
A2
|
1
|
|
HBU
|
1
|
|
HCE1
|
1
|
|
HCE2
|
1
|
|
HCW1
|
1
|
|
M3C
|
1
|
|
N/A
|
1
|
|
PAR
|
1
|
|
PED
|
1
|
|
R2W
|
1
|
|
RAMB
|
1
|
|
S5MH
|
1
|
|
USPO
|
1
|
Proportion by site:
df2.cleaned.data %>%
group_by(institution) %>%
summarise(num.zero.los = sum(ifelse(los == 0, 1,0)),
other.los = sum(ifelse(los > 0, 1, 0))) %>%
mutate(prop = num.zero.los/(num.zero.los + other.los),
total = num.zero.los + other.los) %>%
arrange(desc(total)) %>%
kable() %>%
kable_styling()
|
institution
|
num.zero.los
|
other.los
|
prop
|
total
|
|
VGH
|
525
|
11876
|
0.0423353
|
12401
|
|
SPH
|
590
|
11420
|
0.0491257
|
12010
|
|
RHS
|
376
|
4611
|
0.0753960
|
4987
|
|
LGH
|
239
|
4732
|
0.0480789
|
4971
|
|
UBCH
|
8
|
1864
|
0.0042735
|
1872
|
|
SMH
|
102
|
1450
|
0.0657216
|
1552
|
|
PRGH
|
25
|
757
|
0.0319693
|
782
|
|
BCGH
|
5
|
242
|
0.0202429
|
247
|
|
SGH
|
11
|
232
|
0.0452675
|
243
|
|
RWLMH
|
5
|
109
|
0.0438596
|
114
|
 Â
Tentative conclusion: the LOS = 0 cases are mostly patients who were ED inpatients. That is, a decision to admit was made, but they were discharged before they ever moved up into the nursing units.
todo: remove LOS = 0 cases?
We may want to exclude these patients in most of the rest of the analysis.
3.1 Create nested dataframe by institution
# nest by site:
df4.site.nested <-
df2.cleaned.data %>%
group_by(institution) %>%
nest()
# add column to find overall readmit rate:
# load function:
source(here::here("code",
"readmit-rates-overall_function.R"))
df4.site.nested %<>%
mutate(readmission_overall = map(data,
readmit_rates_overall_function,
exclude_los_zero = 1,
last_2_years = 1))
# result:
# df4.site.nested$readmission_overall
# df4.site.nested %>% unnest(readmission_overall)
# extract overall readmit rate:
df4.site.nested %<>%
mutate(overall_readmit = map_dbl(readmission_overall,
function(x){x %>% pull(prop)})) %>%
select(institution,
overall_readmit,
everything())
4. Distributions with and without LOS = 0 cases
# function for plotting densities:
source(here::here("code",
"plot-density_function.R"))
# function for plotting histograms:
source(here::here("code",
"plot-histogram_function.R"))
# map function across nested df:
df4.site.nested %<>%
mutate(densities = map2(data,
institution,
~plot_density_function(..1,
..2,
exclude_los_zero = 1)),
# now add histograms:
histograms = map2(data,
institution,
~plot_histogram_function(..1,
..2,
exclude_los_zero = 0)))
Densities
# view densities:
df4.site.nested$densities
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

Histograms
df4.site.nested$histograms
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

# densities:
# pdf(here::here("results",
# "code output",
# "2019-02-12_los-densities_since-2016.pdf"),
# width = 10)
# df4.site.nested$densities
# dev.off()
# histograms:
# pdf(here::here("results",
# "code output",
# "2019-02-12_los-histograms_since-2016.pdf"),
# width = 10)
# df4.site.nested$histograms
# dev.off()
5. Age categories
Are there natural splits in age suggested by the data?
# function for plotting age cdfs:
source(here::here("code",
"plot-age-cdf_function.R"))
# add plots in a column:
df4.site.nested %<>%
mutate(age_ecdf = map2(data,
institution,
plot_age_cdf_function))
# results:
df4.site.nested$age_ecdf
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

# save outputs:
# pdf(here::here("results",
# "code output",
# "2019-02-12_age-ecdf.pdf"))
# df4.site.nested$age_ecdf
# dev.off()
Segmenting LOS by age quantiles
# function for boxplot by quartile:
source(here::here("code",
"los-boxplot_function.R"))
# map function across nesed df:
df4.site.nested %<>%
mutate(los_boxplot = map2(data,
institution,
los_boxplot_function,
exclude_los_zero = 1))
# print boxplots:
# df4.site.nested$los_boxplot[[1]]
df4.site.nested$los_boxplot
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

# save outputs:
# pdf(here::here("results",
# "code output",
# "2019-02-13_los-distribution-by-age-quartile.pdf"))
# df4.site.nested$los_boxplot
# dev.off()
6. Problems with BCGH and RWLMH
- Note that age is missing in all cases, as well as gender
- Explanation: PatientID in ADR isn’t matching with the PatientIDs in ADTC for these sites?
df2.cleaned.data %>%
filter(institution == "BCGH") %>%
summary()
## denom_phn gender admit_age lha_name
## 9026468137:52 F : 0 Min. : NA NULL :247
## 9023956948:38 M : 0 1st Qu.: NA 100 Mile House : 0
## 9020625092:23 NULL:247 Median : NA Abbotsford : 0
## 9014980462:17 Mean :NaN Agassiz-Harrison : 0
## 9020458731:11 3rd Qu.: NA Alberni : 0
## 9064682565:10 Max. : NA Armstrong-Spallumcheen: 0
## (Other) :96 NA's :247 (Other) : 0
## postal_code gp_code adjusted_admission_date institution
## NULL :247 NULL :170 Min. :NA BCGH :247
## A0A0A0 : 0 23440 : 7 1st Qu.:NA LGH : 0
## OutofC : 0 63606 : 6 Median :NA PRGH : 0
## OutofP : 0 63195 : 3 Mean :NA RHS : 0
## UnkBC : 0 4559 : 1 3rd Qu.:NA RWLMH : 0
## V0A1H0 : 0 (Other): 3 Max. :NA SGH : 0
## (Other): 0 NA's : 57 NA's :247 (Other): 0
## denom_admission_date denom_discharge_date
## Min. :2012-01-10 Min. :2012-04-08
## 1st Qu.:2013-12-19 1st Qu.:2013-12-21
## Median :2015-03-27 Median :2015-03-31
## Mean :2015-04-10 Mean :2015-04-14
## 3rd Qu.:2016-09-18 3rd Qu.:2016-09-20
## Max. :2018-02-16 Max. :2018-02-17
##
## admission_facility_long_name denom_nursing_unit_code
## NULL :247 NULL :247
## Holy Family Hospital : 0 10A : 0
## Lions Gate Hospital : 0 10C : 0
## Mount Saint Joseph Hospital : 0 1AA-SM : 0
## Powell River General Hospital: 0 2A-SM : 0
## Richmond Hospital : 0 2E : 0
## (Other) : 0 (Other): 0
## admission_nursing_unit
## NULL :247
## 1AA-SM : 0
## 2 East - Med/ Post Coronary Care: 0
## 2A-SM : 0
## 3 East - Paediatrics : 0
## 3 West - Maternity : 0
## (Other) : 0
## admitting_dr_service admission_attending_doctor_service
## NULL :247 NULL :247
## Anaesthesiology : 0 Anaesthesiology : 0
## Anatomical Pathology : 0 Cardiac Surgery : 0
## Cardiac Surgery : 0 Cardiology : 0
## Cardiology : 0 Critical Care Medicine: 0
## Critical Care Medicine: 0 Diagnostic Radiology : 0
## (Other) : 0 (Other) : 0
## admission_category_code admission_category_group
## NULL :247 Elective : 0
## 1 : 0 Emergent : 0
## 18984780: 0 Newborn : 0
## 2 : 0 NULL :247
## 3 : 0 Urgent : 0
## 309203 : 0 Urgent\\Emergent: 0
## (Other) : 0
## admission_patient_service_dad_description ed_visits_all_time
## NULL :247 Min. : 0.0000
## Alternate Level of Care: 0 1st Qu.: 0.0000
## Cardiology : 0 Median : 0.0000
## Cardiovascular Surgery : 0 Mean : 0.8057
## Family Practice : 0 3rd Qu.: 1.0000
## Gastro-Enterology : 0 Max. :20.0000
## (Other) : 0
## ed_visits_last_30days ed_visits_last_180days ed_visits_last_365days
## Min. :0.00000 Min. : 0.0000 Min. : 0.0000
## 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median :0.00000 Median : 0.0000 Median : 0.0000
## Mean :0.01619 Mean : 0.1457 Mean : 0.2186
## 3rd Qu.:0.00000 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :2.00000 Max. :13.0000 Max. :14.0000
##
## ed_admits_all_time ed_admits_last_30days ed_admits_last_180days
## Min. :0.0000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.0000 Median :0.000000 Median :0.00000
## Mean :0.2024 Mean :0.004049 Mean :0.04453
## 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :3.0000 Max. :1.000000 Max. :1.00000
##
## ed_admits_last_365days elective_admits_all_time
## Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000
## Mean :0.06883 Mean :0.3846
## 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :6.0000
##
## elective_admits_last_30_days elective_admits_last_180_days
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000
## Mean :0.01215 Mean :0.06073
## 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000
##
## elective_admits_last_365_days num_schizophrenia_dx1_all_time
## Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000
## Mean :0.1174 Mean :0.01215
## 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :2.0000 Max. :1.00000
##
## readmit_flag los
## Min. :0.000 Min. : 0.000
## 1st Qu.:0.000 1st Qu.: 1.000
## Median :0.000 Median : 2.000
## Mean :0.332 Mean : 3.255
## 3rd Qu.:1.000 3rd Qu.: 3.000
## Max. :1.000 Max. :98.000
##
7. Readmission rates by age quartile
# load functions:
source(here::here("code",
"readmit-rates-by-age_function.R"))
# map function across nested df:
df4.site.nested %<>%
mutate(readmit_rates = map(data,
readmit_rates_function,
exclude_los_zero = 1),
# now add the column plots:
readmit_plot = pmap(list(readmit_rates,
as.character(institution),
overall_readmit),
readmit_rates_plot))
# results:
df4.site.nested$readmit_plot[[1]]

df4.site.nested$readmit_plot
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

Notes
- Recall the effects of Age and LOS are correlated: it looks like people in age quantile 4 have lowest readmit %, but this is not necessarily the effect of age alone: see
df4.site.nested$los_boxplot - the oldest patients also have longest LOS * So maybe it’s the long LOS that’s causing them to have low readmits, not the fact that they are older.
- … Actually, it’s both, which is why both variables should be in the model.
# save results:
# pdf(here::here("results",
# "code output",
# "2019-02-13_readmit-rates-by-age-quantile.pdf"),
# width = 10)
# df4.site.nested$readmit_plot
# dev.off()
df4.site.nested %>%
unnest(readmit_rates) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped")) %>%
scroll_box(width = "100%", height = "400px")
|
institution
|
overall_readmit
|
age_quantile
|
cases
|
readmit_cases
|
prop
|
exclude_los_zero
|
last_2_years
|
|
VGH
|
0.1794504
|
0
|
11
|
2
|
0.1818182
|
1
|
1
|
|
VGH
|
0.1794504
|
1
|
964
|
162
|
0.1680498
|
1
|
1
|
|
VGH
|
0.1794504
|
2
|
1095
|
200
|
0.1826484
|
1
|
1
|
|
VGH
|
0.1794504
|
3
|
1063
|
235
|
0.2210724
|
1
|
1
|
|
VGH
|
0.1794504
|
4
|
1052
|
152
|
0.1444867
|
1
|
1
|
|
UBCH
|
0.0828804
|
0
|
6
|
0
|
0.0000000
|
1
|
1
|
|
UBCH
|
0.0828804
|
1
|
178
|
10
|
0.0561798
|
1
|
1
|
|
UBCH
|
0.0828804
|
2
|
175
|
18
|
0.1028571
|
1
|
1
|
|
UBCH
|
0.0828804
|
3
|
191
|
23
|
0.1204188
|
1
|
1
|
|
UBCH
|
0.0828804
|
4
|
186
|
10
|
0.0537634
|
1
|
1
|
|
SPH
|
0.1881597
|
0
|
12
|
3
|
0.2500000
|
1
|
1
|
|
SPH
|
0.1881597
|
1
|
1066
|
235
|
0.2204503
|
1
|
1
|
|
SPH
|
0.1881597
|
2
|
1079
|
239
|
0.2215014
|
1
|
1
|
|
SPH
|
0.1881597
|
3
|
1103
|
199
|
0.1804170
|
1
|
1
|
|
SPH
|
0.1881597
|
4
|
1098
|
144
|
0.1311475
|
1
|
1
|
|
LGH
|
0.1189904
|
0
|
8
|
0
|
0.0000000
|
1
|
1
|
|
LGH
|
0.1189904
|
1
|
402
|
40
|
0.0995025
|
1
|
1
|
|
LGH
|
0.1189904
|
2
|
425
|
54
|
0.1270588
|
1
|
1
|
|
LGH
|
0.1189904
|
3
|
399
|
61
|
0.1528822
|
1
|
1
|
|
LGH
|
0.1189904
|
4
|
430
|
43
|
0.1000000
|
1
|
1
|
|
RHS
|
0.1450428
|
0
|
3
|
0
|
0.0000000
|
1
|
1
|
|
RHS
|
0.1450428
|
1
|
373
|
44
|
0.1179625
|
1
|
1
|
|
RHS
|
0.1450428
|
2
|
405
|
73
|
0.1802469
|
1
|
1
|
|
RHS
|
0.1450428
|
3
|
422
|
53
|
0.1255924
|
1
|
1
|
|
RHS
|
0.1450428
|
4
|
431
|
67
|
0.1554524
|
1
|
1
|
|
PRGH
|
0.1423841
|
0
|
2
|
0
|
0.0000000
|
1
|
1
|
|
PRGH
|
0.1423841
|
1
|
71
|
18
|
0.2535211
|
1
|
1
|
|
PRGH
|
0.1423841
|
2
|
73
|
8
|
0.1095890
|
1
|
1
|
|
PRGH
|
0.1423841
|
3
|
77
|
9
|
0.1168831
|
1
|
1
|
|
PRGH
|
0.1423841
|
4
|
79
|
8
|
0.1012658
|
1
|
1
|
|
SMH
|
0.1646859
|
0
|
2
|
0
|
0.0000000
|
1
|
1
|
|
SMH
|
0.1646859
|
1
|
146
|
20
|
0.1369863
|
1
|
1
|
|
SMH
|
0.1646859
|
2
|
143
|
39
|
0.2727273
|
1
|
1
|
|
SMH
|
0.1646859
|
3
|
144
|
22
|
0.1527778
|
1
|
1
|
|
SMH
|
0.1646859
|
4
|
154
|
16
|
0.1038961
|
1
|
1
|
|
SGH
|
0.1038961
|
1
|
18
|
3
|
0.1666667
|
1
|
1
|
|
SGH
|
0.1038961
|
2
|
19
|
3
|
0.1578947
|
1
|
1
|
|
SGH
|
0.1038961
|
3
|
19
|
0
|
0.0000000
|
1
|
1
|
|
SGH
|
0.1038961
|
4
|
21
|
2
|
0.0952381
|
1
|
1
|
|
RWLMH
|
0.0303030
|
0
|
33
|
1
|
0.0303030
|
1
|
1
|
|
BCGH
|
0.3670886
|
0
|
79
|
29
|
0.3670886
|
1
|
1
|
8. Readmission rates by LOS decile
# load functions:
source(here::here("code",
"readmit-rates-by-los_function.R"))
# map function across nested df:
df4.site.nested %<>%
mutate(readmit_rates_by_los = map(data,
readmit_rates_los_function,
exclude_los_zero = 1),
readmit_by_los_plot = pmap(list(readmit_rates_by_los,
as.character(institution),
overall_readmit),
readmit_rates_by_los_plot))
# results:
# df4.site.nested$readmit_rates_by_los
# df4.site.nested$readmit_by_los_plot[[2]]
df4.site.nested$readmit_by_los_plot
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

# write output:
# pdf(here::here("results",
# "code output",
# "2019-02-13_readmit-rates-by-LOS-decile.pdf"),
# width = 10)
# df4.site.nested$readmit_by_los_plot
# dev.off()
9. Readmission rates by Gender
# load functions:
source(here::here("code",
"readmit-rates-by-gender_function.R"))
# map function across nested df:
df4.site.nested %<>%
mutate(readmit_rates_by_gender = map(data,
readmit_rates_gender_function,
exclude_los_zero = 1),
readmit_plot_by_gender = pmap(list(readmit_rates_by_gender,
as.character(institution),
overall_readmit),
readmit_rates_by_gender_plot)
)
# result:
df4.site.nested %>%
unnest(readmit_rates_by_gender) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped")) %>%
scroll_box(width = "100%", height = "400px")
|
institution
|
overall_readmit
|
gender
|
cases
|
readmit_cases
|
prop
|
exclude_los_zero
|
last_2_years
|
|
VGH
|
0.1794504
|
F
|
1790
|
325
|
0.1815642
|
1
|
1
|
|
VGH
|
0.1794504
|
M
|
2384
|
424
|
0.1778523
|
1
|
1
|
|
VGH
|
0.1794504
|
NULL
|
11
|
2
|
0.1818182
|
1
|
1
|
|
UBCH
|
0.0828804
|
F
|
420
|
28
|
0.0666667
|
1
|
1
|
|
UBCH
|
0.0828804
|
M
|
310
|
33
|
0.1064516
|
1
|
1
|
|
UBCH
|
0.0828804
|
NULL
|
6
|
0
|
0.0000000
|
1
|
1
|
|
SPH
|
0.1881597
|
F
|
1616
|
262
|
0.1621287
|
1
|
1
|
|
SPH
|
0.1881597
|
M
|
2730
|
555
|
0.2032967
|
1
|
1
|
|
SPH
|
0.1881597
|
NULL
|
12
|
3
|
0.2500000
|
1
|
1
|
|
LGH
|
0.1189904
|
F
|
835
|
109
|
0.1305389
|
1
|
1
|
|
LGH
|
0.1189904
|
M
|
821
|
89
|
0.1084044
|
1
|
1
|
|
LGH
|
0.1189904
|
NULL
|
8
|
0
|
0.0000000
|
1
|
1
|
|
RHS
|
0.1450428
|
F
|
672
|
90
|
0.1339286
|
1
|
1
|
|
RHS
|
0.1450428
|
M
|
959
|
147
|
0.1532847
|
1
|
1
|
|
RHS
|
0.1450428
|
NULL
|
3
|
0
|
0.0000000
|
1
|
1
|
|
PRGH
|
0.1423841
|
F
|
159
|
25
|
0.1572327
|
1
|
1
|
|
PRGH
|
0.1423841
|
M
|
141
|
18
|
0.1276596
|
1
|
1
|
|
PRGH
|
0.1423841
|
NULL
|
2
|
0
|
0.0000000
|
1
|
1
|
|
SMH
|
0.1646859
|
F
|
314
|
53
|
0.1687898
|
1
|
1
|
|
SMH
|
0.1646859
|
M
|
273
|
44
|
0.1611722
|
1
|
1
|
|
SMH
|
0.1646859
|
NULL
|
2
|
0
|
0.0000000
|
1
|
1
|
|
SGH
|
0.1038961
|
F
|
43
|
5
|
0.1162791
|
1
|
1
|
|
SGH
|
0.1038961
|
M
|
34
|
3
|
0.0882353
|
1
|
1
|
|
RWLMH
|
0.0303030
|
NULL
|
33
|
1
|
0.0303030
|
1
|
1
|
|
BCGH
|
0.3670886
|
NULL
|
79
|
29
|
0.3670886
|
1
|
1
|
# graphs:
df4.site.nested$readmit_plot_by_gender
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

# write output:
# pdf(here::here("results",
# "code output",
# "2019-02-13_readmit-rates-by-gender.pdf"),
# width = 10)
# df4.site.nested$readmit_plot_by_gender
# dev.off()
10. Pull nested data together to print
df5.0.overall.readmits <-
df4.site.nested %>%
unnest(readmission_overall) %>%
select(institution,
cases,
readmit_cases,
prop)
df5.1.age.readmits <-
df4.site.nested %>%
unnest(readmit_rates) %>%
select(institution,
age_quantile,
prop) %>%
spread(key = age_quantile,
value = prop)
df5.2.los.readmits <-
df4.site.nested %>%
unnest(readmit_rates_by_los) %>%
select(institution,
los_decile,
prop) %>%
spread(key = los_decile,
value = prop) %>%
select(-`10`,
`10`)
df5.3.gender.readmits <-
df4.site.nested %>%
unnest(readmit_rates_by_gender) %>%
select(institution,
gender,
prop) %>%
spread(key = gender,
value = prop)
# join them all together:
df5.summary <-
df5.0.overall.readmits %>%
inner_join(df5.1.age.readmits,
by = c("institution" = "institution")) %>%
inner_join(df5.2.los.readmits,
by = c("institution" = "institution")) %>%
inner_join(df5.3.gender.readmits,
by = c("institution" = "institution")) %>%
purrr::set_names(c("institution",
"overall_cases",
"overall_readmits",
"overall_prop",
"age_quantile_0",
"age_quantile_1",
"age_quantile_2",
"age_quantile_3",
"age_quantile_4",
"los_decile_1",
"los_decile_2",
"los_decile_3",
"los_decile_4",
"los_decile_5",
"los_decile_6",
"los_decile_7",
"los_decile_8",
"los_decile_9",
"los_decile_10",
"gender_female",
"gender_male",
"gender_NULL"))
# result
df5.summary %>%
kable() %>%
kable_styling(bootstrap_options = c("striped")) %>%
scroll_box(width = "100%", height = "400px")
|
institution
|
overall_cases
|
overall_readmits
|
overall_prop
|
age_quantile_0
|
age_quantile_1
|
age_quantile_2
|
age_quantile_3
|
age_quantile_4
|
los_decile_1
|
los_decile_2
|
los_decile_3
|
los_decile_4
|
los_decile_5
|
los_decile_6
|
los_decile_7
|
los_decile_8
|
los_decile_9
|
los_decile_10
|
gender_female
|
gender_male
|
gender_NULL
|
|
VGH
|
4185
|
751
|
0.1794504
|
0.1818182
|
0.1680498
|
0.1826484
|
0.2210724
|
0.1444867
|
NA
|
0.1807229
|
0.2176871
|
0.2061657
|
0.2202073
|
0.1953782
|
0.2019002
|
0.1789976
|
0.1294964
|
0.0898345
|
0.1815642
|
0.1778523
|
0.1818182
|
|
UBCH
|
736
|
61
|
0.0828804
|
0.0000000
|
0.0561798
|
0.1028571
|
0.1204188
|
0.0537634
|
0.1944444
|
0.0769231
|
0.1071429
|
0.0428571
|
0.0649351
|
0.0821918
|
0.0625000
|
0.0641026
|
0.0253165
|
0.1081081
|
0.0666667
|
0.1064516
|
0.0000000
|
|
SPH
|
4358
|
820
|
0.1881597
|
0.2500000
|
0.2204503
|
0.2215014
|
0.1804170
|
0.1311475
|
NA
|
0.1869031
|
0.1917211
|
0.2033582
|
0.2556818
|
0.2490637
|
0.1818182
|
0.1698925
|
0.1341176
|
0.1185682
|
0.1621287
|
0.2032967
|
0.2500000
|
|
LGH
|
1664
|
198
|
0.1189904
|
0.0000000
|
0.0995025
|
0.1270588
|
0.1528822
|
0.1000000
|
NA
|
0.1446281
|
0.1398964
|
0.0970149
|
0.1336406
|
0.1386139
|
0.1034483
|
0.0943396
|
0.1028571
|
0.0892857
|
0.1305389
|
0.1084044
|
0.0000000
|
|
RHS
|
1634
|
237
|
0.1450428
|
0.0000000
|
0.1179625
|
0.1802469
|
0.1255924
|
0.1554524
|
NA
|
NA
|
0.1343284
|
NA
|
0.1554054
|
0.1621622
|
0.1681416
|
0.1272727
|
0.1428571
|
0.1301775
|
0.1339286
|
0.1532847
|
0.0000000
|
|
PRGH
|
302
|
43
|
0.1423841
|
0.0000000
|
0.2535211
|
0.1095890
|
0.1168831
|
0.1012658
|
NA
|
0.1754386
|
0.1200000
|
0.1666667
|
0.0952381
|
0.1200000
|
0.1250000
|
0.1333333
|
0.1142857
|
0.2187500
|
0.1572327
|
0.1276596
|
0.0000000
|
|
SMH
|
589
|
97
|
0.1646859
|
0.0000000
|
0.1369863
|
0.2727273
|
0.1527778
|
0.1038961
|
NA
|
0.2500000
|
NA
|
0.1904762
|
0.1333333
|
0.1346154
|
0.1578947
|
0.1269841
|
0.1355932
|
0.1093750
|
0.1687898
|
0.1611722
|
0.0000000
|
|
SGH
|
77
|
8
|
0.1038961
|
NA
|
0.1666667
|
0.1578947
|
0.0000000
|
0.0952381
|
NA
|
0.2000000
|
NA
|
NA
|
0.1363636
|
NA
|
0.0833333
|
0.0909091
|
0.0000000
|
0.0000000
|
0.1162791
|
0.0882353
|
NA
|
|
RWLMH
|
33
|
1
|
0.0303030
|
0.0303030
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
0.0000000
|
NA
|
NA
|
0.0000000
|
0.2500000
|
0.0000000
|
NA
|
NA
|
0.0303030
|
|
BCGH
|
79
|
29
|
0.3670886
|
0.3670886
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
0.3888889
|
NA
|
NA
|
0.3928571
|
NA
|
0.3529412
|
0.4285714
|
0.2222222
|
NA
|
NA
|
0.3670886
|
# WRITE OUTPUT:
write_csv(df5.summary,
here::here("results",
"code output",
"2019-02-13_summary-readmit-rates.csv"))