#
# Statistics Project 3 - Evaluate readmissions rates report
#
# use some libraries etc
library(stats)
library(lsr)
library(data.table)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:data.table':
##
## between, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# https://rstudio-pubs-static.s3.amazonaws.com/23230_b2e9b87251a2488da0fba51325e26040.html
load(url("http://assets.datacamp.com/course/dasi/inference.Rdata"))
setwd("~/apps/sliderule/statistics_project3/statistics project 3")
hospital <- read.csv("clean_hospital_read_df.csv")
# add scatterplot, graphs to explore data (outliers, missing values, big relationships)
plot(hospital$Number.of.Discharges, hospital$Number.of.Readmissions)

# try subsetting by hospital size (based on suggested feature, #discharges)
small <- subset(hospital, hospital$Number.of.Discharges < 100)
large <- subset(hospital, hospital$Number.of.Discharges > 1000)
med <- subset(hospital, hospital$Number.of.Discharges < 1000 & hospital$Number.of.Discharges > 100 )
xlarge <- subset(hospital, hospital$Number.of.Discharges > 2000)
# BASIC THEME
theme.chart <-
theme(legend.position = "none") +
theme(plot.title = element_text(size=26, family="Trebuchet MS", face="bold", hjust=0, color="#666666")) +
theme(axis.title = element_text(size=18, family="Trebuchet MS", face="bold", color="#666666")) +
theme(axis.title.y = element_text(angle=0))
# SCATTERPLOT THEME
theme.chart_SCATTER <- theme.chart +
theme(axis.title.x = element_text(hjust=0, vjust=-.5))
# HISTOGRAM THEME
theme.chart_HIST <- theme.chart +
theme(axis.title.x = element_text(hjust=0, vjust=-.5))
# SMALL MULTIPLE THEME
theme.chart_SMALLM <- theme.chart +
theme(panel.grid.minor = element_blank()) +
theme(strip.text.x = element_text(size=16, family="Trebuchet MS", face="bold", color="#666666"))
# plot small hospitals discharges to readmissions
ggplot(data=small, aes(x=Number.of.Discharges, y=Number.of.Readmissions)) +
geom_point(alpha=.4, size=4, color="#9ecae1") +
ggtitle("Discharges vs Readmissions (small hospitals)") +
labs(x="Discharges", y="Readmissions") +
theme.chart_SCATTER
## Warning: Removed 81 rows containing missing values (geom_point).

# plot med hospitals discharges to readmissions
ggplot(data=med, aes(x=Number.of.Discharges, y=Number.of.Readmissions)) +
geom_point(alpha=.4, size=4, color="#9ecae1") +
ggtitle("Discharges vs Readmissions (med hospitals)") +
labs(x="Discharges", y="Readmissions") +
theme.chart_SCATTER

# plot large hospitals discharges to readmissions
ggplot(data=large, aes(x=Number.of.Discharges, y=Number.of.Readmissions)) +
geom_point(alpha=.4, size=4, color="#9ecae1") +
ggtitle("Discharges vs Readmissions (large hospitals)") +
labs(x="Discharges", y="Readmissions") +
theme.chart_SCATTER

# plot xlarge hospitals discharges to readmissions
ggplot(data=xlarge, aes(x=Number.of.Discharges, y=Number.of.Readmissions)) +
geom_point(alpha=.4, size=4, color="#9ecae1") +
ggtitle("Discharges vs Readmissions (xlarge hospitals)") +
labs(x="Discharges", y="Readmissions") +
theme.chart_SCATTER

# look at expected vs predicated readmission across all hospitals
ggplot(data=hospital, aes(x=Expected.Readmission.Rate, y=Predicted.Readmission.Rate)) +
geom_point(alpha=.4, size=4, color="#9ecae1") +
ggtitle("Expected Readmissions vs Predicted Readmissions") +
labs(x="Expected Readmissions", y="Predicted Readmissions") +
theme.chart_SCATTER
## Warning: Removed 81 rows containing missing values (geom_point).

# distribution of readmissions
ggplot(data=hospital, aes(x=Number.of.Discharges)) +
geom_histogram(fill="#9ecae1") +
ggtitle("Histogram of Readmissions") +
labs(x="Readmissions", y="Count\nof Records") +
theme.chart_HIST
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

# distribution of discharges
ggplot(data=hospital, aes(x=Number.of.Discharges)) +
geom_histogram(fill="#9ecae1") +
ggtitle("Histogram of Discharges") +
labs(x="Discharges", y="Count\nof Records") +
theme.chart_HIST
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

# distribution of smaller discharges (0..75)
hospital %>%
filter(Number.of.Discharges >0 & Number.of.Discharges <75) %>%
ggplot(aes(x= as.factor(Number.of.Discharges))) +
geom_bar(fill="#9ecae1") +
labs(x="Discharges") +
theme.chart

# readmissions by state
ggplot(data=hospital, aes(x=Number.of.Readmissions)) +
geom_histogram(fill="#9ecae1") +
ggtitle("Histogram of Readmissions\nby State") +
labs(x="Readmissions", y="Count\nof Records") +
facet_wrap(~State) +
theme.chart_SMALLM
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

# readmissions by procedures
ggplot(data=hospital, aes(x=Number.of.Readmissions)) +
geom_histogram(fill="#9ecae1") +
ggtitle("Histogram of Readmissions\nby Procedure") +
labs(x="Readmissions", y="Count\nof Records") +
facet_wrap(~Measure.Name) +
theme.chart_SMALLM
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

# TO TEST ASSERTION: Rate of readmissions in trending down with increasing number of discharges#
#
# TRY: Looking for inverse (negative correlation) between #discharges and #readmissions
#
# RESULTS:
# 1) with #readmissions as a prectior of #discharges, significance with so-so R-squared with positive correlation
# 2) with #discharges as a predictor, significance with so-so R-squared with smaller positive correction
#
summary(lm(hospital$Number.of.Discharges~hospital$Number.of.Readmissions))
##
## Call:
## lm(formula = hospital$Number.of.Discharges ~ hospital$Number.of.Readmissions)
##
## Residuals:
## Min 1Q Median 3Q Max
## -499.4 -86.6 -48.2 17.3 5630.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 104.36082 2.57723 40.49 <2e-16 ***
## hospital$Number.of.Readmissions 4.10325 0.02957 138.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 188.8 on 11495 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.6261, Adjusted R-squared: 0.6261
## F-statistic: 1.925e+04 on 1 and 11495 DF, p-value: < 2.2e-16
summary(lm(hospital$Number.of.Readmissions~hospital$Number.of.Discharges))
##
## Call:
## lm(formula = hospital$Number.of.Readmissions ~ hospital$Number.of.Discharges)
##
## Residuals:
## Min 1Q Median 3Q Max
## -786.41 -8.27 -0.27 12.33 326.39
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.8676 0.5262 14.95 <2e-16 ***
## hospital$Number.of.Discharges 0.1526 0.0011 138.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36.41 on 11495 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.6261, Adjusted R-squared: 0.6261
## F-statistic: 1.925e+04 on 1 and 11495 DF, p-value: < 2.2e-16
# In hospitals/facilities with number of discharges < 100, mean excess readmission rate is 1.023 and 63% have excess readmission rate greater than 1
# In hospitals/facilities with number of discharges > 1000, mean excess readmission rate is 0.978 and 44% have excess readmission rate greater than 1
# fit1 = lm(hospital$Number.of.Discharges~hospital$Number.of.Readmissions)
# summary(fit1)
# res.fit1 = resid(fit1)
# plot(hospital$Number.of.Readmissions, res.fit1,
# ylab="Residuals", xlab="Number of Readmissions", main="Number of Discharges")
# abline(0, 0)
# TO TEST ASSERTION: In hospitals/facilities with number of discharges < 100, mean excess readmission rate is 1.023 and 63% have excess readmission rate greater than 1
# TO TEST ASSERTION: In hospitals/facilities with number of discharges > 1000, mean excess readmission rate is 0.978 and 44% have excess readmission rate greater than 1
small <- subset(hospital, hospital$Number.of.Discharges < 100)
plot(small$Number.of.Discharges, small$Number.of.Readmissions)

mean(small$Excess.Readmission.Ratio, na.rm = TRUE)
## [1] 1.022618
# 1.022618
nrow(subset(small,small$Excess.Readmission.Ratio > 1))/nrow(small)
## [1] 0.5918046
# 0.5918046 (less than 63% asserted)
large <- subset(hospital, hospital$Number.of.Discharges > 1000)
plot(large$Number.of.Discharges, large$Number.of.Readmissions)

# add small/large flag variable, plot both with color
mean(large$Excess.Readmission.Ratio, na.rm = TRUE)
## [1] 0.9783354
# 0.9783354
nrow(subset(large,large$Excess.Readmission.Ratio > 1))/nrow(large)
## [1] 0.4449244
# 0.4449244
# ASSERTION: There is a significant correlation between hospital capacity (number of discharges) and readmission rates.
# VALIDATION: Found ~.62 adjusted R^2
summary(lm(hospital$Number.of.Discharges~hospital$Number.of.Readmissions))
##
## Call:
## lm(formula = hospital$Number.of.Discharges ~ hospital$Number.of.Readmissions)
##
## Residuals:
## Min 1Q Median 3Q Max
## -499.4 -86.6 -48.2 17.3 5630.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 104.36082 2.57723 40.49 <2e-16 ***
## hospital$Number.of.Readmissions 4.10325 0.02957 138.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 188.8 on 11495 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.6261, Adjusted R-squared: 0.6261
## F-statistic: 1.925e+04 on 1 and 11495 DF, p-value: < 2.2e-16
aov.1 = aov(Number.of.Discharges~Number.of.Readmissions, data=data.table(hospital))
summary(aov.1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Number.of.Readmissions 1 686160392 686160392 19250 <2e-16 ***
## Residuals 11495 409746355 35646
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 81 observations deleted due to missingness
print(model.tables(aov.1,"means"), digits=3)
## Warning in replications(paste("~", xx), data = mf): non-factors ignored:
## Number.of.Readmissions
## Tables of means
## Grand mean
##
## 365.4662
##
## Number.of.Readmissions
## Number.of.Readmissions
## 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 149 154 158 162 166 170 174 178 182 186 191 195 199 203 207
## 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 211 215 219 223 227 232 236 240 244 248 252 256 260 264 268
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
## 273 277 281 285 289 293 297 301 305 310 314 318 322 326 330
## 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
## 334 338 342 346 351 355 359 363 367 371 375 379 383 387 392
## 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
## 396 400 404 408 412 416 420 424 429 433 437 441 445 449 453
## 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 457 461 465 470 474 478 482 486 490 494 498 502 506 511 515
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
## 519 523 527 531 535 539 543 548 552 556 560 564 568 572 576
## 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
## 580 584 589 593 597 601 605 609 613 617 621 625 630 634 638
## 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
## 642 646 650 654 658 662 667 671 675 679 683 687 691 695 699
## 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 703 708 712 716 720 724 728 732 736 740 744 749 753 757 761
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
## 765 769 773 777 781 786 790 794 798 802 806 810 814 818 822
## 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
## 827 831 835 839 843 847 851 855 859 863 868 872 876 880 884
## 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
## 888 892 896 900 904 909 913 917 921 925 929 933 937 941 946
## 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 950 954 958 962 966 970 974 978 982 987 991 995 999 1003 1007
## 221 223 224 225 226 227 228 229 230 231 232 233 234 235 236
## 1011 1019 1023 1028 1032 1036 1040 1044 1048 1052 1056 1060 1065 1069 1073
## 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
## 1077 1081 1085 1089 1093 1097 1101 1106 1110 1114 1118 1122 1126 1130 1134
## 252 253 255 256 257 258 259 260 261 262 263 264 265 267 268
## 1138 1142 1151 1155 1159 1163 1167 1171 1175 1179 1184 1188 1192 1200 1204
## 269 270 271 272 273 275 276 277 278 279 280 281 283 284 285
## 1208 1212 1216 1220 1225 1233 1237 1241 1245 1249 1253 1257 1266 1270 1274
## 288 291 292 295 296 298 299 300 301 302 304 305 307 308 309
## 1286 1298 1303 1315 1319 1327 1331 1335 1339 1344 1352 1356 1364 1368 1372
## 310 311 314 315 316 319 323 325 326 327 329 331 333 334 336
## 1376 1380 1393 1397 1401 1413 1430 1438 1442 1446 1454 1463 1471 1475 1483
## 337 341 342 344 345 346 349 351 352 353 354 357 360 361 368
## 1487 1504 1508 1516 1520 1524 1536 1545 1549 1553 1557 1569 1582 1586 1614
## 371 376 377 378 379 381 384 385 387 388 389 390 393 394 396
## 1627 1647 1651 1655 1659 1668 1680 1684 1692 1696 1701 1705 1717 1721 1729
## 399 414 417 426 431 434 436 442 445 446 456 459 486 516 533
## 1742 1803 1815 1852 1873 1885 1893 1918 1930 1934 1975 1988 2099 2222 2291
## 546 564 574 588 596 603 619 623 879
## 2345 2419 2460 2517 2550 2579 2644 2661 3711
boxplot(Number.of.Discharges~Number.of.Readmissions, data=hospital)

# TODO why does this look so strange?
# Smaller hospitals/facilities may be lacking necessary resources to ensure quality care and prevent complications that lead to readmissions
# TODO find out what is separating lines
# TODO model dummy vars and interaction terms
# Setup an appropriate hypothesis test.
# H0 : there is no difference in excess readmission ratio between small and large hospitals
# HA : there is a significant difference in excess readmission ratio between small and large hospitals
# Compute and report the observed significance value (or p-value).
# Report statistical significance for α = .01.
# inference(small$Excess.Readmission.Ratio, large$Excess.Readmission.Ratio, type = "ht",
# est = "mean", method = "theoretical", alternative = "greater", boot_method = "perc",
# conflevel = 0.99, null = 0)
t.test(small$Excess.Readmission.Ratio, large$Excess.Readmission.Ratio, alternative = "two.sided", mu = 0)
##
## Welch Two Sample t-test
##
## data: small$Excess.Readmission.Ratio and large$Excess.Readmission.Ratio
## t = 7.6017, df = 548.11, p-value = 1.275e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.03284015 0.05572570
## sample estimates:
## mean of x mean of y
## 1.0226184 0.9783354
# Discuss statistical significance and practical significance
# At large enough sample sizes, significance will be found with even small differences
# Is there practical meaning to the significant difference?
# Look at differences by $State
# TODO Look at F-stat and p-value to evaluate model fit
summary(lm(hospital$Number.of.Discharges~hospital$State)) #low R
##
## Call:
## lm(formula = hospital$Number.of.Discharges ~ hospital$State)
##
## Residuals:
## Min 1Q Median 3Q Max
## -634.4 -189.5 -73.9 105.9 6418.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 191.909 64.242 2.987 0.002821 **
## hospital$StateAL 134.054 66.781 2.007 0.044732 *
## hospital$StateAR 144.114 68.273 2.111 0.034805 *
## hospital$StateAZ 124.550 67.539 1.844 0.065190 .
## hospital$StateCA 98.258 64.986 1.512 0.130566
## hospital$StateCO 78.868 69.466 1.135 0.256253
## hospital$StateCT 266.305 69.625 3.825 0.000132 ***
## hospital$StateDC 154.815 85.194 1.817 0.069210 .
## hospital$StateDE 519.461 86.544 6.002 2.00e-09 ***
## hospital$StateFL 263.123 65.235 4.033 5.53e-05 ***
## hospital$StateGA 128.256 66.187 1.938 0.052674 .
## hospital$StateHI -1.558 81.123 -0.019 0.984680
## hospital$StateIA 176.682 69.208 2.553 0.010696 *
## hospital$StateID 109.291 79.981 1.366 0.171820
## hospital$StateIL 215.353 65.533 3.286 0.001019 **
## hospital$StateIN 195.190 66.455 2.937 0.003319 **
## hospital$StateKS 88.078 68.792 1.280 0.200450
## hospital$StateKY 216.099 67.042 3.223 0.001271 **
## hospital$StateLA 77.230 66.998 1.153 0.249050
## hospital$StateMA 272.648 67.076 4.065 4.84e-05 ***
## hospital$StateMD 253.892 67.752 3.747 0.000180 ***
## hospital$StateME 125.277 73.649 1.701 0.088970 .
## hospital$StateMI 286.505 66.150 4.331 1.50e-05 ***
## hospital$StateMN 53.042 69.007 0.769 0.442120
## hospital$StateMO 194.697 66.745 2.917 0.003540 **
## hospital$StateMS 104.851 67.823 1.546 0.122143
## hospital$StateMT 90.246 78.388 1.151 0.249643
## hospital$StateNC 277.913 66.343 4.189 2.82e-05 ***
## hospital$StateND 225.884 85.194 2.651 0.008026 **
## hospital$StateNE 166.597 72.443 2.300 0.021483 *
## hospital$StateNH 187.315 75.449 2.483 0.013054 *
## hospital$StateNJ 344.956 66.701 5.172 2.36e-07 ***
## hospital$StateNM 13.155 72.740 0.181 0.856489
## hospital$StateNV 88.573 72.078 1.229 0.219152
## hospital$StateNY 183.061 65.425 2.798 0.005150 **
## hospital$StateOH 171.467 65.677 2.611 0.009046 **
## hospital$StateOK 91.741 67.337 1.362 0.173090
## hospital$StateOR 61.628 70.482 0.874 0.381933
## hospital$StatePA 154.856 65.519 2.364 0.018119 *
## hospital$StateRI 154.741 79.981 1.935 0.053049 .
## hospital$StateSC 173.809 67.650 2.569 0.010205 *
## hospital$StateSD 147.463 78.985 1.867 0.061929 .
## hospital$StateTN 184.007 66.414 2.771 0.005604 **
## hospital$StateTX 131.407 65.030 2.021 0.043331 *
## hospital$StateUT 79.525 76.421 1.041 0.298076
## hospital$StateVA 212.991 66.549 3.201 0.001375 **
## hospital$StateVT 188.662 91.927 2.052 0.040163 *
## hospital$StateWA 161.829 67.916 2.383 0.017199 *
## hospital$StateWI 82.112 67.122 1.223 0.221234
## hospital$StateWV 177.155 70.428 2.515 0.011902 *
## hospital$StateWY 9.424 88.939 0.106 0.915614
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 301.3 on 11527 degrees of freedom
## Multiple R-squared: 0.05426, Adjusted R-squared: 0.05016
## F-statistic: 13.23 on 50 and 11527 DF, p-value: < 2.2e-16
summary(lm(hospital$Number.of.Readmissions~hospital$State)) #low R
##
## Call:
## lm(formula = hospital$Number.of.Readmissions ~ hospital$State)
##
## Residuals:
## Min 1Q Median 3Q Max
## -98.93 -35.15 -14.57 18.32 797.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.409 12.217 2.162 0.030664 *
## hospital$StateAL 28.846 12.703 2.271 0.023181 *
## hospital$StateAR 29.950 12.983 2.307 0.021085 *
## hospital$StateAZ 20.848 12.853 1.622 0.104814
## hospital$StateCA 24.274 12.360 1.964 0.049570 *
## hospital$StateCO 6.168 13.210 0.467 0.640583
## hospital$StateCT 56.115 13.241 4.238 2.27e-05 ***
## hospital$StateDC 40.246 16.201 2.484 0.013001 *
## hospital$StateDE 87.517 16.458 5.318 1.07e-07 ***
## hospital$StateFL 55.223 12.406 4.451 8.62e-06 ***
## hospital$StateGA 28.409 12.588 2.257 0.024035 *
## hospital$StateHI 2.915 15.427 0.189 0.850122
## hospital$StateIA 24.297 13.168 1.845 0.065044 .
## hospital$StateID 6.091 15.210 0.400 0.688829
## hospital$StateIL 48.163 12.465 3.864 0.000112 ***
## hospital$StateIN 39.771 12.642 3.146 0.001659 **
## hospital$StateKS 12.356 13.088 0.944 0.345147
## hospital$StateKY 55.195 12.754 4.328 1.52e-05 ***
## hospital$StateLA 25.225 12.758 1.977 0.048047 *
## hospital$StateMA 57.681 12.756 4.522 6.19e-06 ***
## hospital$StateMD 58.709 12.888 4.555 5.28e-06 ***
## hospital$StateME 22.905 14.006 1.635 0.101991
## hospital$StateMI 63.731 12.581 5.066 4.13e-07 ***
## hospital$StateMN 9.851 13.129 0.750 0.453063
## hospital$StateMO 40.382 12.693 3.181 0.001469 **
## hospital$StateMS 28.779 12.901 2.231 0.025719 *
## hospital$StateMT 4.136 14.963 0.276 0.782210
## hospital$StateNC 51.606 12.618 4.090 4.34e-05 ***
## hospital$StateND 22.970 16.201 1.418 0.156275
## hospital$StateNE 19.085 13.776 1.385 0.165984
## hospital$StateNH 29.367 14.348 2.047 0.040706 *
## hospital$StateNJ 81.694 12.685 6.440 1.24e-10 ***
## hospital$StateNM 4.354 13.873 0.314 0.753638
## hospital$StateNV 26.249 13.759 1.908 0.056434 .
## hospital$StateNY 49.621 12.443 3.988 6.70e-05 ***
## hospital$StateOH 41.735 12.490 3.341 0.000836 ***
## hospital$StateOK 21.962 12.819 1.713 0.086679 .
## hospital$StateOR 6.100 13.404 0.455 0.649036
## hospital$StatePA 36.066 12.461 2.894 0.003807 **
## hospital$StateRI 37.841 15.210 2.488 0.012864 *
## hospital$StateSC 33.740 12.868 2.622 0.008754 **
## hospital$StateSD 12.518 15.144 0.827 0.408492
## hospital$StateTN 41.673 12.632 3.299 0.000974 ***
## hospital$StateTX 28.809 12.369 2.329 0.019867 *
## hospital$StateUT 1.534 14.533 0.106 0.915922
## hospital$StateVA 43.919 12.658 3.470 0.000523 ***
## hospital$StateVT 31.686 17.482 1.813 0.069932 .
## hospital$StateWA 24.409 12.916 1.890 0.058798 .
## hospital$StateWI 13.349 12.765 1.046 0.295675
## hospital$StateWV 49.435 13.393 3.691 0.000224 ***
## hospital$StateWY 6.156 17.089 0.360 0.718668
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 57.3 on 11446 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.07778, Adjusted R-squared: 0.07376
## F-statistic: 19.31 on 50 and 11446 DF, p-value: < 2.2e-16
# Look at differences procedure called $Measure.Name
summary(lm(hospital$Number.of.Discharges~hospital$Measure.Name)) #low R
##
## Call:
## lm(formula = hospital$Number.of.Discharges ~ hospital$Measure.Name)
##
## Residuals:
## Min 1Q Median 3Q Max
## -533.5 -192.5 -76.6 106.2 6259.5
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 262.824 7.049 37.283
## hospital$Measure.NameREADM-30-COPD-HRRP 65.761 9.082 7.240
## hospital$Measure.NameREADM-30-HF-HRRP 137.192 9.017 15.215
## hospital$Measure.NameREADM-30-HIP-KNEE-HRRP 270.631 10.685 25.329
## hospital$Measure.NameREADM-30-PN-HRRP 75.735 9.043 8.375
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## hospital$Measure.NameREADM-30-COPD-HRRP 4.76e-13 ***
## hospital$Measure.NameREADM-30-HF-HRRP < 2e-16 ***
## hospital$Measure.NameREADM-30-HIP-KNEE-HRRP < 2e-16 ***
## hospital$Measure.NameREADM-30-PN-HRRP < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 299.7 on 11573 degrees of freedom
## Multiple R-squared: 0.06092, Adjusted R-squared: 0.06059
## F-statistic: 187.7 on 4 and 11573 DF, p-value: < 2.2e-16
summary(lm(hospital$Number.of.Readmissions~hospital$Measure.Name)) #low R
##
## Call:
## lm(formula = hospital$Number.of.Readmissions ~ hospital$Measure.Name)
##
## Residuals:
## Min 1Q Median 3Q Max
## -79.75 -33.69 -11.83 17.25 788.25
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 46.688 1.333 35.026
## hospital$Measure.NameREADM-30-COPD-HRRP 22.074 1.713 12.886
## hospital$Measure.NameREADM-30-HF-HRRP 44.063 1.700 25.914
## hospital$Measure.NameREADM-30-HIP-KNEE-HRRP -18.855 2.020 -9.335
## hospital$Measure.NameREADM-30-PN-HRRP 12.783 1.706 7.495
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## hospital$Measure.NameREADM-30-COPD-HRRP < 2e-16 ***
## hospital$Measure.NameREADM-30-HF-HRRP < 2e-16 ***
## hospital$Measure.NameREADM-30-HIP-KNEE-HRRP < 2e-16 ***
## hospital$Measure.NameREADM-30-PN-HRRP 7.11e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 56.19 on 11492 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.1097, Adjusted R-squared: 0.1094
## F-statistic: 354 on 4 and 11492 DF, p-value: < 2.2e-16
# Look at AOV
aov <- aov(Excess.Readmission.Ratio~Number.of.Discharges*Number.of.Readmissions*State*Measure.Name, data=hospital)
aov
## Call:
## aov(formula = Excess.Readmission.Ratio ~ Number.of.Discharges *
## Number.of.Readmissions * State * Measure.Name, data = hospital)
##
## Terms:
## Number.of.Discharges Number.of.Readmissions State
## Sum of Squares 0.922326 9.854469 6.867373
## Deg. of Freedom 1 1 50
## Measure.Name Number.of.Discharges:Number.of.Readmissions
## Sum of Squares 16.424253 0.317690
## Deg. of Freedom 4 1
## Number.of.Discharges:State Number.of.Readmissions:State
## Sum of Squares 2.526706 2.864135
## Deg. of Freedom 50 50
## Number.of.Discharges:Measure.Name
## Sum of Squares 25.277323
## Deg. of Freedom 4
## Number.of.Readmissions:Measure.Name State:Measure.Name
## Sum of Squares 9.002350 1.768707
## Deg. of Freedom 4 200
## Number.of.Discharges:Number.of.Readmissions:State
## Sum of Squares 0.364245
## Deg. of Freedom 50
## Number.of.Discharges:Number.of.Readmissions:Measure.Name
## Sum of Squares 0.280962
## Deg. of Freedom 4
## Number.of.Discharges:State:Measure.Name
## Sum of Squares 2.054828
## Deg. of Freedom 200
## Number.of.Readmissions:State:Measure.Name
## Sum of Squares 1.188367
## Deg. of Freedom 195
## Number.of.Discharges:Number.of.Readmissions:State:Measure.Name
## Sum of Squares 1.179756
## Deg. of Freedom 191
## Residuals
## Sum of Squares 16.333082
## Deg. of Freedom 10491
##
## Residual standard error: 0.03945714
## 14 out of 1020 effects not estimable
## Estimated effects may be unbalanced
## 81 observations deleted due to missingness
# TODO: Can I use Tukey HSD to tell which groups differ?
# tuk <- TukeyHSD(aov)
# tuk
# Warning messages:
# 1: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Discharges
# 2: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Readmissions
# 3: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Discharges, Number.of.Readmissions
# 4: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Discharges, State
# 5: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Readmissions, State
# 6: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Discharges, Measure.Name
# 7: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Readmissions, Measure.Name
# 8: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Discharges, Number.of.Readmissions, State
# 9: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Discharges, Number.of.Readmissions, Measure.Name
# 10: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Discharges, State, Measure.Name
# 11: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Readmissions, State, Measure.Name
# 12: In replications(paste("~", xx), data = mf) :
# non-factors ignored: Number.of.Discharges, Number.of.Readmissions, State, Measure.Name