Rashbir Singh Kohli (s3810585)
10/05/2020
average-length-of-stay-multilevel-data.xlsx and is in Excel format file.read_excel() function from readxl library.AvgLenStDF <- read_excel("average-length-of-stay-multilevel-data.xlsx", sheet = "Average length of stay", col_types = c("text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text"), skip = 12)
head(AvgLenStDF, 3)## # A tibble: 3 x 19
## `Reporting unit` `Reporting unit… State `Local Hospital… `Peer group`
## <chr> <chr> <chr> <chr> <chr>
## 1 Albury Wodonga … Hospital NSW Albury Wodonga … Large hospi…
## 2 Albury Wodonga … Hospital NSW Albury Wodonga … Large hospi…
## 3 Albury Wodonga … Hospital NSW Albury Wodonga … Large hospi…
## # … with 14 more variables: `Time period` <chr>, Category <chr>, `Total
## # number of stays` <chr>, ...9 <chr>, `Number of overnight stays` <chr>,
## # ...11 <chr>, `Percentage of overnight stays` <chr>, ...13 <chr>,
## # `Average length of stay (days)` <chr>, ...15 <chr>, `Peer group
## # average (days)` <chr>, ...17 <chr>, `Total overnight patient bed
## # days` <chr>, ...19 <chr>
contracted that have (‡) to denote people who have a contract with the hospital.names(AvgLenStDF)[15] <- 'contracted' #Renaming column 15 of data
AvgLenStDF$contracted <- gsub('‡', 1, AvgLenStDF$contracted) #Subsituting special symbol with 1
AvgLenStDF$contracted[is.na(AvgLenStDF$contracted)] <- 0 #Subsituting empty space with 0
AvgLenStDF <- AvgLenStDF[-c(9,11,13,17,19)] # Dropping empty volumns
names(AvgLenStDF) <- gsub(" ", "", names(AvgLenStDF)) #Removing white spaces in column name
names(AvgLenStDF) <- gsub("LHN", "", names(AvgLenStDF)) #Removing alphabets inside the bracket
names(AvgLenStDF) <- gsub("days", "InDays", names(AvgLenStDF)) #Subsituting 'days' with'InDays'
names(AvgLenStDF) <- gsub("[^A-z]", "", names(AvgLenStDF)) #Removing everyting except the alphabetsAveragelengthofstayInDays(Average length of stay (days)) and Peergroup(Peer group), so created a new data frame(df) with only ALOS and peer groups as Large hospitals and Medium hospitals.AveragelengthofstayInDays to numeric.df <- AvgLenStDF[AvgLenStDF$Peergroup == "Medium hospitals" | AvgLenStDF$Peergroup == "Large hospitals",
c("AveragelengthofstayInDays", "Peergroup")]
## Removing NP and -
df <- df[df$AveragelengthofstayInDays != '-', ]
df <- df[df$AveragelengthofstayInDays != 'NP', ]
## Converting Averagel ength of stay(In Days) to numeric
df$AveragelengthofstayInDays <- df$AveragelengthofstayInDays %>% as.numeric()knitr::kable(df %>% group_by(df$Peergroup) %>% summarise(Min = min(AveragelengthofstayInDays,na.rm = TRUE),
Max = max(AveragelengthofstayInDays, na.rm = TRUE),
n = n(),
Missing = sum(is.na(AveragelengthofstayInDays)),
Q1 = quantile(AveragelengthofstayInDays ,probs = .25,na.rm = TRUE),
Median = median(AveragelengthofstayInDays, na.rm = TRUE),
Q3 = quantile(AveragelengthofstayInDays, probs = .75,na.rm = TRUE),
Mean = mean(AveragelengthofstayInDays, na.rm = TRUE),
SD = sd(AveragelengthofstayInDays, na.rm = TRUE),
IQR = IQR(AveragelengthofstayInDays ,na.rm = TRUE))
, "html", caption = "Table 1: Descriptive Statistics", align = "llllllllll", col.names = c("Peer Groups", "Minimum", "Maximum", "Sample Size", "Missing Count","First Quartile", "Median", "Third Quartile", "Mean", "Standard Deviation", "IQR"), digits = 2) %>% kable_styling(latex_options = "HOLD_position") %>% column_spec(1, bold = TRUE) %>% column_spec(c(2,4,6,8,10), color = 'white', background = 'black')| Peer Groups | Minimum | Maximum | Sample Size | Missing Count | First Quartile | Median | Third Quartile | Mean | Standard Deviation | IQR |
|---|---|---|---|---|---|---|---|---|---|---|
| Large hospitals | 1.2 | 12.6 | 4411 | 0 | 2.5 | 3.5 | 5.0 | 3.99 | 1.98 | 2.5 |
| Medium hospitals | 1.0 | 13.2 | 2182 | 0 | 2.4 | 3.4 | 4.5 | 3.71 | 1.85 | 2.1 |
Q1) for both is comparatively the same.Large Hospitals and Medium Hospitals are right-skewed.Large Hospitals mean is 3.99 and for Medium Hospitals mean is 3.71.LH <- filter(df, Peergroup=="Large hospitals") ; MH <- filter(df, Peergroup=="Medium hospitals")
ggplot(df, aes(AveragelengthofstayInDays)) + geom_histogram(fill = "#4271AE", color = "#1F3552", binwidth = 0.3, position="identity") + facet_wrap(~ Peergroup) + geom_vline(data=LH, aes(xintercept=mean(LH$AveragelengthofstayInDays) ), colour="red", linetype = "dashed", size = 0.8) + geom_vline(data=LH, aes(xintercept=median(LH$AveragelengthofstayInDays) ), colour="orange", linetype = "dashed", size = 0.4) + geom_vline(data=MH, aes(xintercept=mean(MH$AveragelengthofstayInDays)), colour="green", linetype = "dashed", size = 0.8) + geom_vline(data=MH, aes(xintercept=median(MH$AveragelengthofstayInDays)), colour="purple", linetype = "dashed", size = 0.4) + ggtitle("Frequency histogram of Medium and Large Hospitals\n") + theme_economist() + theme(plot.title = element_text(family="Tahoma", hjust = 0.5), text = element_text(family="Tahoma"), axis.title = element_text(size = 12)) + scale_x_continuous(name = "\nAverage Length of Stay (In Days)") + geom_text(aes(x=4.8, y=400, label= 'μ = 3.99', group=NULL), data=LH[1,], size = 4) + geom_text(aes(x=2.6, y=450, label= 'Median = 3.5', group=NULL), data=LH[1,], size = 3) + geom_text(aes(x=4.6, y=400, label= 'μ = 3.71', group=NULL), data=MH[1,], size = 4) + geom_text(aes(x=2.4, y=360, label= 'Median = 3.4', group=NULL), data=MH[1,], size = 3) + scale_y_continuous(name = 'Frequency\n')p1 <- ggqqplot(LH$AveragelengthofstayInDays, size = 0.5) + ggtitle('QQ Plot for Large Hospitals ALOS') + theme(plot.title = element_text(hjust = 0.5))
p2 <- ggqqplot(MH$AveragelengthofstayInDays, size = 0.5) + ggtitle('QQ Plot for Medium Hospitals ALOS') + theme(plot.title = element_text(hjust = 0.5))
grid.arrange(p1, p2, nrow = 1)p-value(\(p\)) to the \(\alpha = 0.5\) (Significance level) or 95% Confidence.## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 16.585 0.00004707 ***
## 6591
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Box Plot and Q-Q Plot it can be seen that our data do have some outliers that can not be removed and it dows not follow normal distribution.Fligner-Killeen Test can be done to verify the Homogeneity of variance.##
## Fligner-Killeen test of homogeneity of variances
##
## data: AveragelengthofstayInDays by Peergroup
## Fligner-Killeen:med chi-squared = 16.354, df = 1, p-value =
## 0.00005255
t.test(AveragelengthofstayInDays ~ Peergroup,
alt = "two.sided",
conf = 0.95,
var.eq = F,
paired = F,
data=df)##
## Welch Two Sample t-test
##
## data: AveragelengthofstayInDays by Peergroup
## t = 5.6615, df = 4611, p-value = 0.00000001592
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1835797 0.3780687
## sample estimates:
## mean in group Large hospitals mean in group Medium hospitals
## 3.986874 3.706049
t.test(log(AveragelengthofstayInDays) ~ Peergroup,
alt = "two.sided", conf = 0.95, var.eq = F, paired = F,
data=df)##
## Welch Two Sample t-test
##
## data: log(AveragelengthofstayInDays) by Peergroup
## t = 6.0326, df = 4289, p-value = 0.000000001749
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.05015421 0.09844745
## sample estimates:
## mean in group Large hospitals mean in group Medium hospitals
## 1.271971 1.197670
wilcox.test(AveragelengthofstayInDays ~ Peergroup,
alt = "two.sided", var.eq = F, paired = F,
data=df)##
## Wilcoxon rank sum test with continuity correction
##
## data: AveragelengthofstayInDays by Peergroup
## W = 5172686, p-value = 0.0000007227
## alternative hypothesis: true location shift is not equal to 0
[1] “Admitted patients”, Australian Institute of Health and Welfare 2020. [Online]. Available: https://www.aihw.gov.au/reports-data/myhospitals/sectors/admitted-patients. [Accessed: 10-May-2020].
[2] “Homogeneity of Variance Test in R”, Data Novia, [Online]. Available: https://www.datanovia.com/en/lessons/homogeneity-of-variance-test-in-r/ [Accessed: 10-May-2020].
[3] “t-test: Comparing Group Means”, UC Business Analytics R Programming Guide , [Online]. Available: https://uc-r.github.io/t_test [Accessed: 10-May-2020].