MATH1324 Assignment 2

Test for Average Length of Stay (ALOS) Between Large and Medium Hospitals

Rashbir Singh Kohli (s3810585)

10/05/2020

Introduction

Introduction

Problem Statement

Data

Data (Cont.)

AvgLenStDF <- read_excel("average-length-of-stay-multilevel-data.xlsx", sheet = "Average length of stay", col_types = c("text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text"), skip = 12)
head(AvgLenStDF, 3)
## # A tibble: 3 x 19
##   `Reporting unit` `Reporting unit… State `Local Hospital… `Peer group`
##   <chr>            <chr>            <chr> <chr>            <chr>       
## 1 Albury Wodonga … Hospital         NSW   Albury Wodonga … Large hospi…
## 2 Albury Wodonga … Hospital         NSW   Albury Wodonga … Large hospi…
## 3 Albury Wodonga … Hospital         NSW   Albury Wodonga … Large hospi…
## # … with 14 more variables: `Time period` <chr>, Category <chr>, `Total
## #   number of stays` <chr>, ...9 <chr>, `Number of overnight stays` <chr>,
## #   ...11 <chr>, `Percentage of overnight stays` <chr>, ...13 <chr>,
## #   `Average length of stay (days)` <chr>, ...15 <chr>, `Peer group
## #   average (days)` <chr>, ...17 <chr>, `Total overnight patient bed
## #   days` <chr>, ...19 <chr>

Data (Cont.)

names(AvgLenStDF)[15] <- 'contracted' #Renaming column 15 of data
AvgLenStDF$contracted <- gsub('‡', 1, AvgLenStDF$contracted) #Subsituting special symbol with 1
AvgLenStDF$contracted[is.na(AvgLenStDF$contracted)] <- 0 #Subsituting empty space with 0
AvgLenStDF <- AvgLenStDF[-c(9,11,13,17,19)] # Dropping empty volumns

names(AvgLenStDF) <- gsub(" ", "", names(AvgLenStDF)) #Removing white spaces in column name
names(AvgLenStDF) <- gsub("LHN", "", names(AvgLenStDF)) #Removing alphabets inside the bracket
names(AvgLenStDF) <- gsub("days", "InDays", names(AvgLenStDF)) #Subsituting 'days' with'InDays'
names(AvgLenStDF) <- gsub("[^A-z]", "", names(AvgLenStDF)) #Removing everyting except the alphabets

Data (Cont.)

df <- AvgLenStDF[AvgLenStDF$Peergroup == "Medium hospitals" | AvgLenStDF$Peergroup == "Large hospitals",
                 c("AveragelengthofstayInDays", "Peergroup")]
## Removing NP and -
df <- df[df$AveragelengthofstayInDays != '-', ]
df <- df[df$AveragelengthofstayInDays != 'NP', ]
## Converting Averagel ength of stay(In Days) to numeric
df$AveragelengthofstayInDays <- df$AveragelengthofstayInDays %>% as.numeric()

Descriptive Statistics and Visualisation

knitr::kable(df %>% group_by(df$Peergroup) %>% summarise(Min = min(AveragelengthofstayInDays,na.rm = TRUE),
                                        Max = max(AveragelengthofstayInDays, na.rm = TRUE),
                                        n = n(),
                                        Missing = sum(is.na(AveragelengthofstayInDays)),
                                        Q1 = quantile(AveragelengthofstayInDays ,probs = .25,na.rm = TRUE),
                                        Median = median(AveragelengthofstayInDays, na.rm = TRUE),
                                        Q3 = quantile(AveragelengthofstayInDays, probs = .75,na.rm = TRUE),
                                        Mean = mean(AveragelengthofstayInDays, na.rm = TRUE),
                                        SD = sd(AveragelengthofstayInDays, na.rm = TRUE),
                                        IQR = IQR(AveragelengthofstayInDays ,na.rm = TRUE))
                                        , "html", caption = "Table 1: Descriptive Statistics", align = "llllllllll", col.names = c("Peer Groups", "Minimum", "Maximum", "Sample Size", "Missing Count","First Quartile", "Median", "Third Quartile", "Mean", "Standard Deviation", "IQR"), digits = 2) %>% kable_styling(latex_options = "HOLD_position") %>% column_spec(1, bold = TRUE) %>% column_spec(c(2,4,6,8,10), color = 'white', background = 'black')
Table 1: Descriptive Statistics
Peer Groups Minimum Maximum Sample Size Missing Count First Quartile Median Third Quartile Mean Standard Deviation IQR
Large hospitals 1.2 12.6 4411 0 2.5 3.5 5.0 3.99 1.98 2.5
Medium hospitals 1.0 13.2 2182 0 2.4 3.4 4.5 3.71 1.85 2.1

Descriptive Statistics and Visualisation (Cont.)

ggplot(df, aes(x=Peergroup, y=AveragelengthofstayInDays)) + geom_boxplot(outlier.colour="black", outlier.shape=1, outlier.size=1.5 ,fill='#4271AE', color="#1F3552") + theme_economist() + theme(plot.title = element_text(family="Tahoma", hjust = 0.5), text = element_text(family="Tahoma"), axis.title = element_text(size = 12)) + scale_x_discrete(name = "\nPeer Group")+ ggtitle("Boxplot for Medium and Large Hospitals\n") + scale_y_continuous(name = 'Average Length of Stay (In Days)\n') 

Descriptive Statistics and Visualisation (Cont.)

LH <- filter(df, Peergroup=="Large hospitals") ; MH <- filter(df, Peergroup=="Medium hospitals")
ggplot(df, aes(AveragelengthofstayInDays)) + geom_histogram(fill = "#4271AE", color = "#1F3552", binwidth = 0.3, position="identity") + facet_wrap(~ Peergroup) + geom_vline(data=LH, aes(xintercept=mean(LH$AveragelengthofstayInDays) ), colour="red", linetype = "dashed", size = 0.8) + geom_vline(data=LH, aes(xintercept=median(LH$AveragelengthofstayInDays) ), colour="orange", linetype = "dashed", size = 0.4)  + geom_vline(data=MH, aes(xintercept=mean(MH$AveragelengthofstayInDays)), colour="green", linetype = "dashed", size = 0.8) + geom_vline(data=MH, aes(xintercept=median(MH$AveragelengthofstayInDays)), colour="purple", linetype = "dashed", size = 0.4) + ggtitle("Frequency histogram of Medium and Large Hospitals\n") + theme_economist() + theme(plot.title = element_text(family="Tahoma", hjust = 0.5), text = element_text(family="Tahoma"), axis.title = element_text(size = 12)) + scale_x_continuous(name = "\nAverage Length of Stay (In Days)") + geom_text(aes(x=4.8, y=400, label= 'μ = 3.99', group=NULL), data=LH[1,], size = 4) + geom_text(aes(x=2.6, y=450, label= 'Median = 3.5', group=NULL), data=LH[1,], size = 3) + geom_text(aes(x=4.6, y=400, label= 'μ = 3.71', group=NULL), data=MH[1,], size = 4) + geom_text(aes(x=2.4, y=360, label= 'Median = 3.4', group=NULL), data=MH[1,], size = 3) + scale_y_continuous(name = 'Frequency\n')

Descriptive Statistics and Visualisation (Cont.)

p1 <- ggqqplot(LH$AveragelengthofstayInDays, size = 0.5) + ggtitle('QQ Plot for Large Hospitals ALOS') + theme(plot.title = element_text(hjust = 0.5))
p2 <- ggqqplot(MH$AveragelengthofstayInDays, size = 0.5) +  ggtitle('QQ Plot for Medium Hospitals ALOS') + theme(plot.title = element_text(hjust = 0.5))
grid.arrange(p1, p2, nrow = 1)

Descriptive Statistics and Visualisation (Cont.)

leveneTest(AveragelengthofstayInDays ~ Peergroup, data = df)
## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value     Pr(>F)    
## group    1  16.585 0.00004707 ***
##       6591                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Descriptive Statistics and Visualisation (Cont.)

fligner.test(AveragelengthofstayInDays ~ Peergroup, data = df)
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  AveragelengthofstayInDays by Peergroup
## Fligner-Killeen:med chi-squared = 16.354, df = 1, p-value =
## 0.00005255

Hypothesis Testing

Hypothesis Testing (Cont.)

t.test(AveragelengthofstayInDays ~ Peergroup, 
       alt = "two.sided", 
       conf = 0.95, 
       var.eq = F, 
       paired = F, 
       data=df)
## 
##  Welch Two Sample t-test
## 
## data:  AveragelengthofstayInDays by Peergroup
## t = 5.6615, df = 4611, p-value = 0.00000001592
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1835797 0.3780687
## sample estimates:
##  mean in group Large hospitals mean in group Medium hospitals 
##                       3.986874                       3.706049

Hypothesis Testing (Cont.)

t.test(log(AveragelengthofstayInDays) ~ Peergroup, 
       alt = "two.sided", conf = 0.95, var.eq = F, paired = F,
       data=df)
## 
##  Welch Two Sample t-test
## 
## data:  log(AveragelengthofstayInDays) by Peergroup
## t = 6.0326, df = 4289, p-value = 0.000000001749
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.05015421 0.09844745
## sample estimates:
##  mean in group Large hospitals mean in group Medium hospitals 
##                       1.271971                       1.197670
wilcox.test(AveragelengthofstayInDays ~ Peergroup, 
            alt = "two.sided", var.eq = F, paired = F,
            data=df)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  AveragelengthofstayInDays by Peergroup
## W = 5172686, p-value = 0.0000007227
## alternative hypothesis: true location shift is not equal to 0

Discussion

Discussion

References

[1] “Admitted patients”, Australian Institute of Health and Welfare 2020. [Online]. Available: https://www.aihw.gov.au/reports-data/myhospitals/sectors/admitted-patients. [Accessed: 10-May-2020].

[2] “Homogeneity of Variance Test in R”, Data Novia, [Online]. Available: https://www.datanovia.com/en/lessons/homogeneity-of-variance-test-in-r/ [Accessed: 10-May-2020].

[3] “t-test: Comparing Group Means”, UC Business Analytics R Programming Guide , [Online]. Available: https://uc-r.github.io/t_test [Accessed: 10-May-2020].