This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(readr)
library(dplyr)
library(ggplot2)
library(rmarkdown)
library(readxl)
alos <- read_excel("C:/Users/thyagu/rmit/applied analytics/Assign2/average-length-of-stay-multilevel-data (2).xlsx",skip=12,col_names = TRUE)
Expecting logical in O30023 / R30023C15: got '‡'Expecting logical in O30024 / R30024C15: got '‡'Expecting logical in O30025 / R30025C15: got '‡'Expecting logical in O30026 / R30026C15: got '‡'Expecting logical in O30027 / R30027C15: got '‡'Expecting logical in O30028 / R30028C15: got '‡'New names:
* `` -> ...9
* `` -> ...11
* `` -> ...13
* `` -> ...15
* `` -> ...17
* ... and 1 more problem
alos <- alos %>% rename(Peer_group = `Peer group`)
alos <- alos %>% rename(Avg_len_stay = `Average length of stay (days)`)
alos$Avg_len_stay <- as.numeric(alos$Avg_len_stay)
NAs introduced by coercion
alos_df <- alos %>% select(Peer_group,Avg_len_stay)
alos_df <- alos_df %>% filter(Peer_group %in% c("Large hospitals","Medium hospitals"))
alos_df <- na.omit(alos_df)
alos_df$Peer_group <- factor(alos_df$Peer_group,ordered = FALSE)
knitr::kable(head(alos_df[52:58,]))
| Peer_group | Avg_len_stay |
|---|---|
| Large hospitals | 5.0 |
| Large hospitals | 4.8 |
| Large hospitals | 4.9 |
| Medium hospitals | 3.5 |
| Medium hospitals | 3.2 |
| Medium hospitals | 2.7 |
#outliers
summary(alos_df$Avg_len_stay)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 2.400 3.500 3.894 4.800 13.200
boxplot(alos_df$Avg_len_stay)
benchmark <- 4.800 + 1.5*IQR(alos_df$Avg_len_stay) #benchmark is 8.4
benchmark
[1] 8.4
dim(alos_df)
[1] 6593 2
head(alos_df,5)
class(alos_df$Avg_len_stay)
[1] "numeric"
alos_clean <- alos_df %>% filter(Avg_len_stay < benchmark)
alos_clean
dim(alos_clean)
[1] 6335 2
#Descriptive Statistics and Visualization
alos_Largehosp <- alos_clean %>% filter(Peer_group == "Large hospitals")
dim(alos_Largehosp)
[1] 4225 2
alos_Mediumhosp <- alos_clean %>% filter(Peer_group == "Medium hospitals")
dim(alos_Mediumhosp)
[1] 2110 2
hist(alos_Largehosp$Avg_len_stay,main = "Avg length of stay in days for Large hospitals",col = "green",xlab = "Avg length of stay",breaks = 20,xlim=c(0,10))
hist(alos_Mediumhosp$Avg_len_stay,main = "Avg length of stay in days for Medium hospitals",col = "blue",xlab = "Avg length of stay",breaks = 20,xlim=c(0,10))
summary_table <- alos_clean %>% group_by(Peer_group) %>% summarise(Mean=mean(Avg_len_stay,na.rm=TRUE),
Median=median(Avg_len_stay,na.rm=TRUE),
IQR=IQR(Avg_len_stay,na.rm=TRUE),
SD=sd(Avg_len_stay,na.rm=TRUE),
Var=var(Avg_len_stay,na.rm=TRUE),
Min=min(Avg_len_stay,na.rm=TRUE),
Max=max(Avg_len_stay,na.rm=TRUE),
Q1=quantile(Avg_len_stay,probs=.25,na.rm=TRUE),
Q3=quantile(Avg_len_stay,probs=.75,na.rm=TRUE),
n = n(),
Missing = sum(is.na(Avg_len_stay)))
knitr::kable(summary_table,digits=round(1))
| Peer_group | Mean | Median | IQR | SD | Var | Min | Max | Q1 | Q3 | n | Missing |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Large hospitals | 3.7 | 3.4 | 2.3 | 1.6 | 2.6 | 1.2 | 8.3 | 2.4 | 4.7 | 4225 | 0 |
| Medium hospitals | 3.5 | 3.3 | 1.9 | 1.5 | 2.2 | 1.0 | 8.3 | 2.4 | 4.3 | 2110 | 0 |
#Hypothesis testing(Normality test)
qqnorm(alos_Largehosp$Avg_len_stay)
qqline(alos_Largehosp$Avg_len_stay)
qqnorm(alos_Mediumhosp$Avg_len_stay)
qqline(alos_Mediumhosp$Avg_len_stay)
install.packages("car")
Error in install.packages : Updating loaded packages
library(car)
leveneTest(Avg_len_stay~Peer_group,data=alos_clean)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1 18.416 1.802e-05 ***
6333
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#0.00001802
# unequal variance t-test
res <- t.test(Avg_len_stay~Peer_group,data=alos_clean,var.equal = FALSE, alternative = "two.sided")
res
Welch Two Sample t-test
data: Avg_len_stay by Peer_group
t = 5.9202, df = 4545.1, p-value = 3.453e-09
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.1623117 0.3230370
sample estimates:
mean in group Large hospitals mean in group Medium hospitals
3.742864 3.500190
# t value
round(res$statistic,2)
t
5.92
# df` degree of freedom
round(res$parameter)
df
4545
# p-value
(res$p.value)
[1] 3.453018e-09
# conf.int
round(res$conf.int,2)
[1] 0.16 0.32
attr(,"conf.level")
[1] 0.95
# sample estimates
round(res$estimate,2)
mean in group Large hospitals mean in group Medium hospitals
3.74 3.50
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.