This dataset Hospital Admissions contains data on the general reasons people were admitted to hospital by fnancial year (FY) from July 1993 to June 1998 and can be found here:
http://www.mm-c.me/mdsi/hospitals93to98.csv
Separations refer to episodes of admitted patient care (number of patients admitted) and PatientDays refer to the days a patient stayed in the hospital.
knitr::opts_chunk$set(message = FALSE)
df <- read.csv("http://www.mm-c.me/mdsi/hospitals93to98.csv")
head(df, n =10)
## IcdChapter
## 1 0. Not Reported
## 2 0. Not Reported
## 3 1. Infectious and Parasitic Diseases
## 4 1. Infectious and Parasitic Diseases
## 5 2. Neoplasms
## 6 2. Neoplasms
## 7 3. Endocrine Nutritional, and Metabolic Diseases and Immunity Disorders
## 8 3. Endocrine Nutritional, and Metabolic Diseases and Immunity Disorders
## 9 4. Diseases of the Blood and Blood?Forming Organs
## 10 4. Diseases of the Blood and Blood?Forming Organs
## Field FY1993 FY1994 FY1995 FY1996 FY1997 FY1998
## 1 PatientDays 257,965 55,582 128,507 182,226 61,599 685,879
## 2 Separations 37,178 6,146 3,832 4,861 1,558 53,575
## 3 PatientDays 311,221 313,386 324,693 311,560 306,688 1,567,548
## 4 Separations 75,857 78,323 84,631 80,864 79,148 398,823
## 5 PatientDays 1,686,919 1,707,437 1,795,751 1,770,559 1,777,452 8,738,118
## 6 Separations 301,928 336,447 348,905 360,578 378,070 1,725,928
## 7 PatientDays 328,354 326,877 349,671 351,119 354,723 1,710,744
## 8 Separations 50,365 54,292 60,655 65,483 68,605 299,400
## 9 PatientDays 142,332 147,120 156,280 163,412 166,802 775,946
## 10 Separations 46,969 50,769 56,758 62,771 67,672 284,939
library(dplyr)
library(ggplot2)
library(tidyr)
df<-df %>% gather('FY1993','FY1994','FY1995','FY1996','FY1997','FY1997','FY1998', key = 'Year', value = 'Count') %>% spread(key = 'Field', value = 'Count')
is.factor(df$IcdChapter)
## [1] FALSE
df$IcdChapter <- as.factor(df$IcdChapter)
df <- filter(df, IcdChapter != "0. Not Reported") # remove not reported observations
library(stringr)
df$IcdChapter <- substr(df$IcdChapter,4, str_length(df$IcdChapter)) # remove numbers in front of disease type
df$Year <- substr(df$Year,3,6) #Remove FY in year
df$PatientDays <- gsub(",","",df$PatientDays, fixed = TRUE) # remove commas in numeric fields
df$Separations <- gsub(",","",df$Separations, fixed = TRUE)
df$PatientDays <- as.numeric(df$PatientDays)
df$Separations <- as.numeric(df$Separations)
head(df, n =5)
## IcdChapter Year PatientDays Separations
## 1 Infectious and Parasitic Diseases 1993 311221 75857
## 2 Infectious and Parasitic Diseases 1994 313386 78323
## 3 Infectious and Parasitic Diseases 1995 324693 84631
## 4 Infectious and Parasitic Diseases 1996 311560 80864
## 5 Infectious and Parasitic Diseases 1997 306688 79148
For ease of analysis, we will be analysing if patients suffering from mental disorders tend to stay in the hospital longer than other non-mental health problems, such as circulatory, respiratory and digestive diseases.
df_final <- df[df$IcdChapter %in% c("Diseases of the Digestive System","Diseases of the Circulatory System","Mental Disorders", "Diseases of the Respiratory System"),]
unique(df_final$IcdChapter) #check rows
## [1] "Mental Disorders" "Diseases of the Circulatory System"
## [3] "Diseases of the Respiratory System" "Diseases of the Digestive System"
options(scipen = 999)
df_final <- df_final %>% group_by(IcdChapter)
df_final$IcdChapter <- as.factor(df_final$IcdChapter)
library(RColorBrewer)
plot1<- df_final %>% ggplot(aes(x = Year, y = PatientDays, group = IcdChapter)) + geom_area(aes(fill = df_final$IcdChapter)) +
ylab("Total number of patient days") + scale_fill_discrete("Ilness Type") + ggtitle("Days patients stayed in the hospital by Ilness Type") +
theme(text = element_text(size = 20))
plot2<- df_final %>% ggplot(aes(x = Year, y = Separations, group = IcdChapter)) + geom_area(aes(fill = df_final$IcdChapter)) +
ylab("Total number of patient admissions") + scale_fill_discrete("Ilness Type") + ggtitle("Hospital admissions by Ilness Type") +
theme(text = element_text(size = 20))
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.0.2
grid.arrange(plot1,plot2)
From the graph, we observe the following key insights: