Introduction

Dataset Description

This data set is composed of a curated collection of over 200 publicly available COVID-19 related data sets from sources like Johns Hopkins, the WHO, the World Bank, the New York Times, and many others. It includes data on a wide variety of potentially powerful statistics and indicators, like local and national infection rates, global social distancing policies, geospatial data on movement of people, and more.

This was a challenge/competition on Kaggle. But the key questions or tasks were not available during this current analysis. Came up with new Questions to analyze the data.

The dataset was downloaded from Kaggle.Data set

Loading packages

library(tidyverse)
library(readxl)
library(skimr)
library(ggplot2)
library(tidyr)
library(stringr)
library(gridExtra)
library(scales)

Reading Data

Data_at_admission <- read_excel("~/Asha/Projects_R/Canada_Hosp1_COVID_InpatientData.xlsx", sheet = "Data-at-admission")

Days_breakdown <- read_excel("~/Asha/Projects_R/Canada_Hosp1_COVID_InpatientData.xlsx", sheet = "Days-breakdown")

Hospital_length_of_stay <- read_excel("~/Asha/Projects_R/Canada_Hosp1_COVID_InpatientData.xlsx", sheet = "Hospital-length-of-stay")

Medication_Static_List <- read_excel("~/Asha/Projects_R/Canada_Hosp1_COVID_InpatientData.xlsx", sheet = "Medication-Static-List")

Data Cleaning

Cleaned the comorbidities column to remove symbols and signs (“” [ ] ). Made longdata of this column for the analysis.

# separating each comorbidities to individual cols
Data_at_admission1 <- separate(Data_at_admission,col = comorbidities,into = c('como1','como2','como3','como4','como5','como6'),sep=",")

# removing symbols and signs
Data_at_admission1$como1 <- gsub('[[:punct:]]',' ', Data_at_admission1$como1)
Data_at_admission1$como1 <- trimws(Data_at_admission1$como1)  # trimming white spaces

Data_at_admission1$como2 <- gsub('[[:punct:]]',' ', Data_at_admission1$como2)
Data_at_admission1$como2 <- trimws(Data_at_admission1$como2)  

Data_at_admission1$como3 <- gsub('[[:punct:]]',' ', Data_at_admission1$como3)
Data_at_admission1$como3 <- trimws(Data_at_admission1$como3)  

Data_at_admission1$como4 <- gsub('[[:punct:]]',' ', Data_at_admission1$como4)
Data_at_admission1$como4 <- trimws(Data_at_admission1$como4)  

Data_at_admission1$como5 <- gsub('[[:punct:]]',' ', Data_at_admission1$como5)
Data_at_admission1$como5 <- trimws(Data_at_admission1$como5)  

Data_at_admission1$como6 <- gsub('[[:punct:]]',' ', Data_at_admission1$como6)
Data_at_admission1$como6 <- trimws(Data_at_admission1$como6)  

dataForLong <-
Data_at_admission1 %>% 
  select(id,como1,como2,como3,como4,como5,como6)      #new df with only id and como cols

# making long data to get each comorbidities in a row
longcomodata <- pivot_longer(dataForLong,cols=c('como1','como2','como3','como4','como5','como6'),names_to="type",
                             values_to="comorbidities") %>% 
  na.omit()    # removing NA rows

longcomodata <- longcomodata[!(longcomodata$comorbidities==""),]     #removing rows with empty cells in comorbidities col

head(longcomodata)
## # A tibble: 6 × 3
##      id type  comorbidities
##   <dbl> <chr> <chr>        
## 1     1 como1 Hypertension 
## 2     1 como2 Diabetes     
## 3     1 como3 Other        
## 4     2 como1 Hypertension 
## 5     2 como2 Other        
## 6     3 como1 Hypertension
dataForLong$Comorbidities=paste(dataForLong$como1,",",dataForLong$como2,",",dataForLong$como3,",",dataForLong$como4,",",dataForLong$como5,",",dataForLong$como6)     # Added new col with all comos in one cell without signs and symbols

dataForLong$Comorbidities <- gsub('NA','',dataForLong$Comorbidities)    # Removed NAs from Comorbidities col

head(dataForLong)
## # A tibble: 6 × 8
##      id como1                              como2 como3 como4 como5 como6 Comor…¹
##   <dbl> <chr>                              <chr> <chr> <chr> <chr> <chr> <chr>  
## 1     1 Hypertension                       Diab… Other <NA>  <NA>  <NA>  "Hyper…
## 2     2 Hypertension                       Other <NA>  <NA>  <NA>  <NA>  "Hyper…
## 3     3 Hypertension                       <NA>  <NA>  <NA>  <NA>  <NA>  "Hyper…
## 4     4 Hypertension                       Other <NA>  <NA>  <NA>  <NA>  "Hyper…
## 5     5 Chronic cardiac disease  not hype… Hype… Diab… Other <NA>  <NA>  "Chron…
## 6     6 Hypertension                       <NA>  <NA>  <NA>  <NA>  <NA>  "Hyper…
## # … with abbreviated variable name ¹​Comorbidities
#save(longcomodata,file = "longcomodata.RData")       # saving this df for further analysis on tableau
#write.csv(longcomodata,file = "longcomodata.csv",row.names = FALSE)      # saving as a n excel file

VIZES OF THE DATA

1. Age distribution graph.

Data_at_admission %>% 
  ggplot()+
  geom_bar(aes(x=age,fill=sex))+
  labs(title = "Age distribution of Patients")+
  xlab("Age")+
  ylab("No.of patients")+
  theme_minimal()

Observation:-Most no. of patients in the age of 60s and 70s.

2. How many male and female patients are admitted for each reason for admission?

Data_at_admission %>% 
   select(reason_for_admission,sex) %>%   
  group_by(reason_for_admission,sex) %>% 
   count(sex,name = "count",sort = TRUE)
## # A tibble: 38 × 3
## # Groups:   reason_for_admission, sex [38]
##    reason_for_admission                           sex    count
##    <chr>                                          <chr>  <int>
##  1 COVID-19 [U07.1]                               Male     124
##  2 COVID-19 [U07.1]                               Female    93
##  3 Pneumonia [J18.9]                              Male      83
##  4 Pneumonia [J18.9]                              Female    51
##  5 Pneumonia due to COVID-19 virus [U07.1, J12.8] Male      24
##  6 Pneumonia due to COVID-19 virus [U07.1, J12.8] Female    16
##  7 Shortness of breath [R06.0]                    Female    14
##  8 Fever [R50.9]                                  Male      13
##  9 Hypoxia [R09.0]                                Male      12
## 10 Shortness of breath [R06.0]                    Male      11
## # … with 28 more rows
Data_at_admission %>% 
   select(reason_for_admission,sex) %>%   
  group_by(reason_for_admission,sex) %>% 
   count(sex,name = "count",sort = TRUE) %>% 
  ggplot()+
  geom_point(aes(x=count,y=reason_for_admission,color=sex))+
  labs(title = "Patients in each reason of admission", subtitle = "Distinguished sex with color of points")

Observation:-

In most reasons of admissions no. of male patients are more than the female.

Covid-19 related admissions are majority in this dataset of 508 patients.

3. How many patients died in the hospital

# No of patients died
Hospital_length_of_stay %>% 
  filter(did_the_patient_expire_in_hospital=="Yes") %>% 
  count(name = "No_of_patients_died")      
## # A tibble: 1 × 1
##   No_of_patients_died
##                 <int>
## 1                  90
# No of female and male patents out of 508 patients
Data_at_admission %>% 
  group_by(sex) %>% 
  count(name = "No_of_patients")      
## # A tibble: 2 × 2
## # Groups:   sex [2]
##   sex    No_of_patients
##   <chr>           <int>
## 1 Female            212
## 2 Male              296
# No of female and male patients died
# Merged DFs Data_at_admission,Hospital_length_of_stay
admission_hospital_data<-
merge(
  x=Data_at_admission,
  y=Hospital_length_of_stay,
  by.x = 'id',
  by.y = 'parent_id'
) 

admission_hospital_data %>% 
 filter(did_the_patient_expire_in_hospital=="Yes") %>%
  group_by(sex) %>% 
  count(name = "No_of_patients_died")        
## # A tibble: 2 × 2
## # Groups:   sex [2]
##   sex    No_of_patients_died
##   <chr>                <int>
## 1 Female                  31
## 2 Male                    59
#No. of Patients died (Int/Not)
admission_hospital_data %>% 
  filter(did_the_patient_expire_in_hospital=="Yes") %>%
  group_by(intubated) %>% 
  count(name = "No_of_patients_died")
## # A tibble: 2 × 2
## # Groups:   intubated [2]
##   intubated No_of_patients_died
##   <chr>                   <int>
## 1 No                         83
## 2 Yes                         7
#No. of Patients died (ICU/WARD)
admission_hospital_data %>% 
  filter(did_the_patient_expire_in_hospital=="Yes") %>%
  group_by(admission_disposition,intubated) %>% 
  count(name = "No_of_patients_died")
## # A tibble: 3 × 3
## # Groups:   admission_disposition, intubated [3]
##   admission_disposition intubated No_of_patients_died
##   <chr>                 <chr>                   <int>
## 1 ICU                   No                          7
## 2 ICU                   Yes                         7
## 3 WARD                  No                         76
Died_patients_count<-
admission_hospital_data %>% 
  group_by(sex) %>%
  ggplot()+
  geom_bar(aes(x=sex,fill=did_the_patient_expire_in_hospital))+
 labs(title = "No. of female and male Patients",subtitle="Stacked no. of patients died and not died")+
  xlab("Sex")+
  ylab("No.of patients")+
  theme(legend.position = "none")

Died_patients_perct<-
admission_hospital_data %>% 
  group_by(sex) %>%
  ggplot()+
  geom_bar(aes(x=sex,fill=did_the_patient_expire_in_hospital),position="fill")+
 labs(title = "% of died patients")+
  xlab("Sex")+
  ylab("Patients died in %")+
  guides(fill=guide_legend(title = "Patient expiered in hospital"))+
  theme(legend.position = "top")

grid.arrange(Died_patients_count,Died_patients_perct,nrow=1,ncol=2)

Observation:-

1. There were more male patients than female in the dataset.

2. 90 patients expired in hospital out of 508 patients in the dataset.

3. Male death rate was higher than female.

ANALYSIS :

PART 1

1. Average, min, and max age, height and weight of the patients who are admitted

# Assigning the needed col-manes to which the summary to be found to a vec
vec<- c(
  "age","height","weight"
)

summary(
  Data_at_admission[vec]
)
##       age             height          weight      
##  Min.   : 19.00   Min.   :125.0   Min.   : 27.70  
##  1st Qu.: 55.75   1st Qu.:159.0   1st Qu.: 65.80  
##  Median : 66.00   Median :167.6   Median : 76.70  
##  Mean   : 66.03   Mean   :166.9   Mean   : 80.27  
##  3rd Qu.: 78.00   3rd Qu.:175.0   3rd Qu.: 89.80  
##  Max.   :100.00   Max.   :198.0   Max.   :199.60  
##                   NA's   :236     NA's   :150

2. Most common comorbidities found in patients in the dataset.

longcomodata %>% 
  group_by(comorbidities) %>% 
  tally(name="count",sort = TRUE) %>% 
  head()
## # A tibble: 6 × 2
##   comorbidities                             count
##   <chr>                                     <int>
## 1 Other                                       400
## 2 Hypertension                                310
## 3 Diabetes                                    175
## 4 Chronic cardiac disease  not hypertension    88
## 5 Asthma                                       54
## 6 Chronic renal                                40
merge(
  x=longcomodata,
  y=Hospital_length_of_stay,
  by.x = 'id',
  by.y = 'parent_id',
  all.y = TRUE
)%>% 
 group_by(comorbidities) %>% 
  ggplot()+
  geom_bar(aes(x=comorbidities,fill=did_the_patient_expire_in_hospital))+
  labs(title = "Common comorbidities amoung Patients",subtitle = "Distingushed died and not died patients within common comorbidities",fill="Patient expired?")+
  ylab("No.of patients")+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 90,hjust = 1))

Observation:- Hypertension and Diabetes are the common comorbidities found in most of the patients and most patients had other medical conditions.

3. Patients in different age groups below:

  1. 14-30

  2. 31-50

  3. 51-75

  4. 76-90

  5. 90+

# Adding a new col; age_group
Data_at_admission_AgeGp <-
  Data_at_admission %>% 
  mutate(age_group=case_when(
    age>=14 & age<=30 ~ '14-30',
    age>=31 & age<=50 ~ '31-50',
    age>=51 & age<=75 ~ '51-75',
    age>=76 & age<=90 ~ '76-90',
    age>90 ~ '90+',
  ))

4. No.of patients in each age group

Data_at_admission_AgeGp %>% 
  group_by(age_group) %>% 
  summarise(Patient_Count=n())
## # A tibble: 5 × 2
##   age_group Patient_Count
##   <chr>             <int>
## 1 14-30                 8
## 2 31-50                81
## 3 51-75               263
## 4 76-90               121
## 5 90+                  35

Observation:- Most patients are in the age group 51-75.

5.Average length of stay of patients in each age group.

# Merging 2 DFs
admission_hospital_agegp_data <-
merge(
  x = Data_at_admission_AgeGp,
  y = Hospital_length_of_stay,
  by.x = 'id',
  by.y = 'parent_id',
  all.x = T
) 

avg_stay<-
admission_hospital_agegp_data%>% 
  group_by(age_group) %>% 
  summarise(Avg_length_ofStay = round(mean(hospital_length_of_stay),0))
avg_stay
## # A tibble: 5 × 2
##   age_group Avg_length_ofStay
##   <chr>                 <dbl>
## 1 14-30                     5
## 2 31-50                     8
## 3 51-75                    13
## 4 76-90                    15
## 5 90+                      14

Observation:- Average length of stay is more than 10 days in age group above 50.

6.No.of patients expired in the hospital for each age group

admission_hospital_agegp_data %>% 
  group_by(age_group,did_the_patient_expire_in_hospital) %>% 
  summarise(No_of_patients_died =n())  %>% 
  filter(did_the_patient_expire_in_hospital=="Yes") %>%
  select(age_group,No_of_patients_died)
## # A tibble: 3 × 2
## # Groups:   age_group [3]
##   age_group No_of_patients_died
##   <chr>                   <int>
## 1 51-75                      32
## 2 76-90                      43
## 3 90+                        15
# Death rate in each age gp:
admission_hospital_agegp_data %>% 
  group_by(age_group) %>% 
  ggplot()+
  geom_bar(aes(x=age_group,fill=did_the_patient_expire_in_hospital),
           position = "fill")+
  labs(title = "Death rate in each age-group",fill="Patient expired?",y="Percentage")

Observation:- Age 50+, Death rate increases as age increases.

PART 2

1.Average Vitals and their Standard deviation in the dataset

Normal range values:

Temperature:- 97 F (36.1 C) and 99 F (37.2 C).

Systolic bp: 90-120

Diastolic: bp 60-80

WBC: 4.5 to 11.0 × 109/L

Hematocrit:- men:41% to 50%, women:36% to 48%

Platelet: 150,000 to 450,000 platelets per microliter of blood

Heart rate: 60-100

# Vitals Avg & SD of patients

Data_at_admission %>% 
  summarise(
    No_of_Patiens=n(),
    Avg_Sbp = mean(systolic_blood_pressure,na.rm=T),
    Std_Sbp = sd(systolic_blood_pressure,na.rm=T),
    Avg_Heartrate=mean(heart_rate,na.rm = T),
    Std_Heartrate=sd(heart_rate,na.rm = T),
    Avg_Resprate=mean(respiratory_rate,na.rm = T),
    Std_Resprate=sd(respiratory_rate,na.rm = T),
    Avg_OxySat=mean( oxygen_saturation,na.rm = T),
    Std_OxySat=sd(oxygen_saturation,na.rm = T),
    Avg_Temp=mean(temperature,na.rm = T),
    Std_Temp=sd(temperature,na.rm = T),
    Avg_dbp = mean(diastolic_blood_pressure,na.rm=TRUE),
    Std_dbp = sd(diastolic_blood_pressure,na.rm=TRUE),
    Avg_wbc = mean(wbc,na.rm=TRUE),
    Std_wbc = sd(wbc,na.rm=TRUE),
    Avg_rbc = mean(rbc,na.rm=TRUE),
    Std_rbc = sd(rbc,na.rm=TRUE),
    Avg_Hematocrit = mean(hematocrit,na.rm=TRUE),
    Std_Hematocrit = sd(hematocrit,na.rm=TRUE),
    Avg_PlateletCount = mean(platelet_count,na.rm=TRUE),
    Std_PlateletCount = sd(platelet_count,na.rm=TRUE)
  )
## # A tibble: 1 × 21
##   No_of_Patiens Avg_Sbp Std_Sbp Avg_He…¹ Std_H…² Avg_R…³ Std_R…⁴ Avg_O…⁵ Std_O…⁶
##           <int>   <dbl>   <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1           508    129.    22.1     97.4    17.5    24.9    7.17    93.1    6.10
## # … with 12 more variables: Avg_Temp <dbl>, Std_Temp <dbl>, Avg_dbp <dbl>,
## #   Std_dbp <dbl>, Avg_wbc <dbl>, Std_wbc <dbl>, Avg_rbc <dbl>, Std_rbc <dbl>,
## #   Avg_Hematocrit <dbl>, Std_Hematocrit <dbl>, Avg_PlateletCount <dbl>,
## #   Std_PlateletCount <dbl>, and abbreviated variable names ¹​Avg_Heartrate,
## #   ²​Std_Heartrate, ³​Avg_Resprate, ⁴​Std_Resprate, ⁵​Avg_OxySat, ⁶​Std_OxySat

2. Average Vitals in different groups below:

#Writing a function and looping it to find the repeated summary
colmns<-c("sex","admission_disposition","age_limit")

Summary<-function(z)
{

  Data_at_admission %>% 
  mutate(age_limit=case_when(
    age<50 ~'Below 50',
    age>=50 ~ '50 and above'
  )) %>% 
    select(R=respiratory_rate,O=oxygen_saturation,S=systolic_blood_pressure,D=diastolic_blood_pressure,H=heart_rate,t=temperature,C=all_of(z)) %>% 
  group_by(C) %>% 
  summarise(
    RespRate_avg=mean(R,na.rm=TRUE),
    Oxy_Sat_avg=mean(O,na.rm=TRUE),
    SysBP_avg=mean(S,na.rm=TRUE),
    DiaBP_avg=mean(D,na.rm=TRUE),
    HeartRate_avg=mean(H,na.rm=TRUE),
    Temp_avg=mean(t,na.rm=TRUE)
  ) 
  
}

for (a in colmns) {
  G<-Summary(a)
  print(G)
}
## # A tibble: 2 × 7
##   C      RespRate_avg Oxy_Sat_avg SysBP_avg DiaBP_avg HeartRate_avg Temp_avg
##   <chr>         <dbl>       <dbl>     <dbl>     <dbl>         <dbl>    <dbl>
## 1 Female         24.5        93.4      127.      74.6          97.1     37.8
## 2 Male           25.2        92.9      131.      76.2          97.6     37.8
## # A tibble: 2 × 7
##   C     RespRate_avg Oxy_Sat_avg SysBP_avg DiaBP_avg HeartRate_avg Temp_avg
##   <chr>        <dbl>       <dbl>     <dbl>     <dbl>         <dbl>    <dbl>
## 1 ICU           32.0        85.9      128.      75.6         105.      37.8
## 2 WARD          24.2        93.8      129.      75.6          96.7     37.8
## # A tibble: 2 × 7
##   C            RespRate_avg Oxy_Sat_avg SysBP_avg DiaBP_avg HeartRate_…¹ Temp_…²
##   <chr>               <dbl>       <dbl>     <dbl>     <dbl>        <dbl>   <dbl>
## 1 50 and above         24.4        92.9      130.      75.0         95.4    37.8
## 2 Below 50             27.7        94.0      124.      78.4        108.     37.8
## # … with abbreviated variable names ¹​HeartRate_avg, ²​Temp_avg

Observation:-

a) Resp-rate and Sbp was high for both male & female;male readings were higher.

b) Oxygen saturation was significantly low and respiratory rate was high in ICU patients(43 ICU/465 WARD patients),Oxygen saturation below 90% is very concerning and indicates an emergency.

c) Avg Resp-rate was high in age group 50 and below, Sbp was high in both group.

3. Visually analyze the data to see if there were vital changes during their stay. Vitals include systolic bp, diastolic bp, temperature, and heart rate, wbc, rbc, hematocrit, platelet count.

# Looping the vizs to show different vitals
vitals<-c("systolic_blood_pressure.y","respiratory_rate.y","oxygen_saturation.y","temperature.y","diastolic_blood_pressure.y","wbc.y","hematocrit.y")

viz<-function(z)
{
graph<-
  merge(
  x = Data_at_admission_AgeGp,
  y = Days_breakdown,
  by.x = 'id',
  by.y = 'parent_id',
  all.x = T
) %>%
  filter(day<=10) %>% 
 group_by(age_group,day) %>% 
  select(x=day,G=age_group,A=all_of(z)) %>% 
  summarise(
    y=mean(A,na.rm=TRUE)
    ) %>% 
  ggplot(mapping = aes(x=x,y=y,color=G))+geom_point()+geom_line()+
                  facet_wrap(~G)+
  theme(legend.position = "none")+
    ylab(z)+
  labs(title = "Variation in Avg-Vitals during the hospital stay in different age group",
       subtitle = "Considered first 10 days of data")
return(graph)
}

for (a in vitals) {
  G<-viz(a)
  print(G)
}

Observation:-

Systolic bp normal reading 90-120:- Sbp was significantly high throughout these days in patients above age 50.

Normal WBC reading ranges 4.5 to 11.0 × 109/L:- First 4 days all age group show normal readings; age group 51-90 show steady increase during rest of the days. After 5th day it goes above 11,which is a concern.

4. Compare patients in the WARD vs ICU

# Compare patients in WARD & ICU
Data_at_admission %>% 
  group_by(admission_disposition,intubated) %>% 
  tally(name="No.of patients")
## # A tibble: 3 × 3
## # Groups:   admission_disposition [2]
##   admission_disposition intubated `No.of patients`
##   <chr>                 <chr>                <int>
## 1 ICU                   No                      28
## 2 ICU                   Yes                     15
## 3 WARD                  No                     465
#Looping the vizs

vitals=c("age","systolic_blood_pressure","respiratory_rate","oxygen_saturation")

viz<-function(z)
{
graph<-
  Data_at_admission %>% 
  select(A=all_of(z),admission_disposition) %>% 
  ggplot(aes(x=A,fill=admission_disposition))+
  geom_histogram(alpha=0.5,binwidth = 10,position = "identity")+
  xlab(z)+
  labs(title = "WARD vs ICU patients",subtitle = z)+
  ylab("No.of patients")
  
  
  return(graph)
}

for (a in vitals) {
  G<-viz(a)
  print(G)
}

Observation:-

1. Below 90 Oxygen-sat, 2. High Systolic-bp and Respiratory rate

5. Compare patients who expired and not-expired in hospital.

# Comparing Vitals in Expired vs Not expired patients
admission_hospital_data %>%
 group_by(Patient_expired_in_hospital=did_the_patient_expire_in_hospital) %>% 
  summarise(
    Count=n(),
    Avg_sys_bp = mean(systolic_blood_pressure,na.rm=TRUE),
    Avg_dbp = mean(diastolic_blood_pressure,na.rm=TRUE),
    Avg_resp = mean(respiratory_rate,na.rm=TRUE),
    Avg_oxy = mean(oxygen_saturation,na.rm=TRUE),
    Avg_wbc = mean(wbc,na.rm=TRUE),
    Avg_rbc = mean(rbc,na.rm=TRUE),
    Avg_Hematocrit = mean(hematocrit,na.rm=TRUE),
    Avg_PlateletCount = mean(platelet_count,na.rm=TRUE),
  )
## # A tibble: 2 × 10
##   Patien…¹ Count Avg_s…² Avg_dbp Avg_r…³ Avg_oxy Avg_wbc Avg_rbc Avg_H…⁴ Avg_P…⁵
##   <chr>    <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 No         418    128.    75.8    24.6    93.5    7.90    4.68   0.398    237.
## 2 Yes         90    134.    74.3    26.2    91.3    8.64    4.35   0.382    203.
## # … with abbreviated variable names ¹​Patient_expired_in_hospital, ²​Avg_sys_bp,
## #   ³​Avg_resp, ⁴​Avg_Hematocrit, ⁵​Avg_PlateletCount
#Looping the vizs

vitals=c("age","systolic_blood_pressure","respiratory_rate","oxygen_saturation")

viz<-function(z)
{
graph<-
  admission_hospital_data %>% 
  select(A=all_of(z),f=did_the_patient_expire_in_hospital) %>% 
  ggplot(aes(x=A,fill=f))+
  geom_density(alpha=0.5)+
  xlab(z)+
  labs(title = "Died/Not died Patients in Hospital",subtitle = z,fill="Did patient die in Hopital")
  #ylab("No.of patients")
 
  return(graph)
}

for (a in vitals) {
  G<-viz(a)
  print(G)
}

Observation:- Can see a shift in the density graphs for age and Sbp.

ANALYSIS RESULTS:-

1.Total Patients in the data set: 508 (296M,212F)

2.Most no. of patients in the age of 60s and 70s.

3.Total patients died: 90(59M,31F),(83 Intubated, 7 Not),(14 ICU[7 Intu,7 Not],76 WARD), more people died in the age-group 76-90

4.Total ICU patients: 43(15 Intu, 28 Not)

5.Oxygen saturation was significantly low and respiratory rate was high in ICU patients(43 ICU/465 WARD patients),Oxygen saturation below 90% is very concerning and indicates an emergency.

6.Most common Comorbidities in the patients:-Hypertension (310/508), Diabetes (175/308), Chronic cardiac disease (88/508). And 400/508 patients have other medical conditions

7.Systolic-blood-pressure,Respiratory-rate and oxygen-saturation were risky vital levels.

8.70+ Age group was at highest risk