Dorcas Olanike Agboola
2024-12-10
#setwd("C:/Users/USER/Desktop/ICAMMDA/Data Analytics/")
library(readr)
Maternity <- read_csv("C:/Users/USER/Desktop/ICAMMDA/Data Analytics/St. Mark's Maternity Hospital (SMMH).csv")
View(Maternity)
summary(Maternity)## case_id generation date_infection date_onset
## Length:422 Min. : 2.00 Min. :2014-04-26 Min. :2014-05-01
## Class :character 1st Qu.:13.00 1st Qu.:2014-09-06 1st Qu.:2014-09-15
## Mode :character Median :16.00 Median :2014-10-16 Median :2014-10-24
## Mean :16.67 Mean :2014-10-26 Mean :2014-11-03
## 3rd Qu.:20.00 3rd Qu.:2014-12-16 3rd Qu.:2014-12-20
## Max. :37.00 Max. :2015-04-17 Max. :2015-04-27
## NA's :152
## date_hospitalisation date_outcome outcome
## Min. :2014-05-05 Min. :2014-05-23 Length:422
## 1st Qu.:2014-09-17 1st Qu.:2014-09-25 Class :character
## Median :2014-10-24 Median :2014-11-06 Mode :character
## Mean :2014-11-05 Mean :2014-11-13
## 3rd Qu.:2014-12-20 3rd Qu.:2014-12-28
## Max. :2015-04-29 Max. :2015-05-10
## NA's :79
## gender age age_unit age_years
## Length:422 Min. : 0.00 Length:422 Min. : 0.00
## Class :character 1st Qu.: 6.00 Class :character 1st Qu.: 6.00
## Mode :character Median :12.00 Mode :character Median :12.00
## Mean :15.12 Mean :15.03
## 3rd Qu.:22.00 3rd Qu.:22.00
## Max. :59.00 Max. :59.00
## NA's :5 NA's :5
## age_cat age_cat5 hospital lon
## Length:422 Length:422 Length:422 Min. :-13.27
## Class :character Class :character Class :character 1st Qu.:-13.25
## Mode :character Mode :character Mode :character Median :-13.23
## Mean :-13.23
## 3rd Qu.:-13.22
## Max. :-13.21
##
## lat infector source wt_kg
## Min. :8.448 Length:422 Length:422 Min. :-4.00
## 1st Qu.:8.461 Class :character Class :character 1st Qu.:40.00
## Median :8.469 Mode :character Mode :character Median :54.50
## Mean :8.470 Mean :52.12
## 3rd Qu.:8.479 3rd Qu.:65.00
## Max. :8.490 Max. :95.00
##
## ht_cm ct_blood fever chills
## Min. : 12.0 Min. :16.00 Length:422 Length:422
## 1st Qu.: 83.5 1st Qu.:20.00 Class :character Class :character
## Median :128.0 Median :22.00 Mode :character Mode :character
## Mean :121.6 Mean :21.29
## 3rd Qu.:156.0 3rd Qu.:23.00
## Max. :266.0 Max. :25.00
##
## cough aches vomit temp
## Length:422 Length:422 Length:422 Min. :35.60
## Class :character Class :character Class :character 1st Qu.:38.30
## Mode :character Mode :character Mode :character Median :38.90
## Mean :38.63
## 3rd Qu.:39.30
## Max. :40.40
## NA's :11
## time_admission bmi days_onset_hosp
## Length:422 Min. :-100.00 Min. : 0.000
## Class1:hms 1st Qu.: 24.87 1st Qu.: 1.000
## Class2:difftime Median : 33.59 Median : 1.000
## Mode :numeric Mean : 51.04 Mean : 2.036
## 3rd Qu.: 53.27 3rd Qu.: 3.000
## Max. : 428.06 Max. :18.000
##
It was observed that
Maternity$date_infection <- NULL
Maternity$date_outcome <- NULL
Maternity$date_onset <- NULL
Maternity$date_hospitalisation <- NULL
Maternity$hospital <- NULL
Maternity$infector <- NULL
Maternity$source<- NULL
View(Maternity)### Since age column is the same as age_years, we remove age_years
Maternity$age_years <- NULL
### Since age_cat column is the same as age_cat5, we remove age_cat5.
Maternity$age_cat5 <- NULL#### I need to create a table to view the months among the years in age_unit
table(Maternity$age_unit)##
## months years
## 2 420
## column 48 and 214 ages were in months
### To change the age in months to years and months unit to years.
library(dplyr)
Maternity <-Maternity %>%
mutate(
age = ifelse(age_unit == "months", age / 12, age), # Convert age to years if the unit is "months"
age_unit = ifelse(age_unit == "months", "years", age_unit) # Change unit to "years"
)## [1] 55 173 244 288 296
## [1] 79 80 81 82 416 417 418 419 420 421 422
## [1] 3 4 7 13 16 17 31 62 82 83 85 107 115 118 126 130 138 156 184
## [20] 195 201 215 233 240 244 248 249 257 260 262 264 274 278 280 281 298 300 309
## [39] 317 331 335 344 345 351 381 396 411
# Find rows where the column has negative values
negative_rowsbmi <- which(Maternity$bmi < 0)
negative_rowsbmi## [1] 73
# Since there is only one negative value in `bmi` column this might be to typing error, hence we make the value positive by taking the absolute value.
Maternity$bmi[Maternity$bmi < 0] <- abs(Maternity$bmi[Maternity$bmi < 0])
View(Maternity)library(ggplot2)
ggplot(Maternity, aes(gender, fill = gender)) +
geom_bar() +
scale_fill_manual(values = c("m" = "green", "f" = "red"))+
labs(title=" Frequency representation of gender",x= "Gender", y= "Frequency")library(ggplot2)
ggplot(Maternity, aes(outcome, fill=outcome)) +
geom_bar() +
scale_fill_manual(values = c("Death" = "black", "Recover" = "blue"))+
labs(title=" Frequency representation of outcome",x= "outcome", y= "Frequency")ggplot(Maternity, aes( gender,age, fill = outcome)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("Death" = "black", "Recover" = "blue")) +labs(title=" Scatter Plot representation of Sex",x= "gender", y= "age")ggplot(Maternity, aes(x = gender, y = age, color = outcome)) +
geom_jitter(aes(x = interaction(gender, outcome)), width = 0.2) +
scale_x_discrete(labels = c("m" = "Male", "f" = "Female")) + # Set proper labels
scale_color_manual(values = c("Death" = "red", "Recover" = "blue")) +
labs(
title = "Gender Group",
x = "Gender",
y = "Age",
color = "Outcome"
) +
theme_minimal()ggplot(Maternity, aes( age,fill=gender)) +
geom_bar(position = "dodge",col="black") +
scale_fill_manual(values = c("f" = "pink", "m" = "green"))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 6.0 12.0 15.5 23.0 59.0
ggplot(Maternity, aes( age,generation)) +
geom_line(size=0.7,col="purple") +
#scale_fill_manual(values = c("f" = "pink", "m" = "green"))+
labs(title=" Line Graph",x= "age", y= "generation")## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 6.0 12.0 15.5 23.0 59.0
ol1 <- ggplot(Maternity, aes(x=generation)) +
geom_boxplot(fill="green")
ol2 <- ggplot(Maternity, aes(x=age)) +
geom_boxplot(fill="green")
ol3 <- ggplot(Maternity, aes(x= wt_kg)) +
geom_boxplot(fill="green")
ol4 <- ggplot(Maternity, aes(x= ht_cm)) +
geom_boxplot(fill="green")
ol5 <- ggplot(Maternity, aes(x= ct_blood)) +
geom_boxplot(fill="green")
ol6 <- ggplot(Maternity, aes(x= temp)) +
geom_boxplot(fill="green")
ol7 <- ggplot(Maternity, aes(x= days_onset_hosp)) +
geom_boxplot(fill="green")
ol8 <- ggplot(Maternity, aes(x= bmi)) +
geom_boxplot(fill="green")
library(gridExtra)
grid.arrange(ol1,ol2,ol3,ol4,ol5,ol6,ol7,ol8,ncol=3)ggplot(Maternity, aes(fever,fill=fever))+
geom_bar()+
scale_fill_manual(values = c("no" = "orange", "yes" = "chocolate"))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 6.0 12.0 15.5 23.0 59.0
ggplot(Maternity, aes(chills,fill=chills))+
geom_bar()+
scale_fill_manual(values = c("no" = "skyblue", "yes" = "gold"))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 6.0 12.0 15.5 23.0 59.0
ggplot(Maternity, aes(aches,fill=aches))+
geom_bar()+
scale_fill_manual(values = c("no" = "gold", "yes" = "green"))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 6.0 12.0 15.5 23.0 59.0
G1 <- ggplot(Maternity, aes(x = gender, fill = fever)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of Fever by Gender",
x = "Gender",
y = "Frequency",
fill = "Fever")G2 <- ggplot(Maternity, aes(x = gender, fill = chills)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of Chills by Gender",
x = "Gender",
y = "Frequency",
fill = "Chills")G3 <- ggplot(Maternity, aes(x = gender, fill = vomit)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of vomit by Gender",
x = "Gender",
y = "Frequency",
fill = " vomit")G4 <- ggplot(Maternity, aes(x = gender, fill = aches)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of aches by Gender",
x = "Gender",
y = "Frequency",
fill = " aches")G5 <- ggplot(Maternity, aes(x = gender, fill = cough)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of cough by Gender",
x = "Gender",
y = "Frequency",
fill = "cough")
library(gridExtra)
grid.arrange(G1,G2,G3,G4,G5, ncol=2)OT1 <- ggplot(Maternity, aes(x = outcome, fill = aches)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of aches by outcome",
x = "outcome",
y = "Frequency",
fill = " aches")OT2 <- ggplot(Maternity, aes(x = outcome, fill = chills)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of Chills by outcome",
x = "outcome",
y = "Frequency",
fill = "Chills")OT3 <- ggplot(Maternity, aes(x = outcome, fill = cough)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of cough by outcome",
x = "outcome",
y = "Frequency",
fill =" cough")OT4 <- ggplot(Maternity, aes(x = outcome, fill = fever)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of fever by outcome",
x = "outcome",
y = "Frequency",
fill ="fever")OT5 <- ggplot(Maternity, aes(x = outcome, fill = vomit)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of vomit by outcome",
x = "outcome",
y = "Frequency",
fill =" vomit")
library(gridExtra)
grid.arrange(OT1, OT2, OT3, OT4, OT5,ncol=2)library(gridExtra)
A1 <- ggplot(Maternity, aes(x = aches,y= age, fill =aches)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by aches",
x = "aches",
y = "Age",
fill =" aches")
A2 <- ggplot(Maternity, aes(x = chills,y= age, fill =chills)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by chills",
x = "chills",
y = "Age",
fill =" chills")
A3<- ggplot(Maternity, aes(x = cough,y= age, fill = cough)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by cough",
x = "cough",
y = "Age",
fill =" cough")
A4 <- ggplot(Maternity, aes(x = fever,y= age, fill = fever)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by fever",
x = "fever",
y = "Age",
fill =" fever")
A5 <- ggplot(Maternity, aes(x = vomit,y= age, fill = vomit)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by vomit",
x = "vomit",
y = "Age",
fill =" vomit")
grid.arrange(A1,A2,A3,A4,A5,ncol=3.5)age_cat are repeated,
then let us factor itMaternity$age_cat<-factor(Maternity$age_cat,levels=c("0-4", "5-9", "10-14", "15-19", "20-29" ,"30-49", "50-69"), ordered = TRUE)
str(Maternity$age_cat)## Ord.factor w/ 7 levels "0-4"<"5-9"<"10-14"<..: 4 3 4 1 3 6 1 3 6 1 ...
## [1] "0-4" "5-9" "10-14" "15-19" "20-29" "30-49" "50-69"
ggplot(Maternity,aes(age_cat))+
geom_bar(fill="chocolate")+
labs(title="Age Category Distribution",x="Age Category",y="Frequency")library(ggplot2)
ggplot(Maternity,aes(x=gender,fill=outcome))+
geom_bar(position="dodge")+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Gender Distribution with Outcome",x="gender",y="Frequency")library(ggplot2)
Maternity$age_cat<-factor(Maternity$age_cat,levels=c("0-4", "5-9", "10-14", "15-19", "20-29" ,"30-49", "50-69"), ordered = TRUE)
str(Maternity$age_cat)## Ord.factor w/ 7 levels "0-4"<"5-9"<"10-14"<..: 4 3 4 1 3 6 1 3 6 1 ...
ggplot(Maternity,aes(x=age_cat,fill=outcome))+
geom_bar(position="dodge")+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Age_cat Distribution with Outcome",x="Age_cat",y="Frequency")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))library(ggplot2)
Maternity$age_cat<-factor(Maternity$age_cat,levels=c("0-4", "5-9", "10-14", "15-19", "20-29" ,"30-49", "50-69"), ordered = TRUE)
str(Maternity$age_cat)## Ord.factor w/ 7 levels "0-4"<"5-9"<"10-14"<..: 4 3 4 1 3 6 1 3 6 1 ...
ggplot(Maternity,aes(x=age_cat,fill=outcome))+
geom_bar()+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Age_cat Distribution with Outcome",x="Age_cat",y="Frequency")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))library(ggplot2)
ggplot(Maternity,aes(x=days_onset_hosp,fill=outcome))+
geom_bar(position = "dodge")+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Days_onset_hosp Distribution with Outcome",x="Days_onset_hosp",y="Frequency")library(ggplot2)
ggplot(Maternity,aes(x=days_onset_hosp,fill=outcome))+
geom_bar()+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Days_onset_hosp Distribution with Outcome",x="Days_onset_hosp",y="Frequency")library(corrplot)
sel_columns <- Maternity[, c("generation", "age", "wt_kg", "ht_cm","ct_blood","temp")]
# Calculate the correlation matrix
cor_matrix <- cor(sel_columns, use = "complete.obs")
# Plot the correlation matrix
corrplot(cor_matrix, method = "color",
addCoef.col = "black", # Display correlation coefficients
tl.col = "black", # Label text color
tl.cex = 0.8, # Label text size
number.cex = 0.9, # Coefficient text size
col = colorRampPalette(c("red", "white", "blue"))(200))## case_id generation outcome gender
## Length:305 Min. : 3.00 Length:305 Length:305
## Class :character 1st Qu.:12.00 Class :character Class :character
## Mode :character Median :16.00 Mode :character Mode :character
## Mean :16.62
## 3rd Qu.:21.00
## Max. :37.00
##
## age age_unit age_cat lon lat
## Min. : 0.0 Length:305 0-4 :59 Min. :-13.27 Min. :8.448
## 1st Qu.: 6.0 Class :character 5-9 :62 1st Qu.:-13.25 1st Qu.:8.461
## Median :12.0 Mode :character 10-14:54 Median :-13.23 Median :8.468
## Mean :15.5 15-19:35 Mean :-13.23 Mean :8.469
## 3rd Qu.:23.0 20-29:56 3rd Qu.:-13.22 3rd Qu.:8.479
## Max. :59.0 30-49:35 Max. :-13.21 Max. :8.490
## 50-69: 4
## wt_kg ht_cm ct_blood fever
## Min. : 4.00 Min. : 12.0 Min. :16.00 Length:305
## 1st Qu.:40.00 1st Qu.: 85.0 1st Qu.:20.00 Class :character
## Median :54.00 Median :129.0 Median :22.00 Mode :character
## Mean :52.24 Mean :122.1 Mean :21.23
## 3rd Qu.:66.00 3rd Qu.:157.0 3rd Qu.:23.00
## Max. :95.00 Max. :266.0 Max. :25.00
##
## chills cough aches vomit
## Length:305 Length:305 Length:305 Length:305
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## temp time_admission bmi days_onset_hosp
## Min. :35.60 Length:305 Min. : 12.31 Min. : 0.000
## 1st Qu.:38.40 Class1:hms 1st Qu.: 24.65 1st Qu.: 1.000
## Median :38.90 Class2:difftime Median : 33.57 Median : 1.000
## Mean :38.71 Mode :numeric Mean : 52.33 Mean : 2.151
## 3rd Qu.:39.30 3rd Qu.: 53.02 3rd Qu.: 3.000
## Max. :40.40 Max. :428.06 Max. :18.000
##
It was observed that both Height and weight is inversely proportional to
Age. Increase in age cause an increase in height.
ggplot(Maternity,aes(days_onset_hosp,fill=outcome))+
geom_bar(position="dodge",col="white")+
scale_fill_manual(values = c("Death" = "purple", "Recover" = "brown"))+
labs(title = "Distribution of days_onset_hosp by Outcome",
x = "days_onset_hosp",
y = "Frequency",
fill = " Outcome")