Dorcas Olanike Agboola
2024-12-10
Load the dataset using the library and find the summary of the dataset.
library(readr)
CHospital <- read_csv("C:/Users/USER/Desktop/ICAMMDA/Data Analytics/Central Hospital.csv")
View(CHospital)
dim(CHospital)## [1] 454 30
## case_id generation date_infection date_onset
## Length:454 Min. : 2.00 Min. :2014-04-23 Min. :2014-05-01
## Class :character 1st Qu.:13.00 1st Qu.:2014-09-07 1st Qu.:2014-09-20
## Mode :character Median :16.00 Median :2014-10-12 Median :2014-10-23
## Mean :16.88 Mean :2014-10-22 Mean :2014-11-05
## 3rd Qu.:20.00 3rd Qu.:2014-11-22 3rd Qu.:2014-12-15
## Max. :36.00 Max. :2015-04-22 Max. :2015-04-28
## NA's :154
## date_hospitalisation date_outcome outcome
## Min. :2014-05-06 Min. :2014-05-11 Length:454
## 1st Qu.:2014-09-23 1st Qu.:2014-10-03 Class :character
## Median :2014-10-25 Median :2014-11-06 Mode :character
## Mean :2014-11-07 Mean :2014-11-18
## 3rd Qu.:2014-12-17 3rd Qu.:2014-12-31
## Max. :2015-04-28 Max. :2015-05-17
## NA's :74
## gender age age_unit age_years
## Length:454 Min. : 0.0 Length:454 Min. : 0.00
## Class :character 1st Qu.: 7.0 Class :character 1st Qu.: 7.00
## Mode :character Median :15.0 Mode :character Median :15.00
## Mean :17.4 Mean :17.38
## 3rd Qu.:24.0 3rd Qu.:24.00
## Max. :87.0 Max. :87.00
## NA's :9 NA's :9
## age_cat age_cat5 hospital lon
## Length:454 Length:454 Length:454 Min. :-13.27
## Class :character Class :character Class :character 1st Qu.:-13.25
## Mode :character Mode :character Mode :character Median :-13.23
## Mean :-13.23
## 3rd Qu.:-13.22
## Max. :-13.21
##
## lat infector source wt_kg
## Min. :8.448 Length:454 Length:454 Min. : -2.00
## 1st Qu.:8.460 Class :character Class :character 1st Qu.: 43.00
## Median :8.468 Mode :character Mode :character Median : 57.50
## Mean :8.469 Mean : 55.13
## 3rd Qu.:8.479 3rd Qu.: 67.00
## Max. :8.490 Max. :103.00
##
## ht_cm ct_blood fever chills
## Min. : 15.0 Min. :17.00 Length:454 Length:454
## 1st Qu.: 97.0 1st Qu.:20.00 Class :character Class :character
## Median :135.0 Median :22.00 Mode :character Mode :character
## Mean :129.7 Mean :21.19
## 3rd Qu.:161.0 3rd Qu.:22.00
## Max. :335.0 Max. :25.00
##
## cough aches vomit temp
## Length:454 Length:454 Length:454 Min. :35.7
## Class :character Class :character Class :character 1st Qu.:37.8
## Mode :character Mode :character Mode :character Median :38.8
## Mean :38.5
## 3rd Qu.:39.2
## Max. :40.4
## NA's :3
## time_admission bmi days_onset_hosp
## Length:454 Min. :-41.32 Min. : 0.000
## Class1:hms 1st Qu.: 24.10 1st Qu.: 1.000
## Class2:difftime Median : 32.23 Median : 1.000
## Mode :numeric Mean : 44.57 Mean : 1.852
## 3rd Qu.: 47.71 3rd Qu.: 2.000
## Max. :370.37 Max. :12.000
##
It was observed that
## [1] 69 158 250 279 312 322 325 360 363
## [1] 452 453 454
## [1] 13 18 20 21 27 32 45 54 64 65 70 72 74 91 92 103 107 110 125
## [20] 128 169 171 172 187 192 195 199 206 209 220 228 229 251 253 261 274 286 287
## [39] 291 300 304 305 306 307 314 335 341 346 354 362 363 369 378 379 389 394 409
## [58] 411 420 435 437 438 443 446 449 454
# Find rows where the column has negative values
negative_rowsbmi <- which(CHospital$bmi < 0)
negative_rowsbmi## integer(0)
# Since there is only one negative value in `bmi` column this might be due to typing error, hence we make the value positive by taking the absolute value.
CHospital$bmi[CHospital$bmi < 0] <- abs(CHospital$bmi[CHospital$bmi < 0])
View(CHospital)library(ggplot2)
ggplot(CHospital, aes(gender, fill = gender)) +
geom_bar() +
scale_fill_manual(values = c("m" = "green", "f" = "red"))+
labs(title=" Frequency representation of gender",x= "Gender", y= "Frequency")library(ggplot2)
ggplot(CHospital, aes(outcome, fill=outcome)) +
geom_bar() +
scale_fill_manual(values = c("Death" = "black", "Recover" = "blue"))+
labs(title=" Frequency representation of outcome",x= "outcome", y= "Frequency")ggplot(CHospital, aes( gender,age, fill = outcome)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("Death" = "black", "Recover" = "blue")) +labs(title=" Barchart representation of gender",x= "gender", y= "age")ggplot(CHospital, aes(x = gender, y = age, color = outcome)) +
geom_jitter(aes(x = interaction(gender, outcome)), width = 0.2) +
scale_x_discrete(labels = c("m" = "Male", "f" = "Female")) + # Set proper labels
scale_color_manual(values = c("Death" = "red", "Recover" = "blue")) +
labs(
title = "Gender Group",
x = "Gender",
y = "Age",
color = "Outcome"
) +
theme_minimal()ggplot(CHospital, aes( age,fill=gender)) +
geom_bar(position = "dodge",col="black") +
scale_fill_manual(values = c("f" = "pink", "m" = "green"))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 7.0 15.0 17.9 24.0 73.0
ggplot(CHospital, aes( age,generation)) +
geom_line(size=0.7,col="purple") +
#scale_fill_manual(values = c("f" = "pink", "m" = "green"))+
labs(title=" Line Graph",x= "age", y= "generation")## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 7.0 15.0 17.9 24.0 73.0
ol1 <- ggplot(CHospital, aes(x=generation)) +
geom_boxplot(fill="green")
ol2 <- ggplot(CHospital, aes(x=age)) +
geom_boxplot(fill="green")
ol3 <- ggplot(CHospital, aes(x= wt_kg)) +
geom_boxplot(fill="green")
ol4 <- ggplot(CHospital, aes(x= ht_cm)) +
geom_boxplot(fill="green")
ol5 <- ggplot(CHospital, aes(x= ct_blood)) +
geom_boxplot(fill="green")
ol6 <- ggplot(CHospital, aes(x= temp)) +
geom_boxplot(fill="green")
ol7 <- ggplot(CHospital, aes(x= days_onset_hosp)) +
geom_boxplot(fill="green")
ol8 <- ggplot(CHospital, aes(x= bmi)) +
geom_boxplot(fill="green")
library(gridExtra)
grid.arrange(ol1,ol2,ol3,ol4,ol5,ol6,ol7,ol8,ncol=3)ggplot(CHospital, aes(fever,fill=fever))+
geom_bar()+
scale_fill_manual(values = c("no" = "orange", "yes" = "chocolate"))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 7.0 15.0 17.9 24.0 73.0
ggplot(CHospital, aes(chills,fill=chills))+
geom_bar()+
scale_fill_manual(values = c("no" = "skyblue", "yes" = "gold"))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 7.0 15.0 17.9 24.0 73.0
ggplot(CHospital, aes(aches,fill=aches))+
geom_bar()+
scale_fill_manual(values = c("no" = "gold", "yes" = "green"))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 7.0 15.0 17.9 24.0 73.0
G1 <- ggplot(CHospital, aes(x = gender, fill = fever)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of Fever by Gender",
x = "Gender",
y = "Frequency",
fill = "Fever")G2 <- ggplot(CHospital, aes(x = gender, fill = chills)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of Chills by Gender",
x = "Gender",
y = "Frequency",
fill = "Chills")G3 <- ggplot(CHospital, aes(x = gender, fill = vomit)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of vomit by Gender",
x = "Gender",
y = "Frequency",
fill = " vomit")G4 <- ggplot(CHospital, aes(x = gender, fill = aches)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of aches by Gender",
x = "Gender",
y = "Frequency",
fill = " aches")G5 <- ggplot(CHospital, aes(x = gender, fill = cough)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of cough by Gender",
x = "Gender",
y = "Frequency",
fill = "cough")
library(gridExtra)
grid.arrange(G1,G2,G3,G4,G5, ncol=2)OT1 <- ggplot(CHospital, aes(x = outcome, fill = aches)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "red", "yes" = "green"))+
labs(title = "Distribution of aches",
x = "outcome",
y = "Frequency",
fill = " aches")OT2 <- ggplot(CHospital, aes(x = outcome, fill = chills)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "red", "yes" = "green"))+
labs(title = "Distribution of Chills by outcome",
x = "outcome",
y = "Frequency",
fill = "Chills")OT3 <- ggplot(CHospital, aes(x = outcome, fill = cough)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "red", "yes" = "green"))+
labs(title = "Distribution of cough by outcome",
x = "outcome",
y = "Frequency",
fill =" cough")OT4 <- ggplot(CHospital, aes(x = outcome, fill = fever)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "red", "yes" = "green"))+
labs(title = "Distribution of fever by outcome",
x = "outcome",
y = "Frequency",
fill ="fever")OT5 <- ggplot(CHospital, aes(x = outcome, fill = vomit)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("no" = "red", "yes" = "green"))+
labs(title = "Distribution of vomit by outcome",
x = "outcome",
y = "Frequency",
fill =" vomit")
library(gridExtra)
grid.arrange(OT1, OT2, OT3, OT4, OT5,ncol=2)library(gridExtra)
A1 <- ggplot(CHospital, aes(x = aches,y= age, fill =aches)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by aches",
x = "aches",
y = "Age",
fill =" aches")
A2 <- ggplot(CHospital, aes(x = chills,y= age, fill =chills)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by chills",
x = "chills",
y = "Age",
fill =" chills")
A3<- ggplot(CHospital, aes(x = cough,y= age, fill = cough)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by cough",
x = "cough",
y = "Age",
fill =" cough")
A4 <- ggplot(CHospital, aes(x = fever,y= age, fill = fever)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by fever",
x = "fever",
y = "Age",
fill =" fever")
A5 <- ggplot(CHospital, aes(x = vomit,y= age, fill = vomit)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("no" = "gold", "yes" = "chocolate"))+
labs(title = "Distribution of age by vomit",
x = "vomit",
y = "Age",
fill =" vomit")
grid.arrange(A1,A2,A3,A4,A5,ncol=3.5)age_cat are repeated, then let us
factor itCHospital$age_cat<-factor(CHospital$age_cat,levels=c("0-4", "5-9", "10-14", "15-19", "20-29" ,"30-49", "50-69"), ordered = TRUE)
str(CHospital$age_cat)## Ord.factor w/ 7 levels "0-4"<"5-9"<"10-14"<..: 1 6 5 3 1 3 5 6 5 6 ...
## [1] "0-4" "5-9" "10-14" "15-19" "20-29" "30-49" "50-69"
ggplot(CHospital,aes(age_cat))+
geom_bar(fill="chocolate")+
labs(title="Age Category Distribution",x="Age Category",y="Frequency")library(ggplot2)
ggplot(CHospital,aes(x=gender,fill=outcome))+
geom_bar(position="dodge")+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Gender Distribution with Outcome",x="gender",y="Frequency")library(ggplot2)
CHospital$age_cat<-factor(CHospital$age_cat,levels=c("0-4", "5-9", "10-14", "15-19", "20-29" ,"30-49", "50-69"), ordered = TRUE)
str(CHospital$age_cat)## Ord.factor w/ 7 levels "0-4"<"5-9"<"10-14"<..: 1 6 5 3 1 3 5 6 5 6 ...
ggplot(CHospital,aes(x=age_cat,fill=outcome))+
geom_bar(position="dodge")+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Age_cat Distribution with Outcome",x="Age_cat",y="Frequency")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))library(ggplot2)
CHospital$age_cat<-factor(CHospital$age_cat,levels=c("0-4", "5-9", "10-14", "15-19", "20-29" ,"30-49", "50-69"), ordered = TRUE)
str(CHospital$age_cat)## Ord.factor w/ 7 levels "0-4"<"5-9"<"10-14"<..: 1 6 5 3 1 3 5 6 5 6 ...
ggplot(CHospital,aes(x=age_cat,fill=outcome))+
geom_bar()+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Age_cat Distribution with Outcome",x="Age_cat",y="Frequency")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))library(ggplot2)
ggplot(CHospital,aes(x=days_onset_hosp,fill=outcome))+
geom_bar(position = "dodge")+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Days_onset_hosp Distribution with Outcome",x="Days_onset_hosp",y="Frequency")library(ggplot2)
ggplot(CHospital,aes(x=days_onset_hosp,fill=outcome))+
geom_bar()+
scale_fill_manual(values = c("Death" = "gold", "Recover" = "chocolate"))+
labs(title="Days_onset_hosp Distribution with Outcome",x="Days_onset_hosp",y="Frequency")library(corrplot)
sel_columns <- CHospital[, c("generation", "age", "wt_kg", "ht_cm","ct_blood","temp")]
# Calculate the correlation matrix
cor_matrix <- cor(sel_columns, use = "complete.obs")
# Plot the correlation matrix
corrplot(cor_matrix, method = "color",
addCoef.col = "black", # Display correlation coefficients
tl.col = "black", # Label text color
tl.cex = 0.8, # Label text size
number.cex = 0.9, # Coefficient text size
col = colorRampPalette(c("red", "white", "blue"))(200))## case_id generation date_outcome outcome
## Length:268 Min. : 2.00 Min. :2014-05-11 Length:268
## Class :character 1st Qu.:13.00 1st Qu.:2014-10-01 Class :character
## Mode :character Median :17.00 Median :2014-11-07 Mode :character
## Mean :17.02 Mean :2014-11-20
## 3rd Qu.:21.00 3rd Qu.:2015-01-02
## Max. :35.00 Max. :2015-05-14
##
## gender age age_unit age_cat
## Length:268 Min. : 0.0 Length:268 20-29 :61
## Class :character 1st Qu.: 7.0 Class :character 5-9 :49
## Mode :character Median :15.0 Mode :character 10-14 :41
## Mean :17.9 0-4 :39
## 3rd Qu.:24.0 30-49 :39
## Max. :73.0 (Other):38
## NA's : 1
## lon lat wt_kg ht_cm
## Min. :-13.27 Min. :8.450 Min. : 3.00 Min. : 30.0
## 1st Qu.:-13.25 1st Qu.:8.459 1st Qu.: 44.75 1st Qu.:100.8
## Median :-13.23 Median :8.468 Median : 57.50 Median :135.5
## Mean :-13.23 Mean :8.469 Mean : 56.12 Mean :131.7
## 3rd Qu.:-13.22 3rd Qu.:8.479 3rd Qu.: 67.00 3rd Qu.:160.0
## Max. :-13.21 Max. :8.490 Max. :100.00 Max. :281.0
##
## ct_blood fever chills cough
## Min. :17.0 Length:268 Length:268 Length:268
## 1st Qu.:20.0 Class :character Class :character Class :character
## Median :22.0 Mode :character Mode :character Mode :character
## Mean :21.2
## 3rd Qu.:22.0
## Max. :25.0
##
## aches vomit temp time_admission
## Length:268 Length:268 Min. :35.9 Length:268
## Class :character Class :character 1st Qu.:38.3 Class1:hms
## Mode :character Mode :character Median :38.9 Class2:difftime
## Mean :38.6 Mode :numeric
## 3rd Qu.:39.2
## Max. :40.4
##
## bmi days_onset_hosp
## Min. : 10.89 Min. : 0.000
## 1st Qu.: 24.43 1st Qu.: 1.000
## Median : 32.38 Median : 1.000
## Mean : 41.77 Mean : 1.843
## 3rd Qu.: 46.94 3rd Qu.: 2.250
## Max. :231.57 Max. :11.000
##
It was observed that both Height and weight is inversely proportional to
Age. Increase in age cause an increase in height.
ggplot(CHospital,aes(days_onset_hosp,fill=outcome))+
geom_bar(position="dodge",col="white")+
scale_fill_manual(values = c("Death" = "purple", "Recover" = "brown"))+
labs(title = "Distribution of days_onset_hosp by Outcome",
x = "days_onset_hosp",
y = "Frequency",
fill = " Outcome")