library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
data <- read_csv("C:/Users/user/Desktop/UN-Report/un-report/data/practice-datasets/kenya-infectious-disease-indicators.csv")
## Rows: 384 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Metric
## dbl (2): Year, Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Displaying column names
colnames(data)
## [1] "Metric" "Year" "Value"
cholera_cases <- filter(data, Metric == "Number of reported cases of cholera")
cholera_deaths <- filter(data, Metric == "Number of reported deaths from cholera")
#Checking the unique values of “Year” in both datasets
unique(cholera_cases$Year)
## [1] 1994 1995 1971 1972 1974 1975 1976 1977 1978 1979 1980 1983 1984 1987 1989
## [16] 1992 1996 1981 1982 1985 1986 2009 2008 2007 2006 2005 2004 1997 1998 1999
## [31] 2000 2001 2002 2010 2011 2014 2015 2016
unique(cholera_deaths$Year)
## [1] 1994 1995 1971 1972 1974 1975 1976 1977 1978 1979 1980 1983 1984 1987 1989
## [16] 1992 1996 1981 1982 1985 1986 2009 2008 2007 2006 2005 2004 1997 1998 1999
## [31] 2000 2001 2002 2010 2011 2014 2015 2016
#Trimming whitespace in case of hidden spaces
cholera_cases$Year <- trimws(cholera_cases$Year)
cholera_cases$Year
## [1] "1994" "1995" "1971" "1972" "1974" "1975" "1976" "1977" "1978" "1979"
## [11] "1980" "1983" "1984" "1987" "1989" "1992" "1996" "1981" "1982" "1985"
## [21] "1986" "2009" "2008" "2007" "2006" "2005" "2004" "1997" "1998" "1999"
## [31] "2000" "2001" "2002" "2010" "2011" "2014" "2015" "2016"
cholera_deaths$Year <- trimws(cholera_deaths$Year)
cholera_deaths$Year
## [1] "1994" "1995" "1971" "1972" "1974" "1975" "1976" "1977" "1978" "1979"
## [11] "1980" "1983" "1984" "1987" "1989" "1992" "1996" "1981" "1982" "1985"
## [21] "1986" "2009" "2008" "2007" "2006" "2005" "2004" "1997" "1998" "1999"
## [31] "2000" "2001" "2002" "2010" "2011" "2014" "2015" "2016"
cholera_cases$Year <- as.character(cholera_cases$Year)
cholera_deaths$Year <- as.character(cholera_deaths$Year)
cholera_data <- merge(cholera_cases, cholera_deaths, by = "Year")
cholera_data
## Year Metric.x Value.x
## 1 1971 Number of reported cases of cholera 239
## 2 1972 Number of reported cases of cholera 51
## 3 1974 Number of reported cases of cholera 402
## 4 1975 Number of reported cases of cholera 1093
## 5 1976 Number of reported cases of cholera 1359
## 6 1977 Number of reported cases of cholera 21
## 7 1978 Number of reported cases of cholera 673
## 8 1979 Number of reported cases of cholera 1070
## 9 1980 Number of reported cases of cholera 2808
## 10 1981 Number of reported cases of cholera 2424
## 11 1982 Number of reported cases of cholera 3498
## 12 1983 Number of reported cases of cholera 1049
## 13 1984 Number of reported cases of cholera 14
## 14 1985 Number of reported cases of cholera 1352
## 15 1986 Number of reported cases of cholera 839
## 16 1987 Number of reported cases of cholera 255
## 17 1989 Number of reported cases of cholera 918
## 18 1992 Number of reported cases of cholera 3388
## 19 1994 Number of reported cases of cholera 880
## 20 1995 Number of reported cases of cholera 1547
## 21 1996 Number of reported cases of cholera 482
## 22 1997 Number of reported cases of cholera 17200
## 23 1998 Number of reported cases of cholera 22432
## 24 1999 Number of reported cases of cholera 11039
## 25 2000 Number of reported cases of cholera 1157
## 26 2001 Number of reported cases of cholera 1001
## 27 2002 Number of reported cases of cholera 291
## 28 2004 Number of reported cases of cholera 870
## 29 2005 Number of reported cases of cholera 816
## 30 2006 Number of reported cases of cholera 870
## 31 2007 Number of reported cases of cholera 1206
## 32 2008 Number of reported cases of cholera 3091
## 33 2009 Number of reported cases of cholera 11425
## 34 2010 Number of reported cases of cholera 3188
## 35 2011 Number of reported cases of cholera 74
## 36 2014 Number of reported cases of cholera 35
## 37 2015 Number of reported cases of cholera 13291
## 38 2016 Number of reported cases of cholera 5866
## Metric.y Value.y
## 1 Number of reported deaths from cholera 38
## 2 Number of reported deaths from cholera 0
## 3 Number of reported deaths from cholera 20
## 4 Number of reported deaths from cholera 74
## 5 Number of reported deaths from cholera 59
## 6 Number of reported deaths from cholera 0
## 7 Number of reported deaths from cholera 17
## 8 Number of reported deaths from cholera 2
## 9 Number of reported deaths from cholera 42
## 10 Number of reported deaths from cholera 108
## 11 Number of reported deaths from cholera 91
## 12 Number of reported deaths from cholera 53
## 13 Number of reported deaths from cholera 0
## 14 Number of reported deaths from cholera 102
## 15 Number of reported deaths from cholera 45
## 16 Number of reported deaths from cholera 12
## 17 Number of reported deaths from cholera 32
## 18 Number of reported deaths from cholera 100
## 19 Number of reported deaths from cholera 28
## 20 Number of reported deaths from cholera 39
## 21 Number of reported deaths from cholera 14
## 22 Number of reported deaths from cholera 555
## 23 Number of reported deaths from cholera 1237
## 24 Number of reported deaths from cholera 350
## 25 Number of reported deaths from cholera 78
## 26 Number of reported deaths from cholera 55
## 27 Number of reported deaths from cholera 10
## 28 Number of reported deaths from cholera 15
## 29 Number of reported deaths from cholera 21
## 30 Number of reported deaths from cholera 11
## 31 Number of reported deaths from cholera 67
## 32 Number of reported deaths from cholera 113
## 33 Number of reported deaths from cholera 264
## 34 Number of reported deaths from cholera 63
## 35 Number of reported deaths from cholera 2
## 36 Number of reported deaths from cholera 9
## 37 Number of reported deaths from cholera 67
## 38 Number of reported deaths from cholera 80
cholera_data <- merge(cholera_cases, cholera_deaths, by = "Year")
cholera_data
## Year Metric.x Value.x
## 1 1971 Number of reported cases of cholera 239
## 2 1972 Number of reported cases of cholera 51
## 3 1974 Number of reported cases of cholera 402
## 4 1975 Number of reported cases of cholera 1093
## 5 1976 Number of reported cases of cholera 1359
## 6 1977 Number of reported cases of cholera 21
## 7 1978 Number of reported cases of cholera 673
## 8 1979 Number of reported cases of cholera 1070
## 9 1980 Number of reported cases of cholera 2808
## 10 1981 Number of reported cases of cholera 2424
## 11 1982 Number of reported cases of cholera 3498
## 12 1983 Number of reported cases of cholera 1049
## 13 1984 Number of reported cases of cholera 14
## 14 1985 Number of reported cases of cholera 1352
## 15 1986 Number of reported cases of cholera 839
## 16 1987 Number of reported cases of cholera 255
## 17 1989 Number of reported cases of cholera 918
## 18 1992 Number of reported cases of cholera 3388
## 19 1994 Number of reported cases of cholera 880
## 20 1995 Number of reported cases of cholera 1547
## 21 1996 Number of reported cases of cholera 482
## 22 1997 Number of reported cases of cholera 17200
## 23 1998 Number of reported cases of cholera 22432
## 24 1999 Number of reported cases of cholera 11039
## 25 2000 Number of reported cases of cholera 1157
## 26 2001 Number of reported cases of cholera 1001
## 27 2002 Number of reported cases of cholera 291
## 28 2004 Number of reported cases of cholera 870
## 29 2005 Number of reported cases of cholera 816
## 30 2006 Number of reported cases of cholera 870
## 31 2007 Number of reported cases of cholera 1206
## 32 2008 Number of reported cases of cholera 3091
## 33 2009 Number of reported cases of cholera 11425
## 34 2010 Number of reported cases of cholera 3188
## 35 2011 Number of reported cases of cholera 74
## 36 2014 Number of reported cases of cholera 35
## 37 2015 Number of reported cases of cholera 13291
## 38 2016 Number of reported cases of cholera 5866
## Metric.y Value.y
## 1 Number of reported deaths from cholera 38
## 2 Number of reported deaths from cholera 0
## 3 Number of reported deaths from cholera 20
## 4 Number of reported deaths from cholera 74
## 5 Number of reported deaths from cholera 59
## 6 Number of reported deaths from cholera 0
## 7 Number of reported deaths from cholera 17
## 8 Number of reported deaths from cholera 2
## 9 Number of reported deaths from cholera 42
## 10 Number of reported deaths from cholera 108
## 11 Number of reported deaths from cholera 91
## 12 Number of reported deaths from cholera 53
## 13 Number of reported deaths from cholera 0
## 14 Number of reported deaths from cholera 102
## 15 Number of reported deaths from cholera 45
## 16 Number of reported deaths from cholera 12
## 17 Number of reported deaths from cholera 32
## 18 Number of reported deaths from cholera 100
## 19 Number of reported deaths from cholera 28
## 20 Number of reported deaths from cholera 39
## 21 Number of reported deaths from cholera 14
## 22 Number of reported deaths from cholera 555
## 23 Number of reported deaths from cholera 1237
## 24 Number of reported deaths from cholera 350
## 25 Number of reported deaths from cholera 78
## 26 Number of reported deaths from cholera 55
## 27 Number of reported deaths from cholera 10
## 28 Number of reported deaths from cholera 15
## 29 Number of reported deaths from cholera 21
## 30 Number of reported deaths from cholera 11
## 31 Number of reported deaths from cholera 67
## 32 Number of reported deaths from cholera 113
## 33 Number of reported deaths from cholera 264
## 34 Number of reported deaths from cholera 63
## 35 Number of reported deaths from cholera 2
## 36 Number of reported deaths from cholera 9
## 37 Number of reported deaths from cholera 67
## 38 Number of reported deaths from cholera 80
names(cholera_data) <- c("Year", "Metric_cases", "Value_cases", "Metric_deaths", "Value_deaths")
#Renaming missing or empty column names
colnames(cholera_data)[is.na(colnames(cholera_data)) | colnames(cholera_data)==""] <- "new_cholera"
cholera_data <- cholera_data %>%
mutate(CFR = (Value_deaths / Value_cases) * 100)
View(cholera_data)
#Making sure Year is numeric or integer
cholera_data$Year <- as.numeric(as.character(cholera_data$Year))
p<-ggplot(cholera_data, aes(x = Year, y = Value_cases)) +
geom_line(color = "blue", linewidth = 1) + # use linewidth instead of size
geom_point(color = "blue", size = 2) + # size stays for points
ggtitle("Reported Cholera Cases in Kenya (1971–2016)") +
xlab("Year") +
ylab("Number of Cases") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)
p<-ggplot(cholera_data , aes(x = Year, y = Value_cases)) +
geom_line(color = "blue") +
geom_point() +
ggtitle("Reported Cholera Cases in Kenya (1971–2016)") +
xlab("Year") +
ylab("Number of Cases") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)
p<-ggplot(cholera_data, aes(x = Year, y = Value_deaths)) +
geom_line(color = "red") +
geom_point() +
ggtitle("Reported Cholera Deaths in Kenya (1971–2016)") +
xlab("Year") +
ylab("Number of Deaths") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)
p<-ggplot(cholera_data, aes(x = Year, y = CFR)) +
geom_line(color = "darkgreen") +
geom_point() +
ggtitle("Cholera Case Fatality Rate in Kenya (1971–2016)") +
xlab("Year") +
ylab("CFR (%)") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)