Load necessary libraries

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)

Load the dataset

data <- read_csv("C:/Users/user/Desktop/UN-Report/un-report/data/practice-datasets/kenya-infectious-disease-indicators.csv")
## Rows: 384 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Metric
## dbl (2): Year, Value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Displaying column names
colnames(data)
## [1] "Metric" "Year"   "Value"

Filter cholera cases and deaths

cholera_cases <- filter(data, Metric == "Number of reported cases of cholera")
cholera_deaths <- filter(data, Metric == "Number of reported deaths from cholera")

#Checking the unique values of “Year” in both datasets

unique(cholera_cases$Year)
##  [1] 1994 1995 1971 1972 1974 1975 1976 1977 1978 1979 1980 1983 1984 1987 1989
## [16] 1992 1996 1981 1982 1985 1986 2009 2008 2007 2006 2005 2004 1997 1998 1999
## [31] 2000 2001 2002 2010 2011 2014 2015 2016
unique(cholera_deaths$Year)
##  [1] 1994 1995 1971 1972 1974 1975 1976 1977 1978 1979 1980 1983 1984 1987 1989
## [16] 1992 1996 1981 1982 1985 1986 2009 2008 2007 2006 2005 2004 1997 1998 1999
## [31] 2000 2001 2002 2010 2011 2014 2015 2016

#Trimming whitespace in case of hidden spaces

cholera_cases$Year <- trimws(cholera_cases$Year)
cholera_cases$Year
##  [1] "1994" "1995" "1971" "1972" "1974" "1975" "1976" "1977" "1978" "1979"
## [11] "1980" "1983" "1984" "1987" "1989" "1992" "1996" "1981" "1982" "1985"
## [21] "1986" "2009" "2008" "2007" "2006" "2005" "2004" "1997" "1998" "1999"
## [31] "2000" "2001" "2002" "2010" "2011" "2014" "2015" "2016"
cholera_deaths$Year <- trimws(cholera_deaths$Year)
cholera_deaths$Year
##  [1] "1994" "1995" "1971" "1972" "1974" "1975" "1976" "1977" "1978" "1979"
## [11] "1980" "1983" "1984" "1987" "1989" "1992" "1996" "1981" "1982" "1985"
## [21] "1986" "2009" "2008" "2007" "2006" "2005" "2004" "1997" "1998" "1999"
## [31] "2000" "2001" "2002" "2010" "2011" "2014" "2015" "2016"
cholera_cases$Year <- as.character(cholera_cases$Year)
cholera_deaths$Year <- as.character(cholera_deaths$Year)


cholera_data <- merge(cholera_cases, cholera_deaths, by = "Year")
cholera_data
##    Year                            Metric.x Value.x
## 1  1971 Number of reported cases of cholera     239
## 2  1972 Number of reported cases of cholera      51
## 3  1974 Number of reported cases of cholera     402
## 4  1975 Number of reported cases of cholera    1093
## 5  1976 Number of reported cases of cholera    1359
## 6  1977 Number of reported cases of cholera      21
## 7  1978 Number of reported cases of cholera     673
## 8  1979 Number of reported cases of cholera    1070
## 9  1980 Number of reported cases of cholera    2808
## 10 1981 Number of reported cases of cholera    2424
## 11 1982 Number of reported cases of cholera    3498
## 12 1983 Number of reported cases of cholera    1049
## 13 1984 Number of reported cases of cholera      14
## 14 1985 Number of reported cases of cholera    1352
## 15 1986 Number of reported cases of cholera     839
## 16 1987 Number of reported cases of cholera     255
## 17 1989 Number of reported cases of cholera     918
## 18 1992 Number of reported cases of cholera    3388
## 19 1994 Number of reported cases of cholera     880
## 20 1995 Number of reported cases of cholera    1547
## 21 1996 Number of reported cases of cholera     482
## 22 1997 Number of reported cases of cholera   17200
## 23 1998 Number of reported cases of cholera   22432
## 24 1999 Number of reported cases of cholera   11039
## 25 2000 Number of reported cases of cholera    1157
## 26 2001 Number of reported cases of cholera    1001
## 27 2002 Number of reported cases of cholera     291
## 28 2004 Number of reported cases of cholera     870
## 29 2005 Number of reported cases of cholera     816
## 30 2006 Number of reported cases of cholera     870
## 31 2007 Number of reported cases of cholera    1206
## 32 2008 Number of reported cases of cholera    3091
## 33 2009 Number of reported cases of cholera   11425
## 34 2010 Number of reported cases of cholera    3188
## 35 2011 Number of reported cases of cholera      74
## 36 2014 Number of reported cases of cholera      35
## 37 2015 Number of reported cases of cholera   13291
## 38 2016 Number of reported cases of cholera    5866
##                                  Metric.y Value.y
## 1  Number of reported deaths from cholera      38
## 2  Number of reported deaths from cholera       0
## 3  Number of reported deaths from cholera      20
## 4  Number of reported deaths from cholera      74
## 5  Number of reported deaths from cholera      59
## 6  Number of reported deaths from cholera       0
## 7  Number of reported deaths from cholera      17
## 8  Number of reported deaths from cholera       2
## 9  Number of reported deaths from cholera      42
## 10 Number of reported deaths from cholera     108
## 11 Number of reported deaths from cholera      91
## 12 Number of reported deaths from cholera      53
## 13 Number of reported deaths from cholera       0
## 14 Number of reported deaths from cholera     102
## 15 Number of reported deaths from cholera      45
## 16 Number of reported deaths from cholera      12
## 17 Number of reported deaths from cholera      32
## 18 Number of reported deaths from cholera     100
## 19 Number of reported deaths from cholera      28
## 20 Number of reported deaths from cholera      39
## 21 Number of reported deaths from cholera      14
## 22 Number of reported deaths from cholera     555
## 23 Number of reported deaths from cholera    1237
## 24 Number of reported deaths from cholera     350
## 25 Number of reported deaths from cholera      78
## 26 Number of reported deaths from cholera      55
## 27 Number of reported deaths from cholera      10
## 28 Number of reported deaths from cholera      15
## 29 Number of reported deaths from cholera      21
## 30 Number of reported deaths from cholera      11
## 31 Number of reported deaths from cholera      67
## 32 Number of reported deaths from cholera     113
## 33 Number of reported deaths from cholera     264
## 34 Number of reported deaths from cholera      63
## 35 Number of reported deaths from cholera       2
## 36 Number of reported deaths from cholera       9
## 37 Number of reported deaths from cholera      67
## 38 Number of reported deaths from cholera      80

Merge cases and deaths by Year

cholera_data <- merge(cholera_cases, cholera_deaths, by = "Year")

cholera_data
##    Year                            Metric.x Value.x
## 1  1971 Number of reported cases of cholera     239
## 2  1972 Number of reported cases of cholera      51
## 3  1974 Number of reported cases of cholera     402
## 4  1975 Number of reported cases of cholera    1093
## 5  1976 Number of reported cases of cholera    1359
## 6  1977 Number of reported cases of cholera      21
## 7  1978 Number of reported cases of cholera     673
## 8  1979 Number of reported cases of cholera    1070
## 9  1980 Number of reported cases of cholera    2808
## 10 1981 Number of reported cases of cholera    2424
## 11 1982 Number of reported cases of cholera    3498
## 12 1983 Number of reported cases of cholera    1049
## 13 1984 Number of reported cases of cholera      14
## 14 1985 Number of reported cases of cholera    1352
## 15 1986 Number of reported cases of cholera     839
## 16 1987 Number of reported cases of cholera     255
## 17 1989 Number of reported cases of cholera     918
## 18 1992 Number of reported cases of cholera    3388
## 19 1994 Number of reported cases of cholera     880
## 20 1995 Number of reported cases of cholera    1547
## 21 1996 Number of reported cases of cholera     482
## 22 1997 Number of reported cases of cholera   17200
## 23 1998 Number of reported cases of cholera   22432
## 24 1999 Number of reported cases of cholera   11039
## 25 2000 Number of reported cases of cholera    1157
## 26 2001 Number of reported cases of cholera    1001
## 27 2002 Number of reported cases of cholera     291
## 28 2004 Number of reported cases of cholera     870
## 29 2005 Number of reported cases of cholera     816
## 30 2006 Number of reported cases of cholera     870
## 31 2007 Number of reported cases of cholera    1206
## 32 2008 Number of reported cases of cholera    3091
## 33 2009 Number of reported cases of cholera   11425
## 34 2010 Number of reported cases of cholera    3188
## 35 2011 Number of reported cases of cholera      74
## 36 2014 Number of reported cases of cholera      35
## 37 2015 Number of reported cases of cholera   13291
## 38 2016 Number of reported cases of cholera    5866
##                                  Metric.y Value.y
## 1  Number of reported deaths from cholera      38
## 2  Number of reported deaths from cholera       0
## 3  Number of reported deaths from cholera      20
## 4  Number of reported deaths from cholera      74
## 5  Number of reported deaths from cholera      59
## 6  Number of reported deaths from cholera       0
## 7  Number of reported deaths from cholera      17
## 8  Number of reported deaths from cholera       2
## 9  Number of reported deaths from cholera      42
## 10 Number of reported deaths from cholera     108
## 11 Number of reported deaths from cholera      91
## 12 Number of reported deaths from cholera      53
## 13 Number of reported deaths from cholera       0
## 14 Number of reported deaths from cholera     102
## 15 Number of reported deaths from cholera      45
## 16 Number of reported deaths from cholera      12
## 17 Number of reported deaths from cholera      32
## 18 Number of reported deaths from cholera     100
## 19 Number of reported deaths from cholera      28
## 20 Number of reported deaths from cholera      39
## 21 Number of reported deaths from cholera      14
## 22 Number of reported deaths from cholera     555
## 23 Number of reported deaths from cholera    1237
## 24 Number of reported deaths from cholera     350
## 25 Number of reported deaths from cholera      78
## 26 Number of reported deaths from cholera      55
## 27 Number of reported deaths from cholera      10
## 28 Number of reported deaths from cholera      15
## 29 Number of reported deaths from cholera      21
## 30 Number of reported deaths from cholera      11
## 31 Number of reported deaths from cholera      67
## 32 Number of reported deaths from cholera     113
## 33 Number of reported deaths from cholera     264
## 34 Number of reported deaths from cholera      63
## 35 Number of reported deaths from cholera       2
## 36 Number of reported deaths from cholera       9
## 37 Number of reported deaths from cholera      67
## 38 Number of reported deaths from cholera      80
names(cholera_data) <- c("Year", "Metric_cases", "Value_cases", "Metric_deaths", "Value_deaths")

#Renaming missing or empty column names

colnames(cholera_data)[is.na(colnames(cholera_data)) | colnames(cholera_data)==""] <- "new_cholera"

Calculate Case Fatality Rate (CFR)

cholera_data <- cholera_data %>%
  mutate(CFR = (Value_deaths / Value_cases) * 100)
View(cholera_data)

#Making sure Year is numeric or integer

cholera_data$Year <- as.numeric(as.character(cholera_data$Year))

Plot-1

p<-ggplot(cholera_data, aes(x = Year, y = Value_cases)) +
  geom_line(color = "blue", linewidth = 1) +   # use linewidth instead of size
  geom_point(color = "blue", size = 2) +       # size stays for points
  ggtitle("Reported Cholera Cases in Kenya (1971–2016)") +
  xlab("Year") +
  ylab("Number of Cases") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

print(p)

Plot Cholera Cases

p<-ggplot(cholera_data , aes(x = Year, y = Value_cases)) +
  geom_line(color = "blue") +
  geom_point() +
  ggtitle("Reported Cholera Cases in Kenya (1971–2016)") +
  xlab("Year") +
  ylab("Number of Cases") +
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)

Plot Cholera Deaths

p<-ggplot(cholera_data, aes(x = Year, y = Value_deaths)) +
  geom_line(color = "red") +
  geom_point() +
  ggtitle("Reported Cholera Deaths in Kenya (1971–2016)") +
  xlab("Year") +
  ylab("Number of Deaths") +
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)

Plot Case Fatality Rate

p<-ggplot(cholera_data, aes(x = Year, y = CFR)) +
  geom_line(color = "darkgreen") +
  geom_point() +
  ggtitle("Cholera Case Fatality Rate in Kenya (1971–2016)") +
  xlab("Year") +
  ylab("CFR (%)") +
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)