Importing Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)

Loading the Data

data <- read.table(file = "https://raw.githubusercontent.com/HackBio-Internship/public_datasets/main/R/lancet_malaria.txt", header = TRUE, sep = "\t")
malaria_data <- data
head(malaria_data)
##       Review.Found          Author
## 1 Whittaker et al.        Proietti
## 2 Whittaker et al.          Souza 
## 3 Whittaker et al.           Souza
## 4 Whittaker et al.           Souza
## 5 Whittaker et al.        Atkinson
## 6 Whittaker et al. Pegha Moukandja
##                                                                                                                                                                          Title
## 1                                                                                                                  Continuing intense malaria transmission in northern Uganda.
## 2 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 3 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 4 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 5                                                           Operational research to inform a sub-national surveillance intervention for malaria elimination in Solomon Islands
## 6                                                                                 Marked Rise in the Prevalence of Asymptomatic Plasmodium falciparum Infection in Rural Gabon
##   Year Global.Region         Country             Location PCR.N.Tested
## 1 2011   East Africa          Uganda                 Apac          241
## 2 2012 South America          Brazil       Cuiba Santarem           74
## 3 2012 South America          Brazil                                74
## 4 2012 South America          Brazil                               134
## 5 2012  Asia&Oceania Solomon Islands      Isabel Province         1843
## 6 2016   West Africa          Gabon  Ogooue-Lolo Province          277
##   PCR.N.Positive X..PCR..Positive Microscopy.N.Tested Microscopy.N.Positive
## 1            133            55.19                 243                    93
## 2              1             1.35                  74                     0
## 3              2             0.01                  74                     0
## 4              1             0.01                 134                     0
## 5              1             0.05                8554                     1
## 6            161            58.12                 370                    93
##   X..Microscopy..Positive Historical.Transmission Current.Transmission
## 1                   38.27                  0.7456               0.6248
## 2                    0.00                      NA                   NA
## 3                    0.00                      NA                   NA
## 4                    0.00                      NA                   NA
## 5                    0.01                      NA                   NA
## 6                   25.14                  0.3408               0.1889
##   Setting_20 Setting_15 Setting_10 Setting_5  PCR_Method Microscopy_Fields
## 1  High_High  High_High  High_High High_High      Nested               100
## 2       <NA>       <NA>       <NA>      <NA>      Nested                NA
## 3       <NA>       <NA>       <NA>      <NA>      Nested                NA
## 4       <NA>       <NA>       <NA>      <NA>      Nested                NA
## 5       <NA>       <NA>       <NA>      <NA> Semi-Nested               100
## 6   High_Low  High_High  High_High High_High      Nested               100
##   Sampling_Season Notes
## 1             Wet      
## 2            <NA>      
## 3            <NA>      
## 4            <NA>      
## 5             Dry      
## 6             Wet

Changing the Column Names

colnames(malaria_data) <- c("Review_Found", "Author",   "Title", "Year",    "Global_Region",    "Country",  "Location", "PCR_N_Tested", "PCR_N_Positive",   "Percent_PCR",  "Microscopy_N_Tested",  "Microscopy_N_Positive",    "Percent_Microscopy",  "Historical_Transmission",   "Current_Transmission", "Setting_20",   "Setting_15",   "Setting_10",   "Setting_5",    "PCR_Method",   "Microscopy_Fields",    "Sampling_Season",  "Notes")
head(malaria_data)
##       Review_Found          Author
## 1 Whittaker et al.        Proietti
## 2 Whittaker et al.          Souza 
## 3 Whittaker et al.           Souza
## 4 Whittaker et al.           Souza
## 5 Whittaker et al.        Atkinson
## 6 Whittaker et al. Pegha Moukandja
##                                                                                                                                                                          Title
## 1                                                                                                                  Continuing intense malaria transmission in northern Uganda.
## 2 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 3 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 4 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 5                                                           Operational research to inform a sub-national surveillance intervention for malaria elimination in Solomon Islands
## 6                                                                                 Marked Rise in the Prevalence of Asymptomatic Plasmodium falciparum Infection in Rural Gabon
##   Year Global_Region         Country             Location PCR_N_Tested
## 1 2011   East Africa          Uganda                 Apac          241
## 2 2012 South America          Brazil       Cuiba Santarem           74
## 3 2012 South America          Brazil                                74
## 4 2012 South America          Brazil                               134
## 5 2012  Asia&Oceania Solomon Islands      Isabel Province         1843
## 6 2016   West Africa          Gabon  Ogooue-Lolo Province          277
##   PCR_N_Positive Percent_PCR Microscopy_N_Tested Microscopy_N_Positive
## 1            133       55.19                 243                    93
## 2              1        1.35                  74                     0
## 3              2        0.01                  74                     0
## 4              1        0.01                 134                     0
## 5              1        0.05                8554                     1
## 6            161       58.12                 370                    93
##   Percent_Microscopy Historical_Transmission Current_Transmission Setting_20
## 1              38.27                  0.7456               0.6248  High_High
## 2               0.00                      NA                   NA       <NA>
## 3               0.00                      NA                   NA       <NA>
## 4               0.00                      NA                   NA       <NA>
## 5               0.01                      NA                   NA       <NA>
## 6              25.14                  0.3408               0.1889   High_Low
##   Setting_15 Setting_10 Setting_5  PCR_Method Microscopy_Fields Sampling_Season
## 1  High_High  High_High High_High      Nested               100             Wet
## 2       <NA>       <NA>      <NA>      Nested                NA            <NA>
## 3       <NA>       <NA>      <NA>      Nested                NA            <NA>
## 4       <NA>       <NA>      <NA>      Nested                NA            <NA>
## 5       <NA>       <NA>      <NA> Semi-Nested               100             Dry
## 6  High_High  High_High High_High      Nested               100             Wet
##   Notes
## 1      
## 2      
## 3      
## 4      
## 5      
## 6

** Descriptive Statistics

Total Number of cases tested by PCR

sum(as.numeric(malaria_data$PCR_N_Tested), na.rm = TRUE)
## [1] 270130

Total Number of Positive cases by PCR

sum(as.numeric(malaria_data$PCR_N_Positive), na.rm = TRUE)
## [1] 47379.98

Percentage Positive by PCR

sum(malaria_data$'Percent_PCR')
## [1] 10487.67

Visualization

visualizing the PCR % against microscopy %.

plot(malaria_data$"Percent_PCR", malaria_data$"Percent_Microscopy", main = "PCR and Microscopy Prevalence", xlab = "%Microscopy", ylab = "%PCR", pch = 18, col = "brown")
abline(0, 1, lty = 2, col = "blue")

#Trial

par(mfrow=c(2, 2))

asia_data <- malaria_data[malaria_data$Global_Region == "Asia & Oceania", ]
east_africa_data <- malaria_data[malaria_data$Global_Region == "East Africa", ]
south_america_data <- malaria_data[malaria_data$Global_Region == "South America", ]
West_africa_data <- malaria_data[malaria_data$Global_Region == "West Africa", ]


region_plot <- function(malaria_data, Global_Region) {
  plot (malaria_data$Percent_Microscopy, malaria_data$Percent_PCR,
        xlim = c(0, 100), ylim = c(0, 100),
        xlab = "%Microscopy", ylab = "%PCR",
        main = Global_Region,
        col = "blue", pch = 16)
  abline(a=0, b=1, lty="dotted")
}


region_plot(asia_data, "Asia & Oceania")
region_plot(east_africa_data, "East Africa")
region_plot(south_america_data, "South America")
region_plot(West_africa_data, "West Africa")

par(mfrow= c(1, 1))
malaria_data$Prevalence_Ratio <- malaria_data$Microscopy_N_Positive / malaria_data$PCR_N_Positive
head(malaria_data)
##       Review_Found          Author
## 1 Whittaker et al.        Proietti
## 2 Whittaker et al.          Souza 
## 3 Whittaker et al.           Souza
## 4 Whittaker et al.           Souza
## 5 Whittaker et al.        Atkinson
## 6 Whittaker et al. Pegha Moukandja
##                                                                                                                                                                          Title
## 1                                                                                                                  Continuing intense malaria transmission in northern Uganda.
## 2 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 3 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 4 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 5                                                           Operational research to inform a sub-national surveillance intervention for malaria elimination in Solomon Islands
## 6                                                                                 Marked Rise in the Prevalence of Asymptomatic Plasmodium falciparum Infection in Rural Gabon
##   Year Global_Region         Country             Location PCR_N_Tested
## 1 2011   East Africa          Uganda                 Apac          241
## 2 2012 South America          Brazil       Cuiba Santarem           74
## 3 2012 South America          Brazil                                74
## 4 2012 South America          Brazil                               134
## 5 2012  Asia&Oceania Solomon Islands      Isabel Province         1843
## 6 2016   West Africa          Gabon  Ogooue-Lolo Province          277
##   PCR_N_Positive Percent_PCR Microscopy_N_Tested Microscopy_N_Positive
## 1            133       55.19                 243                    93
## 2              1        1.35                  74                     0
## 3              2        0.01                  74                     0
## 4              1        0.01                 134                     0
## 5              1        0.05                8554                     1
## 6            161       58.12                 370                    93
##   Percent_Microscopy Historical_Transmission Current_Transmission Setting_20
## 1              38.27                  0.7456               0.6248  High_High
## 2               0.00                      NA                   NA       <NA>
## 3               0.00                      NA                   NA       <NA>
## 4               0.00                      NA                   NA       <NA>
## 5               0.01                      NA                   NA       <NA>
## 6              25.14                  0.3408               0.1889   High_Low
##   Setting_15 Setting_10 Setting_5  PCR_Method Microscopy_Fields Sampling_Season
## 1  High_High  High_High High_High      Nested               100             Wet
## 2       <NA>       <NA>      <NA>      Nested                NA            <NA>
## 3       <NA>       <NA>      <NA>      Nested                NA            <NA>
## 4       <NA>       <NA>      <NA>      Nested                NA            <NA>
## 5       <NA>       <NA>      <NA> Semi-Nested               100             Dry
## 6  High_High  High_High High_High      Nested               100             Wet
##   Notes Prevalence_Ratio
## 1              0.6992481
## 2              0.0000000
## 3              0.0000000
## 4              0.0000000
## 5              1.0000000
## 6              0.5776398
ggplot(malaria_data, aes(x = Percent_Microscopy, y = Percent_PCR, color = Global_Region)) + geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dotted") + facet_wrap(~ Global_Region) + labs(title = "PCR% Vs Microscopy% by region", x = "Microscopy %", y = "percent")

boxplot(Prevalence_Ratio ~ Global_Region, data = malaria_data, 
        main = "prevalence ratio by Global_Region",
        xlab = "Global_Region", ylab = "Prevalence Ratio",
        col = c("lightblue", "lightgreen", "lightpink", "lightyellow"),
        las = 2, notch = TRUE)
abline(h = 1, col = "red", lty = 2)

ggplot(malaria_data, aes(x = Global_Region, y = Prevalence_Ratio, fill = Global_Region)) + 
geom_boxplot(alpha = 0.7) + labs(title = "Prevalence Ratio by Region", x = "Region", y = "Prevalence Ratio")