Importing Libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
Loading the Data
data <- read.table(file = "https://raw.githubusercontent.com/HackBio-Internship/public_datasets/main/R/lancet_malaria.txt", header = TRUE, sep = "\t")
malaria_data <- data
head(malaria_data)
## Review.Found Author
## 1 Whittaker et al. Proietti
## 2 Whittaker et al. Souza
## 3 Whittaker et al. Souza
## 4 Whittaker et al. Souza
## 5 Whittaker et al. Atkinson
## 6 Whittaker et al. Pegha Moukandja
## Title
## 1 Continuing intense malaria transmission in northern Uganda.
## 2 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 3 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 4 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 5 Operational research to inform a sub-national surveillance intervention for malaria elimination in Solomon Islands
## 6 Marked Rise in the Prevalence of Asymptomatic Plasmodium falciparum Infection in Rural Gabon
## Year Global.Region Country Location PCR.N.Tested
## 1 2011 East Africa Uganda Apac 241
## 2 2012 South America Brazil Cuiba Santarem 74
## 3 2012 South America Brazil 74
## 4 2012 South America Brazil 134
## 5 2012 Asia&Oceania Solomon Islands Isabel Province 1843
## 6 2016 West Africa Gabon Ogooue-Lolo Province 277
## PCR.N.Positive X..PCR..Positive Microscopy.N.Tested Microscopy.N.Positive
## 1 133 55.19 243 93
## 2 1 1.35 74 0
## 3 2 0.01 74 0
## 4 1 0.01 134 0
## 5 1 0.05 8554 1
## 6 161 58.12 370 93
## X..Microscopy..Positive Historical.Transmission Current.Transmission
## 1 38.27 0.7456 0.6248
## 2 0.00 NA NA
## 3 0.00 NA NA
## 4 0.00 NA NA
## 5 0.01 NA NA
## 6 25.14 0.3408 0.1889
## Setting_20 Setting_15 Setting_10 Setting_5 PCR_Method Microscopy_Fields
## 1 High_High High_High High_High High_High Nested 100
## 2 <NA> <NA> <NA> <NA> Nested NA
## 3 <NA> <NA> <NA> <NA> Nested NA
## 4 <NA> <NA> <NA> <NA> Nested NA
## 5 <NA> <NA> <NA> <NA> Semi-Nested 100
## 6 High_Low High_High High_High High_High Nested 100
## Sampling_Season Notes
## 1 Wet
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 Dry
## 6 Wet
Changing the Column Names
colnames(malaria_data) <- c("Review_Found", "Author", "Title", "Year", "Global_Region", "Country", "Location", "PCR_N_Tested", "PCR_N_Positive", "Percent_PCR", "Microscopy_N_Tested", "Microscopy_N_Positive", "Percent_Microscopy", "Historical_Transmission", "Current_Transmission", "Setting_20", "Setting_15", "Setting_10", "Setting_5", "PCR_Method", "Microscopy_Fields", "Sampling_Season", "Notes")
head(malaria_data)
## Review_Found Author
## 1 Whittaker et al. Proietti
## 2 Whittaker et al. Souza
## 3 Whittaker et al. Souza
## 4 Whittaker et al. Souza
## 5 Whittaker et al. Atkinson
## 6 Whittaker et al. Pegha Moukandja
## Title
## 1 Continuing intense malaria transmission in northern Uganda.
## 2 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 3 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 4 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 5 Operational research to inform a sub-national surveillance intervention for malaria elimination in Solomon Islands
## 6 Marked Rise in the Prevalence of Asymptomatic Plasmodium falciparum Infection in Rural Gabon
## Year Global_Region Country Location PCR_N_Tested
## 1 2011 East Africa Uganda Apac 241
## 2 2012 South America Brazil Cuiba Santarem 74
## 3 2012 South America Brazil 74
## 4 2012 South America Brazil 134
## 5 2012 Asia&Oceania Solomon Islands Isabel Province 1843
## 6 2016 West Africa Gabon Ogooue-Lolo Province 277
## PCR_N_Positive Percent_PCR Microscopy_N_Tested Microscopy_N_Positive
## 1 133 55.19 243 93
## 2 1 1.35 74 0
## 3 2 0.01 74 0
## 4 1 0.01 134 0
## 5 1 0.05 8554 1
## 6 161 58.12 370 93
## Percent_Microscopy Historical_Transmission Current_Transmission Setting_20
## 1 38.27 0.7456 0.6248 High_High
## 2 0.00 NA NA <NA>
## 3 0.00 NA NA <NA>
## 4 0.00 NA NA <NA>
## 5 0.01 NA NA <NA>
## 6 25.14 0.3408 0.1889 High_Low
## Setting_15 Setting_10 Setting_5 PCR_Method Microscopy_Fields Sampling_Season
## 1 High_High High_High High_High Nested 100 Wet
## 2 <NA> <NA> <NA> Nested NA <NA>
## 3 <NA> <NA> <NA> Nested NA <NA>
## 4 <NA> <NA> <NA> Nested NA <NA>
## 5 <NA> <NA> <NA> Semi-Nested 100 Dry
## 6 High_High High_High High_High Nested 100 Wet
## Notes
## 1
## 2
## 3
## 4
## 5
## 6
** Descriptive Statistics
Total Number of cases tested by PCR
sum(as.numeric(malaria_data$PCR_N_Tested), na.rm = TRUE)
## [1] 270130
Total Number of Positive cases by PCR
sum(as.numeric(malaria_data$PCR_N_Positive), na.rm = TRUE)
## [1] 47379.98
Percentage Positive by PCR
sum(malaria_data$'Percent_PCR')
## [1] 10487.67
Visualization
visualizing the PCR % against microscopy %.
plot(malaria_data$"Percent_PCR", malaria_data$"Percent_Microscopy", main = "PCR and Microscopy Prevalence", xlab = "%Microscopy", ylab = "%PCR", pch = 18, col = "brown")
abline(0, 1, lty = 2, col = "blue")
#Trial
par(mfrow=c(2, 2))
asia_data <- malaria_data[malaria_data$Global_Region == "Asia & Oceania", ]
east_africa_data <- malaria_data[malaria_data$Global_Region == "East Africa", ]
south_america_data <- malaria_data[malaria_data$Global_Region == "South America", ]
West_africa_data <- malaria_data[malaria_data$Global_Region == "West Africa", ]
region_plot <- function(malaria_data, Global_Region) {
plot (malaria_data$Percent_Microscopy, malaria_data$Percent_PCR,
xlim = c(0, 100), ylim = c(0, 100),
xlab = "%Microscopy", ylab = "%PCR",
main = Global_Region,
col = "blue", pch = 16)
abline(a=0, b=1, lty="dotted")
}
region_plot(asia_data, "Asia & Oceania")
region_plot(east_africa_data, "East Africa")
region_plot(south_america_data, "South America")
region_plot(West_africa_data, "West Africa")
par(mfrow= c(1, 1))
malaria_data$Prevalence_Ratio <- malaria_data$Microscopy_N_Positive / malaria_data$PCR_N_Positive
head(malaria_data)
## Review_Found Author
## 1 Whittaker et al. Proietti
## 2 Whittaker et al. Souza
## 3 Whittaker et al. Souza
## 4 Whittaker et al. Souza
## 5 Whittaker et al. Atkinson
## 6 Whittaker et al. Pegha Moukandja
## Title
## 1 Continuing intense malaria transmission in northern Uganda.
## 2 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 3 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 4 Prevalence of Plasmodium falciparum and P. vivax in an area of transmission located in Pará State, Brazil, determined by amplification of mtDNA using a real-time PCR assay.
## 5 Operational research to inform a sub-national surveillance intervention for malaria elimination in Solomon Islands
## 6 Marked Rise in the Prevalence of Asymptomatic Plasmodium falciparum Infection in Rural Gabon
## Year Global_Region Country Location PCR_N_Tested
## 1 2011 East Africa Uganda Apac 241
## 2 2012 South America Brazil Cuiba Santarem 74
## 3 2012 South America Brazil 74
## 4 2012 South America Brazil 134
## 5 2012 Asia&Oceania Solomon Islands Isabel Province 1843
## 6 2016 West Africa Gabon Ogooue-Lolo Province 277
## PCR_N_Positive Percent_PCR Microscopy_N_Tested Microscopy_N_Positive
## 1 133 55.19 243 93
## 2 1 1.35 74 0
## 3 2 0.01 74 0
## 4 1 0.01 134 0
## 5 1 0.05 8554 1
## 6 161 58.12 370 93
## Percent_Microscopy Historical_Transmission Current_Transmission Setting_20
## 1 38.27 0.7456 0.6248 High_High
## 2 0.00 NA NA <NA>
## 3 0.00 NA NA <NA>
## 4 0.00 NA NA <NA>
## 5 0.01 NA NA <NA>
## 6 25.14 0.3408 0.1889 High_Low
## Setting_15 Setting_10 Setting_5 PCR_Method Microscopy_Fields Sampling_Season
## 1 High_High High_High High_High Nested 100 Wet
## 2 <NA> <NA> <NA> Nested NA <NA>
## 3 <NA> <NA> <NA> Nested NA <NA>
## 4 <NA> <NA> <NA> Nested NA <NA>
## 5 <NA> <NA> <NA> Semi-Nested 100 Dry
## 6 High_High High_High High_High Nested 100 Wet
## Notes Prevalence_Ratio
## 1 0.6992481
## 2 0.0000000
## 3 0.0000000
## 4 0.0000000
## 5 1.0000000
## 6 0.5776398
ggplot(malaria_data, aes(x = Percent_Microscopy, y = Percent_PCR, color = Global_Region)) + geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dotted") + facet_wrap(~ Global_Region) + labs(title = "PCR% Vs Microscopy% by region", x = "Microscopy %", y = "percent")
boxplot(Prevalence_Ratio ~ Global_Region, data = malaria_data,
main = "prevalence ratio by Global_Region",
xlab = "Global_Region", ylab = "Prevalence Ratio",
col = c("lightblue", "lightgreen", "lightpink", "lightyellow"),
las = 2, notch = TRUE)
abline(h = 1, col = "red", lty = 2)
ggplot(malaria_data, aes(x = Global_Region, y = Prevalence_Ratio, fill = Global_Region)) +
geom_boxplot(alpha = 0.7) + labs(title = "Prevalence Ratio by Region", x = "Region", y = "Prevalence Ratio")