This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

#Loading libraries

library(tidyverse)
library(readr)

Loading Dataset

link_to_dataset <- read.table(file = "https://raw.githubusercontent.com/HackBio-Internship/public_datasets/main/R/lancet_malaria.txt", header = TRUE, sep = "\t")
malaria_data <- link_to_dataset
head(malaria_data)

Renaming of the column names

colnames(malaria_data) <- c("Review Found", "Author", "Title", "Year", "Region","Country","Location", "PCR_N_Tested", "PCR_N_Positive", "PCR_Percent",
"Microscopy_N_Tested", "Microscopy_N_Positive", "Microscopy_Percent", "Historical_Transmission", "Current_Transmission", "Setting_20", "Setting_15", "Setting_10", "Setting_5", "PCR_Method", "Microscopy_Fields", "Sampling_Season", "Notes")
head(malaria_data)

Visualization of PCR % against microscopy %

plot(malaria_data$PCR_Percent, malaria_data$Microscopy_Percent,
     xlab = "Microscopy %", ylab = "PCR %",
     main = "PCR vs Microscopy Prevalence",
     col = "blue", pch = 19)
abline(0, 1, lty = 2, col = "red")

Prevalence Ratio

malaria_data$Prevalence_Ratio <- malaria_data$Microscopy_N_Positive / malaria_data$PCR_N_Positive
head(malaria_data)

PCR% vs Microscopy% by Region

ggplot(malaria_data, aes(x = Microscopy_Percent, y = PCR_Percent, color = Region)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, linetype = "dotted") +
facet_wrap(~Region) +
labs(title = "PCR% vs Microscopy% by Region",
x = "Microscopy %", y = "PCR %")

Prevalence Ratio by Region

boxplot(Prevalence_Ratio ~ Region, data = malaria_data,
        main = "Prevalence Ratio by Region",
        xlab = "Global Region", ylab = "Prevalence Ratio",
        col = c("lightblue","lightgreen","lightpink","lightyellow"),
        las = 2, notch = TRUE)
abline(h = 1, col = "red", lty = 2) 

According to the boxplot above, West Africa has the highest median prevalence ratio.This suggests microscopy is relatively better at detecting infections compared to other regions.

Prevalence Ratio by Region Using ggplot

ggplot(malaria_data, aes(x = Region, y = Prevalence_Ratio, fill = Region)) +
geom_boxplot(alpha = 0.7) +
labs(title = "Prevalence Ratio by Region",
x = "Region", y = "Prevalence Ratio") 

LS0tDQp0aXRsZTogIk1hbGFyaWEgRGV0ZWN0b3IgUHJvamVjdCINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIA0KDQpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIA0KDQojTG9hZGluZyBsaWJyYXJpZXMNCmBgYHtyfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHJlYWRyKQ0KYGBgDQoNCiMjIyBMb2FkaW5nIERhdGFzZXQNCg0KYGBge3J9DQpsaW5rX3RvX2RhdGFzZXQgPC0gcmVhZC50YWJsZShmaWxlID0gImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9IYWNrQmlvLUludGVybnNoaXAvcHVibGljX2RhdGFzZXRzL21haW4vUi9sYW5jZXRfbWFsYXJpYS50eHQiLCBoZWFkZXIgPSBUUlVFLCBzZXAgPSAiXHQiKQ0KbWFsYXJpYV9kYXRhIDwtIGxpbmtfdG9fZGF0YXNldA0KaGVhZChtYWxhcmlhX2RhdGEpDQpgYGANCg0KIyMjIFJlbmFtaW5nIG9mIHRoZSBjb2x1bW4gbmFtZXMNCmBgYHtyfQ0KY29sbmFtZXMobWFsYXJpYV9kYXRhKSA8LSBjKCJSZXZpZXcgRm91bmQiLCAiQXV0aG9yIiwgIlRpdGxlIiwgIlllYXIiLCAiUmVnaW9uIiwiQ291bnRyeSIsIkxvY2F0aW9uIiwgIlBDUl9OX1Rlc3RlZCIsICJQQ1JfTl9Qb3NpdGl2ZSIsICJQQ1JfUGVyY2VudCIsDQoiTWljcm9zY29weV9OX1Rlc3RlZCIsICJNaWNyb3Njb3B5X05fUG9zaXRpdmUiLCAiTWljcm9zY29weV9QZXJjZW50IiwgIkhpc3RvcmljYWxfVHJhbnNtaXNzaW9uIiwgIkN1cnJlbnRfVHJhbnNtaXNzaW9uIiwgIlNldHRpbmdfMjAiLCAiU2V0dGluZ18xNSIsICJTZXR0aW5nXzEwIiwgIlNldHRpbmdfNSIsICJQQ1JfTWV0aG9kIiwgIk1pY3Jvc2NvcHlfRmllbGRzIiwgIlNhbXBsaW5nX1NlYXNvbiIsICJOb3RlcyIpDQpoZWFkKG1hbGFyaWFfZGF0YSkNCmBgYA0KDQojIyMgVmlzdWFsaXphdGlvbiBvZiBQQ1IgJSBhZ2FpbnN0IG1pY3Jvc2NvcHkgJQ0KYGBge3J9DQpwbG90KG1hbGFyaWFfZGF0YSRQQ1JfUGVyY2VudCwgbWFsYXJpYV9kYXRhJE1pY3Jvc2NvcHlfUGVyY2VudCwNCiAgICAgeGxhYiA9ICJNaWNyb3Njb3B5ICUiLCB5bGFiID0gIlBDUiAlIiwNCiAgICAgbWFpbiA9ICJQQ1IgdnMgTWljcm9zY29weSBQcmV2YWxlbmNlIiwNCiAgICAgY29sID0gImJsdWUiLCBwY2ggPSAxOSkNCmFibGluZSgwLCAxLCBsdHkgPSAyLCBjb2wgPSAicmVkIikNCmBgYA0KDQojIyMgIFByZXZhbGVuY2UgUmF0aW8NCmBgYHtyfQ0KbWFsYXJpYV9kYXRhJFByZXZhbGVuY2VfUmF0aW8gPC0gbWFsYXJpYV9kYXRhJE1pY3Jvc2NvcHlfTl9Qb3NpdGl2ZSAvIG1hbGFyaWFfZGF0YSRQQ1JfTl9Qb3NpdGl2ZQ0KaGVhZChtYWxhcmlhX2RhdGEpDQpgYGANCg0KIyMjIFBDUiUgdnMgTWljcm9zY29weSUgYnkgUmVnaW9uDQpgYGB7cn0NCmdncGxvdChtYWxhcmlhX2RhdGEsIGFlcyh4ID0gTWljcm9zY29weV9QZXJjZW50LCB5ID0gUENSX1BlcmNlbnQsIGNvbG9yID0gUmVnaW9uKSkgKw0KZ2VvbV9wb2ludCgpICsNCmdlb21fYWJsaW5lKGludGVyY2VwdCA9IDAsIHNsb3BlID0gMSwgbGluZXR5cGUgPSAiZG90dGVkIikgKw0KZmFjZXRfd3JhcCh+UmVnaW9uKSArDQpsYWJzKHRpdGxlID0gIlBDUiUgdnMgTWljcm9zY29weSUgYnkgUmVnaW9uIiwNCnggPSAiTWljcm9zY29weSAlIiwgeSA9ICJQQ1IgJSIpDQoNCmBgYA0KDQoNCiMjIyBQcmV2YWxlbmNlIFJhdGlvIGJ5IFJlZ2lvbg0KYGBge3J9DQpib3hwbG90KFByZXZhbGVuY2VfUmF0aW8gfiBSZWdpb24sIGRhdGEgPSBtYWxhcmlhX2RhdGEsDQogICAgICAgIG1haW4gPSAiUHJldmFsZW5jZSBSYXRpbyBieSBSZWdpb24iLA0KICAgICAgICB4bGFiID0gIkdsb2JhbCBSZWdpb24iLCB5bGFiID0gIlByZXZhbGVuY2UgUmF0aW8iLA0KICAgICAgICBjb2wgPSBjKCJsaWdodGJsdWUiLCJsaWdodGdyZWVuIiwibGlnaHRwaW5rIiwibGlnaHR5ZWxsb3ciKSwNCiAgICAgICAgbGFzID0gMiwgbm90Y2ggPSBUUlVFKQ0KYWJsaW5lKGggPSAxLCBjb2wgPSAicmVkIiwgbHR5ID0gMikgDQoNCmBgYA0KQWNjb3JkaW5nIHRvIHRoZSBib3hwbG90IGFib3ZlLCBXZXN0IEFmcmljYSBoYXMgdGhlIGhpZ2hlc3QgbWVkaWFuIHByZXZhbGVuY2UgcmF0aW8uVGhpcyBzdWdnZXN0cyBtaWNyb3Njb3B5IGlzIHJlbGF0aXZlbHkgYmV0dGVyIGF0IGRldGVjdGluZyBpbmZlY3Rpb25zIGNvbXBhcmVkIHRvIG90aGVyIHJlZ2lvbnMuDQoNCiMjIyBQcmV2YWxlbmNlIFJhdGlvIGJ5IFJlZ2lvbiBVc2luZyBnZ3Bsb3QNCmBgYHtyfQ0KZ2dwbG90KG1hbGFyaWFfZGF0YSwgYWVzKHggPSBSZWdpb24sIHkgPSBQcmV2YWxlbmNlX1JhdGlvLCBmaWxsID0gUmVnaW9uKSkgKw0KZ2VvbV9ib3hwbG90KGFscGhhID0gMC43KSArDQpsYWJzKHRpdGxlID0gIlByZXZhbGVuY2UgUmF0aW8gYnkgUmVnaW9uIiwNCnggPSAiUmVnaW9uIiwgeSA9ICJQcmV2YWxlbmNlIFJhdGlvIikgDQpgYGANCg0K