This is an R Markdown
Notebook. When you execute code within the notebook, the results appear
beneath the code.
Try executing this chunk by clicking the Run button within
the chunk or by placing your cursor inside it and pressing
Ctrl+Shift+Enter.
#Loading libraries
library(tidyverse)
library(readr)
Loading Dataset
link_to_dataset <- read.table(file = "https://raw.githubusercontent.com/HackBio-Internship/public_datasets/main/R/lancet_malaria.txt", header = TRUE, sep = "\t")
malaria_data <- link_to_dataset
head(malaria_data)
Renaming of the column names
colnames(malaria_data) <- c("Review Found", "Author", "Title", "Year", "Region","Country","Location", "PCR_N_Tested", "PCR_N_Positive", "PCR_Percent",
"Microscopy_N_Tested", "Microscopy_N_Positive", "Microscopy_Percent", "Historical_Transmission", "Current_Transmission", "Setting_20", "Setting_15", "Setting_10", "Setting_5", "PCR_Method", "Microscopy_Fields", "Sampling_Season", "Notes")
head(malaria_data)
Visualization of PCR % against microscopy %
plot(malaria_data$PCR_Percent, malaria_data$Microscopy_Percent,
xlab = "Microscopy %", ylab = "PCR %",
main = "PCR vs Microscopy Prevalence",
col = "blue", pch = 19)
abline(0, 1, lty = 2, col = "red")

Prevalence Ratio
malaria_data$Prevalence_Ratio <- malaria_data$Microscopy_N_Positive / malaria_data$PCR_N_Positive
head(malaria_data)
PCR% vs Microscopy% by Region
ggplot(malaria_data, aes(x = Microscopy_Percent, y = PCR_Percent, color = Region)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, linetype = "dotted") +
facet_wrap(~Region) +
labs(title = "PCR% vs Microscopy% by Region",
x = "Microscopy %", y = "PCR %")

Prevalence Ratio by Region
boxplot(Prevalence_Ratio ~ Region, data = malaria_data,
main = "Prevalence Ratio by Region",
xlab = "Global Region", ylab = "Prevalence Ratio",
col = c("lightblue","lightgreen","lightpink","lightyellow"),
las = 2, notch = TRUE)
abline(h = 1, col = "red", lty = 2)

According to the boxplot above, West Africa has the highest median
prevalence ratio.This suggests microscopy is relatively better at
detecting infections compared to other regions.
Prevalence Ratio by Region Using ggplot
ggplot(malaria_data, aes(x = Region, y = Prevalence_Ratio, fill = Region)) +
geom_boxplot(alpha = 0.7) +
labs(title = "Prevalence Ratio by Region",
x = "Region", y = "Prevalence Ratio")

LS0tDQp0aXRsZTogIk1hbGFyaWEgRGV0ZWN0b3IgUHJvamVjdCINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIA0KDQpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIA0KDQojTG9hZGluZyBsaWJyYXJpZXMNCmBgYHtyfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHJlYWRyKQ0KYGBgDQoNCiMjIyBMb2FkaW5nIERhdGFzZXQNCg0KYGBge3J9DQpsaW5rX3RvX2RhdGFzZXQgPC0gcmVhZC50YWJsZShmaWxlID0gImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9IYWNrQmlvLUludGVybnNoaXAvcHVibGljX2RhdGFzZXRzL21haW4vUi9sYW5jZXRfbWFsYXJpYS50eHQiLCBoZWFkZXIgPSBUUlVFLCBzZXAgPSAiXHQiKQ0KbWFsYXJpYV9kYXRhIDwtIGxpbmtfdG9fZGF0YXNldA0KaGVhZChtYWxhcmlhX2RhdGEpDQpgYGANCg0KIyMjIFJlbmFtaW5nIG9mIHRoZSBjb2x1bW4gbmFtZXMNCmBgYHtyfQ0KY29sbmFtZXMobWFsYXJpYV9kYXRhKSA8LSBjKCJSZXZpZXcgRm91bmQiLCAiQXV0aG9yIiwgIlRpdGxlIiwgIlllYXIiLCAiUmVnaW9uIiwiQ291bnRyeSIsIkxvY2F0aW9uIiwgIlBDUl9OX1Rlc3RlZCIsICJQQ1JfTl9Qb3NpdGl2ZSIsICJQQ1JfUGVyY2VudCIsDQoiTWljcm9zY29weV9OX1Rlc3RlZCIsICJNaWNyb3Njb3B5X05fUG9zaXRpdmUiLCAiTWljcm9zY29weV9QZXJjZW50IiwgIkhpc3RvcmljYWxfVHJhbnNtaXNzaW9uIiwgIkN1cnJlbnRfVHJhbnNtaXNzaW9uIiwgIlNldHRpbmdfMjAiLCAiU2V0dGluZ18xNSIsICJTZXR0aW5nXzEwIiwgIlNldHRpbmdfNSIsICJQQ1JfTWV0aG9kIiwgIk1pY3Jvc2NvcHlfRmllbGRzIiwgIlNhbXBsaW5nX1NlYXNvbiIsICJOb3RlcyIpDQpoZWFkKG1hbGFyaWFfZGF0YSkNCmBgYA0KDQojIyMgVmlzdWFsaXphdGlvbiBvZiBQQ1IgJSBhZ2FpbnN0IG1pY3Jvc2NvcHkgJQ0KYGBge3J9DQpwbG90KG1hbGFyaWFfZGF0YSRQQ1JfUGVyY2VudCwgbWFsYXJpYV9kYXRhJE1pY3Jvc2NvcHlfUGVyY2VudCwNCiAgICAgeGxhYiA9ICJNaWNyb3Njb3B5ICUiLCB5bGFiID0gIlBDUiAlIiwNCiAgICAgbWFpbiA9ICJQQ1IgdnMgTWljcm9zY29weSBQcmV2YWxlbmNlIiwNCiAgICAgY29sID0gImJsdWUiLCBwY2ggPSAxOSkNCmFibGluZSgwLCAxLCBsdHkgPSAyLCBjb2wgPSAicmVkIikNCmBgYA0KDQojIyMgIFByZXZhbGVuY2UgUmF0aW8NCmBgYHtyfQ0KbWFsYXJpYV9kYXRhJFByZXZhbGVuY2VfUmF0aW8gPC0gbWFsYXJpYV9kYXRhJE1pY3Jvc2NvcHlfTl9Qb3NpdGl2ZSAvIG1hbGFyaWFfZGF0YSRQQ1JfTl9Qb3NpdGl2ZQ0KaGVhZChtYWxhcmlhX2RhdGEpDQpgYGANCg0KIyMjIFBDUiUgdnMgTWljcm9zY29weSUgYnkgUmVnaW9uDQpgYGB7cn0NCmdncGxvdChtYWxhcmlhX2RhdGEsIGFlcyh4ID0gTWljcm9zY29weV9QZXJjZW50LCB5ID0gUENSX1BlcmNlbnQsIGNvbG9yID0gUmVnaW9uKSkgKw0KZ2VvbV9wb2ludCgpICsNCmdlb21fYWJsaW5lKGludGVyY2VwdCA9IDAsIHNsb3BlID0gMSwgbGluZXR5cGUgPSAiZG90dGVkIikgKw0KZmFjZXRfd3JhcCh+UmVnaW9uKSArDQpsYWJzKHRpdGxlID0gIlBDUiUgdnMgTWljcm9zY29weSUgYnkgUmVnaW9uIiwNCnggPSAiTWljcm9zY29weSAlIiwgeSA9ICJQQ1IgJSIpDQoNCmBgYA0KDQoNCiMjIyBQcmV2YWxlbmNlIFJhdGlvIGJ5IFJlZ2lvbg0KYGBge3J9DQpib3hwbG90KFByZXZhbGVuY2VfUmF0aW8gfiBSZWdpb24sIGRhdGEgPSBtYWxhcmlhX2RhdGEsDQogICAgICAgIG1haW4gPSAiUHJldmFsZW5jZSBSYXRpbyBieSBSZWdpb24iLA0KICAgICAgICB4bGFiID0gIkdsb2JhbCBSZWdpb24iLCB5bGFiID0gIlByZXZhbGVuY2UgUmF0aW8iLA0KICAgICAgICBjb2wgPSBjKCJsaWdodGJsdWUiLCJsaWdodGdyZWVuIiwibGlnaHRwaW5rIiwibGlnaHR5ZWxsb3ciKSwNCiAgICAgICAgbGFzID0gMiwgbm90Y2ggPSBUUlVFKQ0KYWJsaW5lKGggPSAxLCBjb2wgPSAicmVkIiwgbHR5ID0gMikgDQoNCmBgYA0KQWNjb3JkaW5nIHRvIHRoZSBib3hwbG90IGFib3ZlLCBXZXN0IEFmcmljYSBoYXMgdGhlIGhpZ2hlc3QgbWVkaWFuIHByZXZhbGVuY2UgcmF0aW8uVGhpcyBzdWdnZXN0cyBtaWNyb3Njb3B5IGlzIHJlbGF0aXZlbHkgYmV0dGVyIGF0IGRldGVjdGluZyBpbmZlY3Rpb25zIGNvbXBhcmVkIHRvIG90aGVyIHJlZ2lvbnMuDQoNCiMjIyBQcmV2YWxlbmNlIFJhdGlvIGJ5IFJlZ2lvbiBVc2luZyBnZ3Bsb3QNCmBgYHtyfQ0KZ2dwbG90KG1hbGFyaWFfZGF0YSwgYWVzKHggPSBSZWdpb24sIHkgPSBQcmV2YWxlbmNlX1JhdGlvLCBmaWxsID0gUmVnaW9uKSkgKw0KZ2VvbV9ib3hwbG90KGFscGhhID0gMC43KSArDQpsYWJzKHRpdGxlID0gIlByZXZhbGVuY2UgUmF0aW8gYnkgUmVnaW9uIiwNCnggPSAiUmVnaW9uIiwgeSA9ICJQcmV2YWxlbmNlIFJhdGlvIikgDQpgYGANCg0K