library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
# pacman::p_load(janitor, tidyverse, readr)
#-----------Load database
library(readr)
db_1 <- read_csv("../Data/Measles_db_on_30_June_year_2025.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 23394 Columns: 45
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (30): Date fBirth naissance, DateIspecimenCollecteds, DateSpecimenSentto...
## dbl (10): SpecimenSource, MeaslesIgm, FinalClassification, RubellaIgm, AgeIn...
## lgl (5): PatientsResidence, EpiLink, VirusIsolation, DateSpecimenRecInLab, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
view(db_1)
# nUmber of rows
number_of_rows <- nrow(db_1)
number_of_rows = nrow(db_1)
# number of columns
my_nb_col <- ncol(db_1)
my_nb_col = ncol(db_1)
# number of rows and columns
dim(db_1)
## [1] 23394 45
# =========================================================
# Work of 19-02-2026
# =========================================================
names(db_1)
## [1] "Date fBirth naissance" "SpecimenSource"
## [3] "DateIspecimenCollecteds" "DateSpecimenSenttolab"
## [5] "MeaslesIgm" "FinalClassification"
## [7] "RubellaIgm" "Sex"
## [9] "DistrictOfresidence" "DateLabSentResulttodistrict"
## [11] "LabId" "DateOfLastvaccination"
## [13] "AgeInYears" "AgeInMonths"
## [15] "Date OfOn set" "SpecimenCondition"
## [17] "DateLabReceivedSpecimen" "LaboratoryName"
## [19] "IdNumber" "Age s"
## [21] "ReportingDistrict" "Towncity"
## [23] "Urbanrural" "DateSeenHealthFacility"
## [25] "NumberOfVaccinedoses" "DateHealthfacilitynotified"
## [27] "ReportingHealthfacility" "ProvinceOfResidence"
## [29] "DateSentFormtodistrict" "DateDistrictRecLabResults"
## [31] "Inoutpatient" "Outcome"
## [33] "DateRecformdistrict" "DateReceivedNatlevel"
## [35] "OtherLabResults" "PatientsResidence"
## [37] "DataType" "Ward"
## [39] "EpiLink" "VirusIsolation"
## [41] "DateSpecimenRecInLab" "WayofLife"
## [43] "OtherCountry" "CountryCode"
## [45] "DiseaseCondition"
summary(db_1)
## Date fBirth naissance SpecimenSource DateIspecimenCollecteds
## Length:23394 Min. :1.00 Length:23394
## Class :character 1st Qu.:2.00 Class :character
## Mode :character Median :2.00 Mode :character
## Mean :1.96
## 3rd Qu.:2.00
## Max. :2.00
## NA's :11379
## DateSpecimenSenttolab MeaslesIgm FinalClassification RubellaIgm
## Length:23394 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Mode :character Median :2.000 Median :4.000 Median :2.000
## Mean :2.489 Mean :3.147 Mean :2.708
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :9.000
## NA's :11087
## Sex DistrictOfresidence DateLabSentResulttodistrict
## Length:23394 Length:23394 Length:23394
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## LabId DateOfLastvaccination AgeInYears AgeInMonths
## Length:23394 Length:23394 Min. : 0.000 Min. : 0.000
## Class :character Class :character 1st Qu.: 2.000 1st Qu.: 0.000
## Mode :character Mode :character Median : 4.000 Median : 0.000
## Mean : 6.833 Mean : 6.877
## 3rd Qu.: 8.000 3rd Qu.: 7.000
## Max. :109.000 Max. :600.000
## NA's :1743 NA's :13141
## Date OfOn set SpecimenCondition DateLabReceivedSpecimen
## Length:23394 Min. :1.000 Length:23394
## Class :character 1st Qu.:1.000 Class :character
## Mode :character Median :1.000 Mode :character
## Mean :1.058
## 3rd Qu.:1.000
## Max. :2.000
## NA's :10700
## LaboratoryName IdNumber Age s ReportingDistrict
## Length:23394 Length:23394 Min. : 0.000 Length:23394
## Class :character Class :character 1st Qu.: 2.000 Class :character
## Mode :character Mode :character Median : 5.000 Mode :character
## Mean : 8.072
## 3rd Qu.: 9.000
## Max. :90.000
## NA's :17303
## Towncity Urbanrural DateSeenHealthFacility
## Length:23394 Length:23394 Length:23394
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## NumberOfVaccinedoses DateHealthfacilitynotified ReportingHealthfacility
## Min. : 0.00 Length:23394 Length:23394
## 1st Qu.: 0.00 Class :character Class :character
## Median : 0.00 Mode :character Mode :character
## Mean :13.71
## 3rd Qu.: 1.00
## Max. :99.00
## NA's :5805
## ProvinceOfResidence DateSentFormtodistrict DateDistrictRecLabResults
## Length:23394 Length:23394 Length:23394
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Inoutpatient Outcome DateRecformdistrict DateReceivedNatlevel
## Min. :1.000 Length:23394 Length:23394 Length:23394
## 1st Qu.:2.000 Class :character Class :character Class :character
## Median :2.000 Mode :character Mode :character Mode :character
## Mean :1.934
## 3rd Qu.:2.000
## Max. :2.000
## NA's :7992
## OtherLabResults PatientsResidence DataType Ward
## Length:23394 Mode:logical Length:23394 Length:23394
## Class :character NA's:23394 Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## EpiLink VirusIsolation DateSpecimenRecInLab WayofLife
## Mode:logical Mode:logical Mode:logical Length:23394
## NA's:23394 NA's:23394 NA's:23394 Class :character
## Mode :character
##
##
##
##
## OtherCountry CountryCode DiseaseCondition
## Mode:logical Length:23394 Length:23394
## NA's:23394 Class :character Class :character
## Mode :character Mode :character
##
##
##
##
str(db_1)
## spc_tbl_ [23,394 × 45] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Date fBirth naissance : chr [1:23394] "1/19/1999" "8/15/1994" "12/25/2016" "5/7/2024" ...
## $ SpecimenSource : num [1:23394] 1 1 1 1 1 1 1 1 1 1 ...
## $ DateIspecimenCollecteds : chr [1:23394] NA "1/23/2025" "1/21/2025" "1/26/2025" ...
## $ DateSpecimenSenttolab : chr [1:23394] "1/30/2025" "1/30/2025" "1/30/2025" "2/2/2025" ...
## $ MeaslesIgm : num [1:23394] 2 2 2 1 1 1 1 1 1 1 ...
## $ FinalClassification : num [1:23394] 4 4 4 1 1 1 1 1 1 1 ...
## $ RubellaIgm : num [1:23394] 2 2 2 4 4 4 4 4 4 4 ...
## $ Sex : chr [1:23394] "2" "2" "1" "1" ...
## $ DistrictOfresidence : chr [1:23394] "Annaba" "El Hadjar" "El Hadjar" "ADRAR" ...
## $ DateLabSentResulttodistrict: chr [1:23394] "1/30/2025" "1/30/2025" "1/30/2025" "2/2/2025" ...
## $ LabId : chr [1:23394] "041-2025" "042-2025" "043-2025" "044-2025" ...
## $ DateOfLastvaccination : chr [1:23394] NA NA "1/1/2023" NA ...
## $ AgeInYears : num [1:23394] 26 30 8 NA 1 NA 1 4 3 4 ...
## $ AgeInMonths : num [1:23394] NA NA NA 8 NA 9 NA NA NA NA ...
## $ Date OfOn set : chr [1:23394] "1/12/2025" "1/18/2025" "1/19/2025" "1/16/2025" ...
## $ SpecimenCondition : num [1:23394] 1 1 1 1 1 1 1 1 1 1 ...
## $ DateLabReceivedSpecimen : chr [1:23394] "1/30/2025" "1/30/2025" "1/30/2025" "2/2/2025" ...
## $ LaboratoryName : chr [1:23394] "Laboratoire National de R\xe9f\xe9rence pour la Rougeole" "Laboratoire National de R\xe9f\xe9rence pour la Rougeole" "Laboratoire National de R\xe9f\xe9rence pour la Rougeole" "Laboratoire National de R\xe9f\xe9rence pour la Rougeole" ...
## $ IdNumber : chr [1:23394] "ALG-ANB-ANB-25-039" "ALG-ANB-EHD-25-040" "ALG-ANB-EHD-25-041" "ALG-ADR-ADR-25-042" ...
## $ Age s : num [1:23394] NA NA NA NA NA NA NA NA NA NA ...
## $ ReportingDistrict : chr [1:23394] NA NA NA NA ...
## $ Towncity : chr [1:23394] NA NA NA NA ...
## $ Urbanrural : chr [1:23394] NA NA NA NA ...
## $ DateSeenHealthFacility : chr [1:23394] NA NA NA NA ...
## $ NumberOfVaccinedoses : num [1:23394] NA NA 1 NA NA NA NA NA NA NA ...
## $ DateHealthfacilitynotified : chr [1:23394] NA NA NA NA ...
## $ ReportingHealthfacility : chr [1:23394] NA NA NA NA ...
## $ ProvinceOfResidence : chr [1:23394] NA NA NA NA ...
## $ DateSentFormtodistrict : chr [1:23394] NA NA NA NA ...
## $ DateDistrictRecLabResults : chr [1:23394] NA NA NA NA ...
## $ Inoutpatient : num [1:23394] NA NA NA NA NA NA NA NA NA NA ...
## $ Outcome : chr [1:23394] NA NA NA NA ...
## $ DateRecformdistrict : chr [1:23394] NA NA NA NA ...
## $ DateReceivedNatlevel : chr [1:23394] NA NA NA NA ...
## $ OtherLabResults : chr [1:23394] NA NA NA NA ...
## $ PatientsResidence : logi [1:23394] NA NA NA NA NA NA ...
## $ DataType : chr [1:23394] NA NA NA NA ...
## $ Ward : chr [1:23394] NA NA NA NA ...
## $ EpiLink : logi [1:23394] NA NA NA NA NA NA ...
## $ VirusIsolation : logi [1:23394] NA NA NA NA NA NA ...
## $ DateSpecimenRecInLab : logi [1:23394] NA NA NA NA NA NA ...
## $ WayofLife : chr [1:23394] NA NA NA NA ...
## $ OtherCountry : logi [1:23394] NA NA NA NA NA NA ...
## $ CountryCode : chr [1:23394] "ALG" "ALG" "ALG" "ALG" ...
## $ DiseaseCondition : chr [1:23394] "Measles" "Measles" "Measles" "Measles" ...
## - attr(*, "spec")=
## .. cols(
## .. `Date fBirth naissance` = col_character(),
## .. SpecimenSource = col_double(),
## .. DateIspecimenCollecteds = col_character(),
## .. DateSpecimenSenttolab = col_character(),
## .. MeaslesIgm = col_double(),
## .. FinalClassification = col_double(),
## .. RubellaIgm = col_double(),
## .. Sex = col_character(),
## .. DistrictOfresidence = col_character(),
## .. DateLabSentResulttodistrict = col_character(),
## .. LabId = col_character(),
## .. DateOfLastvaccination = col_character(),
## .. AgeInYears = col_double(),
## .. AgeInMonths = col_double(),
## .. `Date OfOn set` = col_character(),
## .. SpecimenCondition = col_double(),
## .. DateLabReceivedSpecimen = col_character(),
## .. LaboratoryName = col_character(),
## .. IdNumber = col_character(),
## .. `Age s` = col_double(),
## .. ReportingDistrict = col_character(),
## .. Towncity = col_character(),
## .. Urbanrural = col_character(),
## .. DateSeenHealthFacility = col_character(),
## .. NumberOfVaccinedoses = col_double(),
## .. DateHealthfacilitynotified = col_character(),
## .. ReportingHealthfacility = col_character(),
## .. ProvinceOfResidence = col_character(),
## .. DateSentFormtodistrict = col_character(),
## .. DateDistrictRecLabResults = col_character(),
## .. Inoutpatient = col_double(),
## .. Outcome = col_character(),
## .. DateRecformdistrict = col_character(),
## .. DateReceivedNatlevel = col_character(),
## .. OtherLabResults = col_character(),
## .. PatientsResidence = col_logical(),
## .. DataType = col_character(),
## .. Ward = col_character(),
## .. EpiLink = col_logical(),
## .. VirusIsolation = col_logical(),
## .. DateSpecimenRecInLab = col_logical(),
## .. WayofLife = col_character(),
## .. OtherCountry = col_logical(),
## .. CountryCode = col_character(),
## .. DiseaseCondition = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
head(db_1)
## # A tibble: 6 × 45
## `Date fBirth naissance` SpecimenSource DateIspecimenCollecteds
## <chr> <dbl> <chr>
## 1 1/19/1999 1 <NA>
## 2 8/15/1994 1 1/23/2025
## 3 12/25/2016 1 1/21/2025
## 4 5/7/2024 1 1/26/2025
## 5 1/10/2024 1 1/26/2025
## 6 <NA> 1 1/26/2025
## # ℹ 42 more variables: DateSpecimenSenttolab <chr>, MeaslesIgm <dbl>,
## # FinalClassification <dbl>, RubellaIgm <dbl>, Sex <chr>,
## # DistrictOfresidence <chr>, DateLabSentResulttodistrict <chr>, LabId <chr>,
## # DateOfLastvaccination <chr>, AgeInYears <dbl>, AgeInMonths <dbl>,
## # `Date OfOn set` <chr>, SpecimenCondition <dbl>,
## # DateLabReceivedSpecimen <chr>, LaboratoryName <chr>, IdNumber <chr>,
## # `Age s` <dbl>, ReportingDistrict <chr>, Towncity <chr>, Urbanrural <chr>, …
tail(db_1)
## # A tibble: 6 × 45
## `Date fBirth naissance` SpecimenSource DateIspecimenCollecteds
## <chr> <dbl> <chr>
## 1 2/22/2022 NA 5/23/2025
## 2 5/17/2017 NA 1/21/2025
## 3 10/30/2022 NA 2/18/2025
## 4 10/11/2022 NA 3/26/2025
## 5 5/13/2024 NA 5/19/2025
## 6 7/1/2016 NA 3/17/2025
## # ℹ 42 more variables: DateSpecimenSenttolab <chr>, MeaslesIgm <dbl>,
## # FinalClassification <dbl>, RubellaIgm <dbl>, Sex <chr>,
## # DistrictOfresidence <chr>, DateLabSentResulttodistrict <chr>, LabId <chr>,
## # DateOfLastvaccination <chr>, AgeInYears <dbl>, AgeInMonths <dbl>,
## # `Date OfOn set` <chr>, SpecimenCondition <dbl>,
## # DateLabReceivedSpecimen <chr>, LaboratoryName <chr>, IdNumber <chr>,
## # `Age s` <dbl>, ReportingDistrict <chr>, Towncity <chr>, Urbanrural <chr>, …
# Opérateur %>% |>
milse <- db_1 |>
get_dupes(IdNumber)
prop.table(table(db_1$Sex))
##
## 1 2 f F Female m
## 0.0105608000 0.0098711151 0.0007758955 0.4623475150 0.0002586318 0.0005172637
## M Male
## 0.5152377258 0.0004310531
db_1$Sex[db_1$Sex %in% c("1", "m", "Male", "M", "Masculin")] <- "Male"
names(db_1)[names(db_1) == "EpiLink"] <- "EpiLink_Milse"
#1) comptage
my_tab <- table(db_1$CountryCode, useNA = "no")
# 2) counstruire le data.frame
df <- data.frame(
CountryCode = names(my_tab),
n = as.integer(my_tab), stringsAsFactors = F)
# 3 trier par ordre n (croissant) = arrange(n)
df <- df[order(df$n), ]# , decreasing = T
# 4 équivalent de fct_reorder(CountryCode, n)
df$CountryCode <- factor(x = df$CountryCode, levels = df$CountryCode)
# df <- db_1 %>%
# count(CountryCode) %>%
# ungroup() %>%
# # mutate(prop = n/sum(n), percentage = 100*n/sum(n), label_perc = scales::percent(x = round(prop, digits = 2))) %>%
# arrange(n) %>%
# mutate(CountryCode = fct_reorder(CountryCode, n))
ggplot2::ggplot(data = df, mapping = aes(y = CountryCode, x = n)) +
geom_col(fill = "tomato")+ # , show.legend = F
geom_label(aes(label = n),
hjust = -.01,
size = 4,
fontface = "bold",
colour = "white",
label.size = 0.3, fill = "tomato"
) +
labs(title = "Distribution of suspected cases of Measles in WA countries, 2025",
subtitle = "These data do not reflect reality, they are for educational purposes only.",
caption = "Data source: WHO AFRO (IST WA)",
x = "Cases",
y = "Countries") +
# scale_x_continuous(limits = c(0, max(df$n, na.rm = T)) + 2)
theme_bw() +
theme(
title = element_text(size = 14, face = "bold"),
plot.caption = element_text(size = 12, face = "italic"),
# axis.title = element_text(size = 10),
axis.title.x = element_text(size = 12),
# axis.title.y = element_text(size = 10),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.background = element_blank()
)
