library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
# pacman::p_load(janitor, tidyverse, readr)
#-----------Load database
library(readr)
db_1 <- read_csv("../Data/Measles_db_on_30_June_year_2025.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 23394 Columns: 45
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (30): Date fBirth naissance, DateIspecimenCollecteds, DateSpecimenSentto...
## dbl (10): SpecimenSource, MeaslesIgm, FinalClassification, RubellaIgm, AgeIn...
## lgl (5): PatientsResidence, EpiLink, VirusIsolation, DateSpecimenRecInLab, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
view(db_1)
# nUmber of rows
number_of_rows <- nrow(db_1)
number_of_rows = nrow(db_1)
# number of columns
my_nb_col <- ncol(db_1)
my_nb_col = ncol(db_1)
# number of rows and columns
dim(db_1)
## [1] 23394 45
# =========================================================
# Work of 19-02-2026
# =========================================================
names(db_1)
## [1] "Date fBirth naissance" "SpecimenSource"
## [3] "DateIspecimenCollecteds" "DateSpecimenSenttolab"
## [5] "MeaslesIgm" "FinalClassification"
## [7] "RubellaIgm" "Sex"
## [9] "DistrictOfresidence" "DateLabSentResulttodistrict"
## [11] "LabId" "DateOfLastvaccination"
## [13] "AgeInYears" "AgeInMonths"
## [15] "Date OfOn set" "SpecimenCondition"
## [17] "DateLabReceivedSpecimen" "LaboratoryName"
## [19] "IdNumber" "Age s"
## [21] "ReportingDistrict" "Towncity"
## [23] "Urbanrural" "DateSeenHealthFacility"
## [25] "NumberOfVaccinedoses" "DateHealthfacilitynotified"
## [27] "ReportingHealthfacility" "ProvinceOfResidence"
## [29] "DateSentFormtodistrict" "DateDistrictRecLabResults"
## [31] "Inoutpatient" "Outcome"
## [33] "DateRecformdistrict" "DateReceivedNatlevel"
## [35] "OtherLabResults" "PatientsResidence"
## [37] "DataType" "Ward"
## [39] "EpiLink" "VirusIsolation"
## [41] "DateSpecimenRecInLab" "WayofLife"
## [43] "OtherCountry" "CountryCode"
## [45] "DiseaseCondition"
summary(db_1)
## Date fBirth naissance SpecimenSource DateIspecimenCollecteds
## Length:23394 Min. :1.00 Length:23394
## Class :character 1st Qu.:2.00 Class :character
## Mode :character Median :2.00 Mode :character
## Mean :1.96
## 3rd Qu.:2.00
## Max. :2.00
## NA's :11379
## DateSpecimenSenttolab MeaslesIgm FinalClassification RubellaIgm
## Length:23394 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Mode :character Median :2.000 Median :4.000 Median :2.000
## Mean :2.489 Mean :3.147 Mean :2.708
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :9.000
## NA's :11087
## Sex DistrictOfresidence DateLabSentResulttodistrict
## Length:23394 Length:23394 Length:23394
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## LabId DateOfLastvaccination AgeInYears AgeInMonths
## Length:23394 Length:23394 Min. : 0.000 Min. : 0.000
## Class :character Class :character 1st Qu.: 2.000 1st Qu.: 0.000
## Mode :character Mode :character Median : 4.000 Median : 0.000
## Mean : 6.833 Mean : 6.877
## 3rd Qu.: 8.000 3rd Qu.: 7.000
## Max. :109.000 Max. :600.000
## NA's :1743 NA's :13141
## Date OfOn set SpecimenCondition DateLabReceivedSpecimen
## Length:23394 Min. :1.000 Length:23394
## Class :character 1st Qu.:1.000 Class :character
## Mode :character Median :1.000 Mode :character
## Mean :1.058
## 3rd Qu.:1.000
## Max. :2.000
## NA's :10700
## LaboratoryName IdNumber Age s ReportingDistrict
## Length:23394 Length:23394 Min. : 0.000 Length:23394
## Class :character Class :character 1st Qu.: 2.000 Class :character
## Mode :character Mode :character Median : 5.000 Mode :character
## Mean : 8.072
## 3rd Qu.: 9.000
## Max. :90.000
## NA's :17303
## Towncity Urbanrural DateSeenHealthFacility
## Length:23394 Length:23394 Length:23394
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## NumberOfVaccinedoses DateHealthfacilitynotified ReportingHealthfacility
## Min. : 0.00 Length:23394 Length:23394
## 1st Qu.: 0.00 Class :character Class :character
## Median : 0.00 Mode :character Mode :character
## Mean :13.71
## 3rd Qu.: 1.00
## Max. :99.00
## NA's :5805
## ProvinceOfResidence DateSentFormtodistrict DateDistrictRecLabResults
## Length:23394 Length:23394 Length:23394
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Inoutpatient Outcome DateRecformdistrict DateReceivedNatlevel
## Min. :1.000 Length:23394 Length:23394 Length:23394
## 1st Qu.:2.000 Class :character Class :character Class :character
## Median :2.000 Mode :character Mode :character Mode :character
## Mean :1.934
## 3rd Qu.:2.000
## Max. :2.000
## NA's :7992
## OtherLabResults PatientsResidence DataType Ward
## Length:23394 Mode:logical Length:23394 Length:23394
## Class :character NA's:23394 Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## EpiLink VirusIsolation DateSpecimenRecInLab WayofLife
## Mode:logical Mode:logical Mode:logical Length:23394
## NA's:23394 NA's:23394 NA's:23394 Class :character
## Mode :character
##
##
##
##
## OtherCountry CountryCode DiseaseCondition
## Mode:logical Length:23394 Length:23394
## NA's:23394 Class :character Class :character
## Mode :character Mode :character
##
##
##
##
str(db_1)
## spc_tbl_ [23,394 × 45] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Date fBirth naissance : chr [1:23394] "1/19/1999" "8/15/1994" "12/25/2016" "5/7/2024" ...
## $ SpecimenSource : num [1:23394] 1 1 1 1 1 1 1 1 1 1 ...
## $ DateIspecimenCollecteds : chr [1:23394] NA "1/23/2025" "1/21/2025" "1/26/2025" ...
## $ DateSpecimenSenttolab : chr [1:23394] "1/30/2025" "1/30/2025" "1/30/2025" "2/2/2025" ...
## $ MeaslesIgm : num [1:23394] 2 2 2 1 1 1 1 1 1 1 ...
## $ FinalClassification : num [1:23394] 4 4 4 1 1 1 1 1 1 1 ...
## $ RubellaIgm : num [1:23394] 2 2 2 4 4 4 4 4 4 4 ...
## $ Sex : chr [1:23394] "2" "2" "1" "1" ...
## $ DistrictOfresidence : chr [1:23394] "Annaba" "El Hadjar" "El Hadjar" "ADRAR" ...
## $ DateLabSentResulttodistrict: chr [1:23394] "1/30/2025" "1/30/2025" "1/30/2025" "2/2/2025" ...
## $ LabId : chr [1:23394] "041-2025" "042-2025" "043-2025" "044-2025" ...
## $ DateOfLastvaccination : chr [1:23394] NA NA "1/1/2023" NA ...
## $ AgeInYears : num [1:23394] 26 30 8 NA 1 NA 1 4 3 4 ...
## $ AgeInMonths : num [1:23394] NA NA NA 8 NA 9 NA NA NA NA ...
## $ Date OfOn set : chr [1:23394] "1/12/2025" "1/18/2025" "1/19/2025" "1/16/2025" ...
## $ SpecimenCondition : num [1:23394] 1 1 1 1 1 1 1 1 1 1 ...
## $ DateLabReceivedSpecimen : chr [1:23394] "1/30/2025" "1/30/2025" "1/30/2025" "2/2/2025" ...
## $ LaboratoryName : chr [1:23394] "Laboratoire National de R\xe9f\xe9rence pour la Rougeole" "Laboratoire National de R\xe9f\xe9rence pour la Rougeole" "Laboratoire National de R\xe9f\xe9rence pour la Rougeole" "Laboratoire National de R\xe9f\xe9rence pour la Rougeole" ...
## $ IdNumber : chr [1:23394] "ALG-ANB-ANB-25-039" "ALG-ANB-EHD-25-040" "ALG-ANB-EHD-25-041" "ALG-ADR-ADR-25-042" ...
## $ Age s : num [1:23394] NA NA NA NA NA NA NA NA NA NA ...
## $ ReportingDistrict : chr [1:23394] NA NA NA NA ...
## $ Towncity : chr [1:23394] NA NA NA NA ...
## $ Urbanrural : chr [1:23394] NA NA NA NA ...
## $ DateSeenHealthFacility : chr [1:23394] NA NA NA NA ...
## $ NumberOfVaccinedoses : num [1:23394] NA NA 1 NA NA NA NA NA NA NA ...
## $ DateHealthfacilitynotified : chr [1:23394] NA NA NA NA ...
## $ ReportingHealthfacility : chr [1:23394] NA NA NA NA ...
## $ ProvinceOfResidence : chr [1:23394] NA NA NA NA ...
## $ DateSentFormtodistrict : chr [1:23394] NA NA NA NA ...
## $ DateDistrictRecLabResults : chr [1:23394] NA NA NA NA ...
## $ Inoutpatient : num [1:23394] NA NA NA NA NA NA NA NA NA NA ...
## $ Outcome : chr [1:23394] NA NA NA NA ...
## $ DateRecformdistrict : chr [1:23394] NA NA NA NA ...
## $ DateReceivedNatlevel : chr [1:23394] NA NA NA NA ...
## $ OtherLabResults : chr [1:23394] NA NA NA NA ...
## $ PatientsResidence : logi [1:23394] NA NA NA NA NA NA ...
## $ DataType : chr [1:23394] NA NA NA NA ...
## $ Ward : chr [1:23394] NA NA NA NA ...
## $ EpiLink : logi [1:23394] NA NA NA NA NA NA ...
## $ VirusIsolation : logi [1:23394] NA NA NA NA NA NA ...
## $ DateSpecimenRecInLab : logi [1:23394] NA NA NA NA NA NA ...
## $ WayofLife : chr [1:23394] NA NA NA NA ...
## $ OtherCountry : logi [1:23394] NA NA NA NA NA NA ...
## $ CountryCode : chr [1:23394] "ALG" "ALG" "ALG" "ALG" ...
## $ DiseaseCondition : chr [1:23394] "Measles" "Measles" "Measles" "Measles" ...
## - attr(*, "spec")=
## .. cols(
## .. `Date fBirth naissance` = col_character(),
## .. SpecimenSource = col_double(),
## .. DateIspecimenCollecteds = col_character(),
## .. DateSpecimenSenttolab = col_character(),
## .. MeaslesIgm = col_double(),
## .. FinalClassification = col_double(),
## .. RubellaIgm = col_double(),
## .. Sex = col_character(),
## .. DistrictOfresidence = col_character(),
## .. DateLabSentResulttodistrict = col_character(),
## .. LabId = col_character(),
## .. DateOfLastvaccination = col_character(),
## .. AgeInYears = col_double(),
## .. AgeInMonths = col_double(),
## .. `Date OfOn set` = col_character(),
## .. SpecimenCondition = col_double(),
## .. DateLabReceivedSpecimen = col_character(),
## .. LaboratoryName = col_character(),
## .. IdNumber = col_character(),
## .. `Age s` = col_double(),
## .. ReportingDistrict = col_character(),
## .. Towncity = col_character(),
## .. Urbanrural = col_character(),
## .. DateSeenHealthFacility = col_character(),
## .. NumberOfVaccinedoses = col_double(),
## .. DateHealthfacilitynotified = col_character(),
## .. ReportingHealthfacility = col_character(),
## .. ProvinceOfResidence = col_character(),
## .. DateSentFormtodistrict = col_character(),
## .. DateDistrictRecLabResults = col_character(),
## .. Inoutpatient = col_double(),
## .. Outcome = col_character(),
## .. DateRecformdistrict = col_character(),
## .. DateReceivedNatlevel = col_character(),
## .. OtherLabResults = col_character(),
## .. PatientsResidence = col_logical(),
## .. DataType = col_character(),
## .. Ward = col_character(),
## .. EpiLink = col_logical(),
## .. VirusIsolation = col_logical(),
## .. DateSpecimenRecInLab = col_logical(),
## .. WayofLife = col_character(),
## .. OtherCountry = col_logical(),
## .. CountryCode = col_character(),
## .. DiseaseCondition = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
head(db_1, 2) %>% view()
view(head(db_1, 2))
tail(db_1) %>% view()
# Opérateur %>% |> ctrl+shift+M
my_measles_table <- table(db_1$CountryCode)
my_data_frame <- data.frame(
country = names(my_measles_table),
cases = as.integer(my_measles_table)
)
## Order data base by ascending according to cases
my_data_frame <- my_data_frame[order(my_data_frame$cases),] # , decreasing = T
#Convert country to factor
my_data_frame$country <- factor(x = my_data_frame$country, levels = my_data_frame$country)
#### PLOT Chart
ggplot(data = my_data_frame, mapping = aes(x = cases, y = country)) +
geom_col(fill = "tomato") +
geom_label(aes(label = cases),
hjust = -0.1,
size = 4,
colour = "white",
fontface = "bold",
fill = "tomato",
label.size = 0.3
) +
labs(title = "Distribution of suspected cases of Measles in WA countries, 2025",
subtitle = "These data do not reflect reality, they are for educational purposes only.",
caption = "Data source: WHO AFRO (IST WA)",
x = "Suspected cases",
y = "Country (IST WA)"
) +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 12),
title = element_text(size = 14, face = "bold")
)
