library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.2.3Warning: package ‘ggplot2’ was built under R version 4.2.3Warning: package ‘tibble’ was built under R version 4.2.3Warning: package ‘tidyr’ was built under R version 4.2.2Warning: package ‘readr’ was built under R version 4.2.3Warning: package ‘purrr’ was built under R version 4.2.2Warning: package ‘dplyr’ was built under R version 4.2.3Warning: package ‘stringr’ was built under R version 4.2.2Warning: package ‘forcats’ was built under R version 4.2.2Warning: package ‘lubridate’ was built under R version 4.2.3── Attaching core tidyverse packages ────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.2 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.2 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.1 ── Conflicts ──────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggplot2)
library(lubridate)
library(janitor)
Warning: package ‘janitor’ was built under R version 4.2.3
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
library(readr)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
d1 <- read_csv("MH_scaleup_child_data_all.csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)Rows: 2475060 Columns: 109── Column specification ──────────────────────────────────────────────────────────
Delimiter: ","
chr (21): district, health_block, health_facility, health_subfacility, villag...
dbl (3): rchid, mobileno, weight
lgl (48): measles2, death_reason, caseno, husbandname, mobileof, motherage, l...
date (37): dob, registrationdate, bcg, opv0, opv1, opv2, opv3, dpt1, dpt2, dpt...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(123)
d1 <- d1 %>% sample_n(100000)
glimpse(d1)
Rows: 100,000
Columns: 109
$ rchid <dbl> 227028230817, 227025774728, 227027465164, 2270272…
$ district <chr> "Yavatmal(14)", "Gondiya *(11)", "Mumbai(23)", "R…
$ health_block <chr> "yavatmal(325)", "Gondia(388)", "F North Ward (F/…
$ health_facility <chr> "Sawargad(1710)", "Rawanwadi(2201)", "Transit Cam…
$ health_subfacility <chr> "dorli(8742)", "katangikala(10622)", "--(0)", "SD…
$ village <chr> "pimpalgaon (10000385)*", "Katangikala (10000851)…
$ childname <chr> "Baby Of Sharda Giridhar Raut", "Baby Of Asha Arv…
$ fathername <chr> "Giridhar Raut", "Arvind Chaudhari", "Maniraj", "…
$ mothername <chr> "Sharda Giridhar Raut", "Asha Arvind Chaudhari(12…
$ mobileno <dbl> 9637201148, 9823852774, 8652243436, 7039102848, 9…
$ dob <date> 2023-08-04, 2022-12-07, 2023-03-08, 2023-01-14, …
$ anm_name <chr> "(ID-0)(MobNo.-0)", "P.R.Warkhade(ID-10669)(MobNo…
$ asha_name <chr> "Sujata Nagrale(ID-62916)(MobNo.-9527513166)", "…
$ registrationdate <date> 2023-08-04, 2022-12-07, 2023-06-14, 2023-05-11, …
$ weight <dbl> 2.7, 2.8, 3.1, 3.2, 3.0, 3.0, 3.0, 2.6, 2.8, 2.7,…
$ bcg <date> 2023-08-04, 2022-12-08, NA, NA, 2022-06-24, 2022…
$ opv0 <date> 2023-08-04, 2022-12-08, NA, NA, 2022-06-13, 2022…
$ opv1 <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ opv2 <date> NA, 2023-03-01, NA, NA, NA, 2022-08-02, 2022-07-…
$ opv3 <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, 2022-10-…
$ dpt1 <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ dpt2 <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ dpt3 <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ hep0 <date> 2023-08-04, 2022-12-07, NA, NA, 2022-06-13, 2022…
$ hep1 <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ hep2 <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ hep3 <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ penta1 <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ penta2 <date> NA, 2023-03-01, NA, NA, NA, 2022-08-02, 2022-07-…
$ penta3 <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, 2022-10-…
$ measles <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ipv1 <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ ipv2 <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, 2022-10-…
$ pcv1 <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ pcv2 <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, NA, NA, …
$ pcvb <date> NA, NA, NA, NA, NA, 2023-03-21, NA, NA, NA, NA, …
$ mr1 <date> NA, NA, NA, NA, NA, 2023-03-21, 2023-02-13, NA, …
$ rota1 <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ rota2 <date> NA, 2023-03-01, NA, NA, NA, 2022-08-02, 2022-07-…
$ rota3 <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, 2022-10-…
$ je1 <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ measles2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr2 <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ dptb <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ je2 <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ opvb <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ childdeath <chr> "No", "No", "No", "No", "No", "No", "No", "No", "…
$ death_date <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ death_reason <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ caseno <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ husbandname <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mobileof <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ motherage <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ lmp <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ edd <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc3 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc4 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ttb <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc_ifa <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ pnc_ifa <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ifa <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ delivery <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highrisk1stvisit <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highrisk2ndvisit <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highrisk3rdvisit <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highrisk4thvisit <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel1stvisit <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel2ndvisit <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel3rdvisit <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel4thvisit <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ maternaldeath <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ jsy_beneficiary <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ jsy_beneficiary1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta1_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta2_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta3_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr1_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr2_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc1_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc2_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc3_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc4_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt1_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt2_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ttb_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc_ifa_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ date_db_record_created <date> 2023-10-11, 2023-10-10, 2023-10-11, 2023-10-11, …
$ validation_passed <lgl> TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRU…
$ telerivet_status <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ relationshiptochild <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ source_type <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ signupmethod <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ groups <chr> "Marathi-Everyone-General, Maha RCH Child, Marath…
$ source <chr> "RCH-Maharashtra-Child", NA, "RCH-Maharashtra-Chi…
$ state <chr> "Maharashtra", "Maharashtra", "Maharashtra", "Mah…
$ date_uploaded <date> 2023-10-12, 2023-10-12, 2023-10-12, 2023-10-12, …
$ welcomedate <date> 2023-10-12, NA, 2023-11-28, 2023-10-12, NA, NA, …
$ roundname <chr> "MAHSMSCYAV231012", NA, "MAHSMSCMUM231012", "MAHS…
$ validation_remarks <chr> NA, "Phone number is of the ANM, so excluding thi…
$ rchid_mother <chr> "127027539483", NA, "Not Available", "Not Availab…
$ caregivername <chr> "Sharda Giridhar Raut", NA, "Tanisha", "Mamta Hem…
$ filename <chr> "c:\\\\Suvita\\\\Data\\\\smsreminder_rawdata\\\\m…
$ eligible_vaccine_date <date> 2023-10-12, NA, 2023-11-30, 2023-10-14, NA, NA, …
$ eligible_vaccine_type <chr> "10 weeks", NA, "9 months", "9 months", NA, NA, N…
$ name <chr> "Baby", NA, "Baby", "Baby", NA, NA, "Rudraksha", …
d1$validation_passed %>% tabyl()
. n percent
FALSE 38472 0.38472
TRUE 61528 0.61528
d1 <- d1 %>% mutate(validation_remarks_simple =
str_replace_all(validation_remarks, "RCHID is already added to telerivet, telerivet_contactid=[:graph:]* childbirth_recordid=[:graph:]* date_uploaded=[:graph:]*;", "RCHID is already added to telerivet"))
d1 %>% tabyl(validation_remarks_simple)
validation_remarks_simple n percent
Age based on DOB is more than 488 days; 20129 0.20129
Child death recorded; 238 0.00238
Child death recorded;Age based on DOB is more than 488 days; 34 0.00034
Phone number is of the ANM, so excluding this number; 7379 0.07379
RCHID is already added to telerivet 10692 0.10692
<NA> 61528 0.61528
valid_percent
0.5232116864
0.0061863173
0.0008837596
0.1918018299
0.2779164067
NA
d1 %>% tabyl(childdeath, validation_passed)
childdeath FALSE TRUE
No 38129 61528
Yes 343 0
d1 %>% tabyl(validation_remarks_simple, childdeath)
validation_remarks_simple No Yes
Age based on DOB is more than 488 days; 20129 0
Child death recorded; 0 238
Child death recorded;Age based on DOB is more than 488 days; 0 34
Phone number is of the ANM, so excluding this number; 7356 23
RCHID is already added to telerivet 10644 48
<NA> 61528 0
d1 %>% tabyl( validation_remarks_simple, same_anm)
validation_remarks_simple FALSE TRUE NA_
Age based on DOB is more than 488 days; 18420 0 1709
Child death recorded; 216 0 22
Child death recorded;Age based on DOB is more than 488 days; 33 0 1
Phone number is of the ANM, so excluding this number; 0 7379 0
RCHID is already added to telerivet 10501 0 191
<NA> 56776 0 4752
d1 %>% group_by(validation_passed) %>%
summarize(most_recent = max(age, na.rm = TRUE),
oldest = min(age, na.rm = TRUE))
d1 %>% group_by(validation_remarks_simple) %>%
summarize(most_recent = max(age, na.rm = TRUE),
oldest = min(age, na.rm = TRUE))
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KGphbml0b3IpDQpsaWJyYXJ5KHJlYWRyKQ0KDQpzZXR3ZChkaXJuYW1lKHJzdHVkaW9hcGk6OmdldEFjdGl2ZURvY3VtZW50Q29udGV4dCgpJHBhdGgpKQ0KYGBgDQoNCg0KYGBge3J9DQpkMSA8LSByZWFkX2NzdigiTUhfc2NhbGV1cF9jaGlsZF9kYXRhX2FsbC5jc3YiKQ0KDQoNCg0Kc2V0LnNlZWQoMTIzKQ0KZDEgPC0gZDEgJT4lIHNhbXBsZV9uKDEwMDAwMCkNCmBgYA0KDQoNCg0KYGBge3J9DQpnbGltcHNlKGQxKQ0KYGBgDQoNCg0KYGBge3J9DQpkMSR2YWxpZGF0aW9uX3Bhc3NlZCAlPiUgdGFieWwoKQ0KDQoNCg0KYGBgDQpgYGB7cn0NCg0KZDEgPC0gZDEgJT4lIG11dGF0ZSh2YWxpZGF0aW9uX3JlbWFya3Nfc2ltcGxlID0gDQogICAgICAgIHN0cl9yZXBsYWNlX2FsbCh2YWxpZGF0aW9uX3JlbWFya3MsICJSQ0hJRCBpcyBhbHJlYWR5IGFkZGVkIHRvIHRlbGVyaXZldCwgdGVsZXJpdmV0X2NvbnRhY3RpZD1bOmdyYXBoOl0qIGNoaWxkYmlydGhfcmVjb3JkaWQ9WzpncmFwaDpdKiBkYXRlX3VwbG9hZGVkPVs6Z3JhcGg6XSo7IiwgIlJDSElEIGlzIGFscmVhZHkgYWRkZWQgdG8gdGVsZXJpdmV0IikpIA0KDQoNCmQxICU+JSB0YWJ5bCh2YWxpZGF0aW9uX3JlbWFya3Nfc2ltcGxlKQ0KDQoNCg0KYGBgDQoNCg0KYGBge3J9DQpkMSAlPiUgdGFieWwoY2hpbGRkZWF0aCwgdmFsaWRhdGlvbl9wYXNzZWQpDQpkMSAlPiUgdGFieWwodmFsaWRhdGlvbl9yZW1hcmtzX3NpbXBsZSwgY2hpbGRkZWF0aCkNCg0KDQpgYGANCg0KDQpgYGB7cn0NCg0KDQpkMSA8LSBkMSAlPiUgbXV0YXRlKGFubV9udW1iZXIgPSBzdHJfbWF0Y2hfYWxsKGFubV9uYW1lLCAiWzpkaWdpdDpdezEwfSIpKQ0KDQpkMSA8LSBkMSAlPiUgbXV0YXRlKHNhbWVfYW5tID0gYW5tX251bWJlciA9PSBtb2JpbGVubykgDQoNCmQxICU+JSB0YWJ5bCggdmFsaWRhdGlvbl9yZW1hcmtzX3NpbXBsZSwgc2FtZV9hbm0pDQoNCmBgYA0KDQpgYGB7cn0NCg0KZDEgPC0gZDEgJT4lIG11dGF0ZShkb2IgPSB5bWQoZG9iKSkNCmQxIDwtIGQxICU+JSBtdXRhdGUoYWdlID0gYXMubnVtZXJpYyhkb2ItdG9kYXkoKSkpIA0KDQoNCmQxICU+JSBncm91cF9ieSh2YWxpZGF0aW9uX3Bhc3NlZCkgJT4lIA0KICBzdW1tYXJpemUobW9zdF9yZWNlbnQgPSBtYXgoYWdlLCBuYS5ybSA9IFRSVUUpLA0KICAgICAgICAgICAgb2xkZXN0ID0gbWluKGFnZSwgbmEucm0gPSBUUlVFKSkNCg0KDQpkMSAlPiUgZ3JvdXBfYnkodmFsaWRhdGlvbl9yZW1hcmtzX3NpbXBsZSkgJT4lIA0KICBzdW1tYXJpemUobW9zdF9yZWNlbnQgPSBtYXgoYWdlLCBuYS5ybSA9IFRSVUUpLA0KICAgICAgICAgICAgb2xkZXN0ID0gbWluKGFnZSwgbmEucm0gPSBUUlVFKSkNCmBgYA0KDQo=