library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.2.3Warning: package ‘ggplot2’ was built under R version 4.2.3Warning: package ‘tibble’ was built under R version 4.2.3Warning: package ‘tidyr’ was built under R version 4.2.2Warning: package ‘readr’ was built under R version 4.2.3Warning: package ‘purrr’ was built under R version 4.2.2Warning: package ‘dplyr’ was built under R version 4.2.3Warning: package ‘stringr’ was built under R version 4.2.2Warning: package ‘forcats’ was built under R version 4.2.2Warning: package ‘lubridate’ was built under R version 4.2.3── Attaching core tidyverse packages ────────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     ── Conflicts ──────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggplot2)
library(lubridate)
library(janitor)
Warning: package ‘janitor’ was built under R version 4.2.3
Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(readr)

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
d1 <- read_csv("MH_scaleup_child_data_all.csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)Rows: 2475060 Columns: 109── Column specification ──────────────────────────────────────────────────────────
Delimiter: ","
chr  (21): district, health_block, health_facility, health_subfacility, villag...
dbl   (3): rchid, mobileno, weight
lgl  (48): measles2, death_reason, caseno, husbandname, mobileof, motherage, l...
date (37): dob, registrationdate, bcg, opv0, opv1, opv2, opv3, dpt1, dpt2, dpt...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(123)
d1 <- d1 %>% sample_n(100000)
glimpse(d1)
Rows: 100,000
Columns: 109
$ rchid                  <dbl> 227028230817, 227025774728, 227027465164, 2270272…
$ district               <chr> "Yavatmal(14)", "Gondiya *(11)", "Mumbai(23)", "R…
$ health_block           <chr> "yavatmal(325)", "Gondia(388)", "F North Ward (F/…
$ health_facility        <chr> "Sawargad(1710)", "Rawanwadi(2201)", "Transit Cam…
$ health_subfacility     <chr> "dorli(8742)", "katangikala(10622)", "--(0)", "SD…
$ village                <chr> "pimpalgaon (10000385)*", "Katangikala (10000851)…
$ childname              <chr> "Baby Of Sharda Giridhar Raut", "Baby Of Asha Arv…
$ fathername             <chr> "Giridhar Raut", "Arvind Chaudhari", "Maniraj", "…
$ mothername             <chr> "Sharda Giridhar Raut", "Asha Arvind Chaudhari(12…
$ mobileno               <dbl> 9637201148, 9823852774, 8652243436, 7039102848, 9…
$ dob                    <date> 2023-08-04, 2022-12-07, 2023-03-08, 2023-01-14, …
$ anm_name               <chr> "(ID-0)(MobNo.-0)", "P.R.Warkhade(ID-10669)(MobNo…
$ asha_name              <chr> "Sujata  Nagrale(ID-62916)(MobNo.-9527513166)", "…
$ registrationdate       <date> 2023-08-04, 2022-12-07, 2023-06-14, 2023-05-11, …
$ weight                 <dbl> 2.7, 2.8, 3.1, 3.2, 3.0, 3.0, 3.0, 2.6, 2.8, 2.7,…
$ bcg                    <date> 2023-08-04, 2022-12-08, NA, NA, 2022-06-24, 2022…
$ opv0                   <date> 2023-08-04, 2022-12-08, NA, NA, 2022-06-13, 2022…
$ opv1                   <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ opv2                   <date> NA, 2023-03-01, NA, NA, NA, 2022-08-02, 2022-07-…
$ opv3                   <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, 2022-10-…
$ dpt1                   <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ dpt2                   <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ dpt3                   <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ hep0                   <date> 2023-08-04, 2022-12-07, NA, NA, 2022-06-13, 2022…
$ hep1                   <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ hep2                   <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ hep3                   <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ penta1                 <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ penta2                 <date> NA, 2023-03-01, NA, NA, NA, 2022-08-02, 2022-07-…
$ penta3                 <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, 2022-10-…
$ measles                <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ipv1                   <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ ipv2                   <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, 2022-10-…
$ pcv1                   <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ pcv2                   <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, NA, NA, …
$ pcvb                   <date> NA, NA, NA, NA, NA, 2023-03-21, NA, NA, NA, NA, …
$ mr1                    <date> NA, NA, NA, NA, NA, 2023-03-21, 2023-02-13, NA, …
$ rota1                  <date> NA, 2023-02-01, NA, NA, 2022-08-26, 2022-07-05, …
$ rota2                  <date> NA, 2023-03-01, NA, NA, NA, 2022-08-02, 2022-07-…
$ rota3                  <date> NA, 2023-04-05, NA, NA, NA, 2022-08-30, 2022-10-…
$ je1                    <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ measles2               <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr2                    <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ dptb                   <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ je2                    <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ opvb                   <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ childdeath             <chr> "No", "No", "No", "No", "No", "No", "No", "No", "…
$ death_date             <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ death_reason           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ caseno                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ husbandname            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mobileof               <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ motherage              <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ lmp                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ edd                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc1                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc2                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc3                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc4                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt1                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt2                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ttb                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc_ifa                <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ pnc_ifa                <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ifa                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ delivery               <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highrisk1stvisit       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highrisk2ndvisit       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highrisk3rdvisit       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highrisk4thvisit       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel1stvisit        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel2ndvisit        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel3rdvisit        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel4thvisit        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ maternaldeath          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ jsy_beneficiary        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ jsy_beneficiary1       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta1_dateupdated     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta2_dateupdated     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta3_dateupdated     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr1_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr2_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc1_dateupdated       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc2_dateupdated       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc3_dateupdated       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc4_dateupdated       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt1_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt2_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ttb_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc_ifa_dateupdated    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ date_db_record_created <date> 2023-10-11, 2023-10-10, 2023-10-11, 2023-10-11, …
$ validation_passed      <lgl> TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRU…
$ telerivet_status       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ relationshiptochild    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ source_type            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ signupmethod           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ groups                 <chr> "Marathi-Everyone-General, Maha RCH Child, Marath…
$ source                 <chr> "RCH-Maharashtra-Child", NA, "RCH-Maharashtra-Chi…
$ state                  <chr> "Maharashtra", "Maharashtra", "Maharashtra", "Mah…
$ date_uploaded          <date> 2023-10-12, 2023-10-12, 2023-10-12, 2023-10-12, …
$ welcomedate            <date> 2023-10-12, NA, 2023-11-28, 2023-10-12, NA, NA, …
$ roundname              <chr> "MAHSMSCYAV231012", NA, "MAHSMSCMUM231012", "MAHS…
$ validation_remarks     <chr> NA, "Phone number is of the ANM, so excluding thi…
$ rchid_mother           <chr> "127027539483", NA, "Not Available", "Not Availab…
$ caregivername          <chr> "Sharda Giridhar Raut", NA, "Tanisha", "Mamta Hem…
$ filename               <chr> "c:\\\\Suvita\\\\Data\\\\smsreminder_rawdata\\\\m…
$ eligible_vaccine_date  <date> 2023-10-12, NA, 2023-11-30, 2023-10-14, NA, NA, …
$ eligible_vaccine_type  <chr> "10 weeks", NA, "9 months", "9 months", NA, NA, N…
$ name                   <chr> "Baby", NA, "Baby", "Baby", NA, NA, "Rudraksha", …
d1$validation_passed %>% tabyl()
     .     n percent
 FALSE 38472 0.38472
  TRUE 61528 0.61528

d1 <- d1 %>% mutate(validation_remarks_simple = 
        str_replace_all(validation_remarks, "RCHID is already added to telerivet, telerivet_contactid=[:graph:]* childbirth_recordid=[:graph:]* date_uploaded=[:graph:]*;", "RCHID is already added to telerivet")) 


d1 %>% tabyl(validation_remarks_simple)
                                    validation_remarks_simple     n percent
                      Age based on DOB is more than 488 days; 20129 0.20129
                                        Child death recorded;   238 0.00238
 Child death recorded;Age based on DOB is more than 488 days;    34 0.00034
        Phone number is of the ANM, so excluding this number;  7379 0.07379
                          RCHID is already added to telerivet 10692 0.10692
                                                         <NA> 61528 0.61528
 valid_percent
  0.5232116864
  0.0061863173
  0.0008837596
  0.1918018299
  0.2779164067
            NA
d1 %>% tabyl(childdeath, validation_passed)
 childdeath FALSE  TRUE
         No 38129 61528
        Yes   343     0
d1 %>% tabyl(validation_remarks_simple, childdeath)
                                    validation_remarks_simple    No Yes
                      Age based on DOB is more than 488 days; 20129   0
                                        Child death recorded;     0 238
 Child death recorded;Age based on DOB is more than 488 days;     0  34
        Phone number is of the ANM, so excluding this number;  7356  23
                          RCHID is already added to telerivet 10644  48
                                                         <NA> 61528   0
d1 %>% tabyl( validation_remarks_simple, same_anm)
                                    validation_remarks_simple FALSE TRUE  NA_
                      Age based on DOB is more than 488 days; 18420    0 1709
                                        Child death recorded;   216    0   22
 Child death recorded;Age based on DOB is more than 488 days;    33    0    1
        Phone number is of the ANM, so excluding this number;     0 7379    0
                          RCHID is already added to telerivet 10501    0  191
                                                         <NA> 56776    0 4752
d1 %>% group_by(validation_passed) %>% 
  summarize(most_recent = max(age, na.rm = TRUE),
            oldest = min(age, na.rm = TRUE))



d1 %>% group_by(validation_remarks_simple) %>% 
  summarize(most_recent = max(age, na.rm = TRUE),
            oldest = min(age, na.rm = TRUE))
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KGphbml0b3IpDQpsaWJyYXJ5KHJlYWRyKQ0KDQpzZXR3ZChkaXJuYW1lKHJzdHVkaW9hcGk6OmdldEFjdGl2ZURvY3VtZW50Q29udGV4dCgpJHBhdGgpKQ0KYGBgDQoNCg0KYGBge3J9DQpkMSA8LSByZWFkX2NzdigiTUhfc2NhbGV1cF9jaGlsZF9kYXRhX2FsbC5jc3YiKQ0KDQoNCg0Kc2V0LnNlZWQoMTIzKQ0KZDEgPC0gZDEgJT4lIHNhbXBsZV9uKDEwMDAwMCkNCmBgYA0KDQoNCg0KYGBge3J9DQpnbGltcHNlKGQxKQ0KYGBgDQoNCg0KYGBge3J9DQpkMSR2YWxpZGF0aW9uX3Bhc3NlZCAlPiUgdGFieWwoKQ0KDQoNCg0KYGBgDQpgYGB7cn0NCg0KZDEgPC0gZDEgJT4lIG11dGF0ZSh2YWxpZGF0aW9uX3JlbWFya3Nfc2ltcGxlID0gDQogICAgICAgIHN0cl9yZXBsYWNlX2FsbCh2YWxpZGF0aW9uX3JlbWFya3MsICJSQ0hJRCBpcyBhbHJlYWR5IGFkZGVkIHRvIHRlbGVyaXZldCwgdGVsZXJpdmV0X2NvbnRhY3RpZD1bOmdyYXBoOl0qIGNoaWxkYmlydGhfcmVjb3JkaWQ9WzpncmFwaDpdKiBkYXRlX3VwbG9hZGVkPVs6Z3JhcGg6XSo7IiwgIlJDSElEIGlzIGFscmVhZHkgYWRkZWQgdG8gdGVsZXJpdmV0IikpIA0KDQoNCmQxICU+JSB0YWJ5bCh2YWxpZGF0aW9uX3JlbWFya3Nfc2ltcGxlKQ0KDQoNCg0KYGBgDQoNCg0KYGBge3J9DQpkMSAlPiUgdGFieWwoY2hpbGRkZWF0aCwgdmFsaWRhdGlvbl9wYXNzZWQpDQpkMSAlPiUgdGFieWwodmFsaWRhdGlvbl9yZW1hcmtzX3NpbXBsZSwgY2hpbGRkZWF0aCkNCg0KDQpgYGANCg0KDQpgYGB7cn0NCg0KDQpkMSA8LSBkMSAlPiUgbXV0YXRlKGFubV9udW1iZXIgPSBzdHJfbWF0Y2hfYWxsKGFubV9uYW1lLCAiWzpkaWdpdDpdezEwfSIpKQ0KDQpkMSA8LSBkMSAlPiUgbXV0YXRlKHNhbWVfYW5tID0gYW5tX251bWJlciA9PSBtb2JpbGVubykgDQoNCmQxICU+JSB0YWJ5bCggdmFsaWRhdGlvbl9yZW1hcmtzX3NpbXBsZSwgc2FtZV9hbm0pDQoNCmBgYA0KDQpgYGB7cn0NCg0KZDEgPC0gZDEgJT4lIG11dGF0ZShkb2IgPSB5bWQoZG9iKSkNCmQxIDwtIGQxICU+JSBtdXRhdGUoYWdlID0gYXMubnVtZXJpYyhkb2ItdG9kYXkoKSkpIA0KDQoNCmQxICU+JSBncm91cF9ieSh2YWxpZGF0aW9uX3Bhc3NlZCkgJT4lIA0KICBzdW1tYXJpemUobW9zdF9yZWNlbnQgPSBtYXgoYWdlLCBuYS5ybSA9IFRSVUUpLA0KICAgICAgICAgICAgb2xkZXN0ID0gbWluKGFnZSwgbmEucm0gPSBUUlVFKSkNCg0KDQpkMSAlPiUgZ3JvdXBfYnkodmFsaWRhdGlvbl9yZW1hcmtzX3NpbXBsZSkgJT4lIA0KICBzdW1tYXJpemUobW9zdF9yZWNlbnQgPSBtYXgoYWdlLCBuYS5ybSA9IFRSVUUpLA0KICAgICAgICAgICAgb2xkZXN0ID0gbWluKGFnZSwgbmEucm0gPSBUUlVFKSkNCmBgYA0KDQo=