d1 <- read_csv("MH_scaleup_preg_data_all.csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)Rows: 1021857 Columns: 109── Column specification ──────────────────────────────────────────────────────────
Delimiter: ","
chr (40): district, health_block, health_facility, health_subfacility, village...
dbl (11): rchid, mobileno, caseno, motherage, anc_ifa, pnc_ifa, ifa, hblevel1s...
lgl (58): childname, fathername, dob, weight, bcg, opv0, opv1, opv2, opv3, dpt...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d1 %>% glimpse()
Rows: 1,021,857
Columns: 109
$ rchid                  <dbl> 127006722820, 127006790829, 127006825927, 1270068…
$ district               <chr> "Ahmadnagar(26)", "Ahmadnagar(26)", "Ahmadnagar(2…
$ health_block           <chr> "Akole(84)", "Kopargaon(89)", "Rahata(99)", "Raha…
$ health_facility        <chr> "Mhaladevi(323)", "Pohegaon Bk.(437)", "Astagaon(…
$ health_subfacility     <chr> "Takli(2004)", "Pohegaon(2998)", "Astgaon(1758)",…
$ village                <chr> "Takali  (32076)", "Pohegaon Bk.  (32390)", "Chol…
$ childname              <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ fathername             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mothername             <chr> "Kishori Nanasaheb Gaikwad", "Sujata Rajendra Mal…
$ mobileno               <dbl> 8308071141, 9970717515, 8484922951, 9922192968, 9…
$ dob                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anm_name               <chr> "Watane Meena Parshram(107525)", "sarita sukhdeo …
$ asha_name              <chr> "Ghodake Kavita Deepak(59565)", "Vaishali Sachin …
$ registrationdate       <chr> "16-02-2023", "22-02-2023", "09-02-2023", "13-02-…
$ weight                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ bcg                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opv0                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opv1                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opv2                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opv3                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ dpt1                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ dpt2                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ dpt3                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hep0                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hep1                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hep2                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hep3                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta1                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta2                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta3                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ measles                <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ipv1                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ipv2                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ pcv1                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ pcv2                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ pcvb                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr1                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ rota1                  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ rota2                  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ rota3                  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ je1                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ measles2               <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr2                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ dptb                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ je2                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opvb                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ childdeath             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ death_date             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ death_reason           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ caseno                 <dbl> 2, 2, 3, 2, 2, 6, 3, 3, 2, 2, 1, 2, 1, 2, 2, 3, 2…
$ husbandname            <chr> "Nanasaheb Gaikwad", "Rajendra Mali", "GORAKHA SH…
$ mobileof               <chr> "Wife", "Wife", "Others", "Wife", "Wife", "Wife",…
$ motherage              <dbl> 28, 30, 28, 29, 36, 35, 31, 26, 27, 23, 32, 27, 2…
$ lmp                    <chr> "04-12-2022", "15-11-2022", "27-10-2022", "08-12-…
$ edd                    <chr> "10-09-2023", "22-08-2023", "03-08-2023", "14-09-…
$ anc1                   <chr> "16-02-2023", NA, NA, "06-03-2023", "16-02-2023",…
$ anc2                   <chr> NA, "01-05-2023", "09-04-2023", "03-04-2023", NA,…
$ anc3                   <chr> NA, "02-06-2023", "09-06-2023", "19-06-2023", NA,…
$ anc4                   <chr> NA, NA, "10-07-2023", NA, NA, NA, NA, NA, NA, NA,…
$ tt1                    <chr> "16-02-2023", "22-02-2023", "09-02-2023", "17-04-…
$ tt2                    <chr> NA, "29-03-2023", "09-04-2023", "19-06-2023", NA,…
$ ttb                    <chr> NA, NA, NA, NA, NA, "06-03-2023", NA, NA, NA, "04…
$ anc_ifa                <dbl> 0, 60, 180, 120, 0, 120, 360, 0, 60, 120, 120, 0,…
$ pnc_ifa                <dbl> 0, 0, 180, 0, 0, 60, 0, 0, 0, 0, 60, 0, 0, 0, 0, …
$ ifa                    <dbl> 0, 60, 360, 120, 0, 180, 360, 0, 60, 120, 180, 0,…
$ delivery               <chr> NA, "22-06-2023", "06-08-2023", "15-08-2023", NA,…
$ highrisk1stvisit       <chr> NA, NA, NA, "None", "PIH", "Multiple Pregnancy", …
$ highrisk2ndvisit       <chr> NA, "None", "None", "None", NA, NA, "None", NA, N…
$ highrisk3rdvisit       <chr> NA, NA, "None", "None", NA, NA, "None", NA, NA, N…
$ highrisk4thvisit       <chr> NA, NA, "None", NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel1stvisit        <dbl> 9.0, NA, NA, 11.0, 9.8, 9.8, 0.0, 11.0, 11.0, 11.…
$ hblevel2ndvisit        <dbl> NA, 11.0, 12.0, 12.0, NA, 9.9, 9.2, NA, 10.7, 11.…
$ hblevel3rdvisit        <dbl> NA, 10.6, 10.8, 12.0, NA, NA, 0.0, NA, NA, 0.0, 1…
$ hblevel4thvisit        <dbl> NA, NA, 11.6, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ maternaldeath          <chr> "No", "No", "No", "No", "No", "No", "No", "No", "…
$ jsy_beneficiary        <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Y…
$ jsy_beneficiary1       <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Y…
$ penta1_dateupdated     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta2_dateupdated     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta3_dateupdated     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr1_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr2_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc1_dateupdated       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc2_dateupdated       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc3_dateupdated       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc4_dateupdated       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt1_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt2_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ttb_dateupdated        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc_ifa_dateupdated    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ date_db_record_created <chr> "10-10-2023", "10-10-2023", "10-10-2023", "10-10-…
$ validation_passed      <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
$ telerivet_status       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ relationshiptochild    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ source_type            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ signupmethod           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ groups                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ source                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ state                  <chr> "Maharashtra", "Maharashtra", "Maharashtra", "Mah…
$ date_uploaded          <chr> "12-10-2023", "12-10-2023", "12-10-2023", "12-10-…
$ welcomedate            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ roundname              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ validation_remarks     <chr> "Age based on LMP is more than 280 days;", "Age b…
$ rchid_mother           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ caregivername          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ filename               <chr> "c:\\\\Suvita\\\\Data\\\\smsreminder_rawdata\\\\m…
$ eligible_vaccine_date  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ eligible_vaccine_type  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ name                   <chr> "Kishori Nanasaheb Gaikwad", "Sujata Rajendra Mal…

Renaming validation remarks to just a few categories.

d1 %>% tabyl(validation_passed)
 validation_passed      n   percent
             FALSE 463092 0.4531867
              TRUE 558765 0.5468133
# d1 %>% tabyl(validation_remarks)


# d1 <- d1 %>% mutate(validation_remarks_simple = 
        #  str_replace_all(validation_remarks, "RCHID is already added to telerivet, telerivet_contactid=[:graph:]* childbirth_recordid=[:graph:]* date_uploaded=[:graph:]*;", "RCHID is already added to telerivet")) 


d1 %>% tabyl( validation_remarks_simple, validation_passed)
                                     validation_remarks_simple  FALSE   TRUE
                       Age based on LMP is more than 280 days; 314761      0
                                             LMP date missing;  29780      0
                                        Mother death recorded;     30      0
 Mother death recorded;Age based on LMP is more than 280 days;     75      0
                           RCHID is already added to telerivet 118446      0
                                                          <NA>      0 558765

Maternal death looks calculated correctly!

d1 %>% tabyl(validation_passed, maternaldeath)
 validation_passed     No Yes
             FALSE 462972 120
              TRUE 558765   0
d1 %>% tabyl(validation_remarks_simple, maternaldeath)
                                     validation_remarks_simple     No Yes
                       Age based on LMP is more than 280 days; 314761   0
                                             LMP date missing;  29780   0
                                        Mother death recorded;      0  30
 Mother death recorded;Age based on LMP is more than 280 days;      0  75
                           RCHID is already added to telerivet 118431  15
                                                          <NA> 558765   0

Missing LMP looks good!

d1 %>% tabyl(validation_passed, missing_lmp)
 validation_passed  FALSE  TRUE
             FALSE 433226 29866
              TRUE 558765     0
d1 %>% tabyl(validation_remarks_simple, missing_lmp)
                                     validation_remarks_simple  FALSE  TRUE
                       Age based on LMP is more than 280 days; 314761     0
                                             LMP date missing;      0 29780
                                        Mother death recorded;     30     0
 Mother death recorded;Age based on LMP is more than 280 days;     75     0
                           RCHID is already added to telerivet 118360    86
                                                          <NA> 558765     0

LMP dates look good!

d1 %>% group_by(validation_remarks_simple) %>% 
  summarize(most_recent = max(days_lmp, na.rm = TRUE),
            oldest = min(days_lmp, na.rm = TRUE))
Warning: There were 2 warnings in `summarize()`.
The first warning was:
ℹ In argument: `most_recent = max(days_lmp, na.rm = TRUE)`.
ℹ In group 2: `validation_remarks_simple = "LMP date missing;"`.
Caused by warning in `max()`:
! no non-missing arguments to max; returning -Inf
ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 1 remaining warning.

d1 %>% group_by(validation_passed) %>% 
  summarize(most_recent = max(days_lmp, na.rm = TRUE),
            oldest = min(days_lmp, na.rm = TRUE))

I’m seeing no repeated RCHIDs in this data.


table(d1$rchid) %>% length()
[1] 1021857
unique(d1$rchid) %>% length()
[1] 1021857
tabyl(d1$district)
             d1$district     n     percent
          Ahmadnagar(26) 44283 0.043335809
                Akola(5) 16617 0.016261571
             Amravati(7) 25711 0.025161055
          Aurangabad(19) 36535 0.035753535
                Beed(27) 25957 0.025401793
            Bhandara(10)  9436 0.009234169
              Buldana(4) 24935 0.024401653
          Chandrapur(13) 15603 0.015269260
                Dhule(2)  4510 0.004413533
          Gadchiroli(12) 11040 0.010803860
           Gondiya *(11) 11811 0.011558369
           Hingoli *(16) 11600 0.011351882
              Jalgaon(3) 45511 0.044537543
               Jalna(18) 20252 0.019818820
            Kolhapur(34) 35174 0.034421646
               Latur(28) 25273 0.024732423
 Mumbai (Suburban) *(22) 50900 0.049811275
              Mumbai(23) 23332 0.022832940
               Nagpur(9) 42885 0.041967712
              Nanded(15) 36264 0.035488332
            Nandurbar(1) 21274 0.020818960
              Nashik(20) 64710 0.063325886
           Osmanabad(29) 16616 0.016260592
             Palghar(36) 37601 0.036796734
            Parbhani(17)  3959 0.003874319
                Pune(25) 99278 0.097154494
             Raigarh(24) 26890 0.026314837
           Ratnagiri(32)  8222 0.008046136
              Sangli(35) 29443 0.028813229
              Satara(31) 27628 0.027037051
          Sindhudurg(33)  4080 0.003992731
             Solapur(30) 46047 0.045062078
               Thane(21) 70353 0.068848185
               Wardha(8)  9829 0.009618763
             Washim *(6) 11619 0.011370476
            Yavatmal(14) 26679 0.026108350
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KGphbml0b3IpDQpsaWJyYXJ5KHJlYWRyKQ0KDQpzZXR3ZChkaXJuYW1lKHJzdHVkaW9hcGk6OmdldEFjdGl2ZURvY3VtZW50Q29udGV4dCgpJHBhdGgpKQ0KDQpgYGANCg0KDQoNCmBgYHtyfQ0KDQpkMSA8LSByZWFkX2NzdigiTUhfc2NhbGV1cF9wcmVnX2RhdGFfYWxsLmNzdiIpDQpgYGANCg0KDQpgYGB7cn0NCmQxICU+JSBnbGltcHNlKCkNCmBgYA0KDQoNClJlbmFtaW5nIHZhbGlkYXRpb24gcmVtYXJrcyB0byBqdXN0IGEgZmV3IGNhdGVnb3JpZXMuIA0KYGBge3J9DQpkMSAlPiUgdGFieWwodmFsaWRhdGlvbl9wYXNzZWQpDQoNCiNkMSAlPiUgdGFieWwodmFsaWRhdGlvbl9yZW1hcmtzKQ0KDQoNCmQxIDwtIGQxICU+JSBtdXRhdGUodmFsaWRhdGlvbl9yZW1hcmtzX3NpbXBsZSA9IA0KICAgICAgICAjICBzdHJfcmVwbGFjZV9hbGwodmFsaWRhdGlvbl9yZW1hcmtzLCAiUkNISUQgaXMgYWxyZWFkeSBhZGRlZCB0byB0ZWxlcml2ZXQsIHRlbGVyaXZldF9jb250YWN0aWQ9WzpncmFwaDpdKiBjaGlsZGJpcnRoX3JlY29yZGlkPVs6Z3JhcGg6XSogZGF0ZV91cGxvYWRlZD1bOmdyYXBoOl0qOyIsICJSQ0hJRCBpcyBhbHJlYWR5IGFkZGVkIHRvIHRlbGVyaXZldCIpKSANCg0KDQpkMSAlPiUgdGFieWwoIHZhbGlkYXRpb25fcmVtYXJrc19zaW1wbGUsIHZhbGlkYXRpb25fcGFzc2VkKQ0KYGBgDQoNCk1hdGVybmFsIGRlYXRoIGxvb2tzIGNhbGN1bGF0ZWQgY29ycmVjdGx5IQ0KYGBge3J9DQpkMSAlPiUgdGFieWwodmFsaWRhdGlvbl9wYXNzZWQsIG1hdGVybmFsZGVhdGgpDQpkMSAlPiUgdGFieWwodmFsaWRhdGlvbl9yZW1hcmtzX3NpbXBsZSwgbWF0ZXJuYWxkZWF0aCkNCg0KYGBgDQoNCg0KTWlzc2luZyBMTVAgbG9va3MgZ29vZCENCmBgYHtyfQ0KZDEgPC0gZDEgJT4lIG11dGF0ZShtaXNzaW5nX2xtcCA9IGlzLm5hKGxtcCkpIA0KDQpkMSAlPiUgdGFieWwodmFsaWRhdGlvbl9wYXNzZWQsIG1pc3NpbmdfbG1wKQ0KZDEgJT4lIHRhYnlsKHZhbGlkYXRpb25fcmVtYXJrc19zaW1wbGUsIG1pc3NpbmdfbG1wKQ0KDQpgYGANCg0KDQpMTVAgZGF0ZXMgbG9vayBnb29kISANCmBgYHtyfQ0KZDEkbG1wXzEgPC0gZDEkbG1wICU+JSBkbXkoKQ0KDQpkMSRsbXBfMSAlPiUgc3VtbWFyeSgpDQoNCmQxJGRheXNfbG1wIDwtIGQxJGxtcF8xIC0gdG9kYXkoKSANCg0KZDEkZGF5c19sbXAgIDwtIGQxJGRheXNfbG1wICU+JSBhcy5udW1lcmljKCkNCg0KZDEgJT4lIGdyb3VwX2J5KHZhbGlkYXRpb25fcmVtYXJrc19zaW1wbGUpICU+JSANCiAgc3VtbWFyaXplKG1vc3RfcmVjZW50ID0gbWF4KGRheXNfbG1wLCBuYS5ybSA9IFRSVUUpLA0KICAgICAgICAgICAgb2xkZXN0ID0gbWluKGRheXNfbG1wLCBuYS5ybSA9IFRSVUUpKQ0KDQpkMSAlPiUgZ3JvdXBfYnkodmFsaWRhdGlvbl9wYXNzZWQpICU+JSANCiAgc3VtbWFyaXplKG1vc3RfcmVjZW50ID0gbWF4KGRheXNfbG1wLCBuYS5ybSA9IFRSVUUpLA0KICAgICAgICAgICAgb2xkZXN0ID0gbWluKGRheXNfbG1wLCBuYS5ybSA9IFRSVUUpKQ0KDQpgYGANCg0KSSdtIHNlZWluZyBubyByZXBlYXRlZCBSQ0hJRHMgaW4gdGhpcyBkYXRhLiANCg0KYGBge3J9DQoNCnRhYmxlKGQxJHJjaGlkKSAlPiUgbGVuZ3RoKCkNCnVuaXF1ZShkMSRyY2hpZCkgJT4lIGxlbmd0aCgpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KdGFieWwoZDEkZGlzdHJpY3QpDQoNCmBgYA0KDQo=