d1 <- read_csv("MH_scaleup_preg_data_all.csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)Rows: 1021857 Columns: 109── Column specification ──────────────────────────────────────────────────────────
Delimiter: ","
chr (40): district, health_block, health_facility, health_subfacility, village...
dbl (11): rchid, mobileno, caseno, motherage, anc_ifa, pnc_ifa, ifa, hblevel1s...
lgl (58): childname, fathername, dob, weight, bcg, opv0, opv1, opv2, opv3, dpt...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d1 %>% glimpse()
Rows: 1,021,857
Columns: 109
$ rchid <dbl> 127006722820, 127006790829, 127006825927, 1270068…
$ district <chr> "Ahmadnagar(26)", "Ahmadnagar(26)", "Ahmadnagar(2…
$ health_block <chr> "Akole(84)", "Kopargaon(89)", "Rahata(99)", "Raha…
$ health_facility <chr> "Mhaladevi(323)", "Pohegaon Bk.(437)", "Astagaon(…
$ health_subfacility <chr> "Takli(2004)", "Pohegaon(2998)", "Astgaon(1758)",…
$ village <chr> "Takali (32076)", "Pohegaon Bk. (32390)", "Chol…
$ childname <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ fathername <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mothername <chr> "Kishori Nanasaheb Gaikwad", "Sujata Rajendra Mal…
$ mobileno <dbl> 8308071141, 9970717515, 8484922951, 9922192968, 9…
$ dob <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anm_name <chr> "Watane Meena Parshram(107525)", "sarita sukhdeo …
$ asha_name <chr> "Ghodake Kavita Deepak(59565)", "Vaishali Sachin …
$ registrationdate <chr> "16-02-2023", "22-02-2023", "09-02-2023", "13-02-…
$ weight <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ bcg <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opv0 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opv1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opv2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opv3 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ dpt1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ dpt2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ dpt3 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hep0 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hep1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hep2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hep3 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta3 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ measles <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ipv1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ipv2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ pcv1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ pcv2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ pcvb <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ rota1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ rota2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ rota3 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ je1 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ measles2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ dptb <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ je2 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ opvb <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ childdeath <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ death_date <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ death_reason <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ caseno <dbl> 2, 2, 3, 2, 2, 6, 3, 3, 2, 2, 1, 2, 1, 2, 2, 3, 2…
$ husbandname <chr> "Nanasaheb Gaikwad", "Rajendra Mali", "GORAKHA SH…
$ mobileof <chr> "Wife", "Wife", "Others", "Wife", "Wife", "Wife",…
$ motherage <dbl> 28, 30, 28, 29, 36, 35, 31, 26, 27, 23, 32, 27, 2…
$ lmp <chr> "04-12-2022", "15-11-2022", "27-10-2022", "08-12-…
$ edd <chr> "10-09-2023", "22-08-2023", "03-08-2023", "14-09-…
$ anc1 <chr> "16-02-2023", NA, NA, "06-03-2023", "16-02-2023",…
$ anc2 <chr> NA, "01-05-2023", "09-04-2023", "03-04-2023", NA,…
$ anc3 <chr> NA, "02-06-2023", "09-06-2023", "19-06-2023", NA,…
$ anc4 <chr> NA, NA, "10-07-2023", NA, NA, NA, NA, NA, NA, NA,…
$ tt1 <chr> "16-02-2023", "22-02-2023", "09-02-2023", "17-04-…
$ tt2 <chr> NA, "29-03-2023", "09-04-2023", "19-06-2023", NA,…
$ ttb <chr> NA, NA, NA, NA, NA, "06-03-2023", NA, NA, NA, "04…
$ anc_ifa <dbl> 0, 60, 180, 120, 0, 120, 360, 0, 60, 120, 120, 0,…
$ pnc_ifa <dbl> 0, 0, 180, 0, 0, 60, 0, 0, 0, 0, 60, 0, 0, 0, 0, …
$ ifa <dbl> 0, 60, 360, 120, 0, 180, 360, 0, 60, 120, 180, 0,…
$ delivery <chr> NA, "22-06-2023", "06-08-2023", "15-08-2023", NA,…
$ highrisk1stvisit <chr> NA, NA, NA, "None", "PIH", "Multiple Pregnancy", …
$ highrisk2ndvisit <chr> NA, "None", "None", "None", NA, NA, "None", NA, N…
$ highrisk3rdvisit <chr> NA, NA, "None", "None", NA, NA, "None", NA, NA, N…
$ highrisk4thvisit <chr> NA, NA, "None", NA, NA, NA, NA, NA, NA, NA, NA, N…
$ hblevel1stvisit <dbl> 9.0, NA, NA, 11.0, 9.8, 9.8, 0.0, 11.0, 11.0, 11.…
$ hblevel2ndvisit <dbl> NA, 11.0, 12.0, 12.0, NA, 9.9, 9.2, NA, 10.7, 11.…
$ hblevel3rdvisit <dbl> NA, 10.6, 10.8, 12.0, NA, NA, 0.0, NA, NA, 0.0, 1…
$ hblevel4thvisit <dbl> NA, NA, 11.6, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ maternaldeath <chr> "No", "No", "No", "No", "No", "No", "No", "No", "…
$ jsy_beneficiary <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Y…
$ jsy_beneficiary1 <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Y…
$ penta1_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta2_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penta3_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr1_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mr2_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc1_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc2_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc3_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc4_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt1_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tt2_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ ttb_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ anc_ifa_dateupdated <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ date_db_record_created <chr> "10-10-2023", "10-10-2023", "10-10-2023", "10-10-…
$ validation_passed <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
$ telerivet_status <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ relationshiptochild <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ source_type <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ signupmethod <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ groups <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ source <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ state <chr> "Maharashtra", "Maharashtra", "Maharashtra", "Mah…
$ date_uploaded <chr> "12-10-2023", "12-10-2023", "12-10-2023", "12-10-…
$ welcomedate <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ roundname <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ validation_remarks <chr> "Age based on LMP is more than 280 days;", "Age b…
$ rchid_mother <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ caregivername <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ filename <chr> "c:\\\\Suvita\\\\Data\\\\smsreminder_rawdata\\\\m…
$ eligible_vaccine_date <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ eligible_vaccine_type <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ name <chr> "Kishori Nanasaheb Gaikwad", "Sujata Rajendra Mal…
Renaming validation remarks to just a few categories.
d1 %>% tabyl(validation_passed)
validation_passed n percent
FALSE 463092 0.4531867
TRUE 558765 0.5468133
# d1 %>% tabyl(validation_remarks)
# d1 <- d1 %>% mutate(validation_remarks_simple =
# str_replace_all(validation_remarks, "RCHID is already added to telerivet, telerivet_contactid=[:graph:]* childbirth_recordid=[:graph:]* date_uploaded=[:graph:]*;", "RCHID is already added to telerivet"))
d1 %>% tabyl( validation_remarks_simple, validation_passed)
validation_remarks_simple FALSE TRUE
Age based on LMP is more than 280 days; 314761 0
LMP date missing; 29780 0
Mother death recorded; 30 0
Mother death recorded;Age based on LMP is more than 280 days; 75 0
RCHID is already added to telerivet 118446 0
<NA> 0 558765
Maternal death looks calculated correctly!
d1 %>% tabyl(validation_passed, maternaldeath)
validation_passed No Yes
FALSE 462972 120
TRUE 558765 0
d1 %>% tabyl(validation_remarks_simple, maternaldeath)
validation_remarks_simple No Yes
Age based on LMP is more than 280 days; 314761 0
LMP date missing; 29780 0
Mother death recorded; 0 30
Mother death recorded;Age based on LMP is more than 280 days; 0 75
RCHID is already added to telerivet 118431 15
<NA> 558765 0
Missing LMP looks good!
d1 %>% tabyl(validation_passed, missing_lmp)
validation_passed FALSE TRUE
FALSE 433226 29866
TRUE 558765 0
d1 %>% tabyl(validation_remarks_simple, missing_lmp)
validation_remarks_simple FALSE TRUE
Age based on LMP is more than 280 days; 314761 0
LMP date missing; 0 29780
Mother death recorded; 30 0
Mother death recorded;Age based on LMP is more than 280 days; 75 0
RCHID is already added to telerivet 118360 86
<NA> 558765 0
LMP dates look good!
d1 %>% group_by(validation_remarks_simple) %>%
summarize(most_recent = max(days_lmp, na.rm = TRUE),
oldest = min(days_lmp, na.rm = TRUE))
Warning: There were 2 warnings in `summarize()`.
The first warning was:
ℹ In argument: `most_recent = max(days_lmp, na.rm = TRUE)`.
ℹ In group 2: `validation_remarks_simple = "LMP date missing;"`.
Caused by warning in `max()`:
! no non-missing arguments to max; returning -Inf
ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 1 remaining warning.
d1 %>% group_by(validation_passed) %>%
summarize(most_recent = max(days_lmp, na.rm = TRUE),
oldest = min(days_lmp, na.rm = TRUE))
I’m seeing no repeated RCHIDs in this data.
table(d1$rchid) %>% length()
[1] 1021857
unique(d1$rchid) %>% length()
[1] 1021857
tabyl(d1$district)
d1$district n percent
Ahmadnagar(26) 44283 0.043335809
Akola(5) 16617 0.016261571
Amravati(7) 25711 0.025161055
Aurangabad(19) 36535 0.035753535
Beed(27) 25957 0.025401793
Bhandara(10) 9436 0.009234169
Buldana(4) 24935 0.024401653
Chandrapur(13) 15603 0.015269260
Dhule(2) 4510 0.004413533
Gadchiroli(12) 11040 0.010803860
Gondiya *(11) 11811 0.011558369
Hingoli *(16) 11600 0.011351882
Jalgaon(3) 45511 0.044537543
Jalna(18) 20252 0.019818820
Kolhapur(34) 35174 0.034421646
Latur(28) 25273 0.024732423
Mumbai (Suburban) *(22) 50900 0.049811275
Mumbai(23) 23332 0.022832940
Nagpur(9) 42885 0.041967712
Nanded(15) 36264 0.035488332
Nandurbar(1) 21274 0.020818960
Nashik(20) 64710 0.063325886
Osmanabad(29) 16616 0.016260592
Palghar(36) 37601 0.036796734
Parbhani(17) 3959 0.003874319
Pune(25) 99278 0.097154494
Raigarh(24) 26890 0.026314837
Ratnagiri(32) 8222 0.008046136
Sangli(35) 29443 0.028813229
Satara(31) 27628 0.027037051
Sindhudurg(33) 4080 0.003992731
Solapur(30) 46047 0.045062078
Thane(21) 70353 0.068848185
Wardha(8) 9829 0.009618763
Washim *(6) 11619 0.011370476
Yavatmal(14) 26679 0.026108350