library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ dplyr   1.0.9
## ✔ tibble  3.1.8     ✔ stringr 1.4.0
## ✔ tidyr   1.2.0     ✔ forcats 0.5.1
## ✔ purrr   0.3.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(gmodels)
library(CGPfunctions)
## Registered S3 method overwritten by 'DescTools':
##   method         from 
##   reorder.factor gdata
filepath<-"D:/Data Science/R/John Hopkins/Exploratory data analysis/Data for test task - children5k.csv"
df1<- read_csv(filepath)
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 5003 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (40): state, type of place of residence, mother's highest educational le...
## dbl (11): child's birth order, mother's current age, mother's highest year o...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Data manipulation for Analysis.

df2<- df1 %>% filter(`current age of child in years (for alive children only)`>=1)%>%
              select(`mother's literacy level`,`mother's highest year of education`,
                     `father's total years of education`,`place of delivery (i.e. place where child was born)`,
                     `received DPT-3 vaccine [recommended at 14 weeks]`,
                     `received measles-1 vaccine [recommended at 9 months]`,
                     `is the mother able to read SMS messages`)%>%
  na.omit()
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "ngo or trust hospital/clinic"] <- "private"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "other private sector health facility"] <- "private"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "parents' home"] <- "home"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "other"] <- "home"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "respondent's home"] <- "home"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "private: hospital/maternity home/clinic"] <- "private"

df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "public: chc/rural hospital/block phc"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "public: govt. dispensary"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "public: govt./munic. hospital"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "public: phc/additional phc"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "public: sub-centre"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)` 
                                                          == "public: uhc/uhp/ufwc"] <- "public"
count(df2,`place of delivery (i.e. place where child was born)`)
## # A tibble: 3 × 2
##   `place of delivery (i.e. place where child was born)`     n
##   <chr>                                                 <int>
## 1 home                                                     21
## 2 private                                                  44
## 3 public                                                  138
df2$`received DPT-3 vaccine [recommended at 14 weeks]`[df2$`received DPT-3 vaccine [recommended at 14 weeks]` 
                                                          == "reported by mother"] <- "yes"
df2$`received DPT-3 vaccine [recommended at 14 weeks]`[df2$`received DPT-3 vaccine [recommended at 14 weeks]` 
                                                       == "vaccination date on card"] <- "yes"
df2$`received DPT-3 vaccine [recommended at 14 weeks]`[df2$`received DPT-3 vaccine [recommended at 14 weeks]` 
                                                       == "vaccination marked on card"] <- "yes"
count(df2,`received DPT-3 vaccine [recommended at 14 weeks]`)
## # A tibble: 2 × 2
##   `received DPT-3 vaccine [recommended at 14 weeks]`     n
##   <chr>                                              <int>
## 1 no                                                    27
## 2 yes                                                  176

Relationship between Place of Birth and DPT Vaccination.

CrossTable(df2$`received DPT-3 vaccine [recommended at 14 weeks]`,
           df2$`place of delivery (i.e. place where child was born)`,
           digits=2, expected=TRUE, prop.r=TRUE, prop.c=TRUE, 
           prop.t=TRUE, prop.chisq=TRUE, sresid=TRUE, 
           format=c("SPSS"), dnn = c("Received DPT Vaccine","Place of birth"))
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation may be
## incorrect
## 
##    Cell Contents
## |-------------------------|
## |                   Count |
## |         Expected Values |
## | Chi-square contribution |
## |             Row Percent |
## |          Column Percent |
## |           Total Percent |
## |            Std Residual |
## |-------------------------|
## 
## Total Observations in Table:  203 
## 
##                      | Place of birth 
## Received DPT Vaccine |     home  |  private  |   public  | Row Total | 
## ---------------------|-----------|-----------|-----------|-----------|
##                   no |        3  |        8  |       16  |       27  | 
##                      |     2.79  |     5.85  |    18.35  |           | 
##                      |     0.02  |     0.79  |     0.30  |           | 
##                      |    11.11% |    29.63% |    59.26% |    13.30% | 
##                      |    14.29% |    18.18% |    11.59% |           | 
##                      |     1.48% |     3.94% |     7.88% |           | 
##                      |     0.12  |     0.89  |    -0.55  |           | 
## ---------------------|-----------|-----------|-----------|-----------|
##                  yes |       18  |       36  |      122  |      176  | 
##                      |    18.21  |    38.15  |   119.65  |           | 
##                      |     0.00  |     0.12  |     0.05  |           | 
##                      |    10.23% |    20.45% |    69.32% |    86.70% | 
##                      |    85.71% |    81.82% |    88.41% |           | 
##                      |     8.87% |    17.73% |    60.10% |           | 
##                      |    -0.05  |    -0.35  |     0.22  |           | 
## ---------------------|-----------|-----------|-----------|-----------|
##         Column Total |       21  |       44  |      138  |      203  | 
##                      |    10.34% |    21.67% |    67.98% |           | 
## ---------------------|-----------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  1.275262     d.f. =  2     p =  0.5285431 
## 
## 
##  
##        Minimum expected frequency: 2.793103 
## Cells with Expected Frequency < 5: 1 of 6 (16.66667%)
mosaicplot(`received DPT-3 vaccine [recommended at 14 weeks]`~
             `place of delivery (i.e. place where child was born)`,
           data=df2,col=c("Green","Yellow"))

CGPfunctions::PlotXTabs2(df2,`place of delivery (i.e. place where child was born)`,
                        `received DPT-3 vaccine [recommended at 14 weeks]`)

Data manipulation for Measles vaccine received column.

df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]` 
                                                       == "vaccination marked on card"] <- "yes"
df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]` 
                                                           == "vaccination date on card"] <- "yes"
df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]` 
                                                           == "vaccination identified"] <- "yes"
df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]` 
                                                           == "reported by mother"] <- "yes"
df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]` 
                                                           == "don't know"] <- "no"
count(df2,`received measles-1 vaccine [recommended at 9 months]`)
## # A tibble: 2 × 2
##   `received measles-1 vaccine [recommended at 9 months]`     n
##   <chr>                                                  <int>
## 1 no                                                        22
## 2 yes                                                      181

Relationship of Measles vaccination with Mothers year of schooling, fathers year of schooling and Capacity of mother to read SMS.

CrossTable(df2$`received measles-1 vaccine [recommended at 9 months]`,
           df2$`mother's highest year of education`, 
           digits=2, expected=TRUE, prop.r=TRUE, prop.c=TRUE, 
           prop.t=TRUE, prop.chisq=TRUE, sresid=TRUE, 
           format=c("SPSS"), dnn = c("Got Measles Vaccine","Mother's Highest Year of education"))
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation may be
## incorrect
## 
##    Cell Contents
## |-------------------------|
## |                   Count |
## |         Expected Values |
## | Chi-square contribution |
## |             Row Percent |
## |          Column Percent |
## |           Total Percent |
## |            Std Residual |
## |-------------------------|
## 
## Total Observations in Table:  203 
## 
##                     | Mother's Highest Year of education 
## Got Measles Vaccine |        1  |        2  |        3  |        4  |        5  |        6  |        7  |        8  | Row Total | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                  no |        0  |        1  |        6  |        4  |        7  |        1  |        3  |        0  |       22  | 
##                     |     0.76  |     1.30  |     6.18  |     4.01  |     5.85  |     0.43  |     3.36  |     0.11  |           | 
##                     |     0.76  |     0.07  |     0.01  |     0.00  |     0.23  |     0.74  |     0.04  |     0.11  |           | 
##                     |     0.00% |     4.55% |    27.27% |    18.18% |    31.82% |     4.55% |    13.64% |     0.00% |    10.84% | 
##                     |     0.00% |     8.33% |    10.53% |    10.81% |    12.96% |    25.00% |     9.68% |     0.00% |           | 
##                     |     0.00% |     0.49% |     2.96% |     1.97% |     3.45% |     0.49% |     1.48% |     0.00% |           | 
##                     |    -0.87  |    -0.26  |    -0.07  |    -0.00  |     0.47  |     0.86  |    -0.20  |    -0.33  |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                 yes |        7  |       11  |       51  |       33  |       47  |        3  |       28  |        1  |      181  | 
##                     |     6.24  |    10.70  |    50.82  |    32.99  |    48.15  |     3.57  |    27.64  |     0.89  |           | 
##                     |     0.09  |     0.01  |     0.00  |     0.00  |     0.03  |     0.09  |     0.00  |     0.01  |           | 
##                     |     3.87% |     6.08% |    28.18% |    18.23% |    25.97% |     1.66% |    15.47% |     0.55% |    89.16% | 
##                     |   100.00% |    91.67% |    89.47% |    89.19% |    87.04% |    75.00% |    90.32% |   100.00% |           | 
##                     |     3.45% |     5.42% |    25.12% |    16.26% |    23.15% |     1.48% |    13.79% |     0.49% |           | 
##                     |     0.30  |     0.09  |     0.02  |     0.00  |    -0.17  |    -0.30  |     0.07  |     0.11  |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##        Column Total |        7  |       12  |       57  |       37  |       54  |        4  |       31  |        1  |      203  | 
##                     |     3.45% |     5.91% |    28.08% |    18.23% |    26.60% |     1.97% |    15.27% |     0.49% |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.181927     d.f. =  7     p =  0.9490885 
## 
## 
##  
##        Minimum expected frequency: 0.1083744 
## Cells with Expected Frequency < 5: 8 of 16 (50%)
CrossTable(df2$`received measles-1 vaccine [recommended at 9 months]`,
           df2$`father's total years of education`, 
           digits=2, expected=TRUE, prop.r=TRUE, prop.c=TRUE, 
           prop.t=TRUE, prop.chisq=TRUE, sresid=TRUE, 
           format=c("SPSS"), dnn = c("Got Measles Vaccine","Father's Years of education"))
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation may be
## incorrect
## 
##    Cell Contents
## |-------------------------|
## |                   Count |
## |         Expected Values |
## | Chi-square contribution |
## |             Row Percent |
## |          Column Percent |
## |           Total Percent |
## |            Std Residual |
## |-------------------------|
## 
## Total Observations in Table:  203 
## 
##                     | Father's Years of education 
## Got Measles Vaccine |          0  |          1  |         10  |         11  |         12  |         14  |         15  |         16  |         17  |         18  |         19  |         20  |          3  |          4  |          5  |          6  |          7  |          8  |          9  | don't know  |  Row Total | 
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|
##                  no |         3  |         0  |         5  |         0  |         1  |         0  |         3  |         0  |         0  |         0  |         0  |         0  |         0  |         1  |         1  |         1  |         2  |         3  |         2  |         0  |        22  | 
##                     |      1.41  |      0.11  |      3.36  |      0.43  |      3.03  |      0.22  |      2.93  |      0.22  |      0.76  |      0.33  |      0.11  |      0.11  |      0.11  |      0.54  |      1.41  |      0.22  |      0.87  |      2.82  |      2.82  |      0.22  |            | 
##                     |      1.80  |      0.11  |      0.80  |      0.43  |      1.36  |      0.22  |      0.00  |      0.22  |      0.76  |      0.33  |      0.11  |      0.11  |      0.11  |      0.39  |      0.12  |      2.83  |      1.48  |      0.01  |      0.24  |      0.22  |            | 
##                     |     13.64% |      0.00% |     22.73% |      0.00% |      4.55% |      0.00% |     13.64% |      0.00% |      0.00% |      0.00% |      0.00% |      0.00% |      0.00% |      4.55% |      4.55% |      4.55% |      9.09% |     13.64% |      9.09% |      0.00% |     10.84% | 
##                     |     23.08% |      0.00% |     16.13% |      0.00% |      3.57% |      0.00% |     11.11% |      0.00% |      0.00% |      0.00% |      0.00% |      0.00% |      0.00% |     20.00% |      7.69% |     50.00% |     25.00% |     11.54% |      7.69% |      0.00% |            | 
##                     |      1.48% |      0.00% |      2.46% |      0.00% |      0.49% |      0.00% |      1.48% |      0.00% |      0.00% |      0.00% |      0.00% |      0.00% |      0.00% |      0.49% |      0.49% |      0.49% |      0.99% |      1.48% |      0.99% |      0.00% |            | 
##                     |      1.34  |     -0.33  |      0.89  |     -0.66  |     -1.17  |     -0.47  |      0.04  |     -0.47  |     -0.87  |     -0.57  |     -0.33  |     -0.33  |     -0.33  |      0.62  |     -0.34  |      1.68  |      1.22  |      0.11  |     -0.49  |     -0.47  |            | 
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|
##                 yes |        10  |         1  |        26  |         4  |        27  |         2  |        24  |         2  |         7  |         3  |         1  |         1  |         1  |         4  |        12  |         1  |         6  |        23  |        24  |         2  |       181  | 
##                     |     11.59  |      0.89  |     27.64  |      3.57  |     24.97  |      1.78  |     24.07  |      1.78  |      6.24  |      2.67  |      0.89  |      0.89  |      0.89  |      4.46  |     11.59  |      1.78  |      7.13  |     23.18  |     23.18  |      1.78  |            | 
##                     |      0.22  |      0.01  |      0.10  |      0.05  |      0.17  |      0.03  |      0.00  |      0.03  |      0.09  |      0.04  |      0.01  |      0.01  |      0.01  |      0.05  |      0.01  |      0.34  |      0.18  |      0.00  |      0.03  |      0.03  |            | 
##                     |      5.52% |      0.55% |     14.36% |      2.21% |     14.92% |      1.10% |     13.26% |      1.10% |      3.87% |      1.66% |      0.55% |      0.55% |      0.55% |      2.21% |      6.63% |      0.55% |      3.31% |     12.71% |     13.26% |      1.10% |     89.16% | 
##                     |     76.92% |    100.00% |     83.87% |    100.00% |     96.43% |    100.00% |     88.89% |    100.00% |    100.00% |    100.00% |    100.00% |    100.00% |    100.00% |     80.00% |     92.31% |     50.00% |     75.00% |     88.46% |     92.31% |    100.00% |            | 
##                     |      4.93% |      0.49% |     12.81% |      1.97% |     13.30% |      0.99% |     11.82% |      0.99% |      3.45% |      1.48% |      0.49% |      0.49% |      0.49% |      1.97% |      5.91% |      0.49% |      2.96% |     11.33% |     11.82% |      0.99% |            | 
##                     |     -0.47  |      0.11  |     -0.31  |      0.23  |      0.41  |      0.16  |     -0.02  |      0.16  |      0.30  |      0.20  |      0.11  |      0.11  |      0.11  |     -0.22  |      0.12  |     -0.59  |     -0.42  |     -0.04  |      0.17  |      0.16  |            | 
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|
##        Column Total |        13  |         1  |        31  |         4  |        28  |         2  |        27  |         2  |         7  |         3  |         1  |         1  |         1  |         5  |        13  |         2  |         8  |        26  |        26  |         2  |       203  | 
##                     |      6.40% |      0.49% |     15.27% |      1.97% |     13.79% |      0.99% |     13.30% |      0.99% |      3.45% |      1.48% |      0.49% |      0.49% |      0.49% |      2.46% |      6.40% |      0.99% |      3.94% |     12.81% |     12.81% |      0.99% |            | 
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  13.04462     d.f. =  19     p =  0.836279 
## 
## 
##  
##        Minimum expected frequency: 0.1083744 
## Cells with Expected Frequency < 5: 31 of 40 (77.5%)
CrossTable(df2$`received measles-1 vaccine [recommended at 9 months]`,
           df2$`is the mother able to read SMS messages`, 
           digits=2, expected=TRUE, prop.r=TRUE, prop.c=TRUE, 
           prop.t=TRUE, prop.chisq=TRUE, sresid=TRUE, 
           format=c("SPSS"), dnn = c("Got Measles Vaccine","Mother can Read SMS"))
## 
##    Cell Contents
## |-------------------------|
## |                   Count |
## |         Expected Values |
## | Chi-square contribution |
## |             Row Percent |
## |          Column Percent |
## |           Total Percent |
## |            Std Residual |
## |-------------------------|
## 
## Total Observations in Table:  203 
## 
##                     | Mother can Read SMS 
## Got Measles Vaccine |       no  |      yes  | Row Total | 
## --------------------|-----------|-----------|-----------|
##                  no |        4  |       18  |       22  | 
##                     |     5.20  |    16.80  |           | 
##                     |     0.28  |     0.09  |           | 
##                     |    18.18% |    81.82% |    10.84% | 
##                     |     8.33% |    11.61% |           | 
##                     |     1.97% |     8.87% |           | 
##                     |    -0.53  |     0.29  |           | 
## --------------------|-----------|-----------|-----------|
##                 yes |       44  |      137  |      181  | 
##                     |    42.80  |   138.20  |           | 
##                     |     0.03  |     0.01  |           | 
##                     |    24.31% |    75.69% |    89.16% | 
##                     |    91.67% |    88.39% |           | 
##                     |    21.67% |    67.49% |           | 
##                     |     0.18  |    -0.10  |           | 
## --------------------|-----------|-----------|-----------|
##        Column Total |       48  |      155  |      203  | 
##                     |    23.65% |    76.35% |           | 
## --------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  0.4079449     d.f. =  1     p =  0.5230144 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 =  0.1391399     d.f. =  1     p =  0.7091376 
## 
##  
##        Minimum expected frequency: 5.20197
#install.packages("CGPfunctions")
library(CGPfunctions)
CGPfunctions::PlotXTabs2(df2,`mother's highest year of education`,
                        `received measles-1 vaccine [recommended at 9 months]`)

CGPfunctions::PlotXTabs(df2,`father's total years of education`,
                        `received measles-1 vaccine [recommended at 9 months]`)
## Plotted dataset df2 variables father's total years of education by received measles-1 vaccine [recommended at 9 months]

CGPfunctions::PlotXTabs2(df2,`is the mother able to read SMS messages`,
                        `received measles-1 vaccine [recommended at 9 months]`)

#install.packages("vtree")
library(vtree)
vtree(df2,c("mother's highest year of education",
            "received measles-1 vaccine [recommended at 9 months]"),
      palette=c(1,3),sortfill=TRUE)
## Warning in knitr::include_graphics(fullpath): It is highly recommended to use
## relative paths for images. You had absolute paths: "C:/Users/varun/AppData/
## Local/Temp/RtmpK6zxkt/vtree001.png"

vtree(df2,c("father's total years of education",
            "received measles-1 vaccine [recommended at 9 months]"),
      palette=c(1,3),sortfill=TRUE)
## Warning in knitr::include_graphics(fullpath): It is highly recommended to use
## relative paths for images. You had absolute paths: "C:/Users/varun/AppData/
## Local/Temp/RtmpK6zxkt/vtree002.png"

vtree(df2,c("is the mother able to read SMS messages",
            "received measles-1 vaccine [recommended at 9 months]"),
      palette=c(1,3),sortfill=TRUE)
## Warning in knitr::include_graphics(fullpath): It is highly recommended to use
## relative paths for images. You had absolute paths: "C:/Users/varun/AppData/
## Local/Temp/RtmpK6zxkt/vtree003.png"

EDA of dataset.

library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
view_df(df1,show.frq = T,show.prc = T,show.na = T,show.type = T,show.string.values = T)
Data frame: df1
ID Name Type Label missings Values Value Labels Freq. %
1 child’s birth order numeric 0 (0.00%) range: 1-4
2 mother’s current age numeric 0 (0.00%) range: 16-49
3 state character 0 (0.00%) andaman & nicobar islands
andhra pradesh
arunachal pradesh
assam
bihar
chandigarh
chhattisgarh
dadra & nagar haveli and daman & diu
goa
gujarat
haryana
himachal pradesh
jammu & kashmir
jharkhand
karnataka
<… truncated>
4
63
149
230
486
3
180
16
6
216
158
46
150
199
178
0.08
1.26
2.98
4.60
9.71
0.06
3.60
0.32
0.12
4.32
3.16
0.92
3.00
3.98
3.56
4 type of place of residence character 0 (0.00%) rural
urban
4003
1000
80.01
19.99
5 mother’s highest educational level character 0 (0.00%) higher
no education
primary
secondary
681
1112
648
2562
13.61
22.23
12.95
51.21
6 mother’s highest year of education numeric 1118 (22.35%) range: 1-8
7 household has refrigerator character 0 (0.00%) no
not a dejure resident
yes
3253
244
1506
65.02
4.88
30.10
8 household has car/truck character 0 (0.00%) no
not a dejure resident
yes
4399
244
360
87.93
4.88
7.20
9 religion character 0 (0.00%) buddhist / neo-buddhist
christian
hindu
jain
muslim
no religion
other
sikh
43
397
3663
5
742
3
64
86
0.86
7.94
73.22
0.10
14.83
0.06
1.28
1.72
10 mother’s literacy level character 0 (0.00%) able to read only parts of sentence
able to read whole sentence
blind/visually impaired
cannot read at all
no card with required language
688
2863
8
1419
25
13.75
57.23
0.16
28.36
0.50
11 frequency of listening to radio character 0 (0.00%) at least once a week
less than once a week
not at all
155
460
4388
3.10
9.19
87.71
12 frequency of watching television character 0 (0.00%) at least once a week
less than once a week
not at all
2218
1061
1724
44.33
21.21
34.46
13 has mother been away for more than 1 month in last 12mo character 4207 (84.09%) no
yes
728
68
91.46
8.54
14 mother owns a mobile phone character 4207 (84.09%) no
yes
351
445
44.10
55.90
15 wealth index combined character 0 (0.00%) middle
poorer
poorest
richer
richest
982
1179
1320
875
647
19.63
23.57
26.38
17.49
12.93
16 wealth index factor score combined numeric 0 (0.00%) range: -2501080-2239460
17 mother’s age at birth of first child numeric 0 (0.00%) range: 6-41
18 where is the father currently staying character 66 (1.32%) living with her
staying elsewhere
4365
572
88.41
11.59
19 does the father have any other wives character 66 (1.32%) 1
2
7
don’t know
no other wives
68
8
1
3
4857
1.38
0.16
0.02
0.06
98.38
20 father’s education level character 4207 (84.09%) don’t know
higher
no education
primary
secondary
4
117
135
116
424
0.50
14.70
16.96
14.57
53.27
21 father’s occupation character 4207 (84.09%) accountants, auditors and related workers
administrative and executive officials government and local bodies
administrative, executive and managerial workers, n.e.c.
agricultural labourer
background-color:#eeeeeehitects, engineers, technologists and surveyors
broadcasting station and sound equipment operators and cinema projectionists
building caretakers, sweepers, cleaners and related workers
carpenters, cabinet & related wood workers
chemical processors and related workers
clerical and other supervisors
clerical and related workers
computing machine operators
cooks, waiters, bartenders and related workers (domestic & institutional)
cultivators
dk (don’t know)
<… truncated>
2
2
3
141
4
1
2
10
3
2
5
2
5
42
7
0.25
0.25
0.38
17.71
0.50
0.13
0.25
1.26
0.38
0.25
0.63
0.25
0.63
5.28
0.88
22 is the mother currently working character 4207 (84.09%) no
yes
641
155
80.53
19.47
23 does the mother have a job (even if currently absent) character 4357 (87.09%) no
yes
641
5
99.23
0.77
24 father’s total years of education character 4207 (84.09%) 0
1
10
11
12
13
14
15
16
17
18
19
2
20
3
<… truncated>
138
4
121
16
94
3
7
79
3
17
5
1
12
2
6
17.34
0.50
15.20
2.01
11.81
0.38
0.88
9.92
0.38
2.14
0.63
0.13
1.51
0.25
0.75
25 mother’s occupation character 4207 (84.09%) accountants, auditors and related workers
agricultural labourer
background-color:#eeeeeehitects, engineers, technologists and surveyors
building caretakers, sweepers, cleaners and related workers
cooks, waiters, bartenders and related workers (domestic & institutional)
cultivators
farm plantation, dairy and other managers and supervisors
farmers, other than cultivators
house keepers, matron and stewards (domestic & institutional)
labourers (labourers, n.e.c.)
maids and related house keeping service workers, n.e.c.
merchants and shopkeepers, wholesale and retail trade
not working and didn’t work in last 12 months
nursing and other medical and health technicians
other (occupation not found)
<… truncated>
1
65
1
2
2
17
5
24
5
15
3
4
600
2
3
0.13
8.17
0.13
0.25
0.25
2.14
0.63
3.02
0.63
1.88
0.38
0.50
75.38
0.25
0.38
26 father’s current age numeric 4214 (84.23%) range: 19-55
27 person in the household who makes decisions about mother’s healthcare character 4214 (84.23%) husband/partner alone
other
respondent alone
respondent and husband/partner
someone else
118
6
54
596
15
14.96
0.76
6.84
75.54
1.90
28 sex of child character 0 (0.00%) female
male
2388
2615
47.73
52.27
29 current age of child in years (for alive children only) numeric 166 (3.32%) range: 0-4
30 months since birth (including for dead children) numeric 0 (0.00%) range: 0-59
31 place of delivery (i.e. place where child was born) character 0 (0.00%) ngo or trust hospital/clinic
other
other home
other private sector health facility
other public sector health facility
parents’ home
private: hospital/maternity home/clinic
public: chc/rural hospital/block phc
public: govt. dispensary
public: govt./munic. hospital
public: phc/additional phc
public: sub-centre
public: uhc/uhp/ufwc
respondent’s home
15
4
5
35
5
77
1010
1232
98
1416
371
48
84
603
0.30
0.08
0.10
0.70
0.10
1.54
20.19
24.63
1.96
28.30
7.42
0.96
1.68
12.05
32 baby received a postnatal check-up within 2 months of birth character 1231 (24.61%) don’t know
no
yes
3
2090
1679
0.08
55.41
44.51
33 mother has child’s vaccination card character 2144 (42.85%) no card
no longer has card
yes, not seen
yes, seen
93
67
246
2453
3.25
2.34
8.60
85.80
34 received BCG vaccine [recommended at birth] character 2144 (42.85%) don’t know
no
reported by mother
vaccination date on card
vaccination marked on card
2
174
340
2296
47
0.07
6.09
11.89
80.31
1.64
35 received DPT-1 vaccine [recommended at 6 weeks] character 2144 (42.85%) don’t know
no
reported by mother
vaccination date on card
vaccination marked on card
3
371
314
2099
72
0.10
12.98
10.98
73.42
2.52
36 received DPT-2 vaccine [recommended at 10 weeks] character 2144 (42.85%) don’t know
no
reported by mother
vaccination date on card
vaccination marked on card
3
520
301
1967
68
0.10
18.19
10.53
68.80
2.38
37 received DPT-3 vaccine [recommended at 14 weeks] character 2144 (42.85%) don’t know
no
reported by mother
vaccination date on card
vaccination marked on card
3
714
266
1800
76
0.10
24.97
9.30
62.96
2.66
38 received measles-1 vaccine [recommended at 9 months] character 2144 (42.85%) don’t know
no
reported by mother
vaccination date on card
vaccination identified
vaccination marked on card
14
721
223
1234
620
47
0.49
25.22
7.80
43.16
21.69
1.64
39 family belongs to a scheduled caste/tribe/other backward class character 237 (4.74%) don’t know
none of them
obc
schedule caste
schedule tribe
32
769
1915
1036
1014
0.67
16.14
40.18
21.74
21.28
40 in the last 12 months, has your husband been away for 1mo or more character 4214 (84.23%) no
yes
607
182
76.93
23.07
41 in the last 12mo, have you been away from home other than for visiting your parents/in-laws character 4935 (98.64%) no
yes
43
25
63.24
36.76
42 mother is usually allowed to visit a health facility… character 4207 (84.09%) alone
not at all
with someone else only
353
56
387
44.35
7.04
48.62
43 does the mother have a mobile phone that she herself uses character 4207 (84.09%) no
yes
351
445
44.10
55.90
44 is the mother able to read SMS messages character 4244 (84.83%) no
yes
320
439
42.16
57.84
45 wealth index within state character 0 (0.00%) middle
poorer
poorest
richer
richest
1019
1107
1252
951
674
20.37
22.13
25.02
19.01
13.47
46 wealth index factor score within state numeric 0 (0.00%) range: -2501080-2239460
47 wealth index within state - urban character 4003 (80.01%) middle
poorer
poorest
richer
richest
213
226
246
187
128
21.30
22.60
24.60
18.70
12.80
48 wealth index factor score within state - urban numeric 4003 (80.01%) range: -3809560-1679199
49 wealth index within state - rural character 1000 (19.99%) middle
poorer
poorest
richer
richest
807
808
943
738
707
20.16
20.18
23.56
18.44
17.66
50 wealth index factor score within state - rural numeric 1000 (19.99%) range: -2446780-2761580
51 where does the child receive most of their vaccinations character 2245 (44.87%) ngo or trust hospital / clinic
other
other private health faility
other public sector health facility
private: doctor / clinic
private: hospital
private: paramedic
private: pharmacy / drugstore
public ayush
public: anganwadi / icds centre
public: camp
public: chc / rural hospital / block phc
public: govt. / municipal hospital
public: govt. dispensary
public: govt. mobile clinic
<… truncated>
4
21
3
1
13
71
1
1
2
1374
45
198
295
115
2
0.15
0.76
0.11
0.04
0.47
2.57
0.04
0.04
0.07
49.82
1.63
7.18
10.70
4.17
0.07
df3<-df1[,c(4,5,7,10,12,14,15,20,22,28,32,33,37,38,39,42,43)]
df3<-na.omit(df3)
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]` 
                                                           == "vaccination marked on card"] <- "yes"
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]` 
                                                           == "vaccination date on card"] <- "yes"
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]` 
                                                           == "vaccination identified"] <- "yes"
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]` 
                                                           == "reported by mother"] <- "yes"
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]` 
                                                           == "don't know"] <- "no"
count(df3,`received measles-1 vaccine [recommended at 9 months]`)
## # A tibble: 2 × 2
##   `received measles-1 vaccine [recommended at 9 months]`     n
##   <chr>                                                  <int>
## 1 no                                                       114
## 2 yes                                                      266
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
ggpairs(data=df3,
        columns = 1:3,
        mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
        upper = list(continuous="cor"),
        lower = list(continuous="points"),
        diag = list(continuous="densityDiag"),)

ggpairs(data=df3,
        columns = 4:5,
        mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
        upper = list(continuous="cor"),
        lower = list(continuous="points"),
        diag = list(continuous="densityDiag"),)

ggpairs(data=df3,
        columns = 6:8,
        mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
        upper = list(continuous="cor"),
        lower = list(continuous="points"),
        diag = list(continuous="densityDiag"),)

ggpairs(data=df3,
        columns = 9:11,
        mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
        upper = list(continuous="cor"),
        lower = list(continuous="points"),
        diag = list(continuous="densityDiag"),)

ggpairs(data=df3,
        columns = 12:14,
        mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
        upper = list(continuous="cor"),
        lower = list(continuous="points"),
        diag = list(continuous="densityDiag"),)

ggpairs(data=df3,
        columns = 15:17,
        mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
        upper = list(continuous="cor"),
        lower = list(continuous="points"),
        diag = list(continuous="densityDiag"),)

library(DataExplorer)
create_report(data=df3)
## 
## 
## processing file: report.rmd
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |..                                                                    |   2%
##    inline R code fragments
## 
## 
  |                                                                            
  |...                                                                   |   5%
## label: global_options (with options) 
## List of 1
##  $ include: logi FALSE
## 
## 
  |                                                                            
  |.....                                                                 |   7%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.......                                                               |  10%
## label: introduce
## 
  |                                                                            
  |........                                                              |  12%
##   ordinary text without R code
## 
## 
  |                                                                            
  |..........                                                            |  14%
## label: plot_intro
## 
  |                                                                            
  |............                                                          |  17%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.............                                                         |  19%
## label: data_structure
## 
  |                                                                            
  |...............                                                       |  21%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.................                                                     |  24%
## label: missing_profile
## 
  |                                                                            
  |..................                                                    |  26%
##   ordinary text without R code
## 
## 
  |                                                                            
  |....................                                                  |  29%
## label: univariate_distribution_header
## 
  |                                                                            
  |......................                                                |  31%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.......................                                               |  33%
## label: plot_histogram
## 
  |                                                                            
  |.........................                                             |  36%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...........................                                           |  38%
## label: plot_density
## 
  |                                                                            
  |............................                                          |  40%
##   ordinary text without R code
## 
## 
  |                                                                            
  |..............................                                        |  43%
## label: plot_frequency_bar
## 
  |                                                                            
  |................................                                      |  45%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.................................                                     |  48%
## label: plot_response_bar
## 
  |                                                                            
  |...................................                                   |  50%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.....................................                                 |  52%
## label: plot_with_bar
## 
  |                                                                            
  |......................................                                |  55%
##   ordinary text without R code
## 
## 
  |                                                                            
  |........................................                              |  57%
## label: plot_normal_qq
## 
  |                                                                            
  |..........................................                            |  60%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...........................................                           |  62%
## label: plot_response_qq
## 
  |                                                                            
  |.............................................                         |  64%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...............................................                       |  67%
## label: plot_by_qq
## 
  |                                                                            
  |................................................                      |  69%
##   ordinary text without R code
## 
## 
  |                                                                            
  |..................................................                    |  71%
## label: correlation_analysis
## 
  |                                                                            
  |....................................................                  |  74%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.....................................................                 |  76%
## label: principal_component_analysis
## 
  |                                                                            
  |.......................................................               |  79%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.........................................................             |  81%
## label: bivariate_distribution_header
## 
  |                                                                            
  |..........................................................            |  83%
##   ordinary text without R code
## 
## 
  |                                                                            
  |............................................................          |  86%
## label: plot_response_boxplot
## 
  |                                                                            
  |..............................................................        |  88%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...............................................................       |  90%
## label: plot_by_boxplot
## 
  |                                                                            
  |.................................................................     |  93%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...................................................................   |  95%
## label: plot_response_scatterplot
## 
  |                                                                            
  |....................................................................  |  98%
##   ordinary text without R code
## 
## 
  |                                                                            
  |......................................................................| 100%
## label: plot_by_scatterplot
## output file: D:/Data Science/R/John Hopkins/Exploratory data analysis/Assignmentweek1/report.knit.md
## "C:/Program Files/RStudio/bin/quarto/bin/tools/pandoc" +RTS -K512m -RTS "D:/Data Science/R/John Hopkins/Exploratory data analysis/Assignmentweek1/report.knit.md" --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc3b5c246d4ef7.html --lua-filter "C:\Users\varun\AppData\Local\R\win-library\4.2\rmarkdown\rmarkdown\lua\pagebreak.lua" --lua-filter "C:\Users\varun\AppData\Local\R\win-library\4.2\rmarkdown\rmarkdown\lua\latex-div.lua" --self-contained --variable bs3=TRUE --standalone --section-divs --table-of-contents --toc-depth 6 --template "C:\Users\varun\AppData\Local\R\win-library\4.2\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --variable theme=yeti --mathjax --variable "mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" --include-in-header "C:\Users\varun\AppData\Local\Temp\RtmpK6zxkt\rmarkdown-str3b5c10255f15.html"
## 
## Output created: report.html