library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ dplyr 1.0.9
## ✔ tibble 3.1.8 ✔ stringr 1.4.0
## ✔ tidyr 1.2.0 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(gmodels)
library(CGPfunctions)
## Registered S3 method overwritten by 'DescTools':
## method from
## reorder.factor gdata
filepath<-"D:/Data Science/R/John Hopkins/Exploratory data analysis/Data for test task - children5k.csv"
df1<- read_csv(filepath)
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 5003 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (40): state, type of place of residence, mother's highest educational le...
## dbl (11): child's birth order, mother's current age, mother's highest year o...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2<- df1 %>% filter(`current age of child in years (for alive children only)`>=1)%>%
select(`mother's literacy level`,`mother's highest year of education`,
`father's total years of education`,`place of delivery (i.e. place where child was born)`,
`received DPT-3 vaccine [recommended at 14 weeks]`,
`received measles-1 vaccine [recommended at 9 months]`,
`is the mother able to read SMS messages`)%>%
na.omit()
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "ngo or trust hospital/clinic"] <- "private"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "other private sector health facility"] <- "private"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "parents' home"] <- "home"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "other"] <- "home"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "respondent's home"] <- "home"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "private: hospital/maternity home/clinic"] <- "private"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "public: chc/rural hospital/block phc"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "public: govt. dispensary"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "public: govt./munic. hospital"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "public: phc/additional phc"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "public: sub-centre"] <- "public"
df2$`place of delivery (i.e. place where child was born)`[df2$`place of delivery (i.e. place where child was born)`
== "public: uhc/uhp/ufwc"] <- "public"
count(df2,`place of delivery (i.e. place where child was born)`)
## # A tibble: 3 × 2
## `place of delivery (i.e. place where child was born)` n
## <chr> <int>
## 1 home 21
## 2 private 44
## 3 public 138
df2$`received DPT-3 vaccine [recommended at 14 weeks]`[df2$`received DPT-3 vaccine [recommended at 14 weeks]`
== "reported by mother"] <- "yes"
df2$`received DPT-3 vaccine [recommended at 14 weeks]`[df2$`received DPT-3 vaccine [recommended at 14 weeks]`
== "vaccination date on card"] <- "yes"
df2$`received DPT-3 vaccine [recommended at 14 weeks]`[df2$`received DPT-3 vaccine [recommended at 14 weeks]`
== "vaccination marked on card"] <- "yes"
count(df2,`received DPT-3 vaccine [recommended at 14 weeks]`)
## # A tibble: 2 × 2
## `received DPT-3 vaccine [recommended at 14 weeks]` n
## <chr> <int>
## 1 no 27
## 2 yes 176
CrossTable(df2$`received DPT-3 vaccine [recommended at 14 weeks]`,
df2$`place of delivery (i.e. place where child was born)`,
digits=2, expected=TRUE, prop.r=TRUE, prop.c=TRUE,
prop.t=TRUE, prop.chisq=TRUE, sresid=TRUE,
format=c("SPSS"), dnn = c("Received DPT Vaccine","Place of birth"))
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation may be
## incorrect
##
## Cell Contents
## |-------------------------|
## | Count |
## | Expected Values |
## | Chi-square contribution |
## | Row Percent |
## | Column Percent |
## | Total Percent |
## | Std Residual |
## |-------------------------|
##
## Total Observations in Table: 203
##
## | Place of birth
## Received DPT Vaccine | home | private | public | Row Total |
## ---------------------|-----------|-----------|-----------|-----------|
## no | 3 | 8 | 16 | 27 |
## | 2.79 | 5.85 | 18.35 | |
## | 0.02 | 0.79 | 0.30 | |
## | 11.11% | 29.63% | 59.26% | 13.30% |
## | 14.29% | 18.18% | 11.59% | |
## | 1.48% | 3.94% | 7.88% | |
## | 0.12 | 0.89 | -0.55 | |
## ---------------------|-----------|-----------|-----------|-----------|
## yes | 18 | 36 | 122 | 176 |
## | 18.21 | 38.15 | 119.65 | |
## | 0.00 | 0.12 | 0.05 | |
## | 10.23% | 20.45% | 69.32% | 86.70% |
## | 85.71% | 81.82% | 88.41% | |
## | 8.87% | 17.73% | 60.10% | |
## | -0.05 | -0.35 | 0.22 | |
## ---------------------|-----------|-----------|-----------|-----------|
## Column Total | 21 | 44 | 138 | 203 |
## | 10.34% | 21.67% | 67.98% | |
## ---------------------|-----------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 1.275262 d.f. = 2 p = 0.5285431
##
##
##
## Minimum expected frequency: 2.793103
## Cells with Expected Frequency < 5: 1 of 6 (16.66667%)
mosaicplot(`received DPT-3 vaccine [recommended at 14 weeks]`~
`place of delivery (i.e. place where child was born)`,
data=df2,col=c("Green","Yellow"))
CGPfunctions::PlotXTabs2(df2,`place of delivery (i.e. place where child was born)`,
`received DPT-3 vaccine [recommended at 14 weeks]`)
df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]`
== "vaccination marked on card"] <- "yes"
df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]`
== "vaccination date on card"] <- "yes"
df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]`
== "vaccination identified"] <- "yes"
df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]`
== "reported by mother"] <- "yes"
df2$`received measles-1 vaccine [recommended at 9 months]`[df2$`received measles-1 vaccine [recommended at 9 months]`
== "don't know"] <- "no"
count(df2,`received measles-1 vaccine [recommended at 9 months]`)
## # A tibble: 2 × 2
## `received measles-1 vaccine [recommended at 9 months]` n
## <chr> <int>
## 1 no 22
## 2 yes 181
CrossTable(df2$`received measles-1 vaccine [recommended at 9 months]`,
df2$`mother's highest year of education`,
digits=2, expected=TRUE, prop.r=TRUE, prop.c=TRUE,
prop.t=TRUE, prop.chisq=TRUE, sresid=TRUE,
format=c("SPSS"), dnn = c("Got Measles Vaccine","Mother's Highest Year of education"))
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation may be
## incorrect
##
## Cell Contents
## |-------------------------|
## | Count |
## | Expected Values |
## | Chi-square contribution |
## | Row Percent |
## | Column Percent |
## | Total Percent |
## | Std Residual |
## |-------------------------|
##
## Total Observations in Table: 203
##
## | Mother's Highest Year of education
## Got Measles Vaccine | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Row Total |
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## no | 0 | 1 | 6 | 4 | 7 | 1 | 3 | 0 | 22 |
## | 0.76 | 1.30 | 6.18 | 4.01 | 5.85 | 0.43 | 3.36 | 0.11 | |
## | 0.76 | 0.07 | 0.01 | 0.00 | 0.23 | 0.74 | 0.04 | 0.11 | |
## | 0.00% | 4.55% | 27.27% | 18.18% | 31.82% | 4.55% | 13.64% | 0.00% | 10.84% |
## | 0.00% | 8.33% | 10.53% | 10.81% | 12.96% | 25.00% | 9.68% | 0.00% | |
## | 0.00% | 0.49% | 2.96% | 1.97% | 3.45% | 0.49% | 1.48% | 0.00% | |
## | -0.87 | -0.26 | -0.07 | -0.00 | 0.47 | 0.86 | -0.20 | -0.33 | |
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## yes | 7 | 11 | 51 | 33 | 47 | 3 | 28 | 1 | 181 |
## | 6.24 | 10.70 | 50.82 | 32.99 | 48.15 | 3.57 | 27.64 | 0.89 | |
## | 0.09 | 0.01 | 0.00 | 0.00 | 0.03 | 0.09 | 0.00 | 0.01 | |
## | 3.87% | 6.08% | 28.18% | 18.23% | 25.97% | 1.66% | 15.47% | 0.55% | 89.16% |
## | 100.00% | 91.67% | 89.47% | 89.19% | 87.04% | 75.00% | 90.32% | 100.00% | |
## | 3.45% | 5.42% | 25.12% | 16.26% | 23.15% | 1.48% | 13.79% | 0.49% | |
## | 0.30 | 0.09 | 0.02 | 0.00 | -0.17 | -0.30 | 0.07 | 0.11 | |
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 7 | 12 | 57 | 37 | 54 | 4 | 31 | 1 | 203 |
## | 3.45% | 5.91% | 28.08% | 18.23% | 26.60% | 1.97% | 15.27% | 0.49% | |
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 2.181927 d.f. = 7 p = 0.9490885
##
##
##
## Minimum expected frequency: 0.1083744
## Cells with Expected Frequency < 5: 8 of 16 (50%)
CrossTable(df2$`received measles-1 vaccine [recommended at 9 months]`,
df2$`father's total years of education`,
digits=2, expected=TRUE, prop.r=TRUE, prop.c=TRUE,
prop.t=TRUE, prop.chisq=TRUE, sresid=TRUE,
format=c("SPSS"), dnn = c("Got Measles Vaccine","Father's Years of education"))
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation may be
## incorrect
##
## Cell Contents
## |-------------------------|
## | Count |
## | Expected Values |
## | Chi-square contribution |
## | Row Percent |
## | Column Percent |
## | Total Percent |
## | Std Residual |
## |-------------------------|
##
## Total Observations in Table: 203
##
## | Father's Years of education
## Got Measles Vaccine | 0 | 1 | 10 | 11 | 12 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | don't know | Row Total |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|
## no | 3 | 0 | 5 | 0 | 1 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 2 | 3 | 2 | 0 | 22 |
## | 1.41 | 0.11 | 3.36 | 0.43 | 3.03 | 0.22 | 2.93 | 0.22 | 0.76 | 0.33 | 0.11 | 0.11 | 0.11 | 0.54 | 1.41 | 0.22 | 0.87 | 2.82 | 2.82 | 0.22 | |
## | 1.80 | 0.11 | 0.80 | 0.43 | 1.36 | 0.22 | 0.00 | 0.22 | 0.76 | 0.33 | 0.11 | 0.11 | 0.11 | 0.39 | 0.12 | 2.83 | 1.48 | 0.01 | 0.24 | 0.22 | |
## | 13.64% | 0.00% | 22.73% | 0.00% | 4.55% | 0.00% | 13.64% | 0.00% | 0.00% | 0.00% | 0.00% | 0.00% | 0.00% | 4.55% | 4.55% | 4.55% | 9.09% | 13.64% | 9.09% | 0.00% | 10.84% |
## | 23.08% | 0.00% | 16.13% | 0.00% | 3.57% | 0.00% | 11.11% | 0.00% | 0.00% | 0.00% | 0.00% | 0.00% | 0.00% | 20.00% | 7.69% | 50.00% | 25.00% | 11.54% | 7.69% | 0.00% | |
## | 1.48% | 0.00% | 2.46% | 0.00% | 0.49% | 0.00% | 1.48% | 0.00% | 0.00% | 0.00% | 0.00% | 0.00% | 0.00% | 0.49% | 0.49% | 0.49% | 0.99% | 1.48% | 0.99% | 0.00% | |
## | 1.34 | -0.33 | 0.89 | -0.66 | -1.17 | -0.47 | 0.04 | -0.47 | -0.87 | -0.57 | -0.33 | -0.33 | -0.33 | 0.62 | -0.34 | 1.68 | 1.22 | 0.11 | -0.49 | -0.47 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|
## yes | 10 | 1 | 26 | 4 | 27 | 2 | 24 | 2 | 7 | 3 | 1 | 1 | 1 | 4 | 12 | 1 | 6 | 23 | 24 | 2 | 181 |
## | 11.59 | 0.89 | 27.64 | 3.57 | 24.97 | 1.78 | 24.07 | 1.78 | 6.24 | 2.67 | 0.89 | 0.89 | 0.89 | 4.46 | 11.59 | 1.78 | 7.13 | 23.18 | 23.18 | 1.78 | |
## | 0.22 | 0.01 | 0.10 | 0.05 | 0.17 | 0.03 | 0.00 | 0.03 | 0.09 | 0.04 | 0.01 | 0.01 | 0.01 | 0.05 | 0.01 | 0.34 | 0.18 | 0.00 | 0.03 | 0.03 | |
## | 5.52% | 0.55% | 14.36% | 2.21% | 14.92% | 1.10% | 13.26% | 1.10% | 3.87% | 1.66% | 0.55% | 0.55% | 0.55% | 2.21% | 6.63% | 0.55% | 3.31% | 12.71% | 13.26% | 1.10% | 89.16% |
## | 76.92% | 100.00% | 83.87% | 100.00% | 96.43% | 100.00% | 88.89% | 100.00% | 100.00% | 100.00% | 100.00% | 100.00% | 100.00% | 80.00% | 92.31% | 50.00% | 75.00% | 88.46% | 92.31% | 100.00% | |
## | 4.93% | 0.49% | 12.81% | 1.97% | 13.30% | 0.99% | 11.82% | 0.99% | 3.45% | 1.48% | 0.49% | 0.49% | 0.49% | 1.97% | 5.91% | 0.49% | 2.96% | 11.33% | 11.82% | 0.99% | |
## | -0.47 | 0.11 | -0.31 | 0.23 | 0.41 | 0.16 | -0.02 | 0.16 | 0.30 | 0.20 | 0.11 | 0.11 | 0.11 | -0.22 | 0.12 | -0.59 | -0.42 | -0.04 | 0.17 | 0.16 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|
## Column Total | 13 | 1 | 31 | 4 | 28 | 2 | 27 | 2 | 7 | 3 | 1 | 1 | 1 | 5 | 13 | 2 | 8 | 26 | 26 | 2 | 203 |
## | 6.40% | 0.49% | 15.27% | 1.97% | 13.79% | 0.99% | 13.30% | 0.99% | 3.45% | 1.48% | 0.49% | 0.49% | 0.49% | 2.46% | 6.40% | 0.99% | 3.94% | 12.81% | 12.81% | 0.99% | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|------------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 13.04462 d.f. = 19 p = 0.836279
##
##
##
## Minimum expected frequency: 0.1083744
## Cells with Expected Frequency < 5: 31 of 40 (77.5%)
CrossTable(df2$`received measles-1 vaccine [recommended at 9 months]`,
df2$`is the mother able to read SMS messages`,
digits=2, expected=TRUE, prop.r=TRUE, prop.c=TRUE,
prop.t=TRUE, prop.chisq=TRUE, sresid=TRUE,
format=c("SPSS"), dnn = c("Got Measles Vaccine","Mother can Read SMS"))
##
## Cell Contents
## |-------------------------|
## | Count |
## | Expected Values |
## | Chi-square contribution |
## | Row Percent |
## | Column Percent |
## | Total Percent |
## | Std Residual |
## |-------------------------|
##
## Total Observations in Table: 203
##
## | Mother can Read SMS
## Got Measles Vaccine | no | yes | Row Total |
## --------------------|-----------|-----------|-----------|
## no | 4 | 18 | 22 |
## | 5.20 | 16.80 | |
## | 0.28 | 0.09 | |
## | 18.18% | 81.82% | 10.84% |
## | 8.33% | 11.61% | |
## | 1.97% | 8.87% | |
## | -0.53 | 0.29 | |
## --------------------|-----------|-----------|-----------|
## yes | 44 | 137 | 181 |
## | 42.80 | 138.20 | |
## | 0.03 | 0.01 | |
## | 24.31% | 75.69% | 89.16% |
## | 91.67% | 88.39% | |
## | 21.67% | 67.49% | |
## | 0.18 | -0.10 | |
## --------------------|-----------|-----------|-----------|
## Column Total | 48 | 155 | 203 |
## | 23.65% | 76.35% | |
## --------------------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 0.4079449 d.f. = 1 p = 0.5230144
##
## Pearson's Chi-squared test with Yates' continuity correction
## ------------------------------------------------------------
## Chi^2 = 0.1391399 d.f. = 1 p = 0.7091376
##
##
## Minimum expected frequency: 5.20197
#install.packages("CGPfunctions")
library(CGPfunctions)
CGPfunctions::PlotXTabs2(df2,`mother's highest year of education`,
`received measles-1 vaccine [recommended at 9 months]`)
CGPfunctions::PlotXTabs(df2,`father's total years of education`,
`received measles-1 vaccine [recommended at 9 months]`)
## Plotted dataset df2 variables father's total years of education by received measles-1 vaccine [recommended at 9 months]
CGPfunctions::PlotXTabs2(df2,`is the mother able to read SMS messages`,
`received measles-1 vaccine [recommended at 9 months]`)
#install.packages("vtree")
library(vtree)
vtree(df2,c("mother's highest year of education",
"received measles-1 vaccine [recommended at 9 months]"),
palette=c(1,3),sortfill=TRUE)
## Warning in knitr::include_graphics(fullpath): It is highly recommended to use
## relative paths for images. You had absolute paths: "C:/Users/varun/AppData/
## Local/Temp/RtmpK6zxkt/vtree001.png"
vtree(df2,c("father's total years of education",
"received measles-1 vaccine [recommended at 9 months]"),
palette=c(1,3),sortfill=TRUE)
## Warning in knitr::include_graphics(fullpath): It is highly recommended to use
## relative paths for images. You had absolute paths: "C:/Users/varun/AppData/
## Local/Temp/RtmpK6zxkt/vtree002.png"
vtree(df2,c("is the mother able to read SMS messages",
"received measles-1 vaccine [recommended at 9 months]"),
palette=c(1,3),sortfill=TRUE)
## Warning in knitr::include_graphics(fullpath): It is highly recommended to use
## relative paths for images. You had absolute paths: "C:/Users/varun/AppData/
## Local/Temp/RtmpK6zxkt/vtree003.png"
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
view_df(df1,show.frq = T,show.prc = T,show.na = T,show.type = T,show.string.values = T)
| ID | Name | Type | Label | missings | Values | Value Labels | Freq. | % |
|---|---|---|---|---|---|---|---|---|
| 1 | child’s birth order | numeric | 0 (0.00%) | range: 1-4 | ||||
| 2 | mother’s current age | numeric | 0 (0.00%) | range: 16-49 | ||||
| 3 | state | character | 0 (0.00%) |
andaman & nicobar islands andhra pradesh arunachal pradesh assam bihar chandigarh chhattisgarh dadra & nagar haveli and daman & diu goa gujarat haryana himachal pradesh jammu & kashmir jharkhand karnataka <… truncated> |
4 63 149 230 486 3 180 16 6 216 158 46 150 199 178 |
0.08 1.26 2.98 4.60 9.71 0.06 3.60 0.32 0.12 4.32 3.16 0.92 3.00 3.98 3.56 |
||
| 4 | type of place of residence | character | 0 (0.00%) |
rural urban |
4003 1000 |
80.01 19.99 |
||
| 5 | mother’s highest educational level | character | 0 (0.00%) |
higher no education primary secondary |
681 1112 648 2562 |
13.61 22.23 12.95 51.21 |
||
| 6 | mother’s highest year of education | numeric | 1118 (22.35%) | range: 1-8 | ||||
| 7 | household has refrigerator | character | 0 (0.00%) |
no not a dejure resident yes |
3253 244 1506 |
65.02 4.88 30.10 |
||
| 8 | household has car/truck | character | 0 (0.00%) |
no not a dejure resident yes |
4399 244 360 |
87.93 4.88 7.20 |
||
| 9 | religion | character | 0 (0.00%) |
buddhist / neo-buddhist christian hindu jain muslim no religion other sikh |
43 397 3663 5 742 3 64 86 |
0.86 7.94 73.22 0.10 14.83 0.06 1.28 1.72 |
||
| 10 | mother’s literacy level | character | 0 (0.00%) |
able to read only parts of sentence able to read whole sentence blind/visually impaired cannot read at all no card with required language |
688 2863 8 1419 25 |
13.75 57.23 0.16 28.36 0.50 |
||
| 11 | frequency of listening to radio | character | 0 (0.00%) |
at least once a week less than once a week not at all |
155 460 4388 |
3.10 9.19 87.71 |
||
| 12 | frequency of watching television | character | 0 (0.00%) |
at least once a week less than once a week not at all |
2218 1061 1724 |
44.33 21.21 34.46 |
||
| 13 | has mother been away for more than 1 month in last 12mo | character | 4207 (84.09%) |
no yes |
728 68 |
91.46 8.54 |
||
| 14 | mother owns a mobile phone | character | 4207 (84.09%) |
no yes |
351 445 |
44.10 55.90 |
||
| 15 | wealth index combined | character | 0 (0.00%) |
middle poorer poorest richer richest |
982 1179 1320 875 647 |
19.63 23.57 26.38 17.49 12.93 |
||
| 16 | wealth index factor score combined | numeric | 0 (0.00%) | range: -2501080-2239460 | ||||
| 17 | mother’s age at birth of first child | numeric | 0 (0.00%) | range: 6-41 | ||||
| 18 | where is the father currently staying | character | 66 (1.32%) |
living with her staying elsewhere |
4365 572 |
88.41 11.59 |
||
| 19 | does the father have any other wives | character | 66 (1.32%) |
1 2 7 don’t know no other wives |
68 8 1 3 4857 |
1.38 0.16 0.02 0.06 98.38 |
||
| 20 | father’s education level | character | 4207 (84.09%) |
don’t know higher no education primary secondary |
4 117 135 116 424 |
0.50 14.70 16.96 14.57 53.27 |
||
| 21 | father’s occupation | character | 4207 (84.09%) |
accountants, auditors and related workers administrative and executive officials government and local bodies administrative, executive and managerial workers, n.e.c. agricultural labourer background-color:#eeeeeehitects, engineers, technologists and surveyors broadcasting station and sound equipment operators and cinema projectionists building caretakers, sweepers, cleaners and related workers carpenters, cabinet & related wood workers chemical processors and related workers clerical and other supervisors clerical and related workers computing machine operators cooks, waiters, bartenders and related workers (domestic & institutional) cultivators dk (don’t know) <… truncated> |
2 2 3 141 4 1 2 10 3 2 5 2 5 42 7 |
0.25 0.25 0.38 17.71 0.50 0.13 0.25 1.26 0.38 0.25 0.63 0.25 0.63 5.28 0.88 |
||
| 22 | is the mother currently working | character | 4207 (84.09%) |
no yes |
641 155 |
80.53 19.47 |
||
| 23 | does the mother have a job (even if currently absent) | character | 4357 (87.09%) |
no yes |
641 5 |
99.23 0.77 |
||
| 24 | father’s total years of education | character | 4207 (84.09%) |
0 1 10 11 12 13 14 15 16 17 18 19 2 20 3 <… truncated> |
138 4 121 16 94 3 7 79 3 17 5 1 12 2 6 |
17.34 0.50 15.20 2.01 11.81 0.38 0.88 9.92 0.38 2.14 0.63 0.13 1.51 0.25 0.75 |
||
| 25 | mother’s occupation | character | 4207 (84.09%) |
accountants, auditors and related workers agricultural labourer background-color:#eeeeeehitects, engineers, technologists and surveyors building caretakers, sweepers, cleaners and related workers cooks, waiters, bartenders and related workers (domestic & institutional) cultivators farm plantation, dairy and other managers and supervisors farmers, other than cultivators house keepers, matron and stewards (domestic & institutional) labourers (labourers, n.e.c.) maids and related house keeping service workers, n.e.c. merchants and shopkeepers, wholesale and retail trade not working and didn’t work in last 12 months nursing and other medical and health technicians other (occupation not found) <… truncated> |
1 65 1 2 2 17 5 24 5 15 3 4 600 2 3 |
0.13 8.17 0.13 0.25 0.25 2.14 0.63 3.02 0.63 1.88 0.38 0.50 75.38 0.25 0.38 |
||
| 26 | father’s current age | numeric | 4214 (84.23%) | range: 19-55 | ||||
| 27 | person in the household who makes decisions about mother’s healthcare | character | 4214 (84.23%) |
husband/partner alone other respondent alone respondent and husband/partner someone else |
118 6 54 596 15 |
14.96 0.76 6.84 75.54 1.90 |
||
| 28 | sex of child | character | 0 (0.00%) |
female male |
2388 2615 |
47.73 52.27 |
||
| 29 | current age of child in years (for alive children only) | numeric | 166 (3.32%) | range: 0-4 | ||||
| 30 | months since birth (including for dead children) | numeric | 0 (0.00%) | range: 0-59 | ||||
| 31 | place of delivery (i.e. place where child was born) | character | 0 (0.00%) |
ngo or trust hospital/clinic other other home other private sector health facility other public sector health facility parents’ home private: hospital/maternity home/clinic public: chc/rural hospital/block phc public: govt. dispensary public: govt./munic. hospital public: phc/additional phc public: sub-centre public: uhc/uhp/ufwc respondent’s home |
15 4 5 35 5 77 1010 1232 98 1416 371 48 84 603 |
0.30 0.08 0.10 0.70 0.10 1.54 20.19 24.63 1.96 28.30 7.42 0.96 1.68 12.05 |
||
| 32 | baby received a postnatal check-up within 2 months of birth | character | 1231 (24.61%) |
don’t know no yes |
3 2090 1679 |
0.08 55.41 44.51 |
||
| 33 | mother has child’s vaccination card | character | 2144 (42.85%) |
no card no longer has card yes, not seen yes, seen |
93 67 246 2453 |
3.25 2.34 8.60 85.80 |
||
| 34 | received BCG vaccine [recommended at birth] | character | 2144 (42.85%) |
don’t know no reported by mother vaccination date on card vaccination marked on card |
2 174 340 2296 47 |
0.07 6.09 11.89 80.31 1.64 |
||
| 35 | received DPT-1 vaccine [recommended at 6 weeks] | character | 2144 (42.85%) |
don’t know no reported by mother vaccination date on card vaccination marked on card |
3 371 314 2099 72 |
0.10 12.98 10.98 73.42 2.52 |
||
| 36 | received DPT-2 vaccine [recommended at 10 weeks] | character | 2144 (42.85%) |
don’t know no reported by mother vaccination date on card vaccination marked on card |
3 520 301 1967 68 |
0.10 18.19 10.53 68.80 2.38 |
||
| 37 | received DPT-3 vaccine [recommended at 14 weeks] | character | 2144 (42.85%) |
don’t know no reported by mother vaccination date on card vaccination marked on card |
3 714 266 1800 76 |
0.10 24.97 9.30 62.96 2.66 |
||
| 38 | received measles-1 vaccine [recommended at 9 months] | character | 2144 (42.85%) |
don’t know no reported by mother vaccination date on card vaccination identified vaccination marked on card |
14 721 223 1234 620 47 |
0.49 25.22 7.80 43.16 21.69 1.64 |
||
| 39 | family belongs to a scheduled caste/tribe/other backward class | character | 237 (4.74%) |
don’t know none of them obc schedule caste schedule tribe |
32 769 1915 1036 1014 |
0.67 16.14 40.18 21.74 21.28 |
||
| 40 | in the last 12 months, has your husband been away for 1mo or more | character | 4214 (84.23%) |
no yes |
607 182 |
76.93 23.07 |
||
| 41 | in the last 12mo, have you been away from home other than for visiting your parents/in-laws | character | 4935 (98.64%) |
no yes |
43 25 |
63.24 36.76 |
||
| 42 | mother is usually allowed to visit a health facility… | character | 4207 (84.09%) |
alone not at all with someone else only |
353 56 387 |
44.35 7.04 48.62 |
||
| 43 | does the mother have a mobile phone that she herself uses | character | 4207 (84.09%) |
no yes |
351 445 |
44.10 55.90 |
||
| 44 | is the mother able to read SMS messages | character | 4244 (84.83%) |
no yes |
320 439 |
42.16 57.84 |
||
| 45 | wealth index within state | character | 0 (0.00%) |
middle poorer poorest richer richest |
1019 1107 1252 951 674 |
20.37 22.13 25.02 19.01 13.47 |
||
| 46 | wealth index factor score within state | numeric | 0 (0.00%) | range: -2501080-2239460 | ||||
| 47 | wealth index within state - urban | character | 4003 (80.01%) |
middle poorer poorest richer richest |
213 226 246 187 128 |
21.30 22.60 24.60 18.70 12.80 |
||
| 48 | wealth index factor score within state - urban | numeric | 4003 (80.01%) | range: -3809560-1679199 | ||||
| 49 | wealth index within state - rural | character | 1000 (19.99%) |
middle poorer poorest richer richest |
807 808 943 738 707 |
20.16 20.18 23.56 18.44 17.66 |
||
| 50 | wealth index factor score within state - rural | numeric | 1000 (19.99%) | range: -2446780-2761580 | ||||
| 51 | where does the child receive most of their vaccinations | character | 2245 (44.87%) |
ngo or trust hospital / clinic other other private health faility other public sector health facility private: doctor / clinic private: hospital private: paramedic private: pharmacy / drugstore public ayush public: anganwadi / icds centre public: camp public: chc / rural hospital / block phc public: govt. / municipal hospital public: govt. dispensary public: govt. mobile clinic <… truncated> |
4 21 3 1 13 71 1 1 2 1374 45 198 295 115 2 |
0.15 0.76 0.11 0.04 0.47 2.57 0.04 0.04 0.07 49.82 1.63 7.18 10.70 4.17 0.07 |
||
df3<-df1[,c(4,5,7,10,12,14,15,20,22,28,32,33,37,38,39,42,43)]
df3<-na.omit(df3)
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]`
== "vaccination marked on card"] <- "yes"
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]`
== "vaccination date on card"] <- "yes"
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]`
== "vaccination identified"] <- "yes"
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]`
== "reported by mother"] <- "yes"
df3$`received measles-1 vaccine [recommended at 9 months]`[df3$`received measles-1 vaccine [recommended at 9 months]`
== "don't know"] <- "no"
count(df3,`received measles-1 vaccine [recommended at 9 months]`)
## # A tibble: 2 × 2
## `received measles-1 vaccine [recommended at 9 months]` n
## <chr> <int>
## 1 no 114
## 2 yes 266
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(data=df3,
columns = 1:3,
mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
upper = list(continuous="cor"),
lower = list(continuous="points"),
diag = list(continuous="densityDiag"),)
ggpairs(data=df3,
columns = 4:5,
mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
upper = list(continuous="cor"),
lower = list(continuous="points"),
diag = list(continuous="densityDiag"),)
ggpairs(data=df3,
columns = 6:8,
mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
upper = list(continuous="cor"),
lower = list(continuous="points"),
diag = list(continuous="densityDiag"),)
ggpairs(data=df3,
columns = 9:11,
mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
upper = list(continuous="cor"),
lower = list(continuous="points"),
diag = list(continuous="densityDiag"),)
ggpairs(data=df3,
columns = 12:14,
mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
upper = list(continuous="cor"),
lower = list(continuous="points"),
diag = list(continuous="densityDiag"),)
ggpairs(data=df3,
columns = 15:17,
mapping = ggplot2::aes(color=`received measles-1 vaccine [recommended at 9 months]`),
upper = list(continuous="cor"),
lower = list(continuous="points"),
diag = list(continuous="densityDiag"),)
library(DataExplorer)
create_report(data=df3)
##
##
## processing file: report.rmd
##
|
| | 0%
|
|.. | 2%
## inline R code fragments
##
##
|
|... | 5%
## label: global_options (with options)
## List of 1
## $ include: logi FALSE
##
##
|
|..... | 7%
## ordinary text without R code
##
##
|
|....... | 10%
## label: introduce
##
|
|........ | 12%
## ordinary text without R code
##
##
|
|.......... | 14%
## label: plot_intro
##
|
|............ | 17%
## ordinary text without R code
##
##
|
|............. | 19%
## label: data_structure
##
|
|............... | 21%
## ordinary text without R code
##
##
|
|................. | 24%
## label: missing_profile
##
|
|.................. | 26%
## ordinary text without R code
##
##
|
|.................... | 29%
## label: univariate_distribution_header
##
|
|...................... | 31%
## ordinary text without R code
##
##
|
|....................... | 33%
## label: plot_histogram
##
|
|......................... | 36%
## ordinary text without R code
##
##
|
|........................... | 38%
## label: plot_density
##
|
|............................ | 40%
## ordinary text without R code
##
##
|
|.............................. | 43%
## label: plot_frequency_bar
##
|
|................................ | 45%
## ordinary text without R code
##
##
|
|................................. | 48%
## label: plot_response_bar
##
|
|................................... | 50%
## ordinary text without R code
##
##
|
|..................................... | 52%
## label: plot_with_bar
##
|
|...................................... | 55%
## ordinary text without R code
##
##
|
|........................................ | 57%
## label: plot_normal_qq
##
|
|.......................................... | 60%
## ordinary text without R code
##
##
|
|........................................... | 62%
## label: plot_response_qq
##
|
|............................................. | 64%
## ordinary text without R code
##
##
|
|............................................... | 67%
## label: plot_by_qq
##
|
|................................................ | 69%
## ordinary text without R code
##
##
|
|.................................................. | 71%
## label: correlation_analysis
##
|
|.................................................... | 74%
## ordinary text without R code
##
##
|
|..................................................... | 76%
## label: principal_component_analysis
##
|
|....................................................... | 79%
## ordinary text without R code
##
##
|
|......................................................... | 81%
## label: bivariate_distribution_header
##
|
|.......................................................... | 83%
## ordinary text without R code
##
##
|
|............................................................ | 86%
## label: plot_response_boxplot
##
|
|.............................................................. | 88%
## ordinary text without R code
##
##
|
|............................................................... | 90%
## label: plot_by_boxplot
##
|
|................................................................. | 93%
## ordinary text without R code
##
##
|
|................................................................... | 95%
## label: plot_response_scatterplot
##
|
|.................................................................... | 98%
## ordinary text without R code
##
##
|
|......................................................................| 100%
## label: plot_by_scatterplot
## output file: D:/Data Science/R/John Hopkins/Exploratory data analysis/Assignmentweek1/report.knit.md
## "C:/Program Files/RStudio/bin/quarto/bin/tools/pandoc" +RTS -K512m -RTS "D:/Data Science/R/John Hopkins/Exploratory data analysis/Assignmentweek1/report.knit.md" --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc3b5c246d4ef7.html --lua-filter "C:\Users\varun\AppData\Local\R\win-library\4.2\rmarkdown\rmarkdown\lua\pagebreak.lua" --lua-filter "C:\Users\varun\AppData\Local\R\win-library\4.2\rmarkdown\rmarkdown\lua\latex-div.lua" --self-contained --variable bs3=TRUE --standalone --section-divs --table-of-contents --toc-depth 6 --template "C:\Users\varun\AppData\Local\R\win-library\4.2\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --variable theme=yeti --mathjax --variable "mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" --include-in-header "C:\Users\varun\AppData\Local\Temp\RtmpK6zxkt\rmarkdown-str3b5c10255f15.html"
##
## Output created: report.html