library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ purrr 1.2.1
## ✔ ggplot2 4.0.2 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(GGally)
library(ggthemes)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
df <- read.csv("C:\\Users\\asus\\Downloads\\dHealth.csv") %>%
clean_names()
print(names(df))
## [1] "district_names"
## [2] "state_ut"
## [3] "number_of_households_surveyed"
## [4] "number_of_women_age_15_49_years_interviewed"
## [5] "number_of_men_age_15_54_years_interviewed"
## [6] "female_population_age_6_years_and_above_who_ever_attended_school"
## [7] "population_below_age_15_years"
## [8] "x_sex_ratio_of_the_total_population_females_per_1_000_males"
## [9] "sex_ratio_at_birth_for_children_born_in_the_last_five_years_females_per_1_000_males"
## [10] "children_under_age_5_years_whose_birth_was_registered_with_the_civil_authority"
## [11] "deaths_in_the_last_3_years_registered_with_the_civil_authority"
## [12] "population_living_in_households_with_electricity"
## [13] "population_living_in_households_with_an_improved_drinking_water_source1"
## [14] "population_living_in_households_that_use_an_improved_sanitation_facility2"
## [15] "households_using_clean_fuel_for_cooking3"
## [16] "households_using_iodized_salt"
## [17] "households_with_any_usual_member_covered_under_a_health_insurance_financing_scheme"
## [18] "children_age_5_years_who_attended_pre_primary_school_during_the_school_year_2019_20"
## [19] "women_age_15_49_who_are_literate4"
## [20] "women_age_15_49_with_10_or_more_years_of_schooling"
## [21] "women_age_20_24_years_married_before_age_18_years"
## [22] "births_in_the_5_years_preceding_the_survey_that_are_third_or_higher_order"
## [23] "women_age_15_19_years_who_were_already_mothers_or_pregnant_at_the_time_of_the_survey"
## [24] "women_age_15_24_years_who_use_hygienic_methods_of_protection_during_their_menstrual_period5"
## [25] "current_use_of_family_planning_methods_currently_married_women_age_15_49_years_any_method6"
## [26] "current_use_of_family_planning_methods_currently_married_women_age_15_49_years_any_modern_method6"
## [27] "current_use_of_family_planning_methods_currently_married_women_age_15_49_years_female_sterilization"
## [28] "current_use_of_family_planning_methods_currently_married_women_age_15_49_years_male_sterilization"
## [29] "current_use_of_family_planning_methods_currently_married_women_age_15_49_years_iud_ppiud"
## [30] "current_use_of_family_planning_methods_currently_married_women_age_15_49_years_pill"
## [31] "current_use_of_family_planning_methods_currently_married_women_age_15_49_years_condom"
## [32] "current_use_of_family_planning_methods_currently_married_women_age_15_49_years_injectables"
## [33] "total_unmet_need_for_family_planning_currently_married_women_age_15_49_years_7"
## [34] "unmet_need_for_spacing_currently_married_women_age_15_49_years_7"
## [35] "health_worker_ever_talked_to_female_non_users_about_family_planning"
## [36] "current_users_ever_told_about_side_effects_of_current_method_of_family_planning8"
## [37] "mothers_who_had_an_antenatal_check_up_in_the_first_trimester_for_last_birth_in_the_5_years_before_the_survey"
## [38] "mothers_who_had_at_least_4_antenatal_care_visits_for_last_birth_in_the_5_years_before_the_survey"
## [39] "mothers_whose_last_birth_was_protected_against_neonatal_tetanus_for_last_birth_in_the_5_years_before_the_survey_9"
## [40] "mothers_who_consumed_iron_folic_acid_for_100_days_or_more_when_they_were_pregnant_for_last_birth_in_the_5_years_before_the_survey"
## [41] "mothers_who_consumed_iron_folic_acid_for_180_days_or_more_when_they_were_pregnant_for_last_birth_in_the_5_years_before_the_survey"
## [42] "registered_pregnancies_for_which_the_mother_received_a_mother_and_child_protection_mcp_card_for_last_birth_in_the_5_years_before_the_survey"
## [43] "mothers_who_received_postnatal_care_from_a_doctor_nurse_lhv_anm_midwife_other_health_personnel_within_2_days_of_delivery_for_last_birth_in_the_5_years_before_the_survey"
## [44] "average_out_of_pocket_expenditure_per_delivery_in_a_public_health_facility_for_last_birth_in_the_5_years_before_the_survey_rs"
## [45] "children_born_at_home_who_were_taken_to_a_health_facility_for_a_check_up_within_24_hours_of_birth_for_last_birth_in_the_5_years_before_the_survey"
## [46] "children_who_received_postnatal_care_from_a_doctor_nurse_lhv_anm_midwife_other_health_personnel_within_2_days_of_delivery_for_last_birth_in_the_5_years_before_the_survey"
## [47] "institutional_births_in_the_5_years_before_the_survey"
## [48] "institutional_births_in_public_facility_in_the_5_years_before_the_survey"
## [49] "home_births_that_were_conducted_by_skilled_health_personnel_in_the_5_years_before_the_survey_10"
## [50] "births_attended_by_skilled_health_personnel_in_the_5_years_before_the_survey_10"
## [51] "births_delivered_by_caesarean_section_in_the_5_years_before_the_survey"
## [52] "births_in_a_private_health_facility_that_were_delivered_by_caesarean_section_in_the_5_years_before_the_survey"
## [53] "births_in_a_public_health_facility_that_were_delivered_by_caesarean_section_in_the_5_years_before_the_survey"
## [54] "children_age_12_23_months_fully_vaccinated_based_on_information_from_either_vaccination_card_or_mother_s_recall11"
## [55] "children_age_12_23_months_fully_vaccinated_based_on_information_from_vaccination_card_only12"
## [56] "children_age_12_23_months_who_have_received_bcg"
## [57] "children_age_12_23_months_who_have_received_3_doses_of_polio_vaccine13"
## [58] "children_age_12_23_months_who_have_received_3_doses_of_penta_or_dpt_vaccine"
## [59] "children_age_12_23_months_who_have_received_the_first_dose_of_measles_containing_vaccine_mcv"
## [60] "children_age_24_35_months_who_have_received_a_second_dose_of_measles_containing_vaccine_mcv"
## [61] "children_age_12_23_months_who_have_received_3_doses_of_rotavirus_vaccine14"
## [62] "children_age_12_23_months_who_have_received_3_doses_of_penta_or_hepatitis_b_vaccine"
## [63] "children_age_9_35_months_who_received_a_vitamin_a_dose_in_the_last_6_months"
## [64] "children_age_12_23_months_who_received_most_of_their_vaccinations_in_a_public_health_facility"
## [65] "children_age_12_23_months_who_received_most_of_their_vaccinations_in_a_private_health_facility"
## [66] "prevalence_of_diarrhoea_in_the_2_weeks_preceding_the_survey_children_under_age_5_years"
## [67] "children_with_diarrhoea_in_the_2_weeks_preceding_the_survey_who_received_oral_rehydration_salts_ors_children_under_age_5_years"
## [68] "children_with_diarrhoea_in_the_2_weeks_preceding_the_survey_who_received_zinc_children_under_age_5_years"
## [69] "children_swith_diarrhoea_in_the_2_weeks_preceding_the_survey_taken_to_a_health_facility_or_health_provider_children_under_age_5_years"
## [70] "children_prevalence_of_symptoms_of_acute_respiratory_infection_ari_in_the_2_weeks_preceding_the_survey_children_under_age_5_years"
## [71] "children_with_fever_or_symptoms_of_ari_in_the_2_weeks_preceding_the_survey_taken_to_a_health_facility_or_health_provider_children_under_age_5_years"
## [72] "children_under_age_3_years_breastfed_within_one_hour_of_birth15"
## [73] "children_under_age_6_months_exclusively_breastfed16"
## [74] "children_age_6_8_months_receiving_solid_or_semi_solid_food_and_breastmilk16"
## [75] "breastfeeding_children_age_6_23_months_receiving_an_adequate_diet16_17"
## [76] "non_breastfeeding_children_age_6_23_months_receiving_an_adequate_diet16_17"
## [77] "total_children_age_6_23_months_receiving_an_adequate_diet16_17"
## [78] "children_under_5_years_who_are_stunted_height_for_age_18"
## [79] "children_under_5_years_who_are_wasted_weight_for_height_18"
## [80] "children_under_5_years_who_are_severely_wasted_weight_for_height_19"
## [81] "children_under_5_years_who_are_underweight_weight_for_age_18"
## [82] "children_under_5_years_who_are_overweight_weight_for_height_20"
## [83] "women_age_15_49_years_whose_body_mass_index_bmi_is_below_normal_bmi_18_5_kg_m2_21"
## [84] "women_age_15_49_years_who_are_overweight_or_obese_bmi_25_0_kg_m2_21"
## [85] "women_age_15_49_years_who_have_high_risk_waist_to_hip_ratio_0_85"
## [86] "children_age_6_59_months_who_are_anaemic_11_0_g_dl_22"
## [87] "non_pregnant_women_age_15_49_years_who_are_anaemic_12_0_g_dl_22"
## [88] "pregnant_women_age_15_49_years_who_are_anaemic_11_0_g_dl_22"
## [89] "all_women_age_15_49_years_who_are_anaemic22"
## [90] "all_women_age_15_19_years_who_are_anaemic22"
## [91] "women_age_15_years_and_above_with_high_141_160_mg_dl_blood_sugar_level23"
## [92] "women_age_15_years_and_above_wih_very_high_160_mg_dl_blood_sugar_level23"
## [93] "women_age_15_years_and_above_wih_high_or_very_high_140_mg_dl_blood_sugar_level_or_taking_medicine_to_control_blood_sugar_level23"
## [94] "men_age_15_years_and_above_wih_high_141_160_mg_dl_blood_sugar_level23"
## [95] "men_age_15_years_and_above_wih_very_high_160_mg_dl_blood_sugar_level23"
## [96] "men_age_15_years_and_above_wih_high_or_very_high_140_mg_dl_blood_sugar_level_or_taking_medicine_to_control_blood_sugar_level23"
## [97] "women_age_15_years_and_above_wih_mildly_elevated_blood_pressure_systolic_140_159_mm_of_hg_and_or_diastolic_90_99_mm_of_hg"
## [98] "women_age_15_years_and_above_wih_moderately_or_severely_elevated_blood_pressure_systolic_160_mm_of_hg_and_or_diastolic_100_mm_of_hg"
## [99] "women_age_15_years_and_above_wih_elevated_blood_pressure_systolic_140_mm_of_hg_and_or_diastolic_90_mm_of_hg_or_taking_medicine_to_control_blood_pressure"
## [100] "men_age_15_years_and_above_wih_mildly_elevated_blood_pressure_systolic_140_159_mm_of_hg_and_or_diastolic_90_99_mm_of_hg"
## [101] "men_age_15_years_and_above_wih_moderately_or_severely_elevated_blood_pressure_systolic_160_mm_of_hg_and_or_diastolic_100_mm_of_hg"
## [102] "men_age_15_years_and_above_wih_elevated_blood_pressure_systolic_140_mm_of_hg_and_or_diastolic_90_mm_of_hg_or_taking_medicine_to_control_blood_pressure"
## [103] "women_age_30_49_years_ever_undergone_a_screening_test_for_cervical_cancer"
## [104] "women_age_30_49_years_ever_undergone_a_breast_examination_for_breast_cancer"
## [105] "women_age_30_49_years_ever_undergone_an_oral_cavity_examination_for_oral_cancer"
## [106] "women_age_15_years_and_above_who_use_any_kind_of_tobacco"
## [107] "men_age_15_years_and_above_who_use_any_kind_of_tobacco"
## [108] "women_age_15_years_and_above_who_consume_alcohol"
## [109] "men_age_15_years_and_above_who_consume_alcohol"
new_names <- c(
"district", "state", "hh_surveyed", "women_surveyed", "men_surveyed",
"female_schooling", "pop_under_15", "sex_ratio_total", "sex_ratio_birth",
"child_birth_reg", "death_reg", "hh_electricity", "hh_water", "hh_sanitation",
"hh_cooking_fuel", "hh_iodized_salt", "hh_insurance", "child_preprimary",
"women_literate", "women_10yr_schooling", "women_early_marriage",
"births_3rd_order", "teen_pregnancy", "menstrual_hygiene", "fp_any",
"fp_modern", "fp_female_ster", "fp_male_ster", "fp_iud", "fp_pill",
"fp_condom", "fp_injectable", "fp_unmet_total", "fp_unmet_spacing",
"fp_hw_talk", "fp_side_effects_info", "anc_1st_tri", "anc_4plus",
"mother_tetanus", "ifa_100d", "ifa_180d", "mcp_card", "pnc_mother_2d",
"delivery_cost_public", "home_birth_checkup", "pnc_child_2d", "inst_births",
"inst_births_public", "home_birth_skilled", "birth_skilled_total",
"birth_c_section", "birth_c_sec_private", "birth_c_sec_public",
"vacc_full_all", "vacc_full_card", "vacc_bcg", "vacc_polio3", "vacc_penta_dpt3",
"vacc_measles1", "vacc_measles2", "vacc_rotavirus3", "vacc_penta_hepb3",
"vacc_vita", "vacc_facility_public", "vacc_facility_private",
"child_diarrhea_prev", "child_diarrhea_ors", "child_diarrhea_zinc",
"child_diarrhea_hfac", "child_ari_prev", "child_ari_hfac", "bf_early",
"bf_exclusive_6m", "bf_solid_6_8m", "diet_adequate_bf", "diet_adequate_nonbf",
"diet_adequate_total", "stunting", "wasting", "severe_wasting", "underweight",
"child_overweight", "women_bmi_low", "women_obese", "women_high_whr",
"child_anaemia", "women_nonpreg_anaemia", "women_preg_anaemia",
"women_total_anaemia", "teen_anaemia", "women_sugar_high", "women_sugar_vhigh",
"women_sugar_elevated", "men_sugar_high", "men_sugar_vhigh", "men_sugar_elevated",
"women_bp_mild", "women_bp_mod_sev", "women_bp_elevated", "men_bp_mild",
"men_bp_mod_sev", "men_bp_elevated", "screening_cervical", "screening_breast",
"screening_oral", "women_tobacco", "men_tobacco", "women_alcohol", "men_alcohol"
)
df <- df %>%
setNames(new_names)
# Printing the new names
print(names(df))
## [1] "district" "state" "hh_surveyed"
## [4] "women_surveyed" "men_surveyed" "female_schooling"
## [7] "pop_under_15" "sex_ratio_total" "sex_ratio_birth"
## [10] "child_birth_reg" "death_reg" "hh_electricity"
## [13] "hh_water" "hh_sanitation" "hh_cooking_fuel"
## [16] "hh_iodized_salt" "hh_insurance" "child_preprimary"
## [19] "women_literate" "women_10yr_schooling" "women_early_marriage"
## [22] "births_3rd_order" "teen_pregnancy" "menstrual_hygiene"
## [25] "fp_any" "fp_modern" "fp_female_ster"
## [28] "fp_male_ster" "fp_iud" "fp_pill"
## [31] "fp_condom" "fp_injectable" "fp_unmet_total"
## [34] "fp_unmet_spacing" "fp_hw_talk" "fp_side_effects_info"
## [37] "anc_1st_tri" "anc_4plus" "mother_tetanus"
## [40] "ifa_100d" "ifa_180d" "mcp_card"
## [43] "pnc_mother_2d" "delivery_cost_public" "home_birth_checkup"
## [46] "pnc_child_2d" "inst_births" "inst_births_public"
## [49] "home_birth_skilled" "birth_skilled_total" "birth_c_section"
## [52] "birth_c_sec_private" "birth_c_sec_public" "vacc_full_all"
## [55] "vacc_full_card" "vacc_bcg" "vacc_polio3"
## [58] "vacc_penta_dpt3" "vacc_measles1" "vacc_measles2"
## [61] "vacc_rotavirus3" "vacc_penta_hepb3" "vacc_vita"
## [64] "vacc_facility_public" "vacc_facility_private" "child_diarrhea_prev"
## [67] "child_diarrhea_ors" "child_diarrhea_zinc" "child_diarrhea_hfac"
## [70] "child_ari_prev" "child_ari_hfac" "bf_early"
## [73] "bf_exclusive_6m" "bf_solid_6_8m" "diet_adequate_bf"
## [76] "diet_adequate_nonbf" "diet_adequate_total" "stunting"
## [79] "wasting" "severe_wasting" "underweight"
## [82] "child_overweight" "women_bmi_low" "women_obese"
## [85] "women_high_whr" "child_anaemia" "women_nonpreg_anaemia"
## [88] "women_preg_anaemia" "women_total_anaemia" "teen_anaemia"
## [91] "women_sugar_high" "women_sugar_vhigh" "women_sugar_elevated"
## [94] "men_sugar_high" "men_sugar_vhigh" "men_sugar_elevated"
## [97] "women_bp_mild" "women_bp_mod_sev" "women_bp_elevated"
## [100] "men_bp_mild" "men_bp_mod_sev" "men_bp_elevated"
## [103] "screening_cervical" "screening_breast" "screening_oral"
## [106] "women_tobacco" "men_tobacco" "women_alcohol"
## [109] "men_alcohol"
view(df)
head(df)
## district state hh_surveyed women_surveyed
## 1 Nicobars Andaman & Nicobar Islands 882 764
## 2 North & Middle Andaman Andaman & Nicobar Islands 874 789
## 3 South Andaman Andaman & Nicobar Islands 868 844
## 4 Srikakulam Andhra Pradesh 874 780
## 5 Vizianagaram Andhra Pradesh 902 853
## 6 Visakhapatnam Andhra Pradesh 869 818
## men_surveyed female_schooling pop_under_15 sex_ratio_total sex_ratio_birth
## 1 125 78.0 23.0 973 927
## 2 108 82.7 19.8 950 844
## 3 134 84.7 21.0 967 935
## 4 100 60.0 20.7 1140 1163
## 5 134 56.0 20.6 1114 898
## 6 112 66.8 21.4 1066 974
## child_birth_reg death_reg hh_electricity hh_water hh_sanitation
## 1 98.0 83.2 97.9 98.8 83.5
## 2 100.0 (92.6) 93.2 92.2 86.4
## 3 96.5 92.2 99.6 97.9 89.3
## 4 95.0 71.0 99.9 87.7 71.6
## 5 95.4 81.7 99.5 93.1 61.7
## 6 90.5 71.3 99.6 91.8 77.8
## hh_cooking_fuel hh_iodized_salt hh_insurance child_preprimary women_literate
## 1 56.9 99.4 2.7 (29.5) 87.5
## 2 61.3 99.9 2.1 (30.1) 84.0
## 3 91.9 99.7 1.2 (50.8) 86.7
## 4 74.7 76.5 75.6 (0.0) 64.3
## 5 60.3 85.0 76.7 (25.2) 58.3
## 6 72.9 82.2 64.9 (5.0) 69.5
## women_10yr_schooling women_early_marriage births_3rd_order teen_pregnancy
## 1 53.5 11.4 0.0 1.8
## 2 41.0 15.4 1.5 3.8
## 3 57.5 17.1 0.5 2.8
## 4 42.5 25.4 0.0 5.5
## 5 37.6 33.7 2.2 12.7
## 6 46.0 25.4 0.0 9.5
## menstrual_hygiene fp_any fp_modern fp_female_ster fp_male_ster fp_iud fp_pill
## 1 100.0 65.3 57.2 46.4 0.0 2.7 2.0
## 2 100.0 84.1 73.1 48.3 0.6 6.4 7.8
## 3 98.2 57.1 50.5 34.0 0.0 2.8 1.8
## 4 78.0 72.3 72.2 71.0 0.3 0.6 0.0
## 5 72.5 71.2 71.2 69.3 1.3 0.0 0.0
## 6 85.7 68.0 67.7 64.9 1.5 0.3 0.0
## fp_condom fp_injectable fp_unmet_total fp_unmet_spacing fp_hw_talk
## 1 4.9 1.2 9.5 3.3 40.4
## 2 9.3 0.0 5.8 1.3 23.2
## 3 10.6 0.3 17.6 8.6 31.2
## 4 0.3 0.0 5.7 3.6 16.0
## 5 0.6 0.0 6.7 4.7 21.1
## 6 0.8 0.0 4.8 2.4 15.2
## fp_side_effects_info anc_1st_tri anc_4plus mother_tetanus ifa_100d ifa_180d
## 1 49.4 62.8 71.7 78.0 72.6 43.9
## 2 83.2 74.5 79.2 91.1 83.7 24.1
## 3 88.2 79.4 85.9 92.1 81.0 61.9
## 4 45.8 79.7 78.4 94.4 67.5 35.3
## 5 36.4 76.1 71.4 91.3 59.6 32.4
## 6 35.3 79.4 58.6 88.0 75.0 40.1
## mcp_card pnc_mother_2d delivery_cost_public home_birth_checkup pnc_child_2d
## 1 97.9 85.1 2278 * 92.5
## 2 99.2 92.5 1904 * 94.3
## 3 98.9 88.1 3460 * 89.8
## 4 100.0 90.8 3479 * 97.7
## 5 98.8 83.9 1931 * 89.2
## 6 97.0 84.8 2200 * 90.9
## inst_births inst_births_public home_birth_skilled birth_skilled_total
## 1 97.8 96.7 0.8 98.6
## 2 97.7 95.0 0.7 98.3
## 3 99.5 83.8 0.0 96.9
## 4 97.9 52.2 0.5 96.4
## 5 99.0 70.6 0.5 97.6
## 6 95.3 69.3 0.0 94.4
## birth_c_section birth_c_sec_private birth_c_sec_public vacc_full_all
## 1 11.5 * 10.7 (64.2)
## 2 12.9 * 11.4 *
## 3 37.1 (79.1) 29.6 (76.3)
## 4 57.0 73.8 44.5 (82.8)
## 5 41.3 70.3 30.3 (76.8)
## 6 26.5 57.2 16.8 (76.5)
## vacc_full_card vacc_bcg vacc_polio3 vacc_penta_dpt3 vacc_measles1
## 1 (94.1) (80.4) (69.1) (71.9) (67.3)
## 2 * * * * *
## 3 (96.6) (100.0) (79.0) (94.8) (81.7)
## 4 (100.0) (93.3) (82.8) (89.7) (93.3)
## 5 * (100.0) (76.8) (90.3) (92.5)
## 6 (93.5) (97.9) (76.5) (90.5) (95.1)
## vacc_measles2 vacc_rotavirus3 vacc_penta_hepb3 vacc_vita vacc_facility_public
## 1 (20.7) (3.1) (68.6) 94.9 (100.0)
## 2 * * * (89.6) *
## 3 (33.7) (0.0) (85.3) 84.0 (93.1)
## 4 (34.9) (74.8) (89.7) 69.6 (97.0)
## 5 (35.0) (77.3) (83.6) 85.9 (100.0)
## 6 (45.1) (72.9) (79.6) 91.3 (83.8)
## vacc_facility_private child_diarrhea_prev child_diarrhea_ors
## 1 (0.0) 5.7 *
## 2 * 4.5 *
## 3 (4.3) 6.0 *
## 4 (3.0) 11.9 *
## 5 (0.0) 7.5 *
## 6 (9.7) 8.1 *
## child_diarrhea_zinc child_diarrhea_hfac child_ari_prev child_ari_hfac
## 1 * * 1.8 (85.7)
## 2 * * 7.0 *
## 3 * * 0.0 (77.3)
## 4 * * 1.3 (79.7)
## 5 * * 1.4 (83.5)
## 6 * * 2.0 (72.3)
## bf_early bf_exclusive_6m bf_solid_6_8m diet_adequate_bf diet_adequate_nonbf
## 1 55.4 * * (19.4) *
## 2 27.3 * * (6.5) *
## 3 51.1 * * (22.3) *
## 4 42.8 * * (14.0) *
## 5 55.6 * * (2.5) *
## 6 64.3 * * (6.9) *
## diet_adequate_total stunting wasting severe_wasting underweight
## 1 (18.7) 21.6 15.7 7.8 24.6
## 2 (5.9) 27.0 27.0 8.3 42.8
## 3 23.5 21.1 12.6 3.5 17.4
## 4 16.1 19.7 19.5 7.4 21.4
## 5 1.8 36.4 19.2 8.3 32.2
## 6 11.8 31.0 21.5 11.2 33.5
## child_overweight women_bmi_low women_obese women_high_whr child_anaemia
## 1 1.5 8.2 39.1 62.5 37.7
## 2 0.8 8.6 35.9 79.3 30.4
## 3 7.2 10.0 39.0 78.2 43.4
## 4 4.5 13.8 27.2 54.0 59.6
## 5 4.7 16.9 28.8 58.0 66.7
## 6 4.8 17.4 23.8 58.0 72.6
## women_nonpreg_anaemia women_preg_anaemia women_total_anaemia teen_anaemia
## 1 38.4 * 38.3 48.0
## 2 62.5 * 62.1 47.8
## 3 57.6 * 57.7 43.2
## 4 62.8 * 62.6 59.2
## 5 64.6 * 64.0 73.9
## 6 58.6 * 58.0 58.9
## women_sugar_high women_sugar_vhigh women_sugar_elevated men_sugar_high
## 1 7.4 3.9 13.1 9.6
## 2 7.2 6.4 16.7 9.1
## 3 7.5 9.5 18.4 9.3
## 4 8.2 7.8 17.4 6.8
## 5 6.2 7.0 14.3 5.8
## 6 6.1 8.6 17.0 7.3
## men_sugar_vhigh men_sugar_elevated women_bp_mild women_bp_mod_sev
## 1 4.4 15.4 23.2 8.5
## 2 6.9 18.3 18.4 4.0
## 3 7.8 18.1 12.7 4.9
## 4 8.6 17.6 12.8 5.9
## 5 7.5 14.5 12.9 6.6
## 6 8.5 18.2 12.1 5.9
## women_bp_elevated men_bp_mild men_bp_mod_sev men_bp_elevated
## 1 35.4 32.9 11.1 47.0
## 2 27.4 22.6 6.0 32.2
## 3 23.0 17.9 6.1 26.9
## 4 22.1 14.4 5.5 22.9
## 5 25.2 14.8 6.4 25.1
## 6 23.9 17.0 7.0 29.2
## screening_cervical screening_breast screening_oral women_tobacco men_tobacco
## 1 13.4 13.2 5.4 63.5 76.8
## 2 1.7 0.3 15.8 46.8 70.5
## 3 1.3 0.7 8.0 19.6 50.8
## 4 1.0 0.2 3.8 7.1 21.3
## 5 4.9 0.6 7.3 11.4 21.5
## 6 1.7 0.7 4.1 6.3 22.8
## women_alcohol men_alcohol
## 1 29.6 64.5
## 2 5.1 45.3
## 3 1.7 32.8
## 4 0.6 28.3
## 5 0.8 32.3
## 6 1.3 30.2
#2 # Check the structure of the dataset
str(df)
## 'data.frame': 706 obs. of 109 variables:
## $ district : chr "Nicobars" "North & Middle Andaman " "South Andaman " "Srikakulam " ...
## $ state : chr "Andaman & Nicobar Islands" "Andaman & Nicobar Islands" "Andaman & Nicobar Islands" "Andhra Pradesh" ...
## $ hh_surveyed : num 882 874 868 874 902 869 888 884 865 851 ...
## $ women_surveyed : num 764 789 844 780 853 818 824 841 820 807 ...
## $ men_surveyed : num 125 108 134 100 134 112 105 122 119 93 ...
## $ female_schooling : num 78 82.7 84.7 60 56 66.8 75.4 75.4 74 64.9 ...
## $ pop_under_15 : num 23 19.8 21 20.7 20.6 21.4 20.5 21.5 20.4 22.4 ...
## $ sex_ratio_total : num 973 950 967 1140 1114 ...
## $ sex_ratio_birth : chr "927 " "844 " "935 " "1163 " ...
## $ child_birth_reg : num 98 100 96.5 95 95.4 90.5 93 93.5 96.4 92.3 ...
## $ death_reg : chr "83.2 " "(92.6)" "92.2 " "71.0 " ...
## $ hh_electricity : num 97.9 93.2 99.6 99.9 99.5 99.6 98.8 99.3 99.6 99.2 ...
## $ hh_water : num 98.8 92.2 97.9 87.7 93.1 91.8 97.9 99.1 94.4 99.3 ...
## $ hh_sanitation : num 83.5 86.4 89.3 71.6 61.7 77.8 77.7 80.8 79.1 83.4 ...
## $ hh_cooking_fuel : num 56.9 61.3 91.9 74.7 60.3 72.9 80.3 86.8 89.8 91.7 ...
## $ hh_iodized_salt : num 99.4 99.9 99.7 76.5 85 82.2 81.2 83.4 87.5 85.8 ...
## $ hh_insurance : num 2.7 2.1 1.2 75.6 76.7 64.9 66.4 67.6 68.1 71.1 ...
## $ child_preprimary : chr "(29.5)" "(30.1)" "(50.8)" "(0.0)" ...
## $ women_literate : num 87.5 84 86.7 64.3 58.3 69.5 77.9 77 76.9 68.5 ...
## $ women_10yr_schooling : num 53.5 41 57.5 42.5 37.6 46 43.2 46.5 46.2 32.6 ...
## $ women_early_marriage : chr "11.4 " "15.4 " "17.1 " "25.4 " ...
## $ births_3rd_order : chr "0.0 " "1.5 " "0.5 " "0.0 " ...
## $ teen_pregnancy : chr "1.8 " "3.8 " "2.8 " "5.5 " ...
## $ menstrual_hygiene : num 100 100 98.2 78 72.5 85.7 71 84.4 92.6 88 ...
## $ fp_any : num 65.3 84.1 57.1 72.3 71.2 68 66.3 77.8 79.1 73.3 ...
## $ fp_modern : num 57.2 73.1 50.5 72.2 71.2 67.7 66.3 77.2 78.1 73.2 ...
## $ fp_female_ster : num 46.4 48.3 34 71 69.3 64.9 64.1 74.5 76.5 72.9 ...
## $ fp_male_ster : num 0 0.6 0 0.3 1.3 1.5 0.9 0.7 1 0 ...
## $ fp_iud : num 2.7 6.4 2.8 0.6 0 0.3 0.1 0.6 0 0.2 ...
## $ fp_pill : num 2 7.8 1.8 0 0 0 0.3 0.4 0 0 ...
## $ fp_condom : num 4.9 9.3 10.6 0.3 0.6 0.8 1.1 0.6 0.4 0.1 ...
## $ fp_injectable : num 1.2 0 0.3 0 0 0 0 0.4 0 0 ...
## $ fp_unmet_total : num 9.5 5.8 17.6 5.7 6.7 4.8 8 3 2.5 3.2 ...
## $ fp_unmet_spacing : num 3.3 1.3 8.6 3.6 4.7 2.4 4.4 1.8 1.4 1.7 ...
## $ fp_hw_talk : num 40.4 23.2 31.2 16 21.1 15.2 12.5 12.5 16.1 16.6 ...
## $ fp_side_effects_info : chr "49.4 " "83.2 " "88.2 " "45.8 " ...
## $ anc_1st_tri : chr "62.8 " "74.5 " "79.4 " "79.7 " ...
## $ anc_4plus : chr "71.7 " "79.2 " "85.9 " "78.4 " ...
## $ mother_tetanus : chr "78.0 " "91.1 " "92.1 " "94.4 " ...
## $ ifa_100d : chr "72.6 " "83.7 " "81.0 " "67.5 " ...
## $ ifa_180d : chr "43.9 " "24.1 " "61.9 " "35.3 " ...
## $ mcp_card : chr "97.9 " "99.2 " "98.9 " "100.0 " ...
## $ pnc_mother_2d : chr "85.1 " "92.5 " "88.1 " "90.8 " ...
## $ delivery_cost_public : chr "2278 " "1904 " "3460 " "3479 " ...
## $ home_birth_checkup : chr "*" "*" "*" "*" ...
## $ pnc_child_2d : chr "92.5 " "94.3 " "89.8 " "97.7 " ...
## $ inst_births : num 97.8 97.7 99.5 97.9 99 95.3 96.6 98.7 98.9 98.6 ...
## $ inst_births_public : num 96.7 95 83.8 52.2 70.6 69.3 46 48.8 40.1 49.6 ...
## $ home_birth_skilled : num 0.8 0.7 0 0.5 0.5 0 2.2 0.7 0.5 0.9 ...
## $ birth_skilled_total : num 98.6 98.3 96.9 96.4 97.6 94.4 89.9 98.5 98 95.5 ...
## $ birth_c_section : num 11.5 12.9 37.1 57 41.3 26.5 52.2 55.7 66.1 53.8 ...
## $ birth_c_sec_private : chr "*" "*" "(79.1)" "73.8 " ...
## $ birth_c_sec_public : chr "10.7 " "11.4 " "29.6 " "44.5 " ...
## $ vacc_full_all : chr "(64.2)" "*" "(76.3)" "(82.8)" ...
## $ vacc_full_card : chr "(94.1)" "*" "(96.6)" "(100.0)" ...
## $ vacc_bcg : chr "(80.4)" "*" "(100.0)" "(93.3)" ...
## $ vacc_polio3 : chr "(69.1)" "*" "(79.0)" "(82.8)" ...
## $ vacc_penta_dpt3 : chr "(71.9)" "*" "(94.8)" "(89.7)" ...
## $ vacc_measles1 : chr "(67.3)" "*" "(81.7)" "(93.3)" ...
## $ vacc_measles2 : chr "(20.7)" "*" "(33.7)" "(34.9)" ...
## $ vacc_rotavirus3 : chr "(3.1)" "*" "(0.0)" "(74.8)" ...
## $ vacc_penta_hepb3 : chr "(68.6)" "*" "(85.3)" "(89.7)" ...
## $ vacc_vita : chr "94.9 " "(89.6)" "84.0 " "69.6 " ...
## $ vacc_facility_public : chr "(100.0)" "*" "(93.1)" "(97.0)" ...
## $ vacc_facility_private: chr "(0.0)" "*" "(4.3)" "(3.0)" ...
## $ child_diarrhea_prev : num 5.7 4.5 6 11.9 7.5 8.1 13.3 2.7 7.8 10 ...
## $ child_diarrhea_ors : chr "*" "*" "*" "*" ...
## $ child_diarrhea_zinc : chr "*" "*" "*" "*" ...
## $ child_diarrhea_hfac : chr "*" "*" "*" "*" ...
## $ child_ari_prev : num 1.8 7 0 1.3 1.4 2 2.2 1 2.4 1 ...
## $ child_ari_hfac : chr "(85.7)" "*" "(77.3)" "(79.7)" ...
## $ bf_early : chr "55.4 " "27.3 " "51.1 " "42.8 " ...
## $ bf_exclusive_6m : chr "*" "*" "*" "*" ...
## $ bf_solid_6_8m : chr "*" "*" "*" "*" ...
## $ diet_adequate_bf : chr "(19.4)" "(6.5)" "(22.3)" "(14.0)" ...
## $ diet_adequate_nonbf : chr "*" "*" "*" "*" ...
## $ diet_adequate_total : chr "(18.7)" "(5.9)" "23.5 " "16.1 " ...
## $ stunting : chr "21.6 " "27.0 " "21.1 " "19.7 " ...
## $ wasting : chr "15.7 " "27.0 " "12.6 " "19.5 " ...
## $ severe_wasting : chr "7.8 " "8.3 " "3.5 " "7.4 " ...
## $ underweight : chr "24.6 " "42.8 " "17.4 " "21.4 " ...
## $ child_overweight : chr "1.5 " "0.8 " "7.2 " "4.5 " ...
## $ women_bmi_low : num 8.2 8.6 10 13.8 16.9 17.4 10.2 10.1 10.5 9.6 ...
## $ women_obese : num 39.1 35.9 39 27.2 28.8 23.8 44.4 45.3 40.6 46.4 ...
## $ women_high_whr : num 62.5 79.3 78.2 54 58 58 49.2 51.6 53.5 53.4 ...
## $ child_anaemia : chr "37.7 " "30.4 " "43.4 " "59.6 " ...
## $ women_nonpreg_anaemia: num 38.4 62.5 57.6 62.8 64.6 58.6 63.2 63.1 60.4 59.8 ...
## $ women_preg_anaemia : chr "*" "*" "*" "*" ...
## $ women_total_anaemia : num 38.3 62.1 57.7 62.6 64 58 63 63 60.3 59.5 ...
## $ teen_anaemia : chr "48.0 " "47.8 " "43.2 " "59.2 " ...
## $ women_sugar_high : num 7.4 7.2 7.5 8.2 6.2 6.1 7.5 7.3 7.9 7.8 ...
## $ women_sugar_vhigh : num 3.9 6.4 9.5 7.8 7 8.6 12.7 13.1 13.4 13 ...
## $ women_sugar_elevated : num 13.1 16.7 18.4 17.4 14.3 17 21.7 23.8 23.3 22.7 ...
## $ men_sugar_high : num 9.6 9.1 9.3 6.8 5.8 7.3 9.2 7.2 8.5 10.7 ...
## $ men_sugar_vhigh : num 4.4 6.9 7.8 8.6 7.5 8.5 15.5 10.4 11.9 13.4 ...
## $ men_sugar_elevated : num 15.4 18.3 18.1 17.6 14.5 18.2 27.6 18.9 22.5 25.9 ...
## $ women_bp_mild : num 23.2 18.4 12.7 12.8 12.9 12.1 13 14.7 13.4 14.7 ...
## $ women_bp_mod_sev : num 8.5 4 4.9 5.9 6.6 5.9 6.6 6.1 4.3 5.9 ...
## $ women_bp_elevated : num 35.4 27.4 23 22.1 25.2 23.9 29 28.8 24 25.8 ...
## [list output truncated]
#3 # Convert all columns except ‘district’ and ‘state’ to numeric, removing any non-numeric characters
df <- df %>%
mutate(across(-c(district, state), ~ {
cleaned_value <- str_remove_all(.x, "[\\(\\)\\*]")
cleaned_value <- str_trim(cleaned_value)
as.numeric(cleaned_value)
}))
str(df)
## 'data.frame': 706 obs. of 109 variables:
## $ district : chr "Nicobars" "North & Middle Andaman " "South Andaman " "Srikakulam " ...
## $ state : chr "Andaman & Nicobar Islands" "Andaman & Nicobar Islands" "Andaman & Nicobar Islands" "Andhra Pradesh" ...
## $ hh_surveyed : num 882 874 868 874 902 869 888 884 865 851 ...
## $ women_surveyed : num 764 789 844 780 853 818 824 841 820 807 ...
## $ men_surveyed : num 125 108 134 100 134 112 105 122 119 93 ...
## $ female_schooling : num 78 82.7 84.7 60 56 66.8 75.4 75.4 74 64.9 ...
## $ pop_under_15 : num 23 19.8 21 20.7 20.6 21.4 20.5 21.5 20.4 22.4 ...
## $ sex_ratio_total : num 973 950 967 1140 1114 ...
## $ sex_ratio_birth : num 927 844 935 1163 898 ...
## $ child_birth_reg : num 98 100 96.5 95 95.4 90.5 93 93.5 96.4 92.3 ...
## $ death_reg : num 83.2 92.6 92.2 71 81.7 71.3 68.2 90.1 86.3 82.5 ...
## $ hh_electricity : num 97.9 93.2 99.6 99.9 99.5 99.6 98.8 99.3 99.6 99.2 ...
## $ hh_water : num 98.8 92.2 97.9 87.7 93.1 91.8 97.9 99.1 94.4 99.3 ...
## $ hh_sanitation : num 83.5 86.4 89.3 71.6 61.7 77.8 77.7 80.8 79.1 83.4 ...
## $ hh_cooking_fuel : num 56.9 61.3 91.9 74.7 60.3 72.9 80.3 86.8 89.8 91.7 ...
## $ hh_iodized_salt : num 99.4 99.9 99.7 76.5 85 82.2 81.2 83.4 87.5 85.8 ...
## $ hh_insurance : num 2.7 2.1 1.2 75.6 76.7 64.9 66.4 67.6 68.1 71.1 ...
## $ child_preprimary : num 29.5 30.1 50.8 0 25.2 5 11.2 2.7 15.1 7.6 ...
## $ women_literate : num 87.5 84 86.7 64.3 58.3 69.5 77.9 77 76.9 68.5 ...
## $ women_10yr_schooling : num 53.5 41 57.5 42.5 37.6 46 43.2 46.5 46.2 32.6 ...
## $ women_early_marriage : num 11.4 15.4 17.1 25.4 33.7 25.4 26 22.1 25.3 35.4 ...
## $ births_3rd_order : num 0 1.5 0.5 0 2.2 0 1.3 0.5 0 1 ...
## $ teen_pregnancy : num 1.8 3.8 2.8 5.5 12.7 9.5 6.2 13.4 9.6 20.7 ...
## $ menstrual_hygiene : num 100 100 98.2 78 72.5 85.7 71 84.4 92.6 88 ...
## $ fp_any : num 65.3 84.1 57.1 72.3 71.2 68 66.3 77.8 79.1 73.3 ...
## $ fp_modern : num 57.2 73.1 50.5 72.2 71.2 67.7 66.3 77.2 78.1 73.2 ...
## $ fp_female_ster : num 46.4 48.3 34 71 69.3 64.9 64.1 74.5 76.5 72.9 ...
## $ fp_male_ster : num 0 0.6 0 0.3 1.3 1.5 0.9 0.7 1 0 ...
## $ fp_iud : num 2.7 6.4 2.8 0.6 0 0.3 0.1 0.6 0 0.2 ...
## $ fp_pill : num 2 7.8 1.8 0 0 0 0.3 0.4 0 0 ...
## $ fp_condom : num 4.9 9.3 10.6 0.3 0.6 0.8 1.1 0.6 0.4 0.1 ...
## $ fp_injectable : num 1.2 0 0.3 0 0 0 0 0.4 0 0 ...
## $ fp_unmet_total : num 9.5 5.8 17.6 5.7 6.7 4.8 8 3 2.5 3.2 ...
## $ fp_unmet_spacing : num 3.3 1.3 8.6 3.6 4.7 2.4 4.4 1.8 1.4 1.7 ...
## $ fp_hw_talk : num 40.4 23.2 31.2 16 21.1 15.2 12.5 12.5 16.1 16.6 ...
## $ fp_side_effects_info : num 49.4 83.2 88.2 45.8 36.4 35.3 32.4 28.8 27.7 28.2 ...
## $ anc_1st_tri : num 62.8 74.5 79.4 79.7 76.1 79.4 76.3 82.2 81.5 89.4 ...
## $ anc_4plus : num 71.7 79.2 85.9 78.4 71.4 58.6 51 62.7 73.3 62.5 ...
## $ mother_tetanus : num 78 91.1 92.1 94.4 91.3 88 87 95.3 95.8 90.3 ...
## $ ifa_100d : num 72.6 83.7 81 67.5 59.6 75 62.7 82.8 56.2 75.1 ...
## $ ifa_180d : num 43.9 24.1 61.9 35.3 32.4 40.1 32.3 43.7 39.2 42.5 ...
## $ mcp_card : num 97.9 99.2 98.9 100 98.8 97 91.4 100 96.4 96 ...
## $ pnc_mother_2d : num 85.1 92.5 88.1 90.8 83.9 84.8 91.9 94.8 88.3 94 ...
## $ delivery_cost_public : num 2278 1904 3460 3479 1931 ...
## $ home_birth_checkup : num NA NA NA NA NA NA NA NA NA NA ...
## $ pnc_child_2d : num 92.5 94.3 89.8 97.7 89.2 90.9 93.5 96.4 90.3 97.2 ...
## $ inst_births : num 97.8 97.7 99.5 97.9 99 95.3 96.6 98.7 98.9 98.6 ...
## $ inst_births_public : num 96.7 95 83.8 52.2 70.6 69.3 46 48.8 40.1 49.6 ...
## $ home_birth_skilled : num 0.8 0.7 0 0.5 0.5 0 2.2 0.7 0.5 0.9 ...
## $ birth_skilled_total : num 98.6 98.3 96.9 96.4 97.6 94.4 89.9 98.5 98 95.5 ...
## $ birth_c_section : num 11.5 12.9 37.1 57 41.3 26.5 52.2 55.7 66.1 53.8 ...
## $ birth_c_sec_private : num NA NA 79.1 73.8 70.3 57.2 72 72 79.7 72.2 ...
## $ birth_c_sec_public : num 10.7 11.4 29.6 44.5 30.3 16.8 34.1 40.4 47.9 37 ...
## $ vacc_full_all : num 64.2 NA 76.3 82.8 76.8 76.5 59 80 90 76.2 ...
## $ vacc_full_card : num 94.1 NA 96.6 100 NA 93.5 67.3 87 97.2 100 ...
## $ vacc_bcg : num 80.4 NA 100 93.3 100 97.9 92.7 94.8 93.9 97.6 ...
## $ vacc_polio3 : num 69.1 NA 79 82.8 76.8 76.5 66.3 80 90 76.2 ...
## $ vacc_penta_dpt3 : num 71.9 NA 94.8 89.7 90.3 90.5 83.9 89 93.9 97.6 ...
## $ vacc_measles1 : num 67.3 NA 81.7 93.3 92.5 95.1 76.7 91.7 90 92.5 ...
## $ vacc_measles2 : num 20.7 NA 33.7 34.9 35 45.1 32.5 26.3 33.8 39.6 ...
## $ vacc_rotavirus3 : num 3.1 NA 0 74.8 77.3 72.9 77.8 77.7 62.1 87.8 ...
## $ vacc_penta_hepb3 : num 68.6 NA 85.3 89.7 83.6 79.6 86.7 89 93.9 97.6 ...
## $ vacc_vita : num 94.9 89.6 84 69.6 85.9 91.3 81.4 73.9 81 80.2 ...
## $ vacc_facility_public : num 100 NA 93.1 97 100 83.8 85.3 100 100 97.4 ...
## $ vacc_facility_private: num 0 NA 4.3 3 0 9.7 7.5 0 0 2.6 ...
## $ child_diarrhea_prev : num 5.7 4.5 6 11.9 7.5 8.1 13.3 2.7 7.8 10 ...
## $ child_diarrhea_ors : num NA NA NA NA NA NA 72.9 NA NA NA ...
## $ child_diarrhea_zinc : num NA NA NA NA NA NA 23 NA NA NA ...
## $ child_diarrhea_hfac : num NA NA NA NA NA NA 69.4 NA NA NA ...
## $ child_ari_prev : num 1.8 7 0 1.3 1.4 2 2.2 1 2.4 1 ...
## $ child_ari_hfac : num 85.7 NA 77.3 79.7 83.5 72.3 63.7 71.9 45.8 NA ...
## $ bf_early : num 55.4 27.3 51.1 42.8 55.6 64.3 51.3 32.8 29.3 69.3 ...
## $ bf_exclusive_6m : num NA NA NA NA NA NA NA NA NA NA ...
## $ bf_solid_6_8m : num NA NA NA NA NA NA NA NA NA NA ...
## $ diet_adequate_bf : num 19.4 6.5 22.3 14 2.5 6.9 3 4.9 26.1 8.6 ...
## $ diet_adequate_nonbf : num NA NA NA NA NA NA NA NA NA NA ...
## $ diet_adequate_total : num 18.7 5.9 23.5 16.1 1.8 11.8 10.2 3.5 23.4 11.2 ...
## $ stunting : num 21.6 27 21.1 19.7 36.4 31 23.1 31.4 29.8 23.8 ...
## $ wasting : num 15.7 27 12.6 19.5 19.2 21.5 14.3 11.7 14.3 17.8 ...
## $ severe_wasting : num 7.8 8.3 3.5 7.4 8.3 11.2 3.8 4.7 5.2 8.1 ...
## $ underweight : num 24.6 42.8 17.4 21.4 32.2 33.5 22.4 22.5 21.1 26.9 ...
## $ child_overweight : num 1.5 0.8 7.2 4.5 4.7 4.8 3.4 5.2 4.2 3.6 ...
## $ women_bmi_low : num 8.2 8.6 10 13.8 16.9 17.4 10.2 10.1 10.5 9.6 ...
## $ women_obese : num 39.1 35.9 39 27.2 28.8 23.8 44.4 45.3 40.6 46.4 ...
## $ women_high_whr : num 62.5 79.3 78.2 54 58 58 49.2 51.6 53.5 53.4 ...
## $ child_anaemia : num 37.7 30.4 43.4 59.6 66.7 72.6 66.8 62.3 65.7 59.3 ...
## $ women_nonpreg_anaemia: num 38.4 62.5 57.6 62.8 64.6 58.6 63.2 63.1 60.4 59.8 ...
## $ women_preg_anaemia : num NA NA NA NA NA NA NA NA NA 51.9 ...
## $ women_total_anaemia : num 38.3 62.1 57.7 62.6 64 58 63 63 60.3 59.5 ...
## $ teen_anaemia : num 48 47.8 43.2 59.2 73.9 58.9 65.2 66.8 59 54.6 ...
## $ women_sugar_high : num 7.4 7.2 7.5 8.2 6.2 6.1 7.5 7.3 7.9 7.8 ...
## $ women_sugar_vhigh : num 3.9 6.4 9.5 7.8 7 8.6 12.7 13.1 13.4 13 ...
## $ women_sugar_elevated : num 13.1 16.7 18.4 17.4 14.3 17 21.7 23.8 23.3 22.7 ...
## $ men_sugar_high : num 9.6 9.1 9.3 6.8 5.8 7.3 9.2 7.2 8.5 10.7 ...
## $ men_sugar_vhigh : num 4.4 6.9 7.8 8.6 7.5 8.5 15.5 10.4 11.9 13.4 ...
## $ men_sugar_elevated : num 15.4 18.3 18.1 17.6 14.5 18.2 27.6 18.9 22.5 25.9 ...
## $ women_bp_mild : num 23.2 18.4 12.7 12.8 12.9 12.1 13 14.7 13.4 14.7 ...
## $ women_bp_mod_sev : num 8.5 4 4.9 5.9 6.6 5.9 6.6 6.1 4.3 5.9 ...
## $ women_bp_elevated : num 35.4 27.4 23 22.1 25.2 23.9 29 28.8 24 25.8 ...
## [list output truncated]
view(df)
head(df)
## district state hh_surveyed women_surveyed
## 1 Nicobars Andaman & Nicobar Islands 882 764
## 2 North & Middle Andaman Andaman & Nicobar Islands 874 789
## 3 South Andaman Andaman & Nicobar Islands 868 844
## 4 Srikakulam Andhra Pradesh 874 780
## 5 Vizianagaram Andhra Pradesh 902 853
## 6 Visakhapatnam Andhra Pradesh 869 818
## men_surveyed female_schooling pop_under_15 sex_ratio_total sex_ratio_birth
## 1 125 78.0 23.0 973 927
## 2 108 82.7 19.8 950 844
## 3 134 84.7 21.0 967 935
## 4 100 60.0 20.7 1140 1163
## 5 134 56.0 20.6 1114 898
## 6 112 66.8 21.4 1066 974
## child_birth_reg death_reg hh_electricity hh_water hh_sanitation
## 1 98.0 83.2 97.9 98.8 83.5
## 2 100.0 92.6 93.2 92.2 86.4
## 3 96.5 92.2 99.6 97.9 89.3
## 4 95.0 71.0 99.9 87.7 71.6
## 5 95.4 81.7 99.5 93.1 61.7
## 6 90.5 71.3 99.6 91.8 77.8
## hh_cooking_fuel hh_iodized_salt hh_insurance child_preprimary women_literate
## 1 56.9 99.4 2.7 29.5 87.5
## 2 61.3 99.9 2.1 30.1 84.0
## 3 91.9 99.7 1.2 50.8 86.7
## 4 74.7 76.5 75.6 0.0 64.3
## 5 60.3 85.0 76.7 25.2 58.3
## 6 72.9 82.2 64.9 5.0 69.5
## women_10yr_schooling women_early_marriage births_3rd_order teen_pregnancy
## 1 53.5 11.4 0.0 1.8
## 2 41.0 15.4 1.5 3.8
## 3 57.5 17.1 0.5 2.8
## 4 42.5 25.4 0.0 5.5
## 5 37.6 33.7 2.2 12.7
## 6 46.0 25.4 0.0 9.5
## menstrual_hygiene fp_any fp_modern fp_female_ster fp_male_ster fp_iud fp_pill
## 1 100.0 65.3 57.2 46.4 0.0 2.7 2.0
## 2 100.0 84.1 73.1 48.3 0.6 6.4 7.8
## 3 98.2 57.1 50.5 34.0 0.0 2.8 1.8
## 4 78.0 72.3 72.2 71.0 0.3 0.6 0.0
## 5 72.5 71.2 71.2 69.3 1.3 0.0 0.0
## 6 85.7 68.0 67.7 64.9 1.5 0.3 0.0
## fp_condom fp_injectable fp_unmet_total fp_unmet_spacing fp_hw_talk
## 1 4.9 1.2 9.5 3.3 40.4
## 2 9.3 0.0 5.8 1.3 23.2
## 3 10.6 0.3 17.6 8.6 31.2
## 4 0.3 0.0 5.7 3.6 16.0
## 5 0.6 0.0 6.7 4.7 21.1
## 6 0.8 0.0 4.8 2.4 15.2
## fp_side_effects_info anc_1st_tri anc_4plus mother_tetanus ifa_100d ifa_180d
## 1 49.4 62.8 71.7 78.0 72.6 43.9
## 2 83.2 74.5 79.2 91.1 83.7 24.1
## 3 88.2 79.4 85.9 92.1 81.0 61.9
## 4 45.8 79.7 78.4 94.4 67.5 35.3
## 5 36.4 76.1 71.4 91.3 59.6 32.4
## 6 35.3 79.4 58.6 88.0 75.0 40.1
## mcp_card pnc_mother_2d delivery_cost_public home_birth_checkup pnc_child_2d
## 1 97.9 85.1 2278 NA 92.5
## 2 99.2 92.5 1904 NA 94.3
## 3 98.9 88.1 3460 NA 89.8
## 4 100.0 90.8 3479 NA 97.7
## 5 98.8 83.9 1931 NA 89.2
## 6 97.0 84.8 2200 NA 90.9
## inst_births inst_births_public home_birth_skilled birth_skilled_total
## 1 97.8 96.7 0.8 98.6
## 2 97.7 95.0 0.7 98.3
## 3 99.5 83.8 0.0 96.9
## 4 97.9 52.2 0.5 96.4
## 5 99.0 70.6 0.5 97.6
## 6 95.3 69.3 0.0 94.4
## birth_c_section birth_c_sec_private birth_c_sec_public vacc_full_all
## 1 11.5 NA 10.7 64.2
## 2 12.9 NA 11.4 NA
## 3 37.1 79.1 29.6 76.3
## 4 57.0 73.8 44.5 82.8
## 5 41.3 70.3 30.3 76.8
## 6 26.5 57.2 16.8 76.5
## vacc_full_card vacc_bcg vacc_polio3 vacc_penta_dpt3 vacc_measles1
## 1 94.1 80.4 69.1 71.9 67.3
## 2 NA NA NA NA NA
## 3 96.6 100.0 79.0 94.8 81.7
## 4 100.0 93.3 82.8 89.7 93.3
## 5 NA 100.0 76.8 90.3 92.5
## 6 93.5 97.9 76.5 90.5 95.1
## vacc_measles2 vacc_rotavirus3 vacc_penta_hepb3 vacc_vita vacc_facility_public
## 1 20.7 3.1 68.6 94.9 100.0
## 2 NA NA NA 89.6 NA
## 3 33.7 0.0 85.3 84.0 93.1
## 4 34.9 74.8 89.7 69.6 97.0
## 5 35.0 77.3 83.6 85.9 100.0
## 6 45.1 72.9 79.6 91.3 83.8
## vacc_facility_private child_diarrhea_prev child_diarrhea_ors
## 1 0.0 5.7 NA
## 2 NA 4.5 NA
## 3 4.3 6.0 NA
## 4 3.0 11.9 NA
## 5 0.0 7.5 NA
## 6 9.7 8.1 NA
## child_diarrhea_zinc child_diarrhea_hfac child_ari_prev child_ari_hfac
## 1 NA NA 1.8 85.7
## 2 NA NA 7.0 NA
## 3 NA NA 0.0 77.3
## 4 NA NA 1.3 79.7
## 5 NA NA 1.4 83.5
## 6 NA NA 2.0 72.3
## bf_early bf_exclusive_6m bf_solid_6_8m diet_adequate_bf diet_adequate_nonbf
## 1 55.4 NA NA 19.4 NA
## 2 27.3 NA NA 6.5 NA
## 3 51.1 NA NA 22.3 NA
## 4 42.8 NA NA 14.0 NA
## 5 55.6 NA NA 2.5 NA
## 6 64.3 NA NA 6.9 NA
## diet_adequate_total stunting wasting severe_wasting underweight
## 1 18.7 21.6 15.7 7.8 24.6
## 2 5.9 27.0 27.0 8.3 42.8
## 3 23.5 21.1 12.6 3.5 17.4
## 4 16.1 19.7 19.5 7.4 21.4
## 5 1.8 36.4 19.2 8.3 32.2
## 6 11.8 31.0 21.5 11.2 33.5
## child_overweight women_bmi_low women_obese women_high_whr child_anaemia
## 1 1.5 8.2 39.1 62.5 37.7
## 2 0.8 8.6 35.9 79.3 30.4
## 3 7.2 10.0 39.0 78.2 43.4
## 4 4.5 13.8 27.2 54.0 59.6
## 5 4.7 16.9 28.8 58.0 66.7
## 6 4.8 17.4 23.8 58.0 72.6
## women_nonpreg_anaemia women_preg_anaemia women_total_anaemia teen_anaemia
## 1 38.4 NA 38.3 48.0
## 2 62.5 NA 62.1 47.8
## 3 57.6 NA 57.7 43.2
## 4 62.8 NA 62.6 59.2
## 5 64.6 NA 64.0 73.9
## 6 58.6 NA 58.0 58.9
## women_sugar_high women_sugar_vhigh women_sugar_elevated men_sugar_high
## 1 7.4 3.9 13.1 9.6
## 2 7.2 6.4 16.7 9.1
## 3 7.5 9.5 18.4 9.3
## 4 8.2 7.8 17.4 6.8
## 5 6.2 7.0 14.3 5.8
## 6 6.1 8.6 17.0 7.3
## men_sugar_vhigh men_sugar_elevated women_bp_mild women_bp_mod_sev
## 1 4.4 15.4 23.2 8.5
## 2 6.9 18.3 18.4 4.0
## 3 7.8 18.1 12.7 4.9
## 4 8.6 17.6 12.8 5.9
## 5 7.5 14.5 12.9 6.6
## 6 8.5 18.2 12.1 5.9
## women_bp_elevated men_bp_mild men_bp_mod_sev men_bp_elevated
## 1 35.4 32.9 11.1 47.0
## 2 27.4 22.6 6.0 32.2
## 3 23.0 17.9 6.1 26.9
## 4 22.1 14.4 5.5 22.9
## 5 25.2 14.8 6.4 25.1
## 6 23.9 17.0 7.0 29.2
## screening_cervical screening_breast screening_oral women_tobacco men_tobacco
## 1 13.4 13.2 5.4 63.5 76.8
## 2 1.7 0.3 15.8 46.8 70.5
## 3 1.3 0.7 8.0 19.6 50.8
## 4 1.0 0.2 3.8 7.1 21.3
## 5 4.9 0.6 7.3 11.4 21.5
## 6 1.7 0.7 4.1 6.3 22.8
## women_alcohol men_alcohol
## 1 29.6 64.5
## 2 5.1 45.3
## 3 1.7 32.8
## 4 0.6 28.3
## 5 0.8 32.3
## 6 1.3 30.2
#4 # Check for missing values in the dataset
colSums(is.na(df))
## district state hh_surveyed
## 0 0 0
## women_surveyed men_surveyed female_schooling
## 0 0 0
## pop_under_15 sex_ratio_total sex_ratio_birth
## 0 0 0
## child_birth_reg death_reg hh_electricity
## 0 1 0
## hh_water hh_sanitation hh_cooking_fuel
## 0 0 0
## hh_iodized_salt hh_insurance child_preprimary
## 0 0 3
## women_literate women_10yr_schooling women_early_marriage
## 0 0 0
## births_3rd_order teen_pregnancy menstrual_hygiene
## 1 0 0
## fp_any fp_modern fp_female_ster
## 0 0 0
## fp_male_ster fp_iud fp_pill
## 0 0 0
## fp_condom fp_injectable fp_unmet_total
## 0 0 0
## fp_unmet_spacing fp_hw_talk fp_side_effects_info
## 0 0 2
## anc_1st_tri anc_4plus mother_tetanus
## 0 0 0
## ifa_100d ifa_180d mcp_card
## 0 0 0
## pnc_mother_2d delivery_cost_public home_birth_checkup
## 0 1 422
## pnc_child_2d inst_births inst_births_public
## 0 0 0
## home_birth_skilled birth_skilled_total birth_c_section
## 0 0 0
## birth_c_sec_private birth_c_sec_public vacc_full_all
## 150 0 13
## vacc_full_card vacc_bcg vacc_polio3
## 22 13 13
## vacc_penta_dpt3 vacc_measles1 vacc_measles2
## 13 13 13
## vacc_rotavirus3 vacc_penta_hepb3 vacc_vita
## 13 13 1
## vacc_facility_public vacc_facility_private child_diarrhea_prev
## 16 16 0
## child_diarrhea_ors child_diarrhea_zinc child_diarrhea_hfac
## 492 492 492
## child_ari_prev child_ari_hfac bf_early
## 0 224 0
## bf_exclusive_6m bf_solid_6_8m diet_adequate_bf
## 261 642 5
## diet_adequate_nonbf diet_adequate_total stunting
## 643 1 0
## wasting severe_wasting underweight
## 0 0 0
## child_overweight women_bmi_low women_obese
## 0 0 0
## women_high_whr child_anaemia women_nonpreg_anaemia
## 0 0 0
## women_preg_anaemia women_total_anaemia teen_anaemia
## 134 0 0
## women_sugar_high women_sugar_vhigh women_sugar_elevated
## 0 0 0
## men_sugar_high men_sugar_vhigh men_sugar_elevated
## 0 0 0
## women_bp_mild women_bp_mod_sev women_bp_elevated
## 0 0 0
## men_bp_mild men_bp_mod_sev men_bp_elevated
## 0 0 0
## screening_cervical screening_breast screening_oral
## 0 0 0
## women_tobacco men_tobacco women_alcohol
## 0 0 0
## men_alcohol
## 0
# Replace NAs with the column mean for everything except district and state
df <- df %>%
mutate(across(-c(district, state), ~ {
col_mean <- mean(.x, na.rm = TRUE)
coalesce(.x, col_mean)
}))
sum(is.na(df))
## [1] 0
#1.1 # Finding Mean Literacy by State: from Highest to Lowest
state_literacy <- df %>%
group_by(state) %>%
summarize(mean_literacy = mean(women_literate, na.rm = TRUE)) %>%
arrange(desc(mean_literacy))
print(state_literacy)
## # A tibble: 36 × 2
## state mean_literacy
## <chr> <dbl>
## 1 "Kerala" 97.9
## 2 " Lakshadweep " 96.5
## 3 "Goa" 93.1
## 4 "Mizoram" 91.3
## 5 "Puducherry" 91.1
## 6 "Himachal Pradesh" 90.2
## 7 "Sikkim" 86.9
## 8 "Andaman & Nicobar Islands" 86.1
## 9 "Meghalaya" 86.0
## 10 "Tamil Nadu" 85.7
## # ℹ 26 more rows
#1.2 # Finding State-wise Insurance Coverage: from Highest to Lowest
state_insurance <- df %>%
group_by(state) %>%
summarize(mean_insurance = mean(hh_insurance, na.rm = TRUE)) %>%
arrange(desc(mean_insurance))
print(state_insurance)
## # A tibble: 36 × 2
## state mean_insurance
## <chr> <dbl>
## 1 "Rajasthan" 88.4
## 2 "Chhattisgarh" 73.2
## 3 "Andhra Pradesh" 70.8
## 4 "Meghalaya" 68.3
## 5 "Tamil Nadu" 67.9
## 6 "Uttarakhand" 67.1
## 7 "Goa" 66.2
## 8 "Telangana" 62.3
## 9 "Assam" 61.0
## 10 " Lakshadweep " 60.1
## # ℹ 26 more rows
#1.3 # Identify High Anemia Zones: Top 10 states with the highest average child anemia rates
high_anemia_states <- df %>%
group_by(state) %>%
summarize(mean_child_anemia = mean(child_anaemia, na.rm = TRUE)) %>%
arrange(desc(mean_child_anemia)) %>%
head(10)
print("Top 10 High Anemia States (Children):")
## [1] "Top 10 High Anemia States (Children):"
print(high_anemia_states)
## # A tibble: 10 × 2
## state mean_child_anemia
## <chr> <dbl>
## 1 Ladakh 91.7
## 2 Gujarat 79.8
## 3 Dadra and Nagar Haveli & Daman and Diu 73.4
## 4 Jammu & Kashmir 73.2
## 5 Rajasthan 72.2
## 6 Madhya Pradesh 71.7
## 7 Maharastra 70.3
## 8 Telangana 70.2
## 9 Bihar 70.1
## 10 Assam 69.8
#1.4 # Correlation Check: Relationship between Clean_Fuel and Literacy_W
correlation_value <- cor(df$hh_cooking_fuel, df$women_literate, use = "complete.obs")
print(paste("Pearson Correlation:", round(correlation_value, 4)))
## [1] "Pearson Correlation: 0.4475"
library(dplyr)
if (ncol(df) > 109) { df <- df[, 1:109] }
colnames(df) <- new_names
state_literacy_analysis <- df %>%
mutate(women_literate = as.numeric(gsub("[^0-9.]", "", women_literate))) %>%
group_by(state) %>%
summarize(
avg_literacy = mean(women_literate, na.rm = TRUE),
district_count = n(),
min_literacy = min(women_literate, na.rm = TRUE),
max_literacy = max(women_literate, na.rm = TRUE)
) %>%
arrange(desc(avg_literacy))
print(state_literacy_analysis)
## # A tibble: 36 × 5
## state avg_literacy district_count min_literacy max_literacy
## <chr> <dbl> <int> <dbl> <dbl>
## 1 "Kerala" 97.9 14 93.7 99.7
## 2 " Lakshadweep " 96.5 1 96.5 96.5
## 3 "Goa" 93.1 2 92.4 93.8
## 4 "Mizoram" 91.3 8 76 99.7
## 5 "Puducherry" 91.1 4 83.3 99.7
## 6 "Himachal Pradesh" 90.2 12 84.1 94.8
## 7 "Sikkim" 86.9 4 81.4 90.5
## 8 "Andaman & Nicobar Isl… 86.1 3 84 87.5
## 9 "Meghalaya" 86.0 11 76 93.7
## 10 "Tamil Nadu" 85.7 32 77.4 98
## # ℹ 26 more rows
#1.5 # Top 25 Sex Ratio Districts: Ranking districts by the Sex Ratio column.
top_5_sex_ratio <- df %>%
select(district, state, sex_ratio_total) %>%
arrange(desc(sex_ratio_total)) %>%
head(25)
print(top_5_sex_ratio)
## district state sex_ratio_total
## 1 Diu Dadra and Nagar Haveli & Daman and Diu 1332
## 2 Almora Uttarakhand 1331
## 3 Rudraprayag Uttarakhand 1242
## 4 Tehri Garhwal Uttarakhand 1236
## 5 Pratapgarh Uttar Pradesh 1229
## 6 Madhubani Bihar 1227
## 7 Sivaganga Tamil Nadu 1223
## 8 Jagitial Telangana 1219
## 9 Perambalur Tamil Nadu 1210
## 10 Sitamarhi Bihar 1209
## 11 Kannur Kerala 1203
## 12 Kishanganj Bihar 1199
## 13 Dharmapuri Tamil Nadu 1192
## 14 Lakshadweep Lakshadweep 1187
## 15 Purba Champaran Bihar 1185
## 16 Sant Kabir Nagar Uttar Pradesh 1185
## 17 Palamu Jharkhand 1183
## 18 Hamirpur Himachal Pradesh 1182
## 19 Siddharthnagar Uttar Pradesh 1177
## 20 Pathanamthitta Kerala 1174
## 21 Bageshwar Uttarakhand 1174
## 22 Kollam Kerala 1171
## 23 Nirmal Telangana 1171
## 24 Amethi Uttar Pradesh 1170
## 25 Kodagu Karnataka 1168
#1.6 # Bottom 15 Immunization Areas: Identifying areas with low vaccination rates.
bottom_10_vaccination <- df %>%
select(district, state, vacc_full_all) %>%
arrange(vacc_full_all) %>%
head(15)
print(bottom_10_vaccination)
## district state vacc_full_all
## 1 Udalguri Assam 38.3
## 2 Ukhrul Manipur 39.4
## 3 Tuensang Nagaland 39.9
## 4 Wokha Nagaland 42.8
## 5 Kiphire Nagaland 42.8
## 6 Banas Kantha Gujarat 43.5
## 7 Jhansi Uttar Pradesh 44.5
## 8 North Garo Hills Meghalaya 47.5
## 9 West Karbi Anglong Assam 47.9
## 10 South Tripura Tripura 48.5
## 11 East Siang Arunachal Pradesh 48.8
## 12 East Khasi Hills Meghalaya 49.1
## 13 Kokrajhar Assam 51.1
## 14 Palakkad Kerala 51.8
## 15 Bahraich Uttar Pradesh 51.8
#1.7 # Gender Gap in Tobacco: Compare Tobacco_W vs Tobacco_M
df_tobacco_ratio <- df %>%
mutate(
tobacco_ratio = men_tobacco / women_tobacco,
tobacco_gap = men_tobacco - women_tobacco
) %>%
select(district, state, women_tobacco, men_tobacco, tobacco_ratio, tobacco_gap)
average_ratio <- mean(df_tobacco_ratio$tobacco_ratio, na.rm = TRUE)
top_10_ratios <- df_tobacco_ratio %>%
arrange(desc(tobacco_ratio)) %>%
head(25)
cat("Average National Tobacco Ratio (Men:Women):", round(average_ratio, 2), "\n\n")
## Average National Tobacco Ratio (Men:Women): 8
print(top_10_ratios)
## district state women_tobacco men_tobacco
## 1 Faridkot Punjab 0.1 18.6
## 2 Firozpur Punjab 0.1 12.6
## 3 Tarn Taran Punjab 0.1 6.8
## 4 Fatehgarh Sahib Punjab 0.2 12.3
## 5 Kapurthala Punjab 0.2 11.6
## 6 Moga Punjab 0.3 16.9
## 7 Shahid Bhagat Singh Nagar Punjab 0.2 10.9
## 8 Muktsar Punjab 0.4 20.8
## 9 Hoshiarpur Punjab 0.2 10.1
## 10 Hamirpur Himachal Pradesh 0.7 33.0
## 11 Barnala Punjab 0.3 14.0
## 12 Fazilka Punjab 0.4 17.5
## 13 Mansa Punjab 0.4 16.9
## 14 Amritsar Punjab 0.2 8.2
## 15 Rohtas Bihar 1.0 40.6
## 16 Pathankot Punjab 0.4 15.8
## 17 Una Himachal Pradesh 0.8 31.0
## 18 Patiala Punjab 0.3 11.2
## 19 Jammu Jammu & Kashmir 0.8 25.7
## 20 Fatehabad Haryana 0.8 23.5
## 21 Ambala Haryana 0.8 22.0
## 22 Banka Bihar 1.8 49.4
## 23 Kangra Himachal Pradesh 1.2 32.9
## 24 Ludhiana Punjab 0.5 13.5
## 25 Kodarma Jharkhand 1.4 37.3
## tobacco_ratio tobacco_gap
## 1 186.00000 18.5
## 2 126.00000 12.5
## 3 68.00000 6.7
## 4 61.50000 12.1
## 5 58.00000 11.4
## 6 56.33333 16.6
## 7 54.50000 10.7
## 8 52.00000 20.4
## 9 50.50000 9.9
## 10 47.14286 32.3
## 11 46.66667 13.7
## 12 43.75000 17.1
## 13 42.25000 16.5
## 14 41.00000 8.0
## 15 40.60000 39.6
## 16 39.50000 15.4
## 17 38.75000 30.2
## 18 37.33333 10.9
## 19 32.12500 24.9
## 20 29.37500 22.7
## 21 27.50000 21.2
## 22 27.44444 47.6
## 23 27.41667 31.7
## 24 27.00000 13.0
## 25 26.64286 35.9
#1.8 # BMI Analysis: Find the average percentage of women with below-normal BMI per state.
bmi_analysis <- df %>%
group_by(state) %>%
summarize(avg_women_bmi_low = mean(women_bmi_low, na.rm = TRUE)) %>%
arrange(desc(avg_women_bmi_low))
print(bmi_analysis)
## # A tibble: 36 × 2
## state avg_women_bmi_low
## <chr> <dbl>
## 1 Jharkhand 26.9
## 2 Gujarat 26.0
## 3 Bihar 25.9
## 4 Chhattisgarh 25.0
## 5 Madhya Pradesh 23.5
## 6 Dadra and Nagar Haveli & Daman and Diu 21.9
## 7 Odisha 21.9
## 8 Maharastra 21.7
## 9 Rajasthan 20.2
## 10 Telangana 20.0
## # ℹ 26 more rows
#1.9 # Alcohol Consumption Ranking: Rank states based on the percentage
alcohol_ranking <- df %>%
group_by(state) %>%
summarize(
avg_women_alcohol = mean(women_alcohol, na.rm = TRUE),
avg_men_alcohol = mean(men_alcohol, na.rm = TRUE)
) %>%
arrange(desc(avg_men_alcohol))
print(alcohol_ranking)
## # A tibble: 36 × 3
## state avg_women_alcohol avg_men_alcohol
## <chr> <dbl> <dbl>
## 1 Arunachal Pradesh 23.9 53.0
## 2 Andaman & Nicobar Islands 12.1 47.5
## 3 Telangana 7.4 45.5
## 4 Sikkim 18.2 42.1
## 5 Chhattisgarh 8.72 37.9
## 6 Goa 5.7 37.4
## 7 Jharkhand 7.49 37.4
## 8 Manipur 1.09 37.4
## 9 Meghalaya 1.57 34.5
## 10 Himachal Pradesh 0.675 33.8
## # ℹ 26 more rows
#1.10 # Urban vs Rural Proxy: Compare districts with high vs low Electricity.
df_proxy <- df %>%
mutate(area_proxy = if_else(hh_electricity >= 95, "High Electricity (Urban Proxy)", "Low Electricity (Rural Proxy)"))
urban_rural_comparison <- df_proxy %>%
group_by(area_proxy) %>%
summarize(
count = n(),
avg_literacy = mean(women_literate, na.rm = TRUE),
avg_insurance = mean(hh_insurance, na.rm = TRUE),
avg_stunting = mean(stunting, na.rm = TRUE),
avg_clean_fuel = mean(hh_cooking_fuel, na.rm = TRUE),
avg_child_anemia = mean(child_anaemia, na.rm = TRUE)
)
print(urban_rural_comparison)
## # A tibble: 2 × 7
## area_proxy count avg_literacy avg_insurance avg_stunting avg_clean_fuel
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 High Electricity… 588 75.8 41.3 32.2 57.6
## 2 Low Electricity … 118 66.9 35.1 40.2 36.7
## # ℹ 1 more variable: avg_child_anemia <dbl>
#Level 5: Feature Engineering & Visualizations #2.1 # 1. Composite Infrastructure Index (CII)The Operation:We aggregate four key household metrics—electricity, improved water, sanitation, and clean fuel—into a single CII score.\[CII = \frac{elect + water + sanit + fuel}{4}\]Why? Instead of looking at four separate graphs, this single feature tells us the overall “Modernization” level of a district.Visualization: A Histogram of the CII to see if development in India is normally distributed or heavily skewed.
df <- df %>%
mutate(cii = (hh_electricity + hh_water + hh_sanitation + hh_cooking_fuel) / 4)
df %>%
select(district, state, hh_electricity, hh_water, hh_sanitation, hh_cooking_fuel, cii) %>%
head(20)
## district state hh_electricity hh_water
## 1 Nicobars Andaman & Nicobar Islands 97.9 98.8
## 2 North & Middle Andaman Andaman & Nicobar Islands 93.2 92.2
## 3 South Andaman Andaman & Nicobar Islands 99.6 97.9
## 4 Srikakulam Andhra Pradesh 99.9 87.7
## 5 Vizianagaram Andhra Pradesh 99.5 93.1
## 6 Visakhapatnam Andhra Pradesh 99.6 91.8
## 7 East Godavari Andhra Pradesh 98.8 97.9
## 8 West Godavari Andhra Pradesh 99.3 99.1
## 9 Krishna Andhra Pradesh 99.6 94.4
## 10 Guntur Andhra Pradesh 99.2 99.3
## 11 Prakasam Andhra Pradesh 99.3 98.6
## 12 Sri Potti Sriramulu Nello Andhra Pradesh 99.1 95.6
## 13 Y.S.R. Andhra Pradesh 99.8 99.9
## 14 Kurnool Andhra Pradesh 99.9 97.9
## 15 Anantapur Andhra Pradesh 99.6 98.8
## 16 Chittoor Andhra Pradesh 99.7 98.5
## 17 Tawang Arunachal Pradesh 99.7 99.8
## 18 West Kameng Arunachal Pradesh 97.5 99.8
## 19 East Kameng Arunachal Pradesh 86.3 93.6
## 20 Papum Pare Arunachal Pradesh 98.7 92.9
## hh_sanitation hh_cooking_fuel cii
## 1 83.5 56.9 84.275
## 2 86.4 61.3 83.275
## 3 89.3 91.9 94.675
## 4 71.6 74.7 83.475
## 5 61.7 60.3 78.650
## 6 77.8 72.9 85.525
## 7 77.7 80.3 88.675
## 8 80.8 86.8 91.500
## 9 79.1 89.8 90.725
## 10 83.4 91.7 93.400
## 11 78.4 90.8 91.775
## 12 81.8 84.6 90.275
## 13 85.4 94.3 94.850
## 14 73.5 78.1 87.350
## 15 71.3 86.4 89.025
## 16 74.6 86.6 89.850
## 17 81.7 88.1 92.325
## 18 88.9 88.9 93.775
## 19 75.6 49.5 76.250
## 20 75.7 81.3 87.150
#2.2 # Literacy Tiering (Categorical Binning) Convert the continuous lit_w (Literacy) variable into categorical “Tiers” (e.g., Low, Medium, High) using quartiles or fixed thresholds.
df <- df %>%
mutate(literacy_tier = cut(women_literate,
breaks = quantile(women_literate, probs = c(0, 0.33, 0.66, 1), na.rm = TRUE),
labels = c("Low", "Medium", "High"),
include.lowest = TRUE))
df %>%
select(district, state, women_literate, literacy_tier) %>%
head(35)
## district state women_literate
## 1 Nicobars Andaman & Nicobar Islands 87.5
## 2 North & Middle Andaman Andaman & Nicobar Islands 84.0
## 3 South Andaman Andaman & Nicobar Islands 86.7
## 4 Srikakulam Andhra Pradesh 64.3
## 5 Vizianagaram Andhra Pradesh 58.3
## 6 Visakhapatnam Andhra Pradesh 69.5
## 7 East Godavari Andhra Pradesh 77.9
## 8 West Godavari Andhra Pradesh 77.0
## 9 Krishna Andhra Pradesh 76.9
## 10 Guntur Andhra Pradesh 68.5
## 11 Prakasam Andhra Pradesh 62.8
## 12 Sri Potti Sriramulu Nello Andhra Pradesh 70.5
## 13 Y.S.R. Andhra Pradesh 63.8
## 14 Kurnool Andhra Pradesh 57.0
## 15 Anantapur Andhra Pradesh 63.6
## 16 Chittoor Andhra Pradesh 69.3
## 17 Tawang Arunachal Pradesh 59.1
## 18 West Kameng Arunachal Pradesh 73.7
## 19 East Kameng Arunachal Pradesh 62.2
## 20 Papum Pare Arunachal Pradesh 78.2
## 21 Upper Subansiri Arunachal Pradesh 64.3
## 22 Upper Siang Arunachal Pradesh 74.4
## 23 Changlang Arunachal Pradesh 77.6
## 24 Lower Subansiri Arunachal Pradesh 76.8
## 25 Dibang Valley Arunachal Pradesh 77.8
## 26 Lower Dibang Valley Arunachal Pradesh 81.6
## 27 Anjaw Arunachal Pradesh 67.9
## 28 East Siang Arunachal Pradesh 83.8
## 29 Kra Daadi Arunachal Pradesh 55.3
## 30 Kurung Kumey Arunachal Pradesh 76.5
## 31 Lohit Arunachal Pradesh 73.4
## 32 Longding Arunachal Pradesh 71.9
## 33 Namsai Arunachal Pradesh 67.1
## 34 Siang Arunachal Pradesh 87.4
## 35 Tirap Arunachal Pradesh 75.1
## literacy_tier
## 1 High
## 2 High
## 3 High
## 4 Low
## 5 Low
## 6 Low
## 7 Medium
## 8 Medium
## 9 Medium
## 10 Low
## 11 Low
## 12 Medium
## 13 Low
## 14 Low
## 15 Low
## 16 Low
## 17 Low
## 18 Medium
## 19 Low
## 20 Medium
## 21 Low
## 22 Medium
## 23 Medium
## 24 Medium
## 25 Medium
## 26 High
## 27 Low
## 28 High
## 29 Low
## 30 Medium
## 31 Medium
## 32 Medium
## 33 Low
## 34 High
## 35 Medium
table(df$literacy_tier)
##
## Low Medium High
## 233 233 240
#2.3 # 3. Child Malnutrition “Burden” Score
df <- df %>%
mutate(malnutrition_burden = stunting + wasting + underweight)
df %>%
select(district, state, stunting, wasting, underweight, malnutrition_burden) %>%
arrange(desc(malnutrition_burden)) %>%
head(35)
## district state stunting wasting underweight
## 1 Pashchimi Singhbhum Jharkhand 60.6 30.5 62.4
## 2 Dohad Gujarat 55.3 27.8 53.0
## 3 Arwal Bihar 45.6 36.8 52.9
## 4 Panch Mahals Gujarat 47.1 35.7 51.9
## 5 Nandurbar Maharastra 45.8 30.7 57.2
## 6 The Dangs Gujarat 37.6 40.9 53.1
## 7 Tapi Gujarat 41.7 36.6 51.8
## 8 Karimganj Assam 29.1 48.0 52.9
## 9 Jehanabad Bihar 41.3 36.6 51.7
## 10 Adilabad Telangana 45.7 29.5 52.0
## 11 Banda Uttar Pradesh 51.0 25.7 49.8
## 12 Pakur Jharkhand 51.3 23.6 51.4
## 13 Chhota Udaipur Gujarat 48.6 28.4 48.1
## 14 Buldana Maharastra 45.0 31.7 47.2
## 15 Aravali Gujarat 47.1 29.0 47.2
## 16 Narmada Gujarat 47.2 23.0 52.8
## 17 Aurangabad Bihar 41.2 32.9 48.7
## 18 Dhule Maharastra 37.6 38.9 46.0
## 19 Chandrapur Maharastra 37.3 38.5 46.6
## 20 Saraikela-Kharsawan Jharkhand 40.0 32.9 48.7
## 21 Araria Bihar 49.9 23.9 47.8
## 22 Yadgir Karnataka 57.6 17.7 45.2
## 23 Rohtas Bihar 40.0 31.8 48.2
## 24 Bijapur Chhattisgarh 53.8 20.0 46.1
## 25 Banka Bihar 46.7 26.9 45.8
## 26 Kaimur (Bhabua) Bihar 44.1 27.3 47.2
## 27 Mahisagar Gujarat 43.4 26.2 49.0
## 28 Buxar Bihar 39.6 33.2 45.3
## 29 Koppal Karnataka 49.1 23.1 45.8
## 30 Nalanda Bihar 42.6 27.8 46.7
## 31 Purnia Bihar 43.5 25.8 47.1
## 32 Bhojpur Bihar 40.7 31.3 44.0
## 33 Nabarangapur Odisha 44.1 25.2 46.6
## 34 Katihar Bihar 43.9 23.5 48.1
## 35 Zunheboto Nagaland 44.0 26.9 44.5
## malnutrition_burden
## 1 153.5
## 2 136.1
## 3 135.3
## 4 134.7
## 5 133.7
## 6 131.6
## 7 130.1
## 8 130.0
## 9 129.6
## 10 127.2
## 11 126.5
## 12 126.3
## 13 125.1
## 14 123.9
## 15 123.3
## 16 123.0
## 17 122.8
## 18 122.5
## 19 122.4
## 20 121.6
## 21 121.6
## 22 120.5
## 23 120.0
## 24 119.9
## 25 119.4
## 26 118.6
## 27 118.6
## 28 118.1
## 29 118.0
## 30 117.1
## 31 116.4
## 32 116.0
## 33 115.9
## 34 115.5
## 35 115.4
burden_correlation <- cor(df$cii, df$malnutrition_burden, use = "complete.obs")
cat("Correlation between CII and Malnutrition Burden:", round(burden_correlation, 4))
## Correlation between CII and Malnutrition Burden: -0.4915
#2.4 # 4. Public Health Reliance Ratio
df <- df %>%
mutate(public_health_reliance = inst_births_public / inst_births)
cat("Top 35 Districts: Highest Public Health Reliance\n")
## Top 35 Districts: Highest Public Health Reliance
df %>%
select(district, state, inst_births, inst_births_public, public_health_reliance) %>%
arrange(desc(public_health_reliance)) %>%
head(35)
## district state inst_births
## 1 Kargil Ladakh 93.7
## 2 South Garo Hills Meghalaya 88.3
## 3 South West Garo Hills Meghalaya 83.7
## 4 East Garo Hills Meghalaya 73.8
## 5 Tawang Arunachal Pradesh 84.2
## 6 Sukma Chhattisgarh 81.2
## 7 Leh(Ladakh) Ladakh 96.4
## 8 Kishtwar Jammu & Kashmir 91.6
## 9 Malkangiri Odisha 90.7
## 10 Upper Subansiri Arunachal Pradesh 77.0
## 11 Nicobars Andaman & Nicobar Islands 97.8
## 12 Dindori Madhya Pradesh 77.6
## 13 Udhampur Jammu & Kashmir 87.2
## 14 Anjaw Arunachal Pradesh 78.0
## 15 Upper Siang Arunachal Pradesh 76.0
## 16 Kandhamal Odisha 93.9
## 17 Mayurbhanj Odisha 91.7
## 18 West Kameng Arunachal Pradesh 93.6
## 19 North Garo Hills Meghalaya 61.5
## 20 Tuensang Nagaland 34.8
## 21 Bijapur Chhattisgarh 63.6
## 22 Punch Jammu & Kashmir 86.0
## 23 Ramban Jammu & Kashmir 80.4
## 24 Badgam Jammu & Kashmir 96.5
## 25 Longleng Nagaland 38.7
## 26 East Kameng Arunachal Pradesh 76.0
## 27 South Salmara Mancachar Assam 71.7
## 28 Siang Arunachal Pradesh 81.1
## 29 Longding Arunachal Pradesh 64.5
## 30 North & Middle Andaman Andaman & Nicobar Islands 97.7
## 31 Kiphire Nagaland 34.8
## 32 Raisen Madhya Pradesh 96.0
## 33 Shahdol Madhya Pradesh 85.6
## 34 Ganderbal Jammu & Kashmir 98.0
## 35 Dhalai Tripura 87.3
## inst_births_public public_health_reliance
## 1 93.7 1.0000000
## 2 88.3 1.0000000
## 3 83.7 1.0000000
## 4 73.5 0.9959350
## 5 83.8 0.9952494
## 6 80.8 0.9950739
## 7 95.7 0.9927386
## 8 90.8 0.9912664
## 9 89.9 0.9911797
## 10 76.2 0.9896104
## 11 96.7 0.9887526
## 12 76.6 0.9871134
## 13 86.0 0.9862385
## 14 76.9 0.9858974
## 15 74.9 0.9855263
## 16 92.4 0.9840256
## 17 90.2 0.9836423
## 18 92.0 0.9829060
## 19 60.4 0.9821138
## 20 34.1 0.9798851
## 21 62.3 0.9795597
## 22 84.2 0.9790698
## 23 78.7 0.9788557
## 24 94.3 0.9772021
## 25 37.8 0.9767442
## 26 74.2 0.9763158
## 27 69.9 0.9748954
## 28 79.0 0.9741060
## 29 62.8 0.9736434
## 30 95.0 0.9723644
## 31 33.8 0.9712644
## 32 93.2 0.9708333
## 33 83.0 0.9696262
## 34 95.0 0.9693878
## 35 84.5 0.9679267
cat("\nBottom 35 Districts: Lowest Public Health Reliance (High Private Reliance)\n")
##
## Bottom 35 Districts: Lowest Public Health Reliance (High Private Reliance)
df %>%
select(district, state, inst_births, inst_births_public, public_health_reliance) %>%
arrange(public_health_reliance) %>%
head(35)
## district state inst_births inst_births_public
## 1 Prakasam Andhra Pradesh 97.4 18.1
## 2 Ernakulam Kerala 99.1 23.2
## 3 Patan Gujarat 98.6 25.2
## 4 Malappuram Kerala 100.0 25.6
## 5 Panch Mahals Gujarat 88.4 24.7
## 6 Mahesana Gujarat 97.3 27.4
## 7 Mahisagar Gujarat 93.0 26.6
## 8 Palakkad Kerala 100.0 29.6
## 9 Kasaragod Kerala 100.0 29.9
## 10 Thrissur Kerala 100.0 30.9
## 11 Banas Kantha Gujarat 92.9 29.1
## 12 Morbi Gujarat 94.8 30.1
## 13 Meerut Uttar Pradesh 80.6 25.7
## 14 Nirmal Telangana 96.7 31.3
## 15 Kannur Kerala 100.0 33.6
## 16 Karimnagar Telangana 98.4 34.2
## 17 Thiruvananthapuram Kerala 99.5 34.7
## 18 Sri Potti Sriramulu Nello Andhra Pradesh 97.0 34.9
## 19 Bijnor Uttar Pradesh 83.7 30.6
## 20 Pathanamthitta Kerala 99.1 36.8
## 21 Gautam Buddha Nagar Uttar Pradesh 86.6 32.4
## 22 Wayanad Kerala 100.0 37.6
## 23 Mancherial Telangana 90.1 33.9
## 24 Aravali Gujarat 92.2 34.8
## 25 Udupi Karnataka 98.9 37.6
## 26 Jyotiba Phule Nagar Uttar Pradesh 81.1 31.3
## 27 Rajanna Sircilla Telangana 97.6 37.7
## 28 Gir Somnath Gujarat 87.1 34.0
## 29 Devbhumi Dwarka Gujarat 94.8 37.2
## 30 Sangli Maharastra 98.0 38.5
## 31 Kanniyakumari Tamil Nadu 100.0 39.4
## 32 Khammam Telangana 97.9 39.0
## 33 Medchal-Malkajgiri Telangana 98.3 39.4
## 34 Gandhinagar Gujarat 97.8 39.5
## 35 Krishna Andhra Pradesh 98.9 40.1
## public_health_reliance
## 1 0.1858316
## 2 0.2341070
## 3 0.2555781
## 4 0.2560000
## 5 0.2794118
## 6 0.2816033
## 7 0.2860215
## 8 0.2960000
## 9 0.2990000
## 10 0.3090000
## 11 0.3132400
## 12 0.3175105
## 13 0.3188586
## 14 0.3236815
## 15 0.3360000
## 16 0.3475610
## 17 0.3487437
## 18 0.3597938
## 19 0.3655914
## 20 0.3713421
## 21 0.3741339
## 22 0.3760000
## 23 0.3762486
## 24 0.3774403
## 25 0.3801820
## 26 0.3859433
## 27 0.3862705
## 28 0.3903559
## 29 0.3924051
## 30 0.3928571
## 31 0.3940000
## 32 0.3983657
## 33 0.4008138
## 34 0.4038855
## 35 0.4054601
state_reliance <- df %>%
group_by(state) %>%
summarize(avg_reliance = mean(public_health_reliance, na.rm = TRUE)) %>%
arrange(desc(avg_reliance))
print(state_reliance)
## # A tibble: 36 × 2
## state avg_reliance
## <chr> <dbl>
## 1 Ladakh 0.996
## 2 Arunachal Pradesh 0.953
## 3 Jammu & Kashmir 0.948
## 4 Andaman & Nicobar Islands 0.934
## 5 Madhya Pradesh 0.897
## 6 Meghalaya 0.888
## 7 Tripura 0.886
## 8 Assam 0.883
## 9 Sikkim 0.874
## 10 Mizoram 0.869
## # ℹ 26 more rows
#2.5 # 5. The Gender “Risk Delta” (Behavioral Gap)
df <- df %>%
mutate(tobacco_gap = men_tobacco - women_tobacco)
cat("Top 35 Districts: Largest Gender Gap in Tobacco Use (Men > Women)\n")
## Top 35 Districts: Largest Gender Gap in Tobacco Use (Men > Women)
df %>%
select(district, state, women_tobacco, men_tobacco, tobacco_gap) %>%
arrange(desc(tobacco_gap)) %>%
head(35)
## district state women_tobacco men_tobacco tobacco_gap
## 1 Sheohar Bihar 4.5 56.6 52.1
## 2 Jamui Bihar 2.8 53.6 50.8
## 3 North Garo Hills Meghalaya 3.8 54.1 50.3
## 4 Mahoba Uttar Pradesh 13.2 63.3 50.1
## 5 Chhatarpur Madhya Pradesh 10.8 60.5 49.7
## 6 Satna Madhya Pradesh 6.3 55.9 49.6
## 7 Darbhanga Bihar 5.6 55.1 49.5
## 8 Sitamarhi Bihar 6.4 55.4 49.0
## 9 Lalitpur Uttar Pradesh 5.5 54.5 49.0
## 10 Madhubani Bihar 5.0 53.9 48.9
## 11 Amreli Gujarat 5.0 53.7 48.7
## 12 Sonbhadra Uttar Pradesh 5.8 54.1 48.3
## 13 Tikamgarh Madhya Pradesh 5.6 53.9 48.3
## 14 Samastipur Bihar 3.5 51.7 48.2
## 15 Gaya Bihar 3.7 51.9 48.2
## 16 Kaimur (Bhabua) Bihar 2.8 50.8 48.0
## 17 East Garo Hills Meghalaya 6.3 54.0 47.7
## 18 Rewa Madhya Pradesh 7.6 55.3 47.7
## 19 Purba Champaran Bihar 7.6 55.2 47.6
## 20 Banka Bihar 1.8 49.4 47.6
## 21 Hamirpur Uttar Pradesh 17.0 64.5 47.5
## 22 Morbi Gujarat 7.3 54.7 47.4
## 23 Sidhi Madhya Pradesh 7.4 54.5 47.1
## 24 Nalanda Bihar 2.8 49.8 47.0
## 25 Dumka Jharkhand 9.6 56.6 47.0
## 26 Chitrakoot Uttar Pradesh 12.7 59.6 46.9
## 27 Rajkot Gujarat 7.2 53.4 46.2
## 28 Longleng Nagaland 7.9 54.0 46.1
## 29 Sitapur Uttar Pradesh 13.8 59.7 45.9
## 30 Katni Madhya Pradesh 11.7 57.4 45.7
## 31 Begusarai Bihar 4.0 49.6 45.6
## 32 Hardoi Uttar Pradesh 6.5 52.1 45.6
## 33 Jhalawar Rajasthan 9.8 55.4 45.6
## 34 Arwal Bihar 2.4 47.9 45.5
## 35 Mon Nagaland 6.9 52.4 45.5
state_tobacco_gap <- df %>%
group_by(state) %>%
summarize(avg_tobacco_gap = mean(tobacco_gap, na.rm = TRUE)) %>%
arrange(desc(avg_tobacco_gap))
print(state_tobacco_gap)
## # A tibble: 36 × 2
## state avg_tobacco_gap
## <chr> <dbl>
## 1 Bihar 44.1
## 2 Jharkhand 39.5
## 3 Nagaland 36.6
## 4 Madhya Pradesh 36.6
## 5 Uttar Pradesh 36.5
## 6 Rajasthan 35.8
## 7 Jammu & Kashmir 35.8
## 8 West Bengal 35.8
## 9 Gujarat 34.5
## 10 Dadra and Nagar Haveli & Daman and Diu 34.4
## # ℹ 26 more rows
#Ploting # P.1 # 1. Bar Charts (3 Different Types) # P.1.1 The Leaderboard (Horizontal Ranked Bar Chart) # Identifies Top 10 districts for full vaccination
state_vaccination <- df %>%
group_by(state) %>%
summarize(avg_vacc_full = mean(vacc_full_all, na.rm = TRUE)) %>%
arrange(desc(avg_vacc_full))
top_20_states_vacc <- state_vaccination %>%
head(20)
plot_state <- ggplot(top_20_states_vacc, aes(x = reorder(state, avg_vacc_full), y = avg_vacc_full)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 States: Full Vaccination Leaderboard",
subtitle = "State-level average of full vaccination coverage across states",
x = "State / UT",
y = "Average Full Vaccination (%)"
) +
theme_minimal()
print(plot_state)
print(top_20_states_vacc)
## # A tibble: 20 × 2
## state avg_vacc_full
## <chr> <dbl>
## 1 "Odisha" 91.8
## 2 "Dadra and Nagar Haveli & Daman and Diu" 90.4
## 3 "Tamil Nadu" 89.0
## 4 "Himachal Pradesh" 88.4
## 5 "Ladakh" 88.2
## 6 "Sikkim" 87.9
## 7 "Jammu & Kashmir" 87.1
## 8 "West Bengal" 86.7
## 9 "Karnataka" 86.4
## 10 " Lakshadweep " 86.1
## 11 "Puducherry" 86
## 12 "Uttarakhand" 83.6
## 13 "Chandigarh" 80.9
## 14 "Rajasthan" 80.4
## 15 "Chhattisgarh" 80.4
## 16 "Madhya Pradesh" 78.8
## 17 "Kerala" 78.8
## 18 "Telangana" 78.5
## 19 "Goa" 78.1
## 20 "Gujarat" 78.1
top_10_states_list <- df %>%
group_by(state) %>%
summarize(total_hh = sum(hh_surveyed, na.rm = TRUE)) %>%
arrange(desc(total_hh)) %>%
head(10) %>%
pull(state)
tobacco_comp <- df %>%
filter(state %in% top_10_states_list) %>%
group_by(state) %>%
summarize(Women = mean(women_tobacco, na.rm = TRUE),
Men = mean(men_tobacco, na.rm = TRUE)) %>%
pivot_longer(cols = c(Women, Men), names_to = "Gender", values_to = "Tobacco_Use")
plot_b <- ggplot(tobacco_comp, aes(x = state, y = Tobacco_Use, fill = Gender)) +
geom_col(position = "dodge") +
labs(title = "Gender Disparity in Tobacco Use",
subtitle = "Comparison across the Top 5 most populous states",
x = "State", y = "Average Tobacco Use (%)") +
scale_fill_manual(values = c("Women" = "#f8766d", "Men" = "#00bfc4")) +
theme_minimal()
print(plot_b)
# P.1.3 Infrastructure Composition (Stacked Percentage Bar Chart) #
Shows the mix of amenities across literacy tiers
library(dplyr)
library(tidyr)
library(ggplot2)
# Create literacy tier first
df <- df %>%
mutate(
literacy_tier = case_when(
women_literate < 50 ~ "Low Literacy",
women_literate >= 50 & women_literate < 75 ~ "Medium Literacy",
women_literate >= 75 ~ "High Literacy",
TRUE ~ NA_character_
)
)
# Infrastructure composition plot
infra_comp <- df %>%
filter(!is.na(literacy_tier)) %>%
group_by(literacy_tier) %>%
summarize(
Electricity = mean(hh_electricity, na.rm = TRUE),
Water = mean(hh_water, na.rm = TRUE),
Sanitation = mean(hh_sanitation, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_longer(
cols = -literacy_tier,
names_to = "Amenity",
values_to = "Value"
) %>%
group_by(literacy_tier) %>%
mutate(Percentage = Value / sum(Value))
plot_c <- ggplot(infra_comp, aes(x = literacy_tier, y = Value, fill = Amenity)) +
geom_col(position = "fill") +
geom_text(
aes(label = paste0(round(Percentage * 100, 1), "%")),
position = position_fill(vjust = 0.5),
color = "white",
fontface = "bold",
size = 4
) +
labs(
title = "Infrastructure Composition by Literacy Tier",
subtitle = "Relative share of amenities",
x = "Literacy Tier",
y = "Proportion"
) +
scale_y_continuous(labels = function(x) paste0(x * 100, "%")) +
theme_minimal() +
theme(legend.position = "bottom")
print(plot_c)
# P.2 Histogram # P.2.1 Distribution of the cii (Composite
Infrastructure Index).
df <- df %>%
mutate(cii = (hh_electricity + hh_water + hh_sanitation + hh_cooking_fuel) / 4)
plot_histogram <- ggplot(df, aes(x = cii)) +
geom_histogram(aes(y = after_stat(density)),
bins = 30,
fill = "#69b3a2",
color = "#e9ecef",
alpha = 0.7) +
geom_density(color = "#404080", size = 1.2) +
geom_vline(aes(xintercept = mean(cii, na.rm = TRUE)),
color = "red",
linetype = "dashed",
size = 1) +
labs(
title = "Distribution of Composite Infrastructure Index (CII)",
subtitle = "Histogram showing density and distribution shape across districts",
x = "CII Score (Development Level)",
y = "Density"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(plot_histogram)
# P.3. Pie Chart # P.3.1 National Proportion of fuel (Clean Fuel)
vs. Non-Clean Fuel households.
national_fuel_stats <- df %>%
summarize(
Clean = mean(hh_cooking_fuel, na.rm = TRUE),
Non_Clean = 100 - mean(hh_cooking_fuel, na.rm = TRUE)
) %>%
pivot_longer(cols = everything(), names_to = "Fuel_Type", values_to = "Percentage")
plot_pie <- ggplot(national_fuel_stats, aes(x = "", y = Percentage, fill = Fuel_Type)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
geom_text(aes(label = paste0(round(Percentage, 1), "%")),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold") +
labs(
title = "National Proportion of Cooking Fuel",
subtitle = "Clean Fuel vs. Non-Clean Fuel Households",
fill = "Fuel Category"
) +
theme_void() +
scale_fill_manual(values = c("Clean" = "#23c0d9", "Non_Clean" = "#b47fc9"))
print(plot_pie)
#P.4. Pair Plot # P.4.1 Correlation Matrix/Pair Grid of lit_w, cii,
vacc_full, and malnutrition_burden.
library(GGally)
library(dplyr)
# create malnutrition burden
df$malnutrition_burden <- rowSums(df[grep("stunt|wast|under", names(df))], na.rm = TRUE)
# create plot_data
plot_data <- na.omit(df[, c("women_literate", "cii", "vacc_full_all", "malnutrition_burden")])
# plot
ggpairs(plot_data, progress = FALSE)
#P.5 Box Plot # P.5.1 Box Plot comparing anemia prevalence
df <- read.csv("C:\\Users\\asus\\Downloads\\dHealth.csv", check.names = FALSE)
df <- df %>%
rename(
women_literate = `Women (age 15-49) who are literate4 (%)`,
all_women_anaemic = `All women age 15-49 years who are anaemic22 (%)`
) %>%
mutate(
women_literate = as.numeric(gsub("[^0-9.]", "", women_literate)),
all_women_anaemic = as.numeric(gsub("[^0-9.]", "", all_women_anaemic))
) %>%
mutate(literacy_tier = cut(women_literate,
breaks = quantile(women_literate, probs = c(0, 0.33, 0.66, 1), na.rm = TRUE),
labels = c("Low", "Medium", "High"),
include.lowest = TRUE))
ggplot(df, aes(x = literacy_tier, y = all_women_anaemic, fill = literacy_tier)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.2, size = 1, color = "#000") +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Women's Anemia Distribution by Literacy Tier",
subtitle = "Analysis of health variance across education levels",
x = "Literacy Tier (Women)",
y = "Anemia Prevalence (%)"
) +
theme_wsj() +
theme(legend.position = "none")
#P.6. Line Chart # P.6.1 The Trend Profile (Ordered Line Chart)
df <- read.csv("C:\\Users\\asus\\Downloads\\dHealth.csv", check.names = FALSE)
if (ncol(df) > 109) {
df <- df[, 1:109]
}
colnames(df) <- new_names
df <- df %>%
mutate(across(-c(district, state), ~ as.numeric(gsub("[^0-9.]", "", .))))
state_trends_a <- df %>%
group_by(state) %>%
summarize(
avg_lit = mean(women_literate, na.rm = TRUE),
avg_inst = mean(inst_births, na.rm = TRUE)
)
message("Success! Data trimmed and state_trends_a created.")
## Success! Data trimmed and state_trends_a created.
colnames(df) <- new_names
state_trends_a <- df %>%
group_by(state) %>%
summarize(
avg_lit = mean(women_literate, na.rm = TRUE),
avg_inst = mean(inst_births, na.rm = TRUE)
)
plot_a <- ggplot(state_trends_a, aes(x = reorder(state, avg_lit), y = avg_inst, group = 1)) +
geom_line(color = "#2c3e50", linewidth = 1) +
geom_point(color = "#e74c3c", size = 2) +
coord_flip() +
labs(
title = "The Path to Progress: Literacy vs. Institutional Births",
subtitle = "States ranked from Lowest to Highest Female Literacy",
x = "State (Ranked by Literacy)",
y = "Average Institutional Births (%)"
) +
theme_minimal()
print(plot_a)
colnames(df) <- new_names
state_trends_a <- df %>%
group_by(state) %>%
summarize(
avg_lit = mean(women_literate, na.rm = TRUE),
avg_inst = mean(inst_births, na.rm = TRUE)
)
plot_a <- ggplot(state_trends_a, aes(x = reorder(state, avg_lit), y = avg_inst, group = 1)) +
geom_line(color = "#f01a1a", linewidth = 1) +
geom_point(color = "#1a0901", size = 2) +
coord_flip() +
labs(
title = "The Path to Progress: Literacy vs. Institutional Births",
subtitle = "States ranked from Lowest to Highest Female Literacy",
x = "State (Ranked by Literacy)",
y = "Average Institutional Births (%)"
) +
theme_minimal()
print(plot_a)
# details: this line chart shows the relationship between female
literacy and institutional births across states, with states ordered by
literacy to reveal trends in how education may influence healthcare
utilization. i.e kerela has the highest literacy and also high
institutional births, while states with lower literacy tend to have
lower institutional births, highlighting the potential impact of
education on health outcomes.
state_trends_b <- df %>%
group_by(state) %>%
summarize(
avg_stunting = mean(stunting, na.rm = TRUE),
avg_wasted = mean(wasting, na.rm = TRUE)
)
malnutrition_long <- state_trends_b %>%
pivot_longer(
cols = c(avg_stunting, avg_wasted),
names_to = "Malnutrition_Type",
values_to = "Rate"
)
plot_b <- ggplot(malnutrition_long, aes(x = state, y = Rate, color = Malnutrition_Type, group = Malnutrition_Type)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
scale_color_manual(
values = c("avg_stunting" = "#d35400", "avg_wasted" = "#2980b9"),
labels = c("avg_stunting" = "Stunting (Chronic)", "avg_wasted" = "Wasting (Acute)")
) +
theme_wsj() +
theme(
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
) +
labs(
title = "Chronic vs. Acute Malnutrition Trends",
subtitle = "Comparing long-term stunting vs. short-term wasting across States",
x = "State",
y = "Prevalence Rate (%)",
color = "Metric Type"
)
print(plot_b)
# P.7 Scatter Plot # P.7.1 cii (Infrastructure)
vs. malnutrition_burden.
df <- df %>%
mutate(
cii = (hh_electricity + hh_water + hh_sanitation + hh_cooking_fuel) / 4,
malnutrition_burden = stunting + wasting + underweight
)
p <- ggplot(df, aes(x = cii,
y = malnutrition_burden,
size = hh_surveyed,
color = women_literate,
text = paste("District:", district, "<br>State:", state))) +
geom_point(alpha = 0.6) +
scale_color_gradient(low = "#ffeda0", high = "#800026", name = "Literacy (%)") +
theme_minimal() +
labs(
title = "Interactive Development-Health Nexus",
x = "Composite Infrastructure Index (CII)",
y = "Total Malnutrition Burden"
)
interactive_plot <- ggplotly(p, tooltip = "text")
interactive_plot
#dentify which districts are “Doubly Disadvantaged” (Low education + Low healthcare access + High malnutrition) vs. those that are “Resilient” (Low education but High health outcomes).
if (ncol(df) > 109) { df <- df[, 1:109] }
colnames(df) <- new_names
df_clean <- df %>%
mutate(across(c(women_literate, stunting, underweight, anc_4plus, hh_surveyed),
~ as.numeric(gsub("[^0-9.]", "", .)))) %>%
filter(!is.na(women_literate) & !is.na(stunting) & !is.na(underweight) & !is.na(anc_4plus))
target_states <- c("Uttar Pradesh", "Bihar")
other_states <- df_clean %>%
filter(!(state %in% target_states)) %>%
group_by(state) %>%
summarize(total_hh = sum(hh_surveyed, na.rm = TRUE)) %>%
arrange(desc(total_hh)) %>%
head(4) %>%
pull(state)
top_6_states <- c(target_states, other_states)
df_top6 <- df_clean %>% filter(state %in% top_6_states)
set.seed(123)
cluster_input <- scale(df_top6 %>% select(women_literate, stunting, underweight, anc_4plus))
km_res <- kmeans(cluster_input, centers = 3, nstart = 25)
df_top6$cluster <- as.factor(km_res$cluster)
levels(df_top6$cluster) <- c(
"High Vulnerability (Low Lit/ANC)",
"Success Model (High Lit/ANC)",
"Emerging (Moderate Profile)"
)
hull_data <- df_top6 %>%
group_by(state, cluster) %>%
slice(chull(women_literate, anc_4plus))
p <- ggplot(df_top6, aes(x = women_literate, y = anc_4plus, color = cluster, fill = cluster)) +
geom_polygon(data = hull_data, alpha = 0.2, show.legend = FALSE) +
geom_point(aes(text = paste0("District: ", district, "<br>State: ", state, "<br>Status: ", cluster)),
size = 1.3, alpha = 0.7) +
facet_wrap(~state, ncol = 3) + # 2 rows of 3
scale_color_manual(values = c("#d9534f", "#5cb85c", "#428bca")) +
scale_fill_manual(values = c("#d9534f", "#5cb85c", "#428bca")) +
theme_minimal() +
labs(title = "District Vulnerability Profiles: 6 Key States",
x = "Female Literacy (%)",
y = "Mothers with 4+ ANC Visits (%)",
color = "Cluster Definitions") +
theme(legend.position = "bottom",
strip.text = element_text(face = "bold", size = 9),
panel.spacing = unit(1.5, "lines"))
## Warning in geom_point(aes(text = paste0("District: ", district, "<br>State: ",
## : Ignoring unknown aesthetics: text
ggplotly(p, tooltip = "text") %>%
layout(
showlegend = TRUE,
legend = list(
orientation = "h",
x = 0.5,
xanchor = "center",
y = -0.15,
font = list(size = 10)
),
margin = list(l = 50, r = 50, b = 100, t = 80),
autosize = TRUE
)
library(dplyr)
library(ggplot2)
library(plotly)
if (ncol(df) > 109) { df <- df[, 1:109] }
colnames(df) <- new_names
df_knn <- df %>%
mutate(across(c(anc_4plus, women_literate, hh_electricity, hh_sanitation, hh_insurance),
~ as.numeric(gsub("[^0-9.]", "", .)))) %>%
filter(complete.cases(anc_4plus, women_literate, hh_electricity, hh_sanitation, hh_insurance))
median_anc <- median(df_knn$anc_4plus, na.rm = TRUE)
df_knn$access_label <- ifelse(df_knn$anc_4plus >= median_anc, "High Access", "Low Access")
set.seed(123)
n_rows <- nrow(df_knn)
train_indices <- sample(1:n_rows, size = 0.7 * n_rows) # 70% for training
train_data <- df_knn[train_indices, ]
test_data <- df_knn[-train_indices, ]
features <- c("women_literate", "hh_electricity", "hh_sanitation", "hh_insurance")
train_scaled <- as.matrix(scale(train_data[, features]))
test_scaled <- as.matrix(scale(test_data[, features],
center = attr(train_scaled, "scaled:center"),
scale = attr(train_scaled, "scaled:scale")))
k_val <- round(sqrt(nrow(train_data)))
predict_knn_manual <- function(train_x, test_x, train_y, k) {
sapply(1:nrow(test_x), function(i) {
# Calculate Euclidean Distance
distances <- sqrt(rowSums(t(t(train_x) - test_x[i, ])^2))
# Find k nearest neighbors
neighbor_indices <- order(distances)[1:k]
neighbor_labels <- train_y[neighbor_indices]
# Majority Vote
names(which.max(table(neighbor_labels)))
})
}
test_data$predicted <- predict_knn_manual(train_scaled, test_scaled, train_data$access_label, k_val)
test_data$is_correct <- ifelse(test_data$predicted == test_data$access_label, "Correct", "Misclassified")
p <- ggplot(test_data, aes(x = women_literate, y = hh_sanitation,
color = predicted, shape = is_correct,
text = paste0("District: ", district,
"<br>Actual: ", access_label,
"<br>Predicted: ", predicted))) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("High Access" = "#2ecc71", "Low Access" = "#e74c3c")) +
theme_minimal() +
labs(title = "Manual KNN: Healthcare Access Prediction",
subtitle = "Calculated using Euclidean Distance without external ML libraries",
x = "Female Literacy (%)",
y = "Improved Sanitation (%)")
ggplotly(p, tooltip = "text") %>%
layout(legend = list(orientation = "h", x = 0.5, xanchor = "center", y = -0.2))