library(haven)
SLDHS.dta <- read_dta("C:/Users/Administrator/OneDrive/Datasets/Somaliland DHS/SLBR71.dta") 
head(SLDHS.dta)
tail(SLDHS.dta)
str(SLDHS.dta)
## tibble [17,686 × 563] (S3: tbl_df/tbl/data.frame)
##  $ id               : num [1:17686] 770 771 772 773 774 778 779 780 781 782 ...
##   ..- attr(*, "format.stata")= chr "%8.0f"
##  $ CASEID           : num [1:17686] 502 502 502 502 502 ...
##   ..- attr(*, "label")= chr "Case Identification"
##   ..- attr(*, "format.stata")= chr "%7.0f"
##  $ V000             : chr [1:17686] "SO7" "SO7" "SO7" "SO7" ...
##   ..- attr(*, "label")= chr "Country code and phase"
##   ..- attr(*, "format.stata")= chr "%9s"
##  $ V001             : num [1:17686] 8 8 8 8 8 8 8 8 8 8 ...
##   ..- attr(*, "label")= chr "Cluster number"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V002             : num [1:17686] 65 65 65 65 65 51 51 62 62 62 ...
##   ..- attr(*, "label")= chr "Household number"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V003             : num [1:17686] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "Respondents line number"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V004             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Ultimate area unit"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V005             : num [1:17686] 0.888 0.888 0.888 0.888 0.888 ...
##   ..- attr(*, "label")= chr "Womens individual sample weight (6 decimals)"
##   ..- attr(*, "format.stata")= chr "%10.6f"
##  $ V006             : num [1:17686] 8 8 8 8 8 8 8 8 8 8 ...
##   ..- attr(*, "label")= chr "Month of interview"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V007             : num [1:17686] 2018 2018 2018 2018 2018 ...
##   ..- attr(*, "label")= chr "Year of interview"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V008             : num [1:17686] 1424 1424 1424 1424 1424 ...
##   ..- attr(*, "label")= chr "Date of interview (CMC)"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V008A            : num [1:17686] 43117 43117 43117 43117 43117 ...
##   ..- attr(*, "label")= chr "Date of interview (CDC)"
##   ..- attr(*, "format.stata")= chr "%5.0f"
##  $ V009             : dbl+lbl [1:17686] 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
##    ..@ label       : chr "Respondents month of birth"
##    ..@ format.stata: chr "%2.0f"
##    ..@ labels      : Named num [1:13] 1 2 3 4 5 6 7 8 9 10 ...
##    .. ..- attr(*, "names")= chr [1:13] "January" "February" "March" "April" ...
##  $ V010             : num [1:17686] 1980 1980 1980 1980 1980 ...
##   ..- attr(*, "label")= chr "Respondents year of birth"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V011             : num [1:17686] 964 964 964 964 964 ...
##   ..- attr(*, "label")= chr "Date of birth (CMC)"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V012             : num [1:17686] 38 38 38 38 38 30 30 38 38 38 ...
##   ..- attr(*, "label")= chr "Respondents current age"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ ACTUAL_EA_CODE   : num [1:17686] 1.1e+11 1.1e+11 1.1e+11 1.1e+11 1.1e+11 ...
##   ..- attr(*, "label")= chr "Actual EA Code"
##   ..- attr(*, "format.stata")= chr "%12.0f"
##  $ VHHID            : num [1:17686] 5 5 5 5 5 10 10 11 11 11 ...
##   ..- attr(*, "label")= chr "Household ID"
##   ..- attr(*, "format.stata")= chr "%5.0f"
##  $ V013             : dbl+lbl [1:17686] 5, 5, 5, 5, 5, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5...
##    ..@ label       : chr "Age in 5-year groups"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:7] 1 2 3 4 5 6 7
##    .. ..- attr(*, "names")= chr [1:7] "15 - 19" "20 - 24" "25 - 29" "30 - 34" ...
##  $ V016             : num [1:17686] 16 16 16 16 16 16 16 16 16 16 ...
##   ..- attr(*, "label")= chr "Day of interview"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V017             : num [1:17686] 1357 1357 1357 1357 1357 ...
##   ..- attr(*, "label")= chr "CMC start of calendar"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V018             : num [1:17686] 13 13 13 13 13 13 13 13 13 13 ...
##   ..- attr(*, "label")= chr "Row of month of interview"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V019             : num [1:17686] 66 66 66 66 66 66 66 66 66 66 ...
##   ..- attr(*, "label")= chr "Length of calendar"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V024             : dbl+lbl [1:17686] 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, ...
##    ..@ label       : chr "Region"
##    ..@ format.stata: chr "%2.0f"
##    ..@ labels      : Named num [1:6] 11 12 13 14 15 16
##    .. ..- attr(*, "names")= chr [1:6] "Awdal" " Marodijeh" "Sahil" "Togdheer" ...
##  $ V025             : dbl+lbl [1:17686] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
##    ..@ label       : chr "Type of place of residence"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:6] 1 2 3 4 5 6
##    .. ..- attr(*, "names")= chr [1:6] "Rural" "Urban" "Nomadic" "Rural IDP" ...
##  $ V026             : dbl+lbl [1:17686] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
##    ..@ label       : chr "Distrct"
##    ..@ format.stata: chr "%3.0f"
##    ..@ labels      : Named num [1:103] 1 2 3 4 5 6 7 8 9 10 ...
##    .. ..- attr(*, "names")= chr [1:103] "Borama" "Baki" "Lughaya" "Zeylac" ...
##  $ V027             : num [1:17686] 1 1 1 1 1 2 2 1 1 1 ...
##   ..- attr(*, "label")= chr "Number of visits"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V028             : num [1:17686] 118 118 118 118 118 610 610 610 610 610 ...
##   ..- attr(*, "label")= chr "Interviewer identification"
##   ..- attr(*, "format.stata")= chr "%5.0f"
##  $ V030             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Field supervisor"
##   ..- attr(*, "format.stata")= chr "%5.0f"
##  $ V045A            : num [1:17686] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "Language of questionnaire"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V045B            : num [1:17686] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "Language of interview"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V045C            : dbl+lbl [1:17686] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
##    ..@ label       : chr "Native language of respondent"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num 1
##    .. ..- attr(*, "names")= chr "Somali"
##  $ V101             : dbl+lbl [1:17686] 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, ...
##    ..@ label       : chr "Region"
##    ..@ format.stata: chr "%2.0f"
##    ..@ labels      : Named num [1:6] 11 12 13 14 15 16
##    .. ..- attr(*, "names")= chr [1:6] "Awdal" " Marodijeh" "Sahil" "Togdheer" ...
##  $ V102             : dbl+lbl [1:17686] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
##    ..@ label       : chr "Type of place of residence"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:3] 1 2 3
##    .. ..- attr(*, "names")= chr [1:3] "Urban" "Rural" "Nomadic"
##  $ V106A            : dbl+lbl [1:17686] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
##    ..@ label       : chr "Ever attended school"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "Yes" "No"
##  $ V106             : dbl+lbl [1:17686] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
##    ..@ label       : chr "Highest educational level"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:4] 0 1 2 3
##    .. ..- attr(*, "names")= chr [1:4] "No Education" "Primary" "Secondary" "Higher"
##  $ V107             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Highest year of education"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V113             : num [1:17686] 11 11 11 11 11 13 13 13 13 13 ...
##   ..- attr(*, "label")= chr "Source of drinking water"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V115             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Time to get to water source"
##   ..- attr(*, "format.stata")= chr "%3.0f"
##  $ V116             : num [1:17686] 23 23 23 23 23 61 61 23 23 23 ...
##   ..- attr(*, "label")= chr "Type of toilet facility"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V119             : num [1:17686] 1 1 1 1 1 1 1 2 2 2 ...
##   ..- attr(*, "label")= chr "Household has: electricity"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V120             : num [1:17686] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "Household has: radio"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V121             : num [1:17686] 1 1 1 1 1 1 1 2 2 2 ...
##   ..- attr(*, "label")= chr "Household has: television"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V122             : num [1:17686] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "Household has: refrigerator"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V123             : num [1:17686] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "Household has: bicycle"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V124             : num [1:17686] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "Household has: motorcycle/scooter"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V125             : num [1:17686] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "Household has: car/truck"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V136             : num [1:17686] 6 6 6 6 6 4 4 6 6 6 ...
##   ..- attr(*, "label")= chr "Number of household members (listed)"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V150             : num [1:17686] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "Relationship to household head"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V151             : num [1:17686] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "Sex of household head"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V152             : num [1:17686] 23 23 23 23 23 61 61 23 23 23 ...
##   ..- attr(*, "label")= chr "Age of household head"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V155             : dbl+lbl [1:17686] 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
##    ..@ label       : chr "Literacy"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:5] 1 2 3 4 5
##    .. ..- attr(*, "names")= chr [1:5] "Cannot Read at all" "Able to read only part of the sentence" "Able to read whole sentence" "No card with available language" ...
##  $ V157             : dbl+lbl [1:17686]  3,  3,  3,  3,  3, NA, NA, NA, NA, NA, NA, NA, NA, ...
##    ..@ label       : chr "Frequency of reading newspaper or magazine"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:3] 1 2 3
##    .. ..- attr(*, "names")= chr [1:3] "At least once a week" "Less than once a week" "Not at all"
##  $ V158             : dbl+lbl [1:17686] 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3...
##    ..@ label       : chr "Frequency of listening to radio"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:3] 1 2 3
##    .. ..- attr(*, "names")= chr [1:3] "At least once a week" "Less than once a week" "Not at all"
##  $ V159             : dbl+lbl [1:17686] 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3...
##    ..@ label       : chr "Frequency of watching television"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:3] 1 2 3
##    .. ..- attr(*, "names")= chr [1:3] "At least once a week" "Less than once a week" "Not at all"
##  $ V169A            : dbl+lbl [1:17686] 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
##    ..@ label       : chr "Owns a mobile telephone"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "Yes" "No"
##  $ V169B            : dbl+lbl [1:17686]  1,  1,  1,  1,  1, NA, NA, NA, NA, NA, NA, NA, NA, ...
##    ..@ label       : chr "Use mobile telephone for financial transactions"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "Yes" "No"
##  $ V170             : dbl+lbl [1:17686] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
##    ..@ label       : chr "Has an account in a bank or other financial institution"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "Yes" "No"
##  $ V171             : num [1:17686] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "Ever used internet"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V171A            : dbl+lbl [1:17686] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
##    ..@ label       : chr "Use of internet in the last 12 months"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "Yes" "No"
##  $ V171B            : dbl+lbl [1:17686] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
##    ..@ label       : chr "Frequency of using internet last month"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:4] 1 2 3 4
##    .. ..- attr(*, "names")= chr [1:4] "Almost everyday" "At least once a week" "Less than once a week" "Not at all"
##  $ V190             : dbl+lbl [1:17686] 5, 5, 5, 5, 5, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3...
##    ..@ label       : chr "Wealth index combined"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:5] 1 2 3 4 5
##    .. ..- attr(*, "names")= chr [1:5] "Lowest" "Second" "Middle" "Fourth" ...
##  $ V191             : num [1:17686] 1.37 1.37 1.37 1.37 1.37 ...
##   ..- attr(*, "label")= chr "Wealth index factor score combined (5 decimals)"
##   ..- attr(*, "format.stata")= chr "%9.5f"
##  $ V201             : num [1:17686] 5 5 5 5 5 2 2 4 4 4 ...
##   ..- attr(*, "label")= chr "Total children ever born"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V202             : num [1:17686] 3 3 3 3 3 2 2 4 4 4 ...
##   ..- attr(*, "label")= chr "Sons at home"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V203             : num [1:17686] 1 1 1 1 1 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "Daughters at home"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V204             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Sons elsewhere"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V205             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Daughters elsewhere"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V206             : num [1:17686] 0 0 0 0 0 NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Sons who have died"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V207             : num [1:17686] 1 1 1 1 1 NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Daughters who have died"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V208             : num [1:17686] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "Births in last five years"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V209             : num [1:17686] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "Births in past year"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V210             : num [1:17686] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "Births in month of interview"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V211             : num [1:17686] 1244 1244 1244 1244 1244 ...
##   ..- attr(*, "label")= chr "Date of first birth (CMC)"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V212             : num [1:17686] 23 23 23 23 23 22 22 26 26 26 ...
##   ..- attr(*, "label")= chr "Age of respondent at 1st birth"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V213             : dbl+lbl [1:17686] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
##    ..@ label       : chr "Currently pregnant"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "Yes" "No"
##  $ V214             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Duration of current pregnancy"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V215             : num [1:17686] 105 105 105 105 105 115 115 103 103 103 ...
##   ..- attr(*, "label")= chr "Time since last menstrual period"
##   ..- attr(*, "format.stata")= chr "%3.0f"
##  $ V217             : dbl+lbl [1:17686]  3,  3,  3,  3,  3, NA, NA,  1,  1,  1,  1, NA, NA, ...
##    ..@ label       : chr "Knowledge of ovulatory cycle"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:5] 1 2 3 6 8
##    .. ..- attr(*, "names")= chr [1:5] "Just before period begins" "Right after period has ended" "Half way between two periods" "Other" ...
##  $ V218             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Number of living children"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V219             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Living children + current pregnancy"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V222             : num [1:17686] 128 128 128 128 128 65 65 79 79 79 ...
##   ..- attr(*, "label")= chr "Last birth to interview (months)"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V225             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Current pregnancy wanted"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V226             : dbl+lbl [1:17686]  NA,  NA,  NA,  NA,  NA,  NA,  NA,  NA,  NA,  NA,  N...
##    ..@ label       : chr "Time since last period (comp) (months)"
##    ..@ format.stata: chr "%3.0f"
##    ..@ labels      : Named num [1:3] 994 995 996
##    .. ..- attr(*, "names")= chr [1:3] "In Menopause/Has had Hysterectomy" "Before Last Birth" "Never Menstruated"
##  $ V229             : num [1:17686] 7 7 7 7 7 NA NA 3 3 3 ...
##   ..- attr(*, "label")= chr "Month pregnancy ended"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V230             : num [1:17686] 2015 2015 2015 2015 2015 ...
##   ..- attr(*, "label")= chr "Year pregnancy ended"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V231             : num [1:17686] 1387 1387 1387 1387 1387 ...
##   ..- attr(*, "label")= chr "CMC pregnancy ended"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V238             : num [1:17686] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "Births in last three years"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V240             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Month of last termination prior to calendar"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V241             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "Year of last termination prior to calendar"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V242             : num [1:17686] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "label")= chr "CMC termination ended prior to calendar"
##   ..- attr(*, "format.stata")= chr "%4.0f"
##  $ V302             : dbl+lbl [1:17686] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
##    ..@ label       : chr "Have you ever used anything or tried in any way to delay or avoid getting pregna"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "Yes" "No"
##  $ V312             : dbl+lbl [1:17686] 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
##    ..@ label       : chr "Current contraceptive method"
##    ..@ format.stata: chr "%2.0f"
##    ..@ labels      : Named num [1:14] 0 1 2 3 4 5 6 7 8 9 ...
##    .. ..- attr(*, "names")= chr [1:14] "Not currently using(pregnant)" "IUD" "Injectables" "Implants" ...
##  $ V313             : dbl+lbl [1:17686]  1,  1,  1,  1,  1, NA, NA, NA, NA, NA, NA, NA, NA, ...
##    ..@ label       : chr "Current use by method type"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:3] 1 2 3
##    .. ..- attr(*, "names")= chr [1:3] "Modern method" "Traditional method" "Folkloric method"
##  $ V323             : dbl+lbl [1:17686]  2,  2,  2,  2,  2, NA, NA, NA, NA, NA, NA, NA, NA, ...
##    ..@ label       : chr "Brand of pill used"
##    ..@ format.stata: chr "%2.0f"
##    ..@ labels      : Named num [1:8] 1 2 3 4 5 6 96 98
##    .. ..- attr(*, "names")= chr [1:8] "MICROLUT" "ZINNIA" "MICROGYNON" "CHOICE" ...
##  $ V326             : dbl+lbl [1:17686] 21, 21, 21, 21, 21, NA, NA, NA, NA, NA, NA, NA, NA, ...
##    ..@ label       : chr "Last source for current users"
##    ..@ format.stata: chr "%2.0f"
##    ..@ labels      : Named num [1:13] 11 12 13 14 15 16 17 21 22 26 ...
##    .. ..- attr(*, "names")= chr [1:13] "Government/Hospital" "Refferal Health Center" "MCH/HC" "PHU" ...
##  $ V364             : dbl+lbl [1:17686]  1,  1,  1,  1,  1,  4,  4, NA, NA, NA, NA,  4,  4, ...
##    ..@ label       : chr "Contraceptive use and intention"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:5] 1 2 3 4 5
##    .. ..- attr(*, "names")= chr [1:5] "Using mordern method" "Using traditional method" "Non-User, intends to use later" "Does not intend to use" ...
##  $ V384A            : dbl+lbl [1:17686] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
##    ..@ label       : chr "Heard family planning on radio last few months"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "Yes" "No"
##  $ V384B            : dbl+lbl [1:17686] 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
##    ..@ label       : chr "Heard family planning on TV last few months"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "Yes" "No"
##   [list output truncated]
#3.Dependent Variable:Wealth_index (V190)
#Independent Variables:region(V190), place of residence(V025),education level(V106),sex of household head(V151),age of household(V152),number of household members(V136),Total children ever born(V201),marital status(V501),source of drinking water(V113),toilet facility or sanitation(V116)
SLDHS.dta <- SLDHS.dta[c("V190","V025","V024","V106","V152","V151","V136","V201","V501","V113","V116")]
names(SLDHS.dta)
##  [1] "V190" "V025" "V024" "V106" "V152" "V151" "V136" "V201" "V501" "V113"
## [11] "V116"
str(SLDHS.dta)
## tibble [17,686 × 11] (S3: tbl_df/tbl/data.frame)
##  $ V190: dbl+lbl [1:17686] 5, 5, 5, 5, 5, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3...
##    ..@ label       : chr "Wealth index combined"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:5] 1 2 3 4 5
##    .. ..- attr(*, "names")= chr [1:5] "Lowest" "Second" "Middle" "Fourth" ...
##  $ V025: dbl+lbl [1:17686] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
##    ..@ label       : chr "Type of place of residence"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:6] 1 2 3 4 5 6
##    .. ..- attr(*, "names")= chr [1:6] "Rural" "Urban" "Nomadic" "Rural IDP" ...
##  $ V024: dbl+lbl [1:17686] 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, ...
##    ..@ label       : chr "Region"
##    ..@ format.stata: chr "%2.0f"
##    ..@ labels      : Named num [1:6] 11 12 13 14 15 16
##    .. ..- attr(*, "names")= chr [1:6] "Awdal" " Marodijeh" "Sahil" "Togdheer" ...
##  $ V106: dbl+lbl [1:17686] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
##    ..@ label       : chr "Highest educational level"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:4] 0 1 2 3
##    .. ..- attr(*, "names")= chr [1:4] "No Education" "Primary" "Secondary" "Higher"
##  $ V152: num [1:17686] 23 23 23 23 23 61 61 23 23 23 ...
##   ..- attr(*, "label")= chr "Age of household head"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V151: num [1:17686] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "Sex of household head"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V136: num [1:17686] 6 6 6 6 6 4 4 6 6 6 ...
##   ..- attr(*, "label")= chr "Number of household members (listed)"
##   ..- attr(*, "format.stata")= chr "%1.0f"
##  $ V201: num [1:17686] 5 5 5 5 5 2 2 4 4 4 ...
##   ..- attr(*, "label")= chr "Total children ever born"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V501: dbl+lbl [1:17686] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
##    ..@ label       : chr "Current marital status"
##    ..@ format.stata: chr "%1.0f"
##    ..@ labels      : Named num [1:4] 0 1 2 3
##    .. ..- attr(*, "names")= chr [1:4] "Never Married" "Married" "Divorced" "Widowed"
##  $ V113: num [1:17686] 11 11 11 11 11 13 13 13 13 13 ...
##   ..- attr(*, "label")= chr "Source of drinking water"
##   ..- attr(*, "format.stata")= chr "%2.0f"
##  $ V116: num [1:17686] 23 23 23 23 23 61 61 23 23 23 ...
##   ..- attr(*, "label")= chr "Type of toilet facility"
##   ..- attr(*, "format.stata")= chr "%2.0f"
#4.Variable data types:
#Ordinal Variables:V190,V106,
#Nominal Variables: V024, V116 
#Binary Variables: V025, V151, V501,
#Nominal variables:V113
#Integer Variables: V152, V136,V201
#5. Numeric variables are variables that has measurable quantity and magnitude represented as Numbers (often as Descrete or Continous) and obey mathmatical operations.e.g. 1, 3, 5.5 etc.
#Factor Variables are variables (aka catogorical Variables) represent sring coded distinct catogories or groups that does not imply any inherent magnitude or order (except if it is Ordinal) that are represented as labels and does not help mathamatical operations.e.g. gender, residence, numbers coded as strings( One, Two, Three ) ,  etc.

#Question 2: DESCRIPTIVE STATISTICS
#1.Calculate the mean, median and standard deviation of V136
library(psych)
## Warning: package 'psych' was built under R version 4.4.2
V136 <- c(0,1,2,3,4,5,6,7,8,9)
mean(V136)
## [1] 4.5
median(V136)
## [1] 4.5
sd(V136)
## [1] 3.02765
##2.Create a frequency table for the variable "V106" using table() function.  
freq_table <-table(SLDHS.dta$V106)
print(freq_table)
## 
##     0     1     2     3 
## 15287  1991   311    97
#Calculte the proportion of households in each wealth quantile("V190") 
# Creating count table for Wealth Index variable extract frequencies first
counts <- table(SLDHS.dta$V190)
#Calculate proportions
proportions <- prop.table(counts)
print(proportions)
## 
##         1         2         3         4         5 
## 0.3575144 0.1705304 0.1161371 0.1643673 0.1914509
#Explain how you would use R to calculate the correlation coefficient between age of household head ("V152")and the number of living children ("V218")
#check fo missing values
sum(is.na(SLDHS.dta$V152))
## [1] 862
sum(is.na(SLDHS.dta$V201))
## [1] 13
data_clean <- na.omit(SLDHS.dta[, c("V152", "V201")])
correlation <- cor(data_clean$V152, data_clean$V201, method = "pearson")
print(correlation)
## [1] 0.04612714
#There is Perfect positive correlation between V152 and V201 (if V152 increases, V201 increases).
# QUESTION 3: DATA CLEANING:
#1.Create new variable called "poverty_status" based on "V190" variable (wealth quintile) and catogorize household into groups:
#group 1_Poor: Poorest, Poorer, and Middle quintiles
#group 2_ Non-poor: Richer, and Richest quintiles
# Load the dplyr package
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
SLDHS.dta$poverty_status <- ifelse(SLDHS.dta$V190 %in% c(1, 2,3), "Poor", 
                               ifelse(SLDHS.dta$V190 %in% c(4, 5), "Non-Poor", NA))
table(SLDHS.dta$poverty_status)
## 
## Non-Poor     Poor 
##     6293    11393
#2. Check for missing values in all variables using summary()function  
summary(SLDHS.dta)
##       V190            V025            V024            V106       
##  Min.   :1.000   Min.   :1.000   Min.   :11.00   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:13.00   1st Qu.:0.0000  
##  Median :2.000   Median :1.000   Median :14.00   Median :0.0000  
##  Mean   :2.662   Mean   :1.498   Mean   :13.95   Mean   :0.1642  
##  3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:15.00   3rd Qu.:0.0000  
##  Max.   :5.000   Max.   :2.000   Max.   :16.00   Max.   :3.0000  
##                                                                  
##       V152           V151            V136            V201       
##  Min.   :11.0   Min.   :1.000   Min.   :0.000   Min.   : 0.000  
##  1st Qu.:13.0   1st Qu.:1.000   1st Qu.:3.000   1st Qu.: 4.000  
##  Median :22.0   Median :1.000   Median :5.000   Median : 6.000  
##  Mean   :26.2   Mean   :1.376   Mean   :5.108   Mean   : 6.274  
##  3rd Qu.:23.0   3rd Qu.:2.000   3rd Qu.:7.000   3rd Qu.: 8.000  
##  Max.   :96.0   Max.   :2.000   Max.   :9.000   Max.   :19.000  
##  NA's   :862    NA's   :858     NA's   :2303    NA's   :13      
##       V501            V113            V116      poverty_status    
##  Min.   :1.000   Min.   :11.00   Min.   :11.0   Length:17686      
##  1st Qu.:1.000   1st Qu.:12.00   1st Qu.:13.0   Class :character  
##  Median :1.000   Median :31.00   Median :22.0   Mode  :character  
##  Mean   :1.138   Mean   :33.44   Mean   :26.2                     
##  3rd Qu.:1.000   3rd Qu.:61.00   3rd Qu.:23.0                     
##  Max.   :3.000   Max.   :96.00   Max.   :96.0                     
##                  NA's   :862     NA's   :862
#Recode V113 into "improved" and "unimproved"
#Recode V113 into a new variable 'Source_of_drinking_water' using base R
SLDHS.dta$Source_of_drinking_water <- NA  # Initialize the 'Source_of_drinking_water' variable as NA
SLDHS.dta$Source_of_drinking_water[SLDHS.dta$V113 %in% c(11, 12, 13, 14, 21, 51, 61, 71, 72)] <- "improved"
SLDHS.dta$Source_of_drinking_water[SLDHS.dta$V113 %in% c(32, 42, 81, 96)] <- "unimproved"

table(SLDHS.dta$Source_of_drinking_water)
## 
##   improved unimproved 
##      12355       2311
#Recode V116 into "improved" and "unimproved" 
#Recode V116 into new variable 'toilet_facility' using base R
SLDHS.dta$toilet_facility[SLDHS.dta$V116 %in% c(11, 12, 13, 21, 22, 31)] <- "improved"
## Warning: Unknown or uninitialised column: `toilet_facility`.
SLDHS.dta$toilet_facility[SLDHS.dta$V116 %in% c(14, 15, 23, 41, 51, 61, 96)] <- "unimproved"
table(SLDHS.dta$toilet_facility)
## 
##   improved unimproved 
##       9520       7304
#Explain how you would handle missing values in your dataset?
# To handle missing values in dataset you may follow the below steps:
#1. is.na(data)
#is.na.data.frame(data)
#any(is.na(data))
#sum(is.na(data))
# after identifying missing values you use either Descriptive statistics( mean,mode,median)to remove missing values or the imputation method.
#DATA VISUALIZATION IN R:
# create a histogram to show distribution of the variable "V136" (number of household members).
hist(V136)

# Create a bar chart to visualize the proportion of households in each poverty status category("povert_status).
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
barplot(table(SLDHS.dta$poverty_status))

str(SLDHS.dta$V201)
##  num [1:17686] 5 5 5 5 5 2 2 4 4 4 ...
##  - attr(*, "label")= chr "Total children ever born"
##  - attr(*, "format.stata")= chr "%2.0f"
table(SLDHS.dta$V201)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
##    5  457 1125 1722 2064 2289 2220 1969 1786 1552 1101  599  326  202  150   50 
##   16   18   19 
##   26   18   12
boxplot(SLDHS.dta$V201,main="SLDHS.dta$poverty_status",col="red")