# Mesli Redhouane
# 12/07/2023
# This document describes the various operations to be carried out on
# the "br" and "wm" files from MICS4-2012 (Algeria), in order to be able
# to use the DHS_rates R package in the calculation of the TFR.
# 1.Files and variables of interest "br" file & "wm" file.
# BH4C = CMC date of birth of child
# WDOB = CMC date of birth of respondent
# WDOI = CMC date of interview
# wmweight = Woman’s individual sample weight
# BHLN = Child line number (Numéro de ligne de l'enfant)
# WM7 = Women interview results (Résultat de l'interview de la femme)
# 1.What is my current working directory?
getwd()
## [1] "C:/Users/Snow/Desktop/R_Exercices"
# 2.Change working directory
setwd("C:/Users/Snow/Desktop/R_Exercices")
# 3.Attach "haven" package to use read functions
library(haven)
br <- read_dta("MICS4-ALG_br.dta") # read as table
# 4.Change data file as data frame
br <- as.data.frame(br)
View(br)
# 5.Create ID variable
br$ID <- ((br$HH1*100+br$HH2)*100)+br$LN
# 6.Change the label of ID variable
# install.packages("labelled")
library(labelled)
var_label(br$ID) <- "Identifiant"
# var_label(br$AGE_w) <- "Women's age"
# 7. Rename variables as they are in DHS files
library (tidyverse)
## Warning: le package 'tidyverse' a été compilé avec la version R 4.3.2
## Warning: le package 'ggplot2' a été compilé avec la version R 4.3.2
## Warning: le package 'purrr' a été compilé avec la version R 4.3.2
## Warning: le package 'stringr' a été compilé avec la version R 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
br <- br %>% rename(v008 = WDOI)
br <- br %>% rename(v011 = WDOB)
br <- br %>% rename(b3 = BH4C)
br <- br %>% rename(v005 = wmweight)
# 8.Reorder ID first and select some variables
library(dplyr)
br2 = br%>% select(ID, BHLN, b3, v008, v011, v005)
# 9.Ranking BHLN in descending order by ID
br3 <- br2 %>% arrange(ID) %>% group_by(ID) %>% mutate(BHLN_rev = rank(-BHLN))
# 10. Sort (Ascending) br2 by multiple columns (ID & BHLN_rev)
br3 <- br3 %>% relocate(BHLN_rev, .after=ID)
br3 <- br3[order(br3$ID,br3$BHLN_rev),]
# 11.Remove Columns (BHLN) by Index from br3 data frame
br3 <- br3[,-3]
# 11.Reshape Data Frame (br3) from Long to Wide Format in R
br4 <- br3 %>% pivot_wider(names_from = BHLN_rev, names_prefix = "b3_0", values_from = b3)
# 12.Save br_res_Rda file
save(br4, file = "br4.RData")
load(file = "br4.RData")
# 13.Remove objects not useful for the moment
# rm(br, br2, br3, br4)
# 14.Read "wm" data file
wm <- read_dta("MICS4-ALG_wm.dta") # read as table
# 15.Change data file as data frame
wm <- as.data.frame(wm)
View(wm)
# 16.Create ID variable
wm <- wm[,-1]
wm$ID <- ((wm$HH1*100+wm$HH2)*100)+wm$LN
# 17.Change the label of ID variable
var_label(wm$ID) <- "Identifiant"
# 18. Rename variables as they are in DHS files
wm <- wm %>% rename(v008 = WDOI)
wm <- wm %>% rename(v011 = WDOB)
wm <- wm %>% rename(v509 = WDOM)
wm <- wm %>% rename(v005 = wmweight)
# 19.Reorder ID first and select some variables
wm2 = wm%>% select(ID,HH1,WM7,v008,v011,v509,v005,HH6,HH7,MSTATUS,windex5,Welevel_Reg)
# 20.Select Rows by column value (WM7 = 1)
wm3 <- wm2[wm2$WM7 == 1,]
# 21.Save wm_res_Rda file
save(wm3, file = "wm3.RData")
load(file = "wm3.RData")
# 22.Merge files wm3 & br4 and rename certain variables from wm file
wm3_br4 <- merge(y=wm3, x=br4, by = "ID", all=TRUE)
wm3_br4 <- wm3_br4 %>% rename(v008 = v008.y)
wm3_br4 <- wm3_br4 %>% rename(v011 = v011.y)
wm3_br4 <- wm3_br4 %>% rename(v005 = v005.y)
wm3_br4 <- subset(wm3_br4, select = -c(WM7))
save(wm3_br4, file = "wm3_br4.RData")
# 23.Create v021 variable (primary sampling unit)
wm3_br4$v021 <- wm3_br4$HH1
# 24.Create v022 who is interaction of HH7 & HH6 (strata variable (v022))
wm3_br4 <- mutate(wm3_br4,v022 = as.integer(interaction(HH7,HH6,drop = TRUE)))
wm3_br4 <- wm3_br4[,-2]
# 23.Install and attach DHS.rates
# install.packages("DHS.rates")
library(DHS.rates)
## Warning: le package 'DHS.rates' a été compilé avec la version R 4.3.2
(TFR <- fert(wm3_br4,Indicator="tfr"))
## Warning in fert(wm3_br4, Indicator = "tfr"): Birth History variables
## b3_01:b3_20 are not complete; the missing variables were created
##
## The current function calculated TFR based on a reference period of 36 months
## The reference period ended at the time of the interview, in 2013 OR Oct - NA 2012
## The average reference period is 2011.5
## TFR N WN
## [1,] 2.662 110464 0