# Mesli Redhouane
# 12/07/2023

# This document describes the various operations to be carried out on 
# the "br" and "wm" files from MICS4-2012 (Algeria), in order to be able
# to use the DHS_rates R package in the calculation of the TFR.
  
# 1.Files and variables of interest "br" file & "wm" file.

# BH4C = CMC date of birth of child
# WDOB = CMC date of birth of respondent
# WDOI = CMC date of interview
# wmweight = Woman’s individual sample weight
# BHLN = Child line number (Numéro de ligne de l'enfant)
# WM7 = Women interview results (Résultat de l'interview de la femme)

# 1.What is my current working directory?

getwd() 
## [1] "C:/Users/Snow/Desktop/R_Exercices"
# 2.Change working directory

setwd("C:/Users/Snow/Desktop/R_Exercices")

# 3.Attach "haven" package to use read functions

library(haven)    

br <- read_dta("MICS4-ALG_br.dta") # read as table

# 4.Change data file as data frame

br <- as.data.frame(br)    
View(br)

# 5.Create ID variable

br$ID <- ((br$HH1*100+br$HH2)*100)+br$LN

# 6.Change the label of ID variable 

# install.packages("labelled")
library(labelled)

var_label(br$ID) <- "Identifiant"
# var_label(br$AGE_w) <- "Women's age"

# 7. Rename variables as they are in DHS files

library (tidyverse)
## Warning: le package 'tidyverse' a été compilé avec la version R 4.3.2
## Warning: le package 'ggplot2' a été compilé avec la version R 4.3.2
## Warning: le package 'purrr' a été compilé avec la version R 4.3.2
## Warning: le package 'stringr' a été compilé avec la version R 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
br <- br %>% rename(v008 = WDOI)
br <- br %>% rename(v011 = WDOB)
br <- br %>% rename(b3 = BH4C)
br <- br %>% rename(v005 = wmweight)

# 8.Reorder ID first and select some variables

library(dplyr)

br2 = br%>% select(ID, BHLN, b3, v008, v011, v005)

# 9.Ranking BHLN in descending order by ID

br3 <- br2 %>% arrange(ID) %>% group_by(ID) %>% mutate(BHLN_rev = rank(-BHLN))

# 10. Sort (Ascending) br2 by multiple columns (ID & BHLN_rev)

br3 <- br3 %>% relocate(BHLN_rev, .after=ID) 
br3 <- br3[order(br3$ID,br3$BHLN_rev),]

# 11.Remove Columns (BHLN) by Index from br3 data frame 

br3 <- br3[,-3]

# 11.Reshape Data Frame (br3) from Long to Wide Format in R

br4 <- br3 %>% pivot_wider(names_from = BHLN_rev, names_prefix = "b3_0", values_from = b3)

# 12.Save br_res_Rda file

save(br4, file = "br4.RData")
load(file = "br4.RData")

# 13.Remove objects not useful for the moment

# rm(br, br2, br3, br4)

# 14.Read "wm" data file

wm <- read_dta("MICS4-ALG_wm.dta") # read as table

# 15.Change data file as data frame

wm <- as.data.frame(wm)    
View(wm)

# 16.Create ID variable

wm <- wm[,-1] 
wm$ID <- ((wm$HH1*100+wm$HH2)*100)+wm$LN

# 17.Change the label of ID variable 

var_label(wm$ID) <- "Identifiant"

# 18. Rename variables as they are in DHS files

wm <- wm %>% rename(v008 = WDOI)
wm <- wm %>% rename(v011 = WDOB)
wm <- wm %>% rename(v509 = WDOM)
wm <- wm %>% rename(v005 = wmweight)

# 19.Reorder ID first and select some variables

wm2 = wm%>% select(ID,HH1,WM7,v008,v011,v509,v005,HH6,HH7,MSTATUS,windex5,Welevel_Reg)

# 20.Select Rows by column value (WM7 = 1)

wm3 <- wm2[wm2$WM7 == 1,]

# 21.Save wm_res_Rda file

save(wm3, file = "wm3.RData")
load(file = "wm3.RData")

# 22.Merge files wm3 & br4 and rename certain variables from wm file

wm3_br4 <- merge(y=wm3, x=br4, by = "ID", all=TRUE)

wm3_br4 <- wm3_br4 %>% rename(v008 = v008.y)
wm3_br4 <- wm3_br4 %>% rename(v011 = v011.y)
wm3_br4 <- wm3_br4 %>% rename(v005 = v005.y)

wm3_br4 <- subset(wm3_br4, select = -c(WM7))
save(wm3_br4, file = "wm3_br4.RData")

# 23.Create v021 variable (primary sampling unit)

wm3_br4$v021 <- wm3_br4$HH1  

# 24.Create v022 who is interaction of HH7 & HH6 (strata variable (v022))

wm3_br4 <- mutate(wm3_br4,v022 = as.integer(interaction(HH7,HH6,drop = TRUE))) 
wm3_br4 <- wm3_br4[,-2] 

# 23.Install and attach DHS.rates

# install.packages("DHS.rates")
library(DHS.rates)
## Warning: le package 'DHS.rates' a été compilé avec la version R 4.3.2
(TFR <- fert(wm3_br4,Indicator="tfr"))
## Warning in fert(wm3_br4, Indicator = "tfr"): Birth History variables
## b3_01:b3_20 are not complete; the missing variables were created
## 
##  The current function calculated TFR based on a reference period of 36 months 
##  The reference period ended at the time of the interview, in 2013 OR Oct - NA 2012 
##  The average reference period is 2011.5
##        TFR      N WN
## [1,] 2.662 110464  0