# Libraries
 library(haven)
## Warning: package 'haven' was built under R version 4.3.3
 library(descr)
## Warning: package 'descr' was built under R version 4.3.3
 library(table1)
## Warning: package 'table1' was built under R version 4.3.3
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
 library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
 library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.3.3
 library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.3.2
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
 library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(flextable)
## 
## Attaching package: 'flextable'
## The following object is masked from 'package:gtsummary':
## 
##     continuous_summary
library(officer)
# DHS 2021-23 data Burkina Faso
 DATA<-read_dta("BFIR81FL.dta")
#subset for genital cutting = g102 cut (yes/no)
DATA<-subset(DATA, !(is.na(DATA$g102))) 
# removing NAs for variables to use in the models

DATA<-subset(DATA, !(is.na(DATA$v131)))    #ethnicity  
DATA<-subset(DATA, !(is.na(DATA$v106)))      #education
DATA<-subset(DATA, !(is.na(DATA$v010)))      #year of birth

#attributes(DATA$v106)
#attributes(DATA$v131)

Paper: Figure-2 Section 3.2.1. Age at Circumcision

# Checking g106 "Age at circumcision" 
DATA$age_circ_clean <- as_factor(DATA$g106)

# Check levels if needed
levels(DATA$age_circ_clean)
##  [1] "0"              "1"              "2"              "3"             
##  [5] "4"              "5"              "6"              "7"             
##  [9] "8"              "9"              "10"             "11"            
## [13] "12"             "13"             "14"             "15"            
## [17] "16"             "17"             "18"             "19"            
## [21] "20"             "21"             "22"             "26"            
## [25] "44"             "during infancy" "don't know"
# Optional: Rename long labels if you like
DATA$age_circ_clean <- recode(DATA$age_circ_clean,
                              "during infancy" = "Infancy",
                              "don't know" = "Don't know")

# Step 2: Remove NA values only (not 95 or 98, which are valid responses!)
DATA_clean <- DATA %>% 
  filter(!is.na(age_circ_clean))

# Step 3: Plot using ggplot2
age_plot<-ggplot(DATA_clean, aes(x = age_circ_clean)) +
  geom_bar(fill = "red") +
  theme_minimal() +
  labs(title = "Reported Age at Circumcision",
       x = "Age at Circumcision (in years or category)",
       y = "Number of Women") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

age_plot

Data preparation for analysis

#New Variable: Female genital cutting (factor)
 DATA$FGC<-"NA"
 DATA$FGC[DATA$g102==0]<-"0: FGC: No"
 DATA$FGC[DATA$g102==1]<-"1: FGC: Yes"
 DATA$FGC<- as.factor(DATA$FGC) # I added
#New Variable: Ethnicity (factor)
 DATA$ETHNIC<-"0: Other"
 DATA$ETHNIC[DATA$v131==7]<-"1: Mossi"
 DATA$ETHNIC<- as.factor(DATA$ETHNIC)
 #New Variable: Education (factor)
 DATA$EDU<-"NA"
 DATA$EDU[DATA$v106==0]<-"0: Low"
 DATA$EDU[DATA$v106==1]<-"1: High"
 DATA$EDU[DATA$v106==2]<-"1: High"
 DATA$EDU[DATA$v106==3]<-"1: High"
 DATA$EDU<- as.factor(DATA$EDU)
 #New Variable: AGE (numeric)
 DATA$AGE<-DATA$v007-DATA$v010
 DATA$AGE<- as.numeric(DATA$AGE)

# Paper: Table:1 Section 3.2.2. Descriptive Statistics

label(DATA$FGC) <- "Female Genital Mutilation"
label(DATA$AGE) <- "Age"
label(DATA$EDU) <- "Education Level"
label(DATA$ETHNIC) <- "Ethnicity"
table1(~FGC+AGE+EDU+ETHNIC| FGC, data=DATA)
0: FGC: No
(N=6760)
1: FGC: Yes
(N=10122)
Overall
(N=16882)
Female Genital Mutilation
0: FGC: No 6760 (100%) 0 (0%) 6760 (40.0%)
1: FGC: Yes 0 (0%) 10122 (100%) 10122 (60.0%)
Age
Mean (SD) 25.3 (8.39) 31.8 (9.49) 29.2 (9.60)
Median [Min, Max] 23.0 [15.0, 50.0] 32.0 [15.0, 50.0] 28.0 [15.0, 50.0]
Education Level
0: Low 2865 (42.4%) 6940 (68.6%) 9805 (58.1%)
1: High 3895 (57.6%) 3182 (31.4%) 7077 (41.9%)
Ethnicity
0: Other 3537 (52.3%) 4391 (43.4%) 7928 (47.0%)
1: Mossi 3223 (47.7%) 5731 (56.6%) 8954 (53.0%)

PPT: descriptive plots

# ethnic origin and cutting
#Bar Chart: 2 Dimensional
TABLE1 <- table(DATA$FGC,DATA$ETHNIC) 
TABLE2 <- prop.table(TABLE1,2)
TABLE3 <- as.data.frame(TABLE2)
TABLE3$Percent <- TABLE3$Freq*100
ggplot(TABLE3, aes(fill=Var1, x=Var2, y=Percent))+ 
geom_bar(stat="identity",position="stack")+
 ylab("%")+ 
xlab("Ethnic Origin")+ 
scale_fill_manual(name = "FGC", 
labels = c("No FGC", "FGC"), 
values = c("grey", "red"))

DATA$INT=DATA$ETHNIC:DATA$EDU
TABLE1 <- table(DATA$FGC,DATA$INT) 
TABLE2 <- prop.table(TABLE1,2)
TABLE3 <- as.data.frame(TABLE2)
TABLE3$Percent <- TABLE3$Freq*100
ggplot(TABLE3, aes(fill=Var1, x=Var2, y=Percent))+ 
geom_bar(stat="identity",position="stack")+
 ylab("%")+ 
xlab("Ethnic Origin")+ 
scale_fill_manual(name = "FGC", 
labels = c("No FGC", "FGC"), 
values = c("grey", "red"))

#Bar Chart: 2 Dimensional
 TABLE4 <- table(DATA$FGC,DATA$ETHNIC) 
TABLE5 <- prop.table(TABLE1,2)
 TABLE6 <- as.data.frame(TABLE2)
 TABLE3$Percent <- TABLE3$Freq*100
 ggplot(TABLE3, aes(fill=Var1, x=Var2, y=Percent))+ 
geom_bar(stat="identity",position="stack")+
 ylab("%")+ 
xlab("Ethnic Origin")+ 
scale_fill_manual(name = "FGC", 
labels = c("No FGC", "FGC"), 
values = c("grey", "red"))

Model 1

# recode FGC as numeric
DATA$FGC_num <- ifelse(DATA$FGC == "1: FGC: Yes", 1, 0)
model_fgc <- glm(FGC_num ~ AGE + ETHNIC + EDU, data = DATA, family = binomial())
summary(model_fgc)
## 
## Call:
## glm(formula = FGC_num ~ AGE + ETHNIC + EDU, family = binomial(), 
##     data = DATA)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.39214    0.06857  -20.30   <2e-16 ***
## AGE             0.06556    0.00203   32.29   <2e-16 ***
## ETHNIC1: Mossi  0.43722    0.03398   12.87   <2e-16 ***
## EDU1: High     -0.66137    0.03604  -18.35   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 22729  on 16881  degrees of freedom
## Residual deviance: 20293  on 16878  degrees of freedom
## AIC: 20301
## 
## Number of Fisher Scoring iterations: 4
tbl_model_fgc <- tbl_regression(model_fgc, 
               exponentiate = TRUE, # to show odds ratios
               label = list(
                 AGE ~ "Age (Proxy for birth cohorts)",
                 ETHNIC ~ "Ethnicity (1 = Mossi, 0 = Others)",
                 EDU ~ "Education Level (1 = High, 0 = Low)"
               )) %>%
  bold_labels() %>%
  modify_header(label = "**Variable**", estimate = "**OR**", conf.low = "**95% CI Lower**", conf.high = "**95% CI Upper**")

# Convert to flextable
flextbl <- as_flex_table(tbl_model_fgc)
tbl_model_fgc
Variable OR 95% CI Lower 95% CI Upper p-value
Age (Proxy for birth cohorts) 1.07 1.06, 1.07 1.07 <0.001
Ethnicity (1 = Mossi, 0 = Others)



    0: Other

    1: Mossi 1.55 1.45, 1.66 1.66 <0.001
Education Level (1 = High, 0 = Low)



    0: Low

    1: High 0.52 0.48, 0.55 0.55 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
# Save as Word document
save_as_docx(flextbl, path = "Model_1.docx")

Model 2

model_interact <- glm(FGC_num ~ AGE + AGE * EDU + ETHNIC, data = DATA, family = binomial())
summary(model_interact)
## 
## Call:
## glm(formula = FGC_num ~ AGE + AGE * EDU + ETHNIC, family = binomial(), 
##     data = DATA)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.188530   0.084614 -14.047  < 2e-16 ***
## AGE             0.058821   0.002599  22.637  < 2e-16 ***
## EDU1: High     -1.115505   0.117519  -9.492  < 2e-16 ***
## ETHNIC1: Mossi  0.440740   0.034035  12.949  < 2e-16 ***
## AGE:EDU1: High  0.016912   0.004170   4.056 4.99e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 22729  on 16881  degrees of freedom
## Residual deviance: 20276  on 16877  degrees of freedom
## AIC: 20286
## 
## Number of Fisher Scoring iterations: 4
# Create the regression table
tbl_model_interact <- tbl_regression(model_interact, 
  exponentiate = TRUE,
  label = list(
    AGE ~ "Age (Proxy for birth cohort)",
    ETHNIC ~ "Ethnicity (1 = Mossi, 0 = Others)",
    EDU ~ "Education Level (1 = High, 0 = Low)",
    `AGE:EDU` ~ "Interaction: Age × Education"
  )) %>%
  bold_labels() %>%
  modify_header(
    label = "**Variable**",
    estimate = "**OR**",
    conf.low = "**95% CI Lower**",
    conf.high = "**95% CI Upper**"
  )

# Convert to flextable
flextbl <- as_flex_table(tbl_model_interact)
tbl_model_interact
Variable OR 95% CI Lower 95% CI Upper p-value
Age (Proxy for birth cohort) 1.06 1.06, 1.07 1.07 <0.001
Education Level (1 = High, 0 = Low)



    0: Low

    1: High 0.33 0.26, 0.41 0.41 <0.001
Ethnicity (1 = Mossi, 0 = Others)



    0: Other

    1: Mossi 1.55 1.45, 1.66 1.66 <0.001
Interaction: Age × Education



    Age (Proxy for birth cohort) * NA * NA 1.02 1.01, 1.03 1.03 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
# Save as Word document
save_as_docx(flextbl, path = "Model_FGC_Table.docx")

Model 2 Probability Plot

plot_model(model_interact, type = "pred", terms = c("AGE [all]", "EDU")) +
  theme_minimal() +
  labs(title = "Interaction Effect of Age and Education on Female Genital Mutilation (FGM)",
       x = "Age", y = "Predicted Probability of FGM")