# Libraries
library(haven)
## Warning: package 'haven' was built under R version 4.3.3
library(descr)
## Warning: package 'descr' was built under R version 4.3.3
library(table1)
## Warning: package 'table1' was built under R version 4.3.3
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.3.3
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.3.2
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(flextable)
##
## Attaching package: 'flextable'
## The following object is masked from 'package:gtsummary':
##
## continuous_summary
library(officer)
# DHS 2021-23 data Burkina Faso
DATA<-read_dta("BFIR81FL.dta")
#subset for genital cutting = g102 cut (yes/no)
DATA<-subset(DATA, !(is.na(DATA$g102)))
# removing NAs for variables to use in the models
DATA<-subset(DATA, !(is.na(DATA$v131))) #ethnicity
DATA<-subset(DATA, !(is.na(DATA$v106))) #education
DATA<-subset(DATA, !(is.na(DATA$v010))) #year of birth
#attributes(DATA$v106)
#attributes(DATA$v131)
# Checking g106 "Age at circumcision"
DATA$age_circ_clean <- as_factor(DATA$g106)
# Check levels if needed
levels(DATA$age_circ_clean)
## [1] "0" "1" "2" "3"
## [5] "4" "5" "6" "7"
## [9] "8" "9" "10" "11"
## [13] "12" "13" "14" "15"
## [17] "16" "17" "18" "19"
## [21] "20" "21" "22" "26"
## [25] "44" "during infancy" "don't know"
# Optional: Rename long labels if you like
DATA$age_circ_clean <- recode(DATA$age_circ_clean,
"during infancy" = "Infancy",
"don't know" = "Don't know")
# Step 2: Remove NA values only (not 95 or 98, which are valid responses!)
DATA_clean <- DATA %>%
filter(!is.na(age_circ_clean))
# Step 3: Plot using ggplot2
age_plot<-ggplot(DATA_clean, aes(x = age_circ_clean)) +
geom_bar(fill = "red") +
theme_minimal() +
labs(title = "Reported Age at Circumcision",
x = "Age at Circumcision (in years or category)",
y = "Number of Women") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
age_plot
#New Variable: Female genital cutting (factor)
DATA$FGC<-"NA"
DATA$FGC[DATA$g102==0]<-"0: FGC: No"
DATA$FGC[DATA$g102==1]<-"1: FGC: Yes"
DATA$FGC<- as.factor(DATA$FGC) # I added
#New Variable: Ethnicity (factor)
DATA$ETHNIC<-"0: Other"
DATA$ETHNIC[DATA$v131==7]<-"1: Mossi"
DATA$ETHNIC<- as.factor(DATA$ETHNIC)
#New Variable: Education (factor)
DATA$EDU<-"NA"
DATA$EDU[DATA$v106==0]<-"0: Low"
DATA$EDU[DATA$v106==1]<-"1: High"
DATA$EDU[DATA$v106==2]<-"1: High"
DATA$EDU[DATA$v106==3]<-"1: High"
DATA$EDU<- as.factor(DATA$EDU)
#New Variable: AGE (numeric)
DATA$AGE<-DATA$v007-DATA$v010
DATA$AGE<- as.numeric(DATA$AGE)
label(DATA$FGC) <- "Female Genital Mutilation"
label(DATA$AGE) <- "Age"
label(DATA$EDU) <- "Education Level"
label(DATA$ETHNIC) <- "Ethnicity"
table1(~FGC+AGE+EDU+ETHNIC| FGC, data=DATA)
| 0: FGC: No (N=6760) |
1: FGC: Yes (N=10122) |
Overall (N=16882) |
|
|---|---|---|---|
| Female Genital Mutilation | |||
| 0: FGC: No | 6760 (100%) | 0 (0%) | 6760 (40.0%) |
| 1: FGC: Yes | 0 (0%) | 10122 (100%) | 10122 (60.0%) |
| Age | |||
| Mean (SD) | 25.3 (8.39) | 31.8 (9.49) | 29.2 (9.60) |
| Median [Min, Max] | 23.0 [15.0, 50.0] | 32.0 [15.0, 50.0] | 28.0 [15.0, 50.0] |
| Education Level | |||
| 0: Low | 2865 (42.4%) | 6940 (68.6%) | 9805 (58.1%) |
| 1: High | 3895 (57.6%) | 3182 (31.4%) | 7077 (41.9%) |
| Ethnicity | |||
| 0: Other | 3537 (52.3%) | 4391 (43.4%) | 7928 (47.0%) |
| 1: Mossi | 3223 (47.7%) | 5731 (56.6%) | 8954 (53.0%) |
# ethnic origin and cutting
#Bar Chart: 2 Dimensional
TABLE1 <- table(DATA$FGC,DATA$ETHNIC)
TABLE2 <- prop.table(TABLE1,2)
TABLE3 <- as.data.frame(TABLE2)
TABLE3$Percent <- TABLE3$Freq*100
ggplot(TABLE3, aes(fill=Var1, x=Var2, y=Percent))+
geom_bar(stat="identity",position="stack")+
ylab("%")+
xlab("Ethnic Origin")+
scale_fill_manual(name = "FGC",
labels = c("No FGC", "FGC"),
values = c("grey", "red"))
DATA$INT=DATA$ETHNIC:DATA$EDU
TABLE1 <- table(DATA$FGC,DATA$INT)
TABLE2 <- prop.table(TABLE1,2)
TABLE3 <- as.data.frame(TABLE2)
TABLE3$Percent <- TABLE3$Freq*100
ggplot(TABLE3, aes(fill=Var1, x=Var2, y=Percent))+
geom_bar(stat="identity",position="stack")+
ylab("%")+
xlab("Ethnic Origin")+
scale_fill_manual(name = "FGC",
labels = c("No FGC", "FGC"),
values = c("grey", "red"))
#Bar Chart: 2 Dimensional
TABLE4 <- table(DATA$FGC,DATA$ETHNIC)
TABLE5 <- prop.table(TABLE1,2)
TABLE6 <- as.data.frame(TABLE2)
TABLE3$Percent <- TABLE3$Freq*100
ggplot(TABLE3, aes(fill=Var1, x=Var2, y=Percent))+
geom_bar(stat="identity",position="stack")+
ylab("%")+
xlab("Ethnic Origin")+
scale_fill_manual(name = "FGC",
labels = c("No FGC", "FGC"),
values = c("grey", "red"))
# recode FGC as numeric
DATA$FGC_num <- ifelse(DATA$FGC == "1: FGC: Yes", 1, 0)
model_fgc <- glm(FGC_num ~ AGE + ETHNIC + EDU, data = DATA, family = binomial())
summary(model_fgc)
##
## Call:
## glm(formula = FGC_num ~ AGE + ETHNIC + EDU, family = binomial(),
## data = DATA)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.39214 0.06857 -20.30 <2e-16 ***
## AGE 0.06556 0.00203 32.29 <2e-16 ***
## ETHNIC1: Mossi 0.43722 0.03398 12.87 <2e-16 ***
## EDU1: High -0.66137 0.03604 -18.35 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 22729 on 16881 degrees of freedom
## Residual deviance: 20293 on 16878 degrees of freedom
## AIC: 20301
##
## Number of Fisher Scoring iterations: 4
tbl_model_fgc <- tbl_regression(model_fgc,
exponentiate = TRUE, # to show odds ratios
label = list(
AGE ~ "Age (Proxy for birth cohorts)",
ETHNIC ~ "Ethnicity (1 = Mossi, 0 = Others)",
EDU ~ "Education Level (1 = High, 0 = Low)"
)) %>%
bold_labels() %>%
modify_header(label = "**Variable**", estimate = "**OR**", conf.low = "**95% CI Lower**", conf.high = "**95% CI Upper**")
# Convert to flextable
flextbl <- as_flex_table(tbl_model_fgc)
tbl_model_fgc
| Variable | OR | 95% CI Lower | 95% CI Upper | p-value |
|---|---|---|---|---|
| Age (Proxy for birth cohorts) | 1.07 | 1.06, 1.07 | 1.07 | <0.001 |
| Ethnicity (1 = Mossi, 0 = Others) | ||||
| 0: Other | — | — | ||
| 1: Mossi | 1.55 | 1.45, 1.66 | 1.66 | <0.001 |
| Education Level (1 = High, 0 = Low) | ||||
| 0: Low | — | — | ||
| 1: High | 0.52 | 0.48, 0.55 | 0.55 | <0.001 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | ||||
# Save as Word document
save_as_docx(flextbl, path = "Model_1.docx")
model_interact <- glm(FGC_num ~ AGE + AGE * EDU + ETHNIC, data = DATA, family = binomial())
summary(model_interact)
##
## Call:
## glm(formula = FGC_num ~ AGE + AGE * EDU + ETHNIC, family = binomial(),
## data = DATA)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.188530 0.084614 -14.047 < 2e-16 ***
## AGE 0.058821 0.002599 22.637 < 2e-16 ***
## EDU1: High -1.115505 0.117519 -9.492 < 2e-16 ***
## ETHNIC1: Mossi 0.440740 0.034035 12.949 < 2e-16 ***
## AGE:EDU1: High 0.016912 0.004170 4.056 4.99e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 22729 on 16881 degrees of freedom
## Residual deviance: 20276 on 16877 degrees of freedom
## AIC: 20286
##
## Number of Fisher Scoring iterations: 4
# Create the regression table
tbl_model_interact <- tbl_regression(model_interact,
exponentiate = TRUE,
label = list(
AGE ~ "Age (Proxy for birth cohort)",
ETHNIC ~ "Ethnicity (1 = Mossi, 0 = Others)",
EDU ~ "Education Level (1 = High, 0 = Low)",
`AGE:EDU` ~ "Interaction: Age × Education"
)) %>%
bold_labels() %>%
modify_header(
label = "**Variable**",
estimate = "**OR**",
conf.low = "**95% CI Lower**",
conf.high = "**95% CI Upper**"
)
# Convert to flextable
flextbl <- as_flex_table(tbl_model_interact)
tbl_model_interact
| Variable | OR | 95% CI Lower | 95% CI Upper | p-value |
|---|---|---|---|---|
| Age (Proxy for birth cohort) | 1.06 | 1.06, 1.07 | 1.07 | <0.001 |
| Education Level (1 = High, 0 = Low) | ||||
| 0: Low | — | — | ||
| 1: High | 0.33 | 0.26, 0.41 | 0.41 | <0.001 |
| Ethnicity (1 = Mossi, 0 = Others) | ||||
| 0: Other | — | — | ||
| 1: Mossi | 1.55 | 1.45, 1.66 | 1.66 | <0.001 |
| Interaction: Age × Education | ||||
| Age (Proxy for birth cohort) * NA * NA | 1.02 | 1.01, 1.03 | 1.03 | <0.001 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | ||||
# Save as Word document
save_as_docx(flextbl, path = "Model_FGC_Table.docx")
plot_model(model_interact, type = "pred", terms = c("AGE [all]", "EDU")) +
theme_minimal() +
labs(title = "Interaction Effect of Age and Education on Female Genital Mutilation (FGM)",
x = "Age", y = "Predicted Probability of FGM")