Overview
Propensity score matching (PSM) has been widely used in the
evaluation landscape as a statistical approach of measuring effects of a
treatment/exposure, in which a treatment case is matched with one or
more control cases based on a computed propensity score. Propensity
score is defined as the conditional probability of assignment to
treatment given a set of observed covariates and when used
appropriately, the results groups tend to have similar characteristics
to those created through random assignment. The conditional
probabilities are estimated using logistic/probit regression in which
the treatment variable is the dependent variable and baseline
characteristics are independent variable. The goal of this procedure is
to create propensity score estimates that balance the treatment and
control conditions on a set of baseline covariates.
In other words, the initial step will help determine the likelihood
of a beneficiary receiving treatment based on baseline characteristics
like sex, age, marital status, and others. The subsequent step involves
matching individuals in the control group with those in the treatment
group according to the similarity or closeness of their propensity
scores. The efficacy of propensity score matching (PSM) hinges on the
covariates chosen, the balancing of baseline characteristics, and the
selected PSM algorithm.
In this analysis, we will compare outcome indicators on PrEP
knowledge and mental health empowerment between the treatment group and
control group. We will utilise 4 matching algorithms.
- Nearest neighbour
- Optimal match
- Inverse probability weighting
- Statification method
Check for potential
covariates
With PSM, the set of covariates selected for the model needs to
ensure elimination of selection bias . A number of techniques have been
recommended in selection of covariates:
- Covariates that predict selection: This approach involves
identifying covariates associated with the selection process, either by
examining the relationship between the treatment variable and covariates
in baseline or pilot data, or through interviews with informants
involved in selecting program participants.
- Covariates that predict outcomes: Another approach to identifying a
sufficient set of covariates for use in propensity scores is to focus on
variables that are predictive of the outcomes of interest. An effective
approach to identifying these covariates, when possible, is to utilise
secondary data from previous studies or through the baseline/pilot
data.
Covariate (s) 1: Level of
education
For the level of education, we will create two binary variables. The
first variable will be for AYP with primary level of education and coded
(1 primary, 0 other). The second variable will be AYP who have attended
secondary level of education and coded (1- secondary, 0 other)
Primary
attr(mydata$B_1, "labels")
## Never attended
## 1
## Primary
## 2
## Secondary
## 3
## Technical & vocational
## 4
## Higher
## 5
## Madrasa (religious trainings)
## 6
## Informal education (literacy and numeracy) education
## 7
## Other (Specify)
## 8
mydata$primary <- ifelse(mydata$B_1 == 2, 1, 0)
mydata$primary <- factor(mydata$primary, levels = c(0, 1), labels = c("primary", "other"))
table(mydata$primary)
##
## primary other
## 685 407
sjPlot::tab_xtab(var.row = mydata$primary, var.col = mydata$treatment, title = "Education level: Primary", show.col.prc = TRUE)
Education level: Primary
|
primary
|
treatment
|
Total
|
|
control
|
treatment
|
|
primary
|
249 46.4 %
|
436 78.6 %
|
685 62.7 %
|
|
other
|
288 53.6 %
|
119 21.4 %
|
407 37.3 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=119.587 · df=1 · φ=0.333 · p=0.000
|
Secondary
mydata$secondary <- ifelse(mydata$B_1 == 3, 1, 0)
mydata$secondary <- factor(mydata$secondary, levels = c(0, 1), labels = c("other", "secondary"))
table(mydata$secondary)
##
## other secondary
## 622 470
sjPlot::tab_xtab(var.row = mydata$secondary, var.col = mydata$treatment, title = "Education level: Secondary", show.col.prc = TRUE)
Education level: Secondary
|
secondary
|
treatment
|
Total
|
|
control
|
treatment
|
|
other
|
355 66.1 %
|
267 48.1 %
|
622 57 %
|
|
secondary
|
182 33.9 %
|
288 51.9 %
|
470 43 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=35.339 · df=1 · φ=0.182 · p=0.000
|
Covariate 2: Member of KP:
Create a variable on key population code 1-yes; 0-no
attr(mydata$A_8_alternative, "labels")
## General population Key population
## 0 1
table(mydata$A_8_alternative,useNA = "ifany")
##
## 0 1
## 984 108
mydata$kp <- mydata$A_8_alternative
mydata$kp <- factor(mydata$kp, levels = c(0, 1), labels = c("No", "Yes"))
sjPlot::tab_xtab(var.row = mydata$kp, var.col = mydata$treatment, title = "Member of key population", show.col.prc = TRUE)
Member of key population
|
kp
|
treatment
|
Total
|
|
control
|
treatment
|
|
No
|
502 93.5 %
|
482 86.8 %
|
984 90.1 %
|
|
Yes
|
35 6.5 %
|
73 13.2 %
|
108 9.9 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=12.750 · df=1 · φ=0.111 · p=0.000
|
Covariate 3: Poverty status:
Create a variable poverty_status coded 1 “Poor” and 0 “Non-poor”
attr(mydata$PPI_Category_alternative, "labels")
## Poor Non-poor
## 0 1
table(mydata$PPI_Category_alternative,useNA = "ifany")
##
## 0 1
## 400 692
mydata$poverty_status <- ifelse(mydata$PPI_Category_alternative == 1, 0, 1)
mydata$poverty_status <- factor(mydata$poverty_status, levels = c(0, 1), labels = c("non-poor", "poor"))
sjPlot::tab_xtab(var.row = mydata$poverty_status, var.col = mydata$treatment, title = "Poverty status(PPI)", show.col.prc = TRUE)
Poverty status(PPI)
|
poverty_status
|
treatment
|
Total
|
|
control
|
treatment
|
|
non-poor
|
257 47.9 %
|
435 78.4 %
|
692 63.4 %
|
|
poor
|
280 52.1 %
|
120 21.6 %
|
400 36.6 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=108.208 · df=1 · φ=0.317 · p=0.000
|
Covariate 4: Age category :
This is a continuous variable of AYP aged 15 to 24 years
#attr(mydata$age_category, "labels")
#table(mydata$age_category,useNA = "ifany")
#mydata$age<- mydata$age_category
#mydata$age <- factor(mydata$age, levels = c(0, 1), labels = c("15-19", "20-24"))
#sjPlot::tab_xtab(var.row = mydata$age, var.col = mydata$treatment, title = "Age", show.col.prc = TRUE)
mydata$age<-mydata$A_1
mydata %>%
group_by(treatment) %>%
summarise(
mean = mean(age, na.rm = TRUE),
sd = sd(age, na.rm = TRUE)
) %>%
kable(format = "html", caption = "Age of the respondent") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Age of the respondent
|
treatment
|
mean
|
sd
|
|
control
|
20.83985
|
2.905934
|
|
treatment
|
21.64504
|
2.132385
|
Covariate 5: Biological sex :
Create a variable called sex coded 1 “Male” and 0 “Female”
attr(mydata$A_3, "labels")
## Female Male
## 0 1
table(mydata$A_3,useNA = "ifany")
##
## 0 1
## 888 204
mydata$sex<- mydata$A_3
mydata$sex <- factor(mydata$sex, levels = c(0, 1), labels = c("female", "male"))
sjPlot::tab_xtab(var.row = mydata$sex, var.col = mydata$treatment, title = "Sex of the respondent", show.col.prc = TRUE)
Sex of the respondent
|
sex
|
treatment
|
Total
|
|
control
|
treatment
|
|
female
|
426 79.3 %
|
462 83.2 %
|
888 81.3 %
|
|
male
|
111 20.7 %
|
93 16.8 %
|
204 18.7 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=2.500 · df=1 · φ=0.050 · p=0.114
|
Covariate 4: Decision making:
Create a variable about the person who make decision on accessing a
service from a health facility and code(1-myself, 0-other)
attr(mydata$A_4_2new, "labels")
## Myself My family my partner
## 1 2 3
table(mydata$A_4_2new,useNA = "ifany")
##
## 1 2 3 <NA>
## 937 78 28 49
mydata$decider <- ifelse(is.na(mydata$A_4_2new) | mydata$A_4_2new != 1, 1, 0)
mydata$decider <- factor(mydata$decider, levels = c(0, 1), labels = c("myself", "other"))
table(mydata$decider)
##
## myself other
## 937 155
sjPlot::tab_xtab(var.row = mydata$decider, var.col = mydata$treatment, title = "Who decides if or when you can access services at a health clinic", show.col.prc = TRUE)
Who decides if or when you can access services at a health clinic
|
decider
|
treatment
|
Total
|
|
control
|
treatment
|
|
myself
|
434 80.8 %
|
503 90.6 %
|
937 85.8 %
|
|
other
|
103 19.2 %
|
52 9.4 %
|
155 14.2 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=20.773 · df=1 · φ=0.141 · p=0.000
|
Covariate 7: Relationship
status:
On relationship status, we will create two variables, the first
variable, we will have AYP who are married/union and will be coded
(1-married/union, 0 -other). The second variable will have AYP who are
single/not currently in a union and coded (1-single, 0-other)
Married/in a union
attr(mydata$A_9, "labels")
## Yes, currently married Yes, living with a partner
## 1 2
## Divorced / separated Widow
## 3 4
## No, never in union No response
## 5 6
mydata$married <- ifelse(mydata$A_9<=2, 1, 0)
mydata$married <- factor(mydata$married, levels = c(0, 1), labels = c("other", "Married/union"))
table(mydata$married,useNA = "ifany")
##
## other Married/union
## 634 458
sjPlot::tab_xtab(var.row = mydata$married, var.col = mydata$treatment, title = "Relationship status", show.col.prc = TRUE)
Relationship status
|
married
|
treatment
|
Total
|
|
control
|
treatment
|
|
other
|
309 57.5 %
|
325 58.6 %
|
634 58.1 %
|
|
Married/union
|
228 42.5 %
|
230 41.4 %
|
458 41.9 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=0.078 · df=1 · φ=0.010 · p=0.780
|
Single/not in a union
attr(mydata$A_9, "labels")
## Yes, currently married Yes, living with a partner
## 1 2
## Divorced / separated Widow
## 3 4
## No, never in union No response
## 5 6
mydata$single <- ifelse(mydata$A_9==5, 1, 0)
mydata$single <- factor(mydata$single, levels = c(0, 1), labels = c("other", "Single"))
table(mydata$single,useNA = "ifany")
##
## other Single
## 530 562
sjPlot::tab_xtab(var.row = mydata$single, var.col = mydata$treatment, title = "Relationship status", show.col.prc = TRUE)
Relationship status
|
single
|
treatment
|
Total
|
|
control
|
treatment
|
|
other
|
262 48.8 %
|
268 48.3 %
|
530 48.5 %
|
|
Single
|
275 51.2 %
|
287 51.7 %
|
562 51.5 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=0.011 · df=1 · φ=0.005 · p=0.916
|
Covariate 8: Use of a condom:
In this variable, we are looking for AYP who used a condom during the
previous sex and code 1 yes and 0 for no
attr(mydata$D_37, "labels")
## No Yes Refused
## 0 1 99
table(mydata$D_37,useNA = "ifany")
##
## 0 1 99 <NA>
## 389 273 7 423
mydata$D_37 <- ifelse(is.na(mydata$D_37) | mydata$D_37==0|mydata$D_37==99,0,1)
mydata$condom_use <- factor(mydata$D_37, levels = c(0, 1), labels = c("no", "yes"))
sjPlot::tab_xtab(var.row = mydata$condom_use, var.col = mydata$treatment, title = "Used condom during the last sex", show.col.prc = TRUE)
Used condom during the last sex
|
condom_use
|
treatment
|
Total
|
|
control
|
treatment
|
|
no
|
456 84.9 %
|
363 65.4 %
|
819 75 %
|
|
yes
|
81 15.1 %
|
192 34.6 %
|
273 25 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=54.375 · df=1 · φ=0.225 · p=0.000
|
Covariate 9: Transactional sex:
In this variable we are looking for users who have engaged previously
with transactional sex and we will code 1 for yes and 0 for no
attr(mydata$D_41, "labels")
## No Yes Refused
## 0 1 99
table(mydata$D_41,useNA = "ifany")
##
## 0 1 99 <NA>
## 525 140 4 423
mydata$D_41 <- ifelse(is.na(mydata$D_41) | mydata$D_41==0|mydata$D_41==99,0,1)
mydata$transcational_sex <- ifelse(mydata$D_41==1, 1, 0)
mydata$transcational_sex <- factor(mydata$transcational_sex, levels = c(0, 1), labels = c("no", "yes"))
sjPlot::tab_xtab(var.row = mydata$transcational_sex, var.col = mydata$treatment, title = "User engaged with transcational sex", show.col.prc = TRUE)
User engaged with transcational sex
|
transcational_sex
|
treatment
|
Total
|
|
control
|
treatment
|
|
no
|
506 94.2 %
|
446 80.4 %
|
952 87.2 %
|
|
yes
|
31 5.8 %
|
109 19.6 %
|
140 12.8 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=45.722 · df=1 · φ=0.207 · p=0.000
|
Covariate 11: Awareness of the
programme:
In this we are looking for AYP aware of the programme and coded 1
“Yes” and 0 “No”
attr(mydata$Exp_a, "labels")
## No Yes
## 0 1
table(mydata$Exp_a,useNA = "ifany")
##
## 0 1
## 829 263
mydata$aware_tiko<- mydata$Exp_a
mydata$aware_tiko <- factor(mydata$aware_tiko, levels = c(0, 1), labels = c("no", "yes"))
sjPlot::tab_xtab(var.row = mydata$aware_tiko, var.col = mydata$treatment, title = "Aware of Tiko platform", show.col.prc = TRUE)
Aware of Tiko platform
|
aware_tiko
|
treatment
|
Total
|
|
control
|
treatment
|
|
no
|
530 98.7 %
|
299 53.9 %
|
829 75.9 %
|
|
yes
|
7 1.3 %
|
256 46.1 %
|
263 24.1 %
|
|
Total
|
537 100 %
|
555 100 %
|
1092 100 %
|
χ2=297.451 · df=1 · φ=0.524 · p=0.000
|
Outcome indicators
On PrEP knowledge, we utilise principal component analysis (PCA) on
PrEP questions on knowledge. We save the first principal component and
re-scale from 0-100.
\[ score = \frac{(x -
\text{min})}{(\text{max} - \text{min})} \times 100 \]
PrEP knowledge
score(PCA)
mydata <- mydata %>%
mutate(across(.cols = starts_with("F4_"),
.fns = ~ ifelse(.x == 1, 1, 0),
.names = "F4{.col}")) %>%
mutate(across(starts_with("F4F4_"), ~ replace_na(.x, 0))) %>%
mutate(across(starts_with("F4F4_"), ~ factor(.x, levels = c(0, 1), labels = c("False", "True"))))
# Subset the data to include only the new variables
data_pca <- select(mydata, starts_with("F4F4_"))
data_pca <- data_pca %>%
mutate(across(everything(), as.numeric))
pca_results <- prcomp(data_pca, center = TRUE, scale. = FALSE)
# Extract the first principal component scores
mydata$pca_knowledge <- pca_results$x[, 1]
# Rescale from 0-100
mydata=mydata %>%
mutate(PrEP_Knowledge = round(100*(pca_knowledge - min(pca_knowledge)) / (max(pca_knowledge) - min(pca_knowledge)),1))
mydata %>%
select(treatment, pca_knowledge,PrEP_Knowledge) %>%
datatable()
mydata %>%
group_by(treatment) %>%
summarise(
mean = mean(PrEP_Knowledge, na.rm = TRUE),
sd = sd(PrEP_Knowledge, na.rm = TRUE)
) %>%
kable(format = "html", caption = "Summary Statistics of PrEP Knowledge by Treatment") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Summary Statistics of PrEP Knowledge by Treatment
|
treatment
|
mean
|
sd
|
|
control
|
65.95438
|
40.44159
|
|
treatment
|
36.84216
|
41.50238
|
Mental health empowerment:
The mental health empowerment is computed using principal component
analysis (PCA) based on relevant questions. We save the first principal
component and re-scale from 0-100.
mydata <- mydata %>%
mutate(
D10_1_1 = case_when(
D10_1_1 == 1 ~ 1,
D10_1_1 == 2 ~ 2,
D10_1_1 == 3 ~ 0,
D10_1_1 == 4 ~ 3,
D10_1_1 == 5 ~ 4,
TRUE ~ 0 # Set all other cases including NA to 0
),
D10_1_2 = case_when(
D10_1_2 == 1 ~ 1,
D10_1_2 == 2 ~ 2,
D10_1_2 == 3 ~ 0,
D10_1_2 == 4 ~ 3,
D10_1_2 == 5 ~ 4,
TRUE ~ 0
),
D10_1_3 = case_when(
D10_1_3 == 1 ~ 4,
D10_1_3 == 2 ~ 3,
D10_1_3 == 3 ~ 0,
D10_1_3 == 4 ~ 2,
D10_1_3 == 5 ~ 1,
TRUE ~ 0
),
D10_1_4 = case_when(
D10_1_4 == 1 ~ 1,
D10_1_4 == 2 ~ 2,
D10_1_4 == 3 ~ 0,
D10_1_4 == 4 ~ 3,
D10_1_4 == 5 ~ 4,
TRUE ~ 0
),
D10_1_5 = case_when(
D10_1_5 == 1 ~ 4,
D10_1_5 == 2 ~ 3,
D10_1_5 == 3 ~ 0,
D10_1_5 == 4 ~ 2,
D10_1_5 == 5 ~ 1,
TRUE ~ 0
),
D10_1_6 = case_when(
D10_1_6 == 1 ~ 1,
D10_1_6 == 2 ~ 2,
D10_1_6 == 3 ~ 0,
D10_1_6 == 4 ~ 3,
D10_1_6 == 5 ~ 4,
TRUE ~ 0
),
D10_1_7 = case_when(
D10_1_7 == 1 ~ 1,
D10_1_7 == 2 ~ 2,
D10_1_7 == 3 ~ 0,
D10_1_7 == 4 ~ 3,
D10_1_7 == 5 ~ 4,
TRUE ~ 0
),
D10_1_8 = case_when(
D10_1_8 == 1 ~ 4,
D10_1_8 == 2 ~ 3,
D10_1_8 == 3 ~ 0,
D10_1_8 == 4 ~ 2,
D10_1_8 == 5 ~ 1,
TRUE ~ 0
),
D10_1_9 = case_when(
D10_1_9 == 1 ~ 4,
D10_1_9 == 2 ~ 3,
D10_1_9 == 3 ~ 0,
D10_1_9 == 4 ~ 2,
D10_1_9 == 5 ~ 1,
TRUE ~ 0
),
D10_1_10 = case_when(
D10_1_10 == 1 ~ 4,
D10_1_10 == 2 ~ 3,
D10_1_10 == 3 ~ 0,
D10_1_10 == 4 ~ 2,
D10_1_10 == 5 ~ 1,
TRUE ~ 0
),
across(starts_with("D10_1_"), ~ replace_na(.x, 0))
)
mydata %>%
select(treatment,D10_1_1,D10_1_2,D10_1_3,D10_1_4,D10_1_5,D10_1_6,D10_1_7,D10_1_8,D10_1_9,D10_1_10) %>%
datatable()
# Subset the data to include only the new variables
data_pca2 <- select(mydata, starts_with("D10_1_"))
data_pca2 <- data_pca2 %>%
mutate(across(everything(), as.numeric))
pca_results2 <- prcomp(data_pca2, center = TRUE, scale. = FALSE)
# Extract the first principal component scores
mydata$mental_pca <- pca_results2$x[, 1]
# Re-scale from 0-100
mydata=mydata %>%
mutate(mh_empowerment = round(100*(mental_pca - min(mental_pca)) / (max(mental_pca) - min(mental_pca)),1))
mydata %>%
select(treatment, mental_pca,mh_empowerment) %>%
datatable()
Covariates selection
To identify covariates, we will first run a binary logistic
regression of the treatment variable against all potential covariates.
Then identify those covariates that have a significant relationship with
the treatment (p-value)
The second step will be to run a multiple linear regression of
outcome variables (PrEP knowledge score and mental health empowerment
score) against all covariates. Then select covariates that have a
significant relationship.
mydata<-mydata %>%
select(treatment,primary,secondary,kp,poverty_status,age,sex,decider,married,single,condom_use,transcational_sex,sources,PrEP_Knowledge,mh_empowerment,psu, aware_tiko,PrEP_Knowledge,mh_empowerment,A_1)
head(mydata,10) %>%
kable("html") %>%
kable_styling(font_size=12) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
|
treatment
|
primary
|
secondary
|
kp
|
poverty_status
|
age
|
sex
|
decider
|
married
|
single
|
condom_use
|
transcational_sex
|
sources
|
PrEP_Knowledge
|
mh_empowerment
|
psu
|
aware_tiko
|
A_1
|
|
treatment
|
primary
|
secondary
|
No
|
poor
|
22
|
female
|
myself
|
other
|
Single
|
yes
|
yes
|
1
|
0.0
|
25.4
|
1
|
yes
|
22
|
|
treatment
|
other
|
other
|
No
|
non-poor
|
22
|
female
|
other
|
Married/union
|
other
|
no
|
no
|
2
|
0.0
|
33.0
|
1
|
no
|
22
|
|
treatment
|
primary
|
other
|
No
|
non-poor
|
22
|
female
|
myself
|
Married/union
|
other
|
no
|
no
|
1
|
32.4
|
30.6
|
1
|
no
|
22
|
|
treatment
|
primary
|
secondary
|
No
|
non-poor
|
18
|
female
|
myself
|
other
|
Single
|
no
|
no
|
1
|
15.9
|
44.9
|
1
|
no
|
18
|
|
treatment
|
primary
|
secondary
|
No
|
poor
|
24
|
female
|
myself
|
Married/union
|
other
|
no
|
no
|
3
|
66.4
|
33.6
|
1
|
no
|
24
|
|
treatment
|
primary
|
secondary
|
No
|
non-poor
|
18
|
female
|
myself
|
other
|
Single
|
yes
|
no
|
1
|
83.6
|
9.2
|
1
|
no
|
18
|
|
treatment
|
primary
|
secondary
|
No
|
non-poor
|
19
|
female
|
myself
|
other
|
Single
|
no
|
no
|
1
|
32.4
|
23.9
|
1
|
no
|
19
|
|
treatment
|
primary
|
secondary
|
No
|
non-poor
|
24
|
female
|
myself
|
Married/union
|
other
|
no
|
no
|
1
|
100.0
|
37.0
|
1
|
no
|
24
|
|
treatment
|
primary
|
other
|
No
|
non-poor
|
24
|
female
|
myself
|
Married/union
|
other
|
yes
|
no
|
0
|
32.4
|
53.3
|
1
|
no
|
24
|
|
treatment
|
primary
|
secondary
|
No
|
non-poor
|
22
|
female
|
myself
|
Married/union
|
other
|
no
|
no
|
0
|
66.3
|
26.6
|
1
|
no
|
22
|
#Treatment variable run against covariates using binary logistic regression
fit1 <- glm(treatment ~ primary + secondary+poverty_status + kp + age +
sex + decider + married + single + condom_use + transcational_sex+aware_tiko+sources,
family = "binomial", data = mydata)
# Outcome variables run against covariates using multiple linear regression
fit2=lm(PrEP_Knowledge~primary + secondary+poverty_status + kp + age +
sex + decider + married + single + condom_use + transcational_sex+aware_tiko+sources+A_1, data=mydata)
summary(fit1)
##
## Call:
## glm(formula = treatment ~ primary + secondary + poverty_status +
## kp + age + sex + decider + married + single + condom_use +
## transcational_sex + aware_tiko + sources, family = "binomial",
## data = mydata)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.51695 0.91010 1.667 0.0956 .
## primaryother -1.12610 0.23665 -4.759 1.95e-06 ***
## secondarysecondary -0.23878 0.21285 -1.122 0.2619
## poverty_statuspoor -1.13620 0.17912 -6.343 2.25e-10 ***
## kpYes -0.55398 0.43188 -1.283 0.1996
## age -0.02200 0.03621 -0.608 0.5435
## sexmale -0.38640 0.22982 -1.681 0.0927 .
## deciderother -0.50201 0.23986 -2.093 0.0364 *
## marriedMarried/union 0.07675 0.34302 0.224 0.8230
## singleSingle -0.26792 0.34301 -0.781 0.4347
## condom_useyes 0.28976 0.19623 1.477 0.1398
## transcational_sexyes 0.72623 0.40163 1.808 0.0706 .
## aware_tikoyes 4.12750 0.41188 10.021 < 2e-16 ***
## sources -0.51285 0.12843 -3.993 6.52e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1513.5 on 1091 degrees of freedom
## Residual deviance: 990.8 on 1078 degrees of freedom
## AIC: 1018.8
##
## Number of Fisher Scoring iterations: 6
fit3=lm(mh_empowerment~primary + secondary+poverty_status + kp + age +
sex + decider + married + single + condom_use + transcational_sex+aware_tiko+sources, data=mydata)
summary(fit2)
##
## Call:
## lm(formula = PrEP_Knowledge ~ primary + secondary + poverty_status +
## kp + age + sex + decider + married + single + condom_use +
## transcational_sex + aware_tiko + sources + A_1, data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -84.452 -28.998 5.987 29.709 89.304
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 98.1020 13.2247 7.418 2.40e-13 ***
## primaryother 8.6996 3.4823 2.498 0.012630 *
## secondarysecondary 1.3599 3.1214 0.436 0.663156
## poverty_statuspoor 10.4836 2.5354 4.135 3.83e-05 ***
## kpYes -17.0202 4.9380 -3.447 0.000589 ***
## age -2.0294 0.5257 -3.861 0.000120 ***
## sexmale 4.0326 3.0635 1.316 0.188342
## deciderother -4.9517 3.3951 -1.458 0.145003
## marriedMarried/union 1.7819 4.7679 0.374 0.708678
## singleSingle -1.0532 4.7373 -0.222 0.824102
## condom_useyes -6.8953 2.8002 -2.462 0.013955 *
## transcational_sexyes -12.9653 4.5513 -2.849 0.004474 **
## aware_tikoyes -30.7512 2.8944 -10.624 < 2e-16 ***
## sources 0.6908 1.7903 0.386 0.699677
## A_1 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.09 on 1078 degrees of freedom
## Multiple R-squared: 0.281, Adjusted R-squared: 0.2724
## F-statistic: 32.42 on 13 and 1078 DF, p-value: < 2.2e-16
summary(fit3)
##
## Call:
## lm(formula = mh_empowerment ~ primary + secondary + poverty_status +
## kp + age + sex + decider + married + single + condom_use +
## transcational_sex + aware_tiko + sources, data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.842 -15.399 1.298 16.636 44.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.8506 7.8287 8.667 < 2e-16 ***
## primaryother -0.2733 2.0615 -0.133 0.894555
## secondarysecondary -1.0252 1.8478 -0.555 0.579148
## poverty_statuspoor 5.3112 1.5009 3.539 0.000419 ***
## kpYes -8.4831 2.9232 -2.902 0.003783 **
## age -0.3130 0.3112 -1.006 0.314751
## sexmale -0.2326 1.8135 -0.128 0.897962
## deciderother -0.8861 2.0099 -0.441 0.659406
## marriedMarried/union 2.3031 2.8225 0.816 0.414700
## singleSingle 2.8136 2.8044 1.003 0.315941
## condom_useyes -3.0325 1.6576 -1.829 0.067612 .
## transcational_sexyes 5.2405 2.6943 1.945 0.052030 .
## aware_tikoyes -5.7348 1.7134 -3.347 0.000845 ***
## sources 0.5714 1.0598 0.539 0.589917
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.95 on 1078 degrees of freedom
## Multiple R-squared: 0.05113, Adjusted R-squared: 0.03969
## F-statistic: 4.468 on 13 and 1078 DF, p-value: 1.898e-07
PSM algorithm
Nearest neighbour approach
Without a caliper:
model=matchit(treatment ~ primary+poverty_status + kp + age + sex + decider + condom_use + transcational_sex+sources+aware_tiko,
data=mydata,
distance = "glm",
method="nearest",
m.order="largest",
replace=FALSE)
balance_table <- bal.tab(model, un = TRUE, mean.diff = TRUE, var.ratio = TRUE, ks = TRUE, s.d.denom = "pooled", m.threshold = 0.1)
summary(model)
##
## Call:
## matchit(formula = treatment ~ primary + poverty_status + kp +
## age + sex + decider + condom_use + transcational_sex + sources +
## aware_tiko, data = mydata, method = "nearest", distance = "glm",
## replace = FALSE, m.order = "largest")
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.7008 0.3092 1.3682 2.1975
## primaryprimary 0.7856 0.4637 0.7843 .
## primaryother 0.2144 0.5363 -0.7843 .
## poverty_statusnon-poor 0.7838 0.4786 0.7414 .
## poverty_statuspoor 0.2162 0.5214 -0.7414 .
## kpNo 0.8685 0.9348 -0.1963 .
## kpYes 0.1315 0.0652 0.1963 .
## age 21.6450 20.8399 0.3776 0.5385
## sexfemale 0.8324 0.7933 0.1048 .
## sexmale 0.1676 0.2067 -0.1048 .
## decidermyself 0.9063 0.8082 0.3367 .
## deciderother 0.0937 0.1918 -0.3367 .
## condom_useno 0.6541 0.8492 -0.4102 .
## condom_useyes 0.3459 0.1508 0.4102 .
## transcational_sexno 0.8036 0.9423 -0.3491 .
## transcational_sexyes 0.1964 0.0577 0.3491 .
## sources 1.0072 1.1136 -0.1436 2.2963
## aware_tikono 0.5387 0.9870 -0.8992 .
## aware_tikoyes 0.4613 0.0130 0.8992 .
## eCDF Mean eCDF Max
## distance 0.3384 0.5313
## primaryprimary 0.3219 0.3219
## primaryother 0.3219 0.3219
## poverty_statusnon-poor 0.3052 0.3052
## poverty_statuspoor 0.3052 0.3052
## kpNo 0.0664 0.0664
## kpYes 0.0664 0.0664
## age 0.0806 0.1412
## sexfemale 0.0391 0.0391
## sexmale 0.0391 0.0391
## decidermyself 0.0981 0.0981
## deciderother 0.0981 0.0981
## condom_useno 0.1951 0.1951
## condom_useyes 0.1951 0.1951
## transcational_sexno 0.1387 0.1387
## transcational_sexyes 0.1387 0.1387
## sources 0.0352 0.1589
## aware_tikono 0.4482 0.4482
## aware_tikoyes 0.4482 0.4482
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.7203 0.3092 1.4363 1.9554
## primaryprimary 0.8101 0.4637 0.8439 .
## primaryother 0.1899 0.5363 -0.8439 .
## poverty_statusnon-poor 0.8101 0.4786 0.8052 .
## poverty_statuspoor 0.1899 0.5214 -0.8052 .
## kpNo 0.8659 0.9348 -0.2039 .
## kpYes 0.1341 0.0652 0.2039 .
## age 21.7020 20.8399 0.4043 0.5255
## sexfemale 0.8343 0.7933 0.1097 .
## sexmale 0.1657 0.2067 -0.1097 .
## decidermyself 0.9106 0.8082 0.3515 .
## deciderother 0.0894 0.1918 -0.3515 .
## condom_useno 0.6443 0.8492 -0.4306 .
## condom_useyes 0.3557 0.1508 0.4306 .
## transcational_sexno 0.7970 0.9423 -0.3656 .
## transcational_sexyes 0.2030 0.0577 0.3656 .
## sources 1.0019 1.1136 -0.1508 2.3346
## aware_tikono 0.5233 0.9870 -0.9302 .
## aware_tikoyes 0.4767 0.0130 0.9302 .
## eCDF Mean eCDF Max Std. Pair Dist.
## distance 0.3565 0.5549 1.4363
## primaryprimary 0.3464 0.3464 1.1162
## primaryother 0.3464 0.3464 1.1162
## poverty_statusnon-poor 0.3315 0.3315 1.1490
## poverty_statuspoor 0.3315 0.3315 1.1490
## kpNo 0.0689 0.0689 0.5455
## kpYes 0.0689 0.0689 0.5455
## age 0.0862 0.1490 1.3265
## sexfemale 0.0410 0.0410 0.7579
## sexmale 0.0410 0.0410 0.7579
## decidermyself 0.1024 0.1024 0.8499
## deciderother 0.1024 0.1024 0.8499
## condom_useno 0.2048 0.2048 0.8534
## condom_useyes 0.2048 0.2048 0.8534
## transcational_sexno 0.1453 0.1453 0.5719
## transcational_sexyes 0.1453 0.1453 0.5719
## sources 0.0366 0.1657 0.7592
## aware_tikono 0.4637 0.4637 0.9302
## aware_tikoyes 0.4637 0.4637 0.9302
##
## Sample Sizes:
## Control Treated
## All 537 555
## Matched 537 537
## Unmatched 0 18
## Discarded 0 0
balance_table
## Balance Measures
## Type Diff.Un Diff.Adj M.Threshold
## distance Distance 1.6041 1.6839
## primary_other Binary -0.3219 -0.3464 Not Balanced, >0.1
## poverty_status_poor Binary -0.3052 -0.3315 Not Balanced, >0.1
## kp_Yes Binary 0.0664 0.0689 Balanced, <0.1
## age Contin. 0.3159 0.3383 Not Balanced, >0.1
## sex_male Binary -0.0391 -0.0410 Balanced, <0.1
## decider_other Binary -0.0981 -0.1024 Not Balanced, >0.1
## condom_use_yes Binary 0.1951 0.2048 Not Balanced, >0.1
## transcational_sex_yes Binary 0.1387 0.1453 Not Balanced, >0.1
## sources Contin. -0.1695 -0.1780 Not Balanced, >0.1
## aware_tiko_yes Binary 0.4482 0.4637 Not Balanced, >0.1
##
## Balance tally for mean differences
## count
## Balanced, <0.1 2
## Not Balanced, >0.1 8
##
## Variable with the greatest mean difference
## Variable Diff.Adj M.Threshold
## aware_tiko_yes 0.4637 Not Balanced, >0.1
##
## Sample sizes
## Control Treated
## All 537 555
## Matched 537 537
## Unmatched 0 18
With a caliper:
model=matchit(treatment ~ primary +poverty_status + kp + age + sex + decider + condom_use + transcational_sex+sources+aware_tiko,
data=mydata,
distance = "glm",
caliper=0.2,
method="nearest",
m.order="largest",
replace=FALSE)
balance_table <- bal.tab(model, un = TRUE, mean.diff = TRUE, var.ratio = TRUE, ks = TRUE, s.d.denom = "pooled", m.threshold = 0.1)
summary(model)
##
## Call:
## matchit(formula = treatment ~ primary + poverty_status + kp +
## age + sex + decider + condom_use + transcational_sex + sources +
## aware_tiko, data = mydata, method = "nearest", distance = "glm",
## replace = FALSE, m.order = "largest", caliper = 0.2)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.7008 0.3092 1.3682 2.1975
## primaryprimary 0.7856 0.4637 0.7843 .
## primaryother 0.2144 0.5363 -0.7843 .
## poverty_statusnon-poor 0.7838 0.4786 0.7414 .
## poverty_statuspoor 0.2162 0.5214 -0.7414 .
## kpNo 0.8685 0.9348 -0.1963 .
## kpYes 0.1315 0.0652 0.1963 .
## age 21.6450 20.8399 0.3776 0.5385
## sexfemale 0.8324 0.7933 0.1048 .
## sexmale 0.1676 0.2067 -0.1048 .
## decidermyself 0.9063 0.8082 0.3367 .
## deciderother 0.0937 0.1918 -0.3367 .
## condom_useno 0.6541 0.8492 -0.4102 .
## condom_useyes 0.3459 0.1508 0.4102 .
## transcational_sexno 0.8036 0.9423 -0.3491 .
## transcational_sexyes 0.1964 0.0577 0.3491 .
## sources 1.0072 1.1136 -0.1436 2.2963
## aware_tikono 0.5387 0.9870 -0.8992 .
## aware_tikoyes 0.4613 0.0130 0.8992 .
## eCDF Mean eCDF Max
## distance 0.3384 0.5313
## primaryprimary 0.3219 0.3219
## primaryother 0.3219 0.3219
## poverty_statusnon-poor 0.3052 0.3052
## poverty_statuspoor 0.3052 0.3052
## kpNo 0.0664 0.0664
## kpYes 0.0664 0.0664
## age 0.0806 0.1412
## sexfemale 0.0391 0.0391
## sexmale 0.0391 0.0391
## decidermyself 0.0981 0.0981
## deciderother 0.0981 0.0981
## condom_useno 0.1951 0.1951
## condom_useyes 0.1951 0.1951
## transcational_sexno 0.1387 0.1387
## transcational_sexyes 0.1387 0.1387
## sources 0.0352 0.1589
## aware_tikono 0.4482 0.4482
## aware_tikoyes 0.4482 0.4482
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.4541 0.4407 0.0469 1.1397
## primaryprimary 0.7074 0.7074 0.0000 .
## primaryother 0.2926 0.2926 0.0000 .
## poverty_statusnon-poor 0.7630 0.7667 -0.0090 .
## poverty_statuspoor 0.2370 0.2333 0.0090 .
## kpNo 0.9481 0.9407 0.0219 .
## kpYes 0.0519 0.0593 -0.0219 .
## age 21.5852 21.6222 -0.0174 0.9333
## sexfemale 0.8630 0.8296 0.0893 .
## sexmale 0.1370 0.1704 -0.0893 .
## decidermyself 0.8852 0.8630 0.0763 .
## deciderother 0.1148 0.1370 -0.0763 .
## condom_useno 0.8222 0.7704 0.1090 .
## condom_useyes 0.1778 0.2296 -0.1090 .
## transcational_sexno 0.9185 0.9333 -0.0373 .
## transcational_sexyes 0.0815 0.0667 0.0373 .
## sources 1.0000 1.0926 -0.1250 1.7899
## aware_tikono 0.9667 0.9741 -0.0149 .
## aware_tikoyes 0.0333 0.0259 0.0149 .
## eCDF Mean eCDF Max Std. Pair Dist.
## distance 0.0121 0.0815 0.0481
## primaryprimary 0.0000 0.0000 0.1407
## primaryother 0.0000 0.0000 0.1407
## poverty_statusnon-poor 0.0037 0.0037 0.1889
## poverty_statuspoor 0.0037 0.0037 0.1889
## kpNo 0.0074 0.0074 0.2630
## kpYes 0.0074 0.0074 0.2630
## age 0.0141 0.0444 0.6600
## sexfemale 0.0333 0.0333 0.5058
## sexmale 0.0333 0.0333 0.5058
## decidermyself 0.0222 0.0222 0.6101
## deciderother 0.0222 0.0222 0.6101
## condom_useno 0.0519 0.0519 0.6385
## condom_useyes 0.0519 0.0519 0.6385
## transcational_sexno 0.0148 0.0148 0.2983
## transcational_sexyes 0.0148 0.0148 0.2983
## sources 0.0302 0.1370 0.6450
## aware_tikono 0.0074 0.0074 0.0149
## aware_tikoyes 0.0074 0.0074 0.0149
##
## Sample Sizes:
## Control Treated
## All 537 555
## Matched 270 270
## Unmatched 267 285
## Discarded 0 0
balance_table
## Balance Measures
## Type Diff.Un Diff.Adj M.Threshold
## distance Distance 1.6041 0.0550 Balanced, <0.1
## primary_other Binary -0.3219 0.0000 Balanced, <0.1
## poverty_status_poor Binary -0.3052 0.0037 Balanced, <0.1
## kp_Yes Binary 0.0664 -0.0074 Balanced, <0.1
## age Contin. 0.3159 -0.0145 Balanced, <0.1
## sex_male Binary -0.0391 -0.0333 Balanced, <0.1
## decider_other Binary -0.0981 -0.0222 Balanced, <0.1
## condom_use_yes Binary 0.1951 -0.0519 Balanced, <0.1
## transcational_sex_yes Binary 0.1387 0.0148 Balanced, <0.1
## sources Contin. -0.1695 -0.1475 Not Balanced, >0.1
## aware_tiko_yes Binary 0.4482 0.0074 Balanced, <0.1
##
## Balance tally for mean differences
## count
## Balanced, <0.1 10
## Not Balanced, >0.1 1
##
## Variable with the greatest mean difference
## Variable Diff.Adj M.Threshold
## sources -0.1475 Not Balanced, >0.1
##
## Sample sizes
## Control Treated
## All 537 555
## Matched 270 270
## Unmatched 267 285
Stratification approach
ps_model <- glm(treatment ~ primary+poverty_status + kp + age +
sex + decider + condom_use + transcational_sex+aware_tiko+sources,
family = "binomial", data = mydata)
model2 <- matchit(treatment ~ primary+poverty_status + kp + age +
sex + decider + condom_use + transcational_sex+sources+aware_tiko,
data =mydata,
method = "subclass",
distance = "glm",
nsubclass = 5,
model = ps_model)
summary(model2)
##
## Call:
## matchit(formula = treatment ~ primary + poverty_status + kp +
## age + sex + decider + condom_use + transcational_sex + sources +
## aware_tiko, data = mydata, method = "subclass", distance = "glm",
## nsubclass = 5, model = ps_model)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.7008 0.3092 1.3682 2.1975
## primaryprimary 0.7856 0.4637 0.7843 .
## primaryother 0.2144 0.5363 -0.7843 .
## poverty_statusnon-poor 0.7838 0.4786 0.7414 .
## poverty_statuspoor 0.2162 0.5214 -0.7414 .
## kpNo 0.8685 0.9348 -0.1963 .
## kpYes 0.1315 0.0652 0.1963 .
## age 21.6450 20.8399 0.3776 0.5385
## sexfemale 0.8324 0.7933 0.1048 .
## sexmale 0.1676 0.2067 -0.1048 .
## decidermyself 0.9063 0.8082 0.3367 .
## deciderother 0.0937 0.1918 -0.3367 .
## condom_useno 0.6541 0.8492 -0.4102 .
## condom_useyes 0.3459 0.1508 0.4102 .
## transcational_sexno 0.8036 0.9423 -0.3491 .
## transcational_sexyes 0.1964 0.0577 0.3491 .
## sources 1.0072 1.1136 -0.1436 2.2963
## aware_tikono 0.5387 0.9870 -0.8992 .
## aware_tikoyes 0.4613 0.0130 0.8992 .
## eCDF Mean eCDF Max
## distance 0.3384 0.5313
## primaryprimary 0.3219 0.3219
## primaryother 0.3219 0.3219
## poverty_statusnon-poor 0.3052 0.3052
## poverty_statuspoor 0.3052 0.3052
## kpNo 0.0664 0.0664
## kpYes 0.0664 0.0664
## age 0.0806 0.1412
## sexfemale 0.0391 0.0391
## sexmale 0.0391 0.0391
## decidermyself 0.0981 0.0981
## deciderother 0.0981 0.0981
## condom_useno 0.1951 0.1951
## condom_useyes 0.1951 0.1951
## transcational_sexno 0.1387 0.1387
## transcational_sexyes 0.1387 0.1387
## sources 0.0352 0.1589
## aware_tikono 0.4482 0.4482
## aware_tikoyes 0.4482 0.4482
##
## Summary of Balance Across Subclasses
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.7008 0.6721 0.1005 0.9319
## primaryprimary 0.7856 0.6530 0.3232 .
## primaryother 0.2144 0.3470 -0.3232 .
## poverty_statusnon-poor 0.7838 0.8132 -0.0715 .
## poverty_statuspoor 0.2162 0.1868 0.0715 .
## kpNo 0.8685 0.9201 -0.1527 .
## kpYes 0.1315 0.0799 0.1527 .
## age 21.6450 21.8075 -0.0762 0.8027
## sexfemale 0.8324 0.8874 -0.1472 .
## sexmale 0.1676 0.1126 0.1472 .
## decidermyself 0.9063 0.9040 0.0079 .
## deciderother 0.0937 0.0960 -0.0079 .
## condom_useno 0.6541 0.8423 -0.3957 .
## condom_useyes 0.3459 0.1577 0.3957 .
## transcational_sexno 0.8036 0.9184 -0.2889 .
## transcational_sexyes 0.1964 0.0816 0.2889 .
## sources 1.0072 0.9352 0.0972 2.3847
## aware_tikono 0.5387 0.6008 -0.1246 .
## aware_tikoyes 0.4613 0.3992 0.1246 .
## eCDF Mean eCDF Max
## distance 0.0400 0.1802
## primaryprimary 0.1326 0.1326
## primaryother 0.1326 0.1326
## poverty_statusnon-poor 0.0294 0.0294
## poverty_statuspoor 0.0294 0.0294
## kpNo 0.0516 0.0516
## kpYes 0.0516 0.0516
## age 0.0353 0.1255
## sexfemale 0.0550 0.0550
## sexmale 0.0550 0.0550
## decidermyself 0.0023 0.0023
## deciderother 0.0023 0.0023
## condom_useno 0.1882 0.1882
## condom_useyes 0.1882 0.1882
## transcational_sexno 0.1148 0.1148
## transcational_sexyes 0.1148 0.1148
## sources 0.0342 0.0900
## aware_tikono 0.0621 0.0621
## aware_tikoyes 0.0621 0.0621
##
## Sample Sizes:
## Control Treated
## All 537. 555
## Matched (ESS) 17.06 555
## Matched 537. 555
## Unmatched 0. 0
## Discarded 0. 0
balance_table <- bal.tab(model2, un = TRUE, mean.diff = TRUE, var.ratio = TRUE, ks = TRUE, s.d.denom = "pooled", m.threshold = 0.1)
## Warning: Not enough control units in subclasses 5 and 6.
balance_table
## Balance measures across subclasses
## Type Diff.Un Diff.Adj M.Threshold
## distance Distance 1.6041 0.1178
## primary_other Binary -0.3219 -0.1326 Not Balanced, >0.1
## poverty_status_poor Binary -0.3052 0.0294 Balanced, <0.1
## kp_Yes Binary 0.0664 0.0516 Balanced, <0.1
## age Contin. 0.3159 -0.0638 Balanced, <0.1
## sex_male Binary -0.0391 0.0550 Balanced, <0.1
## decider_other Binary -0.0981 -0.0023 Balanced, <0.1
## condom_use_yes Binary 0.1951 0.1882 Not Balanced, >0.1
## transcational_sex_yes Binary 0.1387 0.1148 Not Balanced, >0.1
## sources Contin. -0.1695 0.1147 Not Balanced, >0.1
## aware_tiko_yes Binary 0.4482 0.0621 Balanced, <0.1
##
## Balance tally for mean differences across subclasses
## Subclass 1 Subclass 2 Subclass 3 Subclass 4 Subclass 5
## Balanced, <0.1 7 8 8 4 3
## Not Balanced, >0.1 3 2 2 6 7
## Subclass 6
## Balanced, <0.1 5
## Not Balanced, >0.1 5
##
## Variable with the greatest mean difference across subclasses
## Variable Diff.Adj M.Threshold
## age 0.3515 Not Balanced, >0.1
## sources -0.2227 Not Balanced, >0.1
## age -0.2313 Not Balanced, >0.1
## sources 1.2490 Not Balanced, >0.1
## primary_other -0.7935 Not Balanced, >0.1
## age -0.7257 Not Balanced, >0.1
##
## Sample sizes by subclass
## 1 2 3 4 5 6 All
## Control 367 93 62 13 1 1 537
## Treated 92 93 90 95 92 93 555
## Total 459 186 152 108 93 94 1092
plot(model2, type = "jitter")

## To identify the units, use first mouse button; to stop, use second.
plot(model2, type = "hist")

Optimal match
ps_model <- glm(treatment ~ primary + secondary + poverty_status + kp + age +
sex + decider + married + single + condom_use + transcational_sex,
family = "binomial", data = mydata)
model3 <- matchit(treatment ~ primary + poverty_status + kp + age +
sex + decider + condom_use + transcational_sex+sources+aware_tiko,
data = mydata,
method = "optimal",
distance = "glm",
model = ps_model)
## Warning: Fewer control units than treated units; not all treated units will get
## a match.
summary(model3)
##
## Call:
## matchit(formula = treatment ~ primary + poverty_status + kp +
## age + sex + decider + condom_use + transcational_sex + sources +
## aware_tiko, data = mydata, method = "optimal", distance = "glm",
## model = ps_model)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.7008 0.3092 1.3682 2.1975
## primaryprimary 0.7856 0.4637 0.7843 .
## primaryother 0.2144 0.5363 -0.7843 .
## poverty_statusnon-poor 0.7838 0.4786 0.7414 .
## poverty_statuspoor 0.2162 0.5214 -0.7414 .
## kpNo 0.8685 0.9348 -0.1963 .
## kpYes 0.1315 0.0652 0.1963 .
## age 21.6450 20.8399 0.3776 0.5385
## sexfemale 0.8324 0.7933 0.1048 .
## sexmale 0.1676 0.2067 -0.1048 .
## decidermyself 0.9063 0.8082 0.3367 .
## deciderother 0.0937 0.1918 -0.3367 .
## condom_useno 0.6541 0.8492 -0.4102 .
## condom_useyes 0.3459 0.1508 0.4102 .
## transcational_sexno 0.8036 0.9423 -0.3491 .
## transcational_sexyes 0.1964 0.0577 0.3491 .
## sources 1.0072 1.1136 -0.1436 2.2963
## aware_tikono 0.5387 0.9870 -0.8992 .
## aware_tikoyes 0.4613 0.0130 0.8992 .
## eCDF Mean eCDF Max
## distance 0.3384 0.5313
## primaryprimary 0.3219 0.3219
## primaryother 0.3219 0.3219
## poverty_statusnon-poor 0.3052 0.3052
## poverty_statuspoor 0.3052 0.3052
## kpNo 0.0664 0.0664
## kpYes 0.0664 0.0664
## age 0.0806 0.1412
## sexfemale 0.0391 0.0391
## sexmale 0.0391 0.0391
## decidermyself 0.0981 0.0981
## deciderother 0.0981 0.0981
## condom_useno 0.1951 0.1951
## condom_useyes 0.1951 0.1951
## transcational_sexno 0.1387 0.1387
## transcational_sexyes 0.1387 0.1387
## sources 0.0352 0.1589
## aware_tikono 0.4482 0.4482
## aware_tikoyes 0.4482 0.4482
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.6909 0.3092 1.3337 2.1905
## primaryprimary 0.7784 0.4637 0.7668 .
## primaryother 0.2216 0.5363 -0.7668 .
## poverty_statusnon-poor 0.7765 0.4786 0.7238 .
## poverty_statuspoor 0.2235 0.5214 -0.7238 .
## kpNo 0.8641 0.9348 -0.2094 .
## kpYes 0.1359 0.0652 0.2094 .
## age 21.6313 20.8399 0.3711 0.5415
## sexfemale 0.8268 0.7933 0.0897 .
## sexmale 0.1732 0.2067 -0.0897 .
## decidermyself 0.9032 0.8082 0.3259 .
## deciderother 0.0968 0.1918 -0.3259 .
## condom_useno 0.6760 0.8492 -0.3641 .
## condom_useyes 0.3240 0.1508 0.3641 .
## transcational_sexno 0.8268 0.9423 -0.2906 .
## transcational_sexyes 0.1732 0.0577 0.2906 .
## sources 1.0130 1.1136 -0.1358 2.3496
## aware_tikono 0.5568 0.9870 -0.8629 .
## aware_tikoyes 0.4432 0.0130 0.8629 .
## eCDF Mean eCDF Max Std. Pair Dist.
## distance 0.3261 0.5214 1.3337
## primaryprimary 0.3147 0.3147 1.1026
## primaryother 0.3147 0.3147 1.1026
## poverty_statusnon-poor 0.2980 0.2980 1.0495
## poverty_statuspoor 0.2980 0.2980 1.0495
## kpNo 0.0708 0.0708 0.5620
## kpYes 0.0708 0.0708 0.5620
## age 0.0799 0.1397 1.2881
## sexfemale 0.0335 0.0335 0.8676
## sexmale 0.0335 0.0335 0.8676
## decidermyself 0.0950 0.0950 0.8627
## deciderother 0.0950 0.0950 0.8627
## condom_useno 0.1732 0.1732 0.8025
## condom_useyes 0.1732 0.1732 0.8025
## transcational_sexno 0.1155 0.1155 0.5344
## transcational_sexyes 0.1155 0.1155 0.5344
## sources 0.0366 0.1601 0.7592
## aware_tikono 0.4302 0.4302 0.8629
## aware_tikoyes 0.4302 0.4302 0.8629
##
## Sample Sizes:
## Control Treated
## All 537 555
## Matched 537 537
## Unmatched 0 18
## Discarded 0 0
balance_table <- bal.tab(model3, un = TRUE, mean.diff = TRUE, var.ratio = TRUE, ks = TRUE, s.d.denom = "pooled", m.threshold = 0.1)
balance_table
## Balance Measures
## Type Diff.Un Diff.Adj M.Threshold
## distance Distance 1.6041 1.5636
## primary_other Binary -0.3219 -0.3147 Not Balanced, >0.1
## poverty_status_poor Binary -0.3052 -0.2980 Not Balanced, >0.1
## kp_Yes Binary 0.0664 0.0708 Balanced, <0.1
## age Contin. 0.3159 0.3105 Not Balanced, >0.1
## sex_male Binary -0.0391 -0.0335 Balanced, <0.1
## decider_other Binary -0.0981 -0.0950 Balanced, <0.1
## condom_use_yes Binary 0.1951 0.1732 Not Balanced, >0.1
## transcational_sex_yes Binary 0.1387 0.1155 Not Balanced, >0.1
## sources Contin. -0.1695 -0.1602 Not Balanced, >0.1
## aware_tiko_yes Binary 0.4482 0.4302 Not Balanced, >0.1
##
## Balance tally for mean differences
## count
## Balanced, <0.1 3
## Not Balanced, >0.1 7
##
## Variable with the greatest mean difference
## Variable Diff.Adj M.Threshold
## aware_tiko_yes 0.4302 Not Balanced, >0.1
##
## Sample sizes
## Control Treated
## All 537 555
## Matched 537 537
## Unmatched 0 18
plot(model2, type = "hist")

Inverse probability weighting
ps_model <- glm(treatment ~ primary+poverty_status +
kp + age + sex + decider +
condom_use + transcational_sex+sources+aware_tiko,
family = "binomial",
data = mydata)
ipwdata<-augment_columns(ps_model,
mydata,
type.predict = "response") %>%
rename(pscore=.fitted)
ipwdata$wgt<- ifelse(ipwdata$treatment == 1, 1/ipwdata$pscore, 1/(1-ipwdata$pscore))
head(ipwdata,6) %>%
kable("html") %>%
kable_styling(font_size=12) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
|
treatment
|
primary
|
secondary
|
kp
|
poverty_status
|
age
|
sex
|
decider
|
married
|
single
|
condom_use
|
transcational_sex
|
sources
|
PrEP_Knowledge
|
mh_empowerment
|
psu
|
aware_tiko
|
A_1
|
pscore
|
.se.fit
|
.resid
|
.hat
|
.sigma
|
.cooksd
|
.std.resid
|
wgt
|
|
treatment
|
primary
|
secondary
|
No
|
poor
|
22
|
female
|
myself
|
other
|
Single
|
yes
|
yes
|
1
|
0.0
|
25.4
|
1
|
yes
|
22
|
0.9856870
|
0.0081194
|
0.1698024
|
0.0046731
|
0.9598513
|
0.0000062
|
0.1702006
|
69.866482
|
|
treatment
|
other
|
other
|
No
|
non-poor
|
22
|
female
|
other
|
Married/union
|
other
|
no
|
no
|
2
|
0.0
|
33.0
|
1
|
no
|
22
|
0.1689857
|
0.0404332
|
1.8857048
|
0.0116417
|
0.9581284
|
0.0053279
|
1.8967780
|
1.203349
|
|
treatment
|
primary
|
other
|
No
|
non-poor
|
22
|
female
|
myself
|
Married/union
|
other
|
no
|
no
|
1
|
32.4
|
30.6
|
1
|
no
|
22
|
0.5673811
|
0.0307298
|
1.0646352
|
0.0038472
|
0.9593163
|
0.0002687
|
1.0666891
|
2.311503
|
|
treatment
|
primary
|
secondary
|
No
|
non-poor
|
18
|
female
|
myself
|
other
|
Single
|
no
|
no
|
1
|
15.9
|
44.9
|
1
|
no
|
18
|
0.5610279
|
0.0450852
|
1.0751602
|
0.0082537
|
0.9593029
|
0.0005969
|
1.0796249
|
2.278049
|
|
treatment
|
primary
|
secondary
|
No
|
poor
|
24
|
female
|
myself
|
Married/union
|
other
|
no
|
no
|
3
|
66.4
|
33.6
|
1
|
no
|
24
|
0.1347143
|
0.0382607
|
2.0022984
|
0.0125583
|
0.9579050
|
0.0075208
|
2.0149908
|
1.155688
|
|
treatment
|
primary
|
secondary
|
No
|
non-poor
|
18
|
female
|
myself
|
other
|
Single
|
yes
|
no
|
1
|
83.6
|
9.2
|
1
|
no
|
18
|
0.6346567
|
0.0543322
|
0.9535943
|
0.0127313
|
0.9594209
|
0.0006836
|
0.9597232
|
2.737152
|
# Create survey design object with weights
design <- svydesign(ids = ~psu, data = ipwdata, weights = ~wgt)
# Perform weighted logistic regression
weighted_model <- svyglm(PrEP_Knowledge ~ treatment+primary+poverty_status +
kp + age + sex + decider +
condom_use + transcational_sex+sources+sources+aware_tiko,
design = design)
summary(weighted_model)
##
## Call:
## svyglm(formula = PrEP_Knowledge ~ treatment + primary + poverty_status +
## kp + age + sex + decider + condom_use + transcational_sex +
## sources + sources + aware_tiko, design = design)
##
## Survey design:
## svydesign(ids = ~psu, data = ipwdata, weights = ~wgt)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.4111 26.6999 2.300 0.0329 *
## treatmenttreatment -29.8025 12.2398 -2.435 0.0249 *
## primaryother -4.3454 5.2116 -0.834 0.4148
## poverty_statuspoor 6.0182 7.2397 0.831 0.4161
## kpYes -1.3378 5.3202 -0.251 0.8042
## age 0.8873 1.2894 0.688 0.4997
## sexmale -1.0608 4.1150 -0.258 0.7993
## deciderother -9.1111 6.1818 -1.474 0.1569
## condom_useyes -6.4174 6.9235 -0.927 0.3656
## transcational_sexyes -12.6398 6.9783 -1.811 0.0859 .
## sources -6.0266 4.6380 -1.299 0.2094
## aware_tikoyes -20.7789 8.6292 -2.408 0.0264 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 779.5596)
##
## Number of Fisher Scoring iterations: 2
# Basic check for balance using weighted means
bal.tab(treatment ~ primary+poverty_status + kp + age + sex + decider + condom_use + transcational_sex+sources+aware_tiko,
data = ipwdata,
weights = ipwdata$weights,
method = "weighting",
un = TRUE,
mean.diff = TRUE,
s.d.denom = "pooled",
ks = TRUE,
m.threshold = 0.1)
## Warning: Unknown or uninitialised column: `weights`.
## Balance Measures
## Type Diff.Un M.Threshold.Un
## primary_other Binary -0.3219 Not Balanced, >0.1
## poverty_status_poor Binary -0.3052 Not Balanced, >0.1
## kp_Yes Binary 0.0664 Balanced, <0.1
## age Contin. 0.3159 Not Balanced, >0.1
## sex_male Binary -0.0391 Balanced, <0.1
## decider_other Binary -0.0981 Balanced, <0.1
## condom_use_yes Binary 0.1951 Not Balanced, >0.1
## transcational_sex_yes Binary 0.1387 Not Balanced, >0.1
## sources Contin. -0.1695 Not Balanced, >0.1
## aware_tiko_yes Binary 0.4482 Not Balanced, >0.1
##
## Balance tally for mean differences
## count
## Balanced, <0.1 3
## Not Balanced, >0.1 7
##
## Variable with the greatest mean difference
## Variable Diff.Un M.Threshold.Un
## aware_tiko_yes 0.4482 Not Balanced, >0.1
##
## Sample sizes
## control treatment
## All 537 555
Sample size
determination
# Define the sample size calculation function
calculate_sample_size <- function(cluster_size) {
# Define fixed parameters
alpha <- 0.05
power <- 0.8
mde <- 0.125
icc <- 0.01
# Calculate design effect
Deff <- 1 + (cluster_size - 1) * icc
# Calculate required sample size for a simple random sample using a t-test approximation
sample_size_simple <- pwr.t.test(d = mde, power = power, sig.level = alpha, type = "one.sample", alternative = "two.sided")$n
# Adjust sample size for design effect (PSM algorithm: Stratified, IPW and nn)
total_sample_size_v1 <- ceiling(sample_size_simple * Deff)
num_clusters <- ceiling(total_sample_size_v1 / cluster_size)
# Adjust sample size for design effect (PSM algorithm: Stratified, IPW and nn)
total_sample_size_v2 <- ceiling(sample_size_simple * Deff/0.5)
# Return results as a list
list(cluster_size = cluster_size, num_clusters = num_clusters, total_sample_size_v1 = total_sample_size_v1, total_sample_size_v2 = total_sample_size_v2, per_group_v1 = total_sample_size_v1/2,per_group_v2 = total_sample_size_v2/2)
}
# Example cluster sizes to analyze
cluster_sizes <- seq(from = 10, to = 30, by = 2)
# Apply the function to each cluster size
results <- lapply(cluster_sizes, calculate_sample_size)
# Convert the results to a data frame
results_df <- do.call(rbind, results) %>%
tibble::as_tibble() %>%
mutate(cluster_size = as.integer(cluster_size),
num_clusters = as.integer(num_clusters),
total_sample_size_v1 = as.integer(total_sample_size_v1),
total_sample_size_v2 = as.integer(total_sample_size_v2))
selected_data <- results_df[c("cluster_size", "num_clusters", "total_sample_size_v1","total_sample_size_v2")]
results_table <- knitr::kable(selected_data,
format = "html",
caption = "Sample Size Calculation for Various Cluster Sizes",
col.names = c("Cluster Size", "Total number of Clusters per arm","Sample Size per arm(stratified, IPW and nn)","Sample size per arm(caliper)")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
column_spec(1, bold = T, color = "red") %>%
scroll_box(width = "100%", height = "500px")
# Print styled table
results_table
Sample Size Calculation for Various Cluster Sizes
|
Cluster Size
|
Total number of Clusters per arm
|
Sample Size per arm(stratified, IPW and nn)
|
Sample size per arm(caliper)
|
|
10
|
55
|
550
|
1100
|
|
12
|
47
|
560
|
1120
|
|
14
|
41
|
570
|
1140
|
|
16
|
37
|
580
|
1160
|
|
18
|
33
|
590
|
1180
|
|
20
|
31
|
601
|
1201
|
|
22
|
28
|
611
|
1221
|
|
24
|
26
|
621
|
1241
|
|
26
|
25
|
631
|
1261
|
|
28
|
23
|
641
|
1281
|
|
30
|
22
|
651
|
1301
|