Ch. 3 fertility, fertility autonomy and medical decision involvement cluster analysis

Author

R.Luttinen

Ch. 3 spatial chapter

#read in PMA data


library(ipumsr)
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

#person-period dataset


pmauglong <- read_ipums_micro(
  ddi = "C:/Users/Rebecca/Downloads/pma_00021.xml",
  data = "C:/Users/Rebecca/Downloads/pma_00021.dat.gz")

Use of data from IPUMS PMA is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.

#read in spatial data

library(sf)

Linking to GEOS 3.13.1, GDAL 3.10.2, PROJ 9.5.1; sf_use_s2() is TRUE

UG_PMA_GPS<-read_sf("C:/Users/Rebecca/Downloads/PMA_UG_GPS_v2_06July2022/UGANDA/PMA_UG_GPS_v2_06July2022.csv")

#read in boundary data

UG_boundary<-read_sf("C:/Users/Rebecca/Downloads/geouggen/geouggen.shp")

UG_sub_boundary<-read_sf("C:/Users/Rebecca/Downloads/uganda-adm-boundaries/UGANDA BOUNDARIES SHAPEFILES AS OF 17 08 2018/COUNTIES_2018_UTM_36N.shp")

Filter for 2022 only.

pmaug2022<-pmauglong%>%
  filter(YEAR==2022)

UG_PMA_GPS_2022<-UG_PMA_GPS%>%
  filter(PMAYEAR==2022)

library(janitor)


Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

UG_PMA_GPS_2022<-UG_PMA_GPS%>%
  rename('EAID'='EA_ID')

pmaug2022wgps<-merge(pmaug2022, UG_PMA_GPS_2022, by=c("EAID"))

tabyl(pmaug2022wgps, EAID)

      EAID  n      percent
 800111001 34 0.0065184049
 800111002 38 0.0072852761
 800111003 33 0.0063266871
 800111004 61 0.0116947853
 800111005 38 0.0072852761
 800111006 47 0.0090107362
 800111007 35 0.0067101227
 800121001 26 0.0049846626
 800121002 51 0.0097776074
 800121003 31 0.0059432515
 800121004 32 0.0061349693
 800121005 26 0.0049846626
 800121006 54 0.0103527607
 800121007 41 0.0078604294
 800121008 26 0.0049846626
 800121009 37 0.0070935583
 800121010 37 0.0070935583
 800121011 39 0.0074769939
 800121012 37 0.0070935583
 800131001 33 0.0063266871
 800131002 41 0.0078604294
 800131003 35 0.0067101227
 800131004 33 0.0063266871
 800131005 41 0.0078604294
 800131006 26 0.0049846626
 800141001 53 0.0101610429
 800141003 34 0.0065184049
 800141004 42 0.0080521472
 800141005 34 0.0065184049
 800141006 48 0.0092024540
 800141007 36 0.0069018405
 800141008 52 0.0099693252
 800141009 53 0.0101610429
 800141010 36 0.0069018405
 800141011 43 0.0082438650
 800141012 54 0.0103527607
 800141013 38 0.0072852761
 800141014 27 0.0051763804
 800151001 35 0.0067101227
 800151002 39 0.0074769939
 800151003 33 0.0063266871
 800151004 35 0.0067101227
 800151005 44 0.0084355828
 800151006 40 0.0076687117
 800151007 26 0.0049846626
 800151008 41 0.0078604294
 800151009 31 0.0059432515
 800161001 24 0.0046012270
 800161002 27 0.0051763804
 800161003 29 0.0055598160
 800161004 43 0.0082438650
 800161005 41 0.0078604294
 800161006 51 0.0097776074
 800161007 39 0.0074769939
 800161008 37 0.0070935583
 800161009 36 0.0069018405
 800171001 30 0.0057515337
 800171002  1 0.0001917178
 800171003 28 0.0053680982
 800171004 36 0.0069018405
 800171005 11 0.0021088957
 800171006 61 0.0116947853
 800171007 27 0.0051763804
 800171008 32 0.0061349693
 800171009 31 0.0059432515
 800171010 22 0.0042177914
 800181001 37 0.0070935583
 800181002 23 0.0044095092
 800181003 28 0.0053680982
 800181004 39 0.0074769939
 800181005 36 0.0069018405
 800191001 28 0.0053680982
 800191002 28 0.0053680982
 800191003 36 0.0069018405
 800191004 34 0.0065184049
 800191005 26 0.0049846626
 800191006 49 0.0093941718
 800201001 41 0.0078604294
 800201002 35 0.0067101227
 800201003 42 0.0080521472
 800201004 38 0.0072852761
 800201005 55 0.0105444785
 800201006 31 0.0059432515
 800201007 51 0.0097776074
 800201008 30 0.0057515337
 800201009 41 0.0078604294
 800211001 33 0.0063266871
 800211002 34 0.0065184049
 800211003 42 0.0080521472
 800211004 26 0.0049846626
 800211005 29 0.0055598160
 800211006 34 0.0065184049
 800211007 28 0.0053680982
 800211008 39 0.0074769939
 800211009 49 0.0093941718
 800211010 28 0.0053680982
 800211011 37 0.0070935583
 800211012 29 0.0055598160
 800211013 42 0.0080521472
 800221001 41 0.0078604294
 800221002 33 0.0063266871
 800221003 37 0.0070935583
 800221004 29 0.0055598160
 800221005 31 0.0059432515
 800221006 37 0.0070935583
 800221007 34 0.0065184049
 800221008 55 0.0105444785
 800221009 34 0.0065184049
 800221010 30 0.0057515337
 800221011 24 0.0046012270
 800221012 32 0.0061349693
 800221013 30 0.0057515337
 800221014 24 0.0046012270
 800221015 29 0.0055598160
 800221016 38 0.0072852761
 800221017 30 0.0057515337
 800221018 35 0.0067101227
 800221019 25 0.0047929448
 800231001 57 0.0109279141
 800231002 34 0.0065184049
 800231003 43 0.0082438650
 800231004 43 0.0082438650
 800231005 39 0.0074769939
 800231006 31 0.0059432515
 800241001 39 0.0074769939
 800241002 45 0.0086273006
 800241003 57 0.0109279141
 800241004 53 0.0101610429
 800241005 41 0.0078604294
 800241006 37 0.0070935583
 800241007 41 0.0078604294
 800241008 48 0.0092024540
 800241009 35 0.0067101227
 800241010 67 0.0128450920
 800241011 33 0.0063266871
 800251001 63 0.0120782209
 800251002 39 0.0074769939
 800251003 35 0.0067101227
 800251004 41 0.0078604294
 800251005 45 0.0086273006
 800251006 32 0.0061349693

#drop EAID with only two people listed

pmaug2022wgps<-pmaug2022wgps%>%
  filter(EAID!= 800171002)

tabyl(pmaug2022wgps, STARTKIDDECWILL)

 STARTKIDDECWILL    n      percent
               1   26 0.0049856184
               2   87 0.0166826462
               3   25 0.0047938639
               4  625 0.1198465964
               5  422 0.0809204219
              97   28 0.0053691275
              98    2 0.0003835091
              99 4000 0.7670182167

tabyl(pmaug2022wgps, STARTKIDDEC)

 STARTKIDDEC    n      percent
           1  230 0.0441035475
           2  581 0.1114093960
           3  137 0.0262703739
           4 2153 0.4128475551
           5  876 0.1679769895
          97   19 0.0036433365
          98    4 0.0007670182
          99 1215 0.2329817833

pmaug2022wgps <- pmaug2022wgps %>%
  mutate(fertautonomy3pt = case_when(
    STARTKIDDECWILL == 1 | STARTKIDDEC== 1 ~ 'strongly disagree/ disagree',
    STARTKIDDECWILL == 2 | STARTKIDDEC== 2 ~ 'strongly disagree/ disagree',
    STARTKIDDECWILL == 3 | STARTKIDDEC== 3 ~ 'neutral',
    STARTKIDDECWILL == 4 | STARTKIDDEC== 4~ 'strongly agree/ agree',
    STARTKIDDECWILL == 5 | STARTKIDDEC== 5 ~ 'strongly agree/ agree'
  ))

tabyl(pmaug2022wgps$fertautonomy3pt)

 pmaug2022wgps$fertautonomy3pt    n    percent valid_percent
                       neutral  162 0.03106424    0.03138318
         strongly agree/ agree 4076 0.78159156    0.78961643
   strongly disagree/ disagree  924 0.17718121    0.17900039
                          <NA>   53 0.01016299            NA

pmaug2022wgps <- pmaug2022wgps %>%
  mutate(fertautonomyscale = case_when(
    STARTKIDDECWILL == 1 | STARTKIDDEC== 1 ~ '1',
    STARTKIDDECWILL == 2 | STARTKIDDEC== 2 ~ '2',
    STARTKIDDECWILL == 3 | STARTKIDDEC== 3 ~ '3',
    STARTKIDDECWILL == 4 | STARTKIDDEC== 4~ '4',
    STARTKIDDECWILL == 5 | STARTKIDDEC== 5 ~ '5'
  ))

tabyl(pmaug2022wgps$fertautonomyscale)

 pmaug2022wgps$fertautonomyscale    n    percent valid_percent
                               1  256 0.04908917    0.04959318
                               2  668 0.12809204    0.12940721
                               3  162 0.03106424    0.03138318
                               4 2778 0.53269415    0.53816350
                               5 1298 0.24889741    0.25145293
                            <NA>   53 0.01016299            NA

library(srvyr)


Attaching package: 'srvyr'

The following object is masked from 'package:stats':

    filter

pmaug2022wgps<-pmaug2022wgps%>%
  filter(BIRTHEVENT<90)


pmaug2022groupedweighted<-pmaug2022wgps%>%
  group_by(EAID)%>%
  summarise(meanbirthsperwoman=weighted.mean(BIRTHEVENT, w=FQWEIGHT, strata=STRATA, na.rm=TRUE))


pmaug2022wgps<-pmaug2022wgps%>%
  filter(fertautonomyscale!='NA')

pmaug2022wgps$fertautonomyscale <- as.numeric(pmaug2022wgps$fertautonomyscale)

pmaug2022groupedweighted2<-pmaug2022wgps%>%
  group_by(EAID)%>%
  summarise(fertilityautonomymean=weighted.mean(fertautonomyscale, w=FQWEIGHT, strata=STRATA, na.rm=TRUE))

#join together


spatialpmaug<-left_join(pmaug2022groupedweighted, pmaug2022groupedweighted2)

Joining with `by = join_by(EAID)`

#do the same for medicial decision-making


library(janitor)

pmaug2022wgps<-pmaug2022wgps%>%
  mutate(HHDECMED=as.factor(HHDECMED))%>%
   mutate(decidemedical= recode(HHDECMED, '1' = "respondent" ,'2' = "husband/partner", '3'="someone else",.default = NA_character_))

tabyl(pmaug2022wgps, decidemedical)

   decidemedical    n   percent valid_percent
      respondent  801 0.1552025     0.2585539
 husband/partner 1407 0.2726216     0.4541640
    someone else  890 0.1724472     0.2872821
            <NA> 2063 0.3997287            NA

pmaug2022wgps<-pmaug2022wgps%>%
  mutate(decidemeddich= recode(decidemedical, 'respondent' = "1" ,'husband/partner' = "0", 'someone else'="0",.default = NA_character_))

tabyl(pmaug2022wgps, decidemeddich)

 decidemeddich    n   percent valid_percent
             1  801 0.1552025     0.2585539
             0 2297 0.4450688     0.7414461
          <NA> 2063 0.3997287            NA

pmaug2022dropNA<-pmaug2022wgps%>%
  filter(decidemeddich!='NA')

class(pmaug2022dropNA$decidemeddich)

[1] "factor"

pmaug2022dropNA$decidemeddichnum<-as.numeric(as.character(pmaug2022dropNA$decidemeddich))


tabyl(pmaug2022dropNA, decidemeddichnum)

 decidemeddichnum    n   percent
                0 2297 0.7414461
                1  801 0.2585539

pmaug2022groupeddecidemedweighted<-pmaug2022dropNA%>%
  group_by(EAID)%>%
  summarise(decidemedrateweighted=weighted.mean(decidemeddichnum, w=FQWEIGHT, strata=STRATA, na.rm=TRUE))

#join with the others

spatialpmaugfull<-left_join(spatialpmaug, pmaug2022groupeddecidemedweighted)

Joining with `by = join_by(EAID)`

Now let’s check to see if these variables are correlated. There is a significant negative correlation between births per woman and a higher score of fertility autonomy.

cor.test(spatialpmaugfull$meanbirthsperwoman, spatialpmaugfull$decidemedrateweighted)


    Pearson's product-moment correlation

data:  spatialpmaugfull$meanbirthsperwoman and spatialpmaugfull$decidemedrateweighted
t = -0.84357, df = 138, p-value = 0.4004
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.23473880  0.09541217
sample estimates:
        cor 
-0.07162487

cor.test(spatialpmaugfull$meanbirthsperwoman, spatialpmaugfull$fertilityautonomymean)


    Pearson's product-moment correlation

data:  spatialpmaugfull$meanbirthsperwoman and spatialpmaugfull$fertilityautonomymean
t = -2.4144, df = 138, p-value = 0.01707
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.3553528 -0.0366385
sample estimates:
       cor 
-0.2013181

cor.test(spatialpmaugfull$decidemedrateweighted, spatialpmaugfull$fertilityautonomymean)


    Pearson's product-moment correlation

data:  spatialpmaugfull$decidemedrateweighted and spatialpmaugfull$fertilityautonomymean
t = 1.263, df = 138, p-value = 0.2087
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.0600731  0.2680452
sample estimates:
      cor 
0.1068955

Now let’s map these distributions out.

library(sf)

#merge GPS data and decision-making data


mapdata<-merge(spatialpmaugfull, UG_PMA_GPS_2022, by=c("EAID"))

#convert to a spatial frame

mapdata <- st_as_sf(mapdata, coords = c("GPSLONG", "GPSLAT"), crs=4326)

ggplot() +
  geom_sf(data = UG_sub_boundary, color = "darkgrey", fill = NA, linewidth = 0.4) +
  geom_sf(data = UG_boundary, color = "black", fill = NA, linewidth = 0.4) +
  geom_sf(data = mapdata, aes(color = meanbirthsperwoman), size = 5, alpha = .8) +
  scale_color_viridis_c(option = "magma", direction = -1, name = "Number of Children Ever Born") +
  ggtitle("PMA Enumeration Area Average Number of Children Ever Born, Uganda 2022")

 ggplot() +
  geom_sf(data = UG_sub_boundary, color = "darkgrey", fill = NA, linewidth = 0.4) +
  geom_sf(data = UG_boundary, color = "black", fill = NA, linewidth = 0.4)+
  geom_sf(data = mapdata, aes(color = decidemedrateweighted), size = 5, alpha = .8) +
  scale_color_viridis_c(option = "magma", direction = -1, name = "Involved in medicial decisions") +
  ggtitle("PMA Enumeration Area Medical Decision-making Rate, Uganda 2022")

  ggplot() +
  geom_sf(data = UG_sub_boundary, color = "darkgrey", fill = NA, linewidth = 0.4) +
  geom_sf(data = UG_boundary, color = "black", fill = NA, linewidth = 0.4)+
  geom_sf(data = mapdata, aes(color = fertilityautonomymean), size = 5, alpha = .8) +
  scale_color_viridis_c(option = "magma", direction = -1, name = "Fertility autonomy scale") +
  ggtitle("PMA Enumeration Area Fertility Autonomy Rate, Uganda 2022")

The maps showcase: 1) overall low involvement in women’s medical decision-making; 2) varied total births per woman and varied levels of fertility autonomy.

library(dplyr)

# Select relevant columns and remove missing values
cluster_data <- mapdata %>%
  select(decidemedrateweighted, meanbirthsperwoman, fertilityautonomymean, geometry) %>%
  na.omit()                  


# Extract and z-score numeric columns for clustering
numeric_data<- cluster_data%>%
  st_drop_geometry() %>%  # Removes the geometry column
  select(decidemedrateweighted, meanbirthsperwoman, fertilityautonomymean) %>%
  na.omit() %>%
  scale()  # Z-score standardization

# Run k-means
set.seed(123)
kmeans_result <- kmeans(numeric_data, centers = 3)

# Add cluster labels back to original sf object
cluster_data$cluster <- as.factor(kmeans_result$cluster)

# Elbow method to find optimal number of clusters
wcss <- vector()
for (i in 1:10) {
  kmeans_model <- kmeans(numeric_data, centers = i)
  wcss[i] <- kmeans_model$tot.withinss
}
plot(1:10, wcss, type = "b", pch = 19, frame = FALSE,
     xlab = "Number of clusters K",
     ylab = "Total within-clusters sum of squares")

cluster_summary <- cluster_data%>%
  st_drop_geometry() %>%
  group_by(cluster) %>%
  summarise(
    avg_decision = mean(decidemedrateweighted, na.rm = TRUE),
    avg_fertility = mean(meanbirthsperwoman, na.rm = TRUE),
    avg_fertaut = mean(fertilityautonomymean, na.rm = TRUE),
    count = n()
  )

print(cluster_summary)

# A tibble: 3 × 5
  cluster avg_decision avg_fertility avg_fertaut count
  <fct>          <dbl>         <dbl>       <dbl> <int>
1 1              0.278          2.21        4.12    57
2 2              0.169          3.45        3.58    61
3 3              0.558          3.46        3.94    22

#recode cluster labels for map

cluster_data<- cluster_data %>%
  mutate(cluster_level = case_when(
    cluster == 1 ~ 'low fertility- high fertility autonomy- mid involvement in medical decision-making',
    cluster == 2 ~ 'high fertility- mid fertility autonomy-low involvement in medical decision-making',
    cluster == 3 ~ 'low fertility- mid fertility autonomy- high involvement in medical decision-making'
  ))


ggplot(data = cluster_data) +
  geom_sf(data = UG_sub_boundary, color = "darkgrey", fill = NA, linewidth = 0.4) +
  geom_sf(aes(color = cluster_level), size = 5, alpha = 0.5) +
  geom_sf(data = UG_boundary, color = "black", fill = NA, linewidth = 0.4) +
  scale_color_brewer(palette = "Set1", name = "Cluster") +
  ggtitle("Clusters of Medical Decision-making, Fertility Autonomy, and Number of Births per Woman, Uganda 2022")

Next, I perform ANOVA/ MANOVA testing to see if the clusters are significantly different from each other. These tests are significant.

# Drop geometry for analysis
df<- cluster_data %>% st_drop_geometry()

# ANOVA for medical decision-making rate
anova_decision <- aov(decidemedrateweighted~ cluster, data = df)
summary(anova_decision)

             Df Sum Sq Mean Sq F value Pr(>F)    
cluster       2  2.441  1.2204   78.12 <2e-16 ***
Residuals   137  2.140  0.0156                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# ANOVA for fertility rate
anova_fertility <- aov(meanbirthsperwoman ~ cluster, data = df)
summary(anova_fertility)

             Df Sum Sq Mean Sq F value Pr(>F)    
cluster       2  51.82   25.91   74.13 <2e-16 ***
Residuals   137  47.89    0.35                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Multivariate ANOVA
manova_result <- manova(cbind(decidemedrateweighted, meanbirthsperwoman) ~ cluster, data = df)
summary(manova_result)

           Df Pillai approx F num Df den Df    Pr(>F)    
cluster     2  1.057   76.788      4    274 < 2.2e-16 ***
Residuals 137                                            
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Now we will begin testing for univariate clustering.

First, we calculate Moran’s I. Medical decision-making and total births per woman show significant univariate spatial clustering.

#univariate for medical decision making
library(spdep)

Warning: package 'spdep' was built under R version 4.5.1

Loading required package: spData

Warning: package 'spData' was built under R version 4.5.1

To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`

library(sf)

# Transform to UTM Zone 36N
mapdata_proj <- st_transform(mapdata, crs = 32636)

# Use centroids for coordinates
coords_proj <- st_coordinates(st_centroid(mapdata_proj))

# Create neighbors within 50 km
nb <- dnearneigh(coords_proj, 0, 50000)

Warning in dnearneigh(coords_proj, 0, 50000): neighbour object has 13
sub-graphs

lw <- nb2listw(nb, style = "W", zero.policy = TRUE)

# Check column existence and run Moran's I (univariate example)
moran_result <- moran.mc(mapdata_proj$decidemedrateweighted, lw, nsim = 499)

print(moran_result)


    Monte-Carlo simulation of Moran I

data:  mapdata_proj$decidemedrateweighted 
weights: lw  
number of simulations + 1: 500 

statistic = 0.11413, observed rank = 485, p-value = 0.03
alternative hypothesis: greater

p_value <- mean(abs(moran_result$res) >= abs(moran_result$statistic))
p_value

[1] 0.072

#univariate for fertility
library(spdep)
library(sf)

# Transform to UTM Zone 36N
mapdata_proj <- st_transform(mapdata, crs = 32636)

# Use centroids for coordinates
coords_proj <- st_coordinates(st_centroid(mapdata_proj))

# Create neighbors within 50 km
nb <- dnearneigh(coords_proj, 0, 50000)

Warning in dnearneigh(coords_proj, 0, 50000): neighbour object has 13
sub-graphs

lw <- nb2listw(nb, style = "W", zero.policy = TRUE)

# Check column existence and run Moran's I (univariate example)
moran_result <- moran.mc(mapdata_proj$meanbirthsperwoman, lw, nsim = 499)

print(moran_result)


    Monte-Carlo simulation of Moran I

data:  mapdata_proj$meanbirthsperwoman 
weights: lw  
number of simulations + 1: 500 

statistic = 0.22011, observed rank = 500, p-value = 0.002
alternative hypothesis: greater

p_value <- mean(abs(moran_result$res) >= abs(moran_result$statistic))
p_value

[1] 0.002

#univariate for fertility autonomy
library(spdep)
library(sf)

# Transform to UTM Zone 36N
mapdata_proj <- st_transform(mapdata, crs = 32636)

# Use centroids for coordinates
coords_proj <- st_coordinates(st_centroid(mapdata_proj))

# Create neighbors within 100 km
nb <- dnearneigh(coords_proj, 0, 50000)

Warning in dnearneigh(coords_proj, 0, 50000): neighbour object has 13
sub-graphs

lw <- nb2listw(nb, style = "W", zero.policy = TRUE)

# Check column existence and run Moran's I (univariate example)
moran_result <- moran.mc(mapdata_proj$fertilityautonomymean, lw, nsim = 499)

print(moran_result)


    Monte-Carlo simulation of Moran I

data:  mapdata_proj$fertilityautonomymean 
weights: lw  
number of simulations + 1: 500 

statistic = 0.064198, observed rank = 427, p-value = 0.146
alternative hypothesis: greater

p_value <- mean(abs(moran_result$res) >= abs(moran_result$statistic))
p_value

[1] 0.3

Now, let’s use the Getis Ord approach.

# Run Getis-Ord Gi* for fertilityrate
gi_fertility <- localG(mapdata_proj$meanbirthsperwoman, lw)

# Run Getis-Ord Gi* for decidemedrate
gi_decision <- localG(mapdata_proj$decidemedrateweighted, lw)

# Run Getis-Ord Gi* for MCPuserate
gi_fertaut <- localG(mapdata_proj$fertilityautonomymean, lw)

# Add Gi* results as new columns
mapdata$gi_fertility <- gi_fertility
mapdata$gi_decision <- gi_decision
mapdata$gi_fertaut <- gi_fertaut

mapdata$gi_fertility <- as.numeric(gi_fertility)
mapdata$gi_decision <- as.numeric(gi_decision)
mapdata$gi_fertaut <- as.numeric(gi_fertaut)

We can visualize the hot and cold spots using this approach.

scale_color_gradient2(
  low = "blue",
  mid = "white",
  high = "red",
  midpoint = 0,
  name = "Gi* Score"
)

<ScaleContinuous>
 Range:  
 Limits:    0 --    1

scale_color_viridis_c(option = "D", name = "Gi* Score")

<ScaleContinuous>
 Range:  
 Limits:    0 --    1

ggplot(data = mapdata) +
  geom_sf(data = UG_sub_boundary, color = "darkgrey", fill = NA, linewidth = 0.4) +
  geom_sf(aes(color = gi_fertility), size = 5, alpha = 0.5) +
  geom_sf(data = UG_boundary, color = "black", fill = NA, linewidth = 0.4) +
  scale_color_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, name = "Gi* Score") +
  ggtitle("Cluster Analysis of Births per Woman, Uganda 2022")

ggplot(data = mapdata) +
  geom_sf(data = UG_sub_boundary, color = "darkgrey", fill = NA, linewidth = 0.4) +
  geom_sf(aes(color = gi_fertaut), size = 5, alpha = 0.5) +
  geom_sf(data = UG_boundary, color = "black", fill = NA, linewidth = 0.4) +
  scale_color_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, name = "Gi* Score") +
  ggtitle("Cluster Analysis of Fertility Autonomy, Uganda 2022")

ggplot(data = mapdata) +
  geom_sf(data = UG_sub_boundary, color = "darkgrey", fill = NA, linewidth = 0.4) +
  geom_sf(aes(color = gi_decision), size = 5, alpha = 0.5) +
  geom_sf(data = UG_boundary, color = "black", fill = NA, linewidth = 0.4) +
  scale_color_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, name = "Gi* Score") +
  ggtitle("Cluster Analysis of Medical Decision-making, Uganda 2022")

Now let’s consider if these gi scores cluster together. We can use the k-means procedure again.

#scale the variables

# Normalize variables (e.g., z-scores)
mapdata$decide_z <- scale(mapdata$gi_decision)
mapdata$fertaut_z <- scale(mapdata$gi_fertaut)
mapdata$fert_z <- scale(mapdata$gi_fertility)

library(dplyr)
library(sf)   # since you're working with sf objects

# Drop geometry and keep only the z-scored variables
gi_data_scaled <- mapdata %>%
  st_drop_geometry() %>%
  select(decide_z, fertaut_z, fert_z) %>%
  mutate(across(everything(), as.numeric))

# Remove rows with NA, NaN, or Inf
gi_data_scaled <- gi_data_scaled %>%
  filter(if_all(everything(), ~ !is.na(.) & !is.infinite(.) & !is.nan(.)))

# Check again
summary(gi_data_scaled)

    decide_z         fertaut_z            fert_z       
 Min.   :-2.1511   Min.   :-2.69672   Min.   :-2.1503  
 1st Qu.:-0.6461   1st Qu.:-0.72928   1st Qu.:-0.0925  
 Median :-0.1344   Median : 0.04732   Median : 0.3615  
 Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000  
 3rd Qu.: 0.5952   3rd Qu.: 0.67110   3rd Qu.: 0.6431  
 Max.   : 2.7545   Max.   : 1.73915   Max.   : 1.3479

# Now run kmeans
set.seed(123)
kmeans_result <- kmeans(gi_data_scaled, centers = 3, nstart = 25)

# Add cluster assignments back to your sf object
mapdata$cluster <- NA
mapdata$cluster[as.numeric(rownames(gi_data_scaled))] <- kmeans_result$cluster



# Print cluster means directly from kmeans result
kmeans_result$centers

     decide_z   fertaut_z     fert_z
1  1.42794759  1.45765177 -1.9881117
2 -0.08803052  0.03247503  0.4261668
3 -0.91871570 -1.26142730  0.4760361

Create a map for this

#recode cluster labels for map

mapdata<- mapdata %>%
  mutate(cluster_level = case_when(
    cluster == 1 ~ 'low fertility- high fertility autonomy- high involvement in medical decision-making',
    cluster == 2 ~ 'high fertility- mid fertility autonomy-low involvement in medical decision-making',
    cluster == 3 ~ 'high fertility- low fertility autonomy- low involvement in medical decision-making',
  ))

mapdata<- mapdata %>%
  na.omit(cluster_level)

ggplot(data = mapdata) +
  geom_sf(data = UG_sub_boundary, color = "darkgrey", fill = NA, linewidth = 0.4) +
  geom_sf(aes(color = factor(cluster_level)), size = 5, alpha = 0.5) +
  geom_sf(data = UG_boundary, color = "black", fill = NA, linewidth = 0.4) +
  scale_color_brewer(palette = "Set1", name = "Cluster") +
  ggtitle("Cluster Analysis of Gi* Variables, Uganda 2022")

Blue & green values are clustering in urban areas. Could argue that diffusion is happening…