Packages

library( geojsonio )   # read shapefiles
library( sp )          # work with shapefiles
library( sf )          # work with shapefiles - simple features format
library( mclust )      # cluster analysis 
library( tmap )        # theme maps
library( ggplot2 )     # graphing 
library( ggthemes )    # nice formats for ggplots
library( dplyr )       # data wrangling 
library( pander )      # formatting RMD tables
library( tidycensus )

library( cartogram )  # spatial maps w/ tract size bias reduction
library( maptools )   # spatial object manipulation 
library( corrplot )

census_api_key("49d90e91867ec8c4c1eab210c0c0636ad21f5b6e")

crosswalk <- read.csv( "https://raw.githubusercontent.com/DS4PS/cpp-529-master/master/data/cbsatocountycrosswalk.csv",  stringsAsFactors=F, colClasses="character" )

# search for cities names San Diego 
# grep( "^SAN DIEGO, CA", crosswalk$msaname, value=TRUE ) 

these.msa <- crosswalk$msaname == "SAN DIEGO, CA"

# Get the FIPS codes for the selected MSA
these.fips <- crosswalk$fipscounty[these.msa]
these.fips <- na.omit(these.fips)

# Get San Diego
sd_data <- get_acs(geography = "tract", 
                   variables = "B01003_001",
                   state = "06", 
                   county = "073", 
                   geometry = TRUE) %>% 
  select(GEOID, estimate) %>%
  rename(POP = estimate)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |=========                                                             |  14%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%

# Load Census data
URL <- "https://github.com/DS4PS/cpp-529-master/raw/master/data/ltdb_std_2010_sample.rds"
census_data <- readRDS(gzcon(url(URL)))

# remove leading 0s
sd_data$GEOID <- substr(sd_data$GEOID, 2, nchar(sd_data$GEOID))



# Merge sd_data (San Diego tracts) with census_data
sd_data <- merge(sd_data, census_data, by.x = "GEOID", by.y = "tractid")


# Remove any empty polygons
sd_merged <- sd_data[!st_is_empty(sd_data), ]

Data Source

This exercise uses Census data from the 2012 American Communities Survey made available through the Diversity and Disparities Project.

DATA DICTIONARY

data.dictionary <- 
structure(list(LABEL = c("tractid", "pnhwht12", "pnhblk12", "phisp12", 
"pntv12", "pfb12", "polang12", "phs12", "pcol12", "punemp12", 
"pflabf12", "pprof12", "pmanuf12", "pvet12", "psemp12", "hinc12", 
"incpc12", "ppov12", "pown12", "pvac12", "pmulti12", "mrent12", 
"mhmval12", "p30old12", "p10yrs12", "p18und12", "p60up12", "p75up12", 
"pmar12", "pwds12", "pfhh12"), VARIABLE = c("GEOID", "Percent white, non-Hispanic", 
"Percent black, non-Hispanic", "Percent Hispanic", "Percent Native American race", 
"Percent foreign born", "Percent speaking other language at home, age 5 plus", 
"Percent with high school degree or less", "Percent with 4-year college degree or more", 
"Percent unemployed", "Percent female labor force participation", 
"Percent professional employees", "Percent manufacturing employees", 
"Percent veteran", "Percent self-employed", "Median HH income, total", 
"Per capita income", "Percent in poverty, total", "Percent owner-occupied units", 
"Percent vacant units", "Percent multi-family units", "Median rent", 
"Median home value", "Percent structures more than 30 years old", 
"Percent HH in neighborhood 10 years or less", "Percent 17 and under, total", 
"Percent 60 and older, total", "Percent 75 and older, total", 
"Percent currently married, not separated", "Percent widowed, divorced and separated", 
"Percent female-headed families with children")), class = "data.frame", row.names = c(NA, 
-31L))

Load San Diego Shapefile

# dorling cartogram of San Diego Census Tracts

# Convert the sf map object to an sp version
sd_sp <- as_Spatial(sd_merged)

# Project the map and remove empty tracts
sd_sp <- spTransform(sd_sp, CRS("+init=epsg:3395"))
sd_sp <- sd_sp[sd_sp$POP != 0 & (!is.na(sd_sp$POP)), ]

# Convert census tract polygons to a Dorling cartogram

sd_sp$pop_w <- sd_sp$POP / 9000  # Adjust the scaling factor as needed
sd_dorling <- cartogram_dorling(x = sd_sp, weight = "pop_w", k = 1) 

# Plot the Dorling cartogram
plot(sd_dorling)

library(tmap)

# Transform the CRS of sd_dorling
sd_dorling <- spTransform(sd_dorling, CRS("+init=epsg:3395"))

# Define a bounding box for the San Diego area
bb <- st_bbox(c(xmin = -13100000, xmax = -12850000,
                ymax = 4025000, ymin = 3775000),
              crs = st_crs("+init=epsg:3395"))

# Create the tmap visualization for the Dorling cartogram
tm_shape(sd_dorling, bbox = bb) +
  tm_polygons(col = "hinc12", n = 10, style = "quantile", palette = "Spectral") +
  tm_layout("San Diego Dorling Cartogram", title.position = c("right", "top"))

tmap_mode("view")
tm_basemap( "HikeBike.HillShading"  ) +
  tm_shape( sd_dorling, bbox=bb ) + 
  tm_polygons( col="phisp12", n=7, style="quantile", palette="-inferno" )

tm_basemap( "Stamen.Watercolor" ) +
tm_shape( sd_dorling, bbox=bb ) + 
  tm_polygons( col="hinc12", n=7, style="quantile", palette="RdYlBu" ) +
  tm_legend( show=FALSE )

Census Variables

We are going to extract the data from the shapefile and save it as a separate data frame so we can use it for analysis independent of the shapefile.

d1 <- sd_dorling@data

Prepare Data for Clustering

We transform all of the variables to z scores so they are on the same scale while clustering. This ensures that each census variable has equal weight. Z-scores typically range from about -3 to +3 with a mean of zero.

keep.these <- c("pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12", 
"phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12", 
"pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12", 
"pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12", 
"p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12")

d2 <- select( d1, keep.these )
d3 <- apply( d2, 2, scale )

Perform Cluster Analysis

set.seed( 1234 )
fit <- Mclust( d3 )
sd_dorling$cluster <- as.factor( fit$classification )
summary( fit )

## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VVE (ellipsoidal, equal orientation) model with 6 components: 
## 
##  log-likelihood   n  df       BIC       ICL
##       -11080.09 534 800 -27184.49 -27224.52
## 
## Clustering table:
##   1   2   3   4   5   6 
##  38  62 123 141 117  53

Some visuals of model fit statistics (doesn’t work well with a lot of variables):

plot( fit, what = "classification" )

Identifying Neighborhood Clusters

d2$cluster <- d2$cluster <- as.factor( paste0("GROUP-",fit$classification) )

ggplot( d2, aes( x=phisp12 ) ) + 
        geom_density( alpha = 0.5, fill="blue" ) + # xlim( -3, 3 ) +
        xlab( "Percent Hispanic" ) + facet_wrap( ~ cluster, nrow=2 ) + theme_minimal()

ggplot( d2, aes( x=pcol12) ) +        
  geom_density( alpha = 0.5, fill="blue" ) + # xlim( -3, 3 ) +
  xlab( "Percent with a College Degree" ) + facet_wrap( ~ cluster, nrow=2 ) + theme_minimal()

df.pct <- sapply( d2, ntile, 100 )
d3 <- as.data.frame( df.pct )
d3$cluster <- as.factor( paste0("GROUP-",fit$classification) )

stats <- 
d3 %>% 
  group_by( cluster ) %>% 
  summarise_each( funs(mean) )

t <- data.frame( t(stats), stringsAsFactors=F )
names(t) <- paste0( "GROUP.", 1:6 )
t <- t[-1,]

for( i in 1:6 )
{
  z <- t[,i]
  plot( rep(1,30), 1:30, bty="n", xlim=c(-75,100), 
        type="n", xaxt="n", yaxt="n",
        xlab="Percentile", ylab="",
        main=paste("GROUP",i) )
  abline( v=seq(0,100,25), lty=3, lwd=1.5, col="gray90" )
  segments( y0=1:30, x0=0, x1=100, col="gray70", lwd=2 )
  text( -0.2, 1:30, data.dictionary$VARIABLE[-1], cex=0.85, pos=2 )
  points( z, 1:30, pch=19, col="firebrick", cex=1.5 )
  axis( side=1, at=c(0,50,100), col.axis="gray", col="gray" )
}

tmap_mode("plot")
tmap_style("cobalt")
tm_shape( sd_dorling, bbox=bb ) + 
  tm_polygons( col="cluster", palette="Accent"  )

Labels

GROUP-1: Mixed-Ethnicity Older Neighborhoods with Moderate Socioeconomic Status GROUP-2: Urban Highly Educated Neighborhoods with Diverse Populations GROUP-3: Economically Disadvantaged and Diverse Urban Communities GROUP-4: Affluent and Educated Older Communities GROUP-5: Majority Hispanic, Low-Income Urban Communities GROUP-6: Affluent, Educated, and Diverse Urban Communities

Part 2

d3 <- as.data.frame(d3)

df.dim1 <- dplyr::select( d3, pown12, pmulti12, p10yrs12, pwds12, pfhh12 )

corrplot( cor(df.dim1, use="complete.obs"), 
          order = "hclust", tl.col='black', tl.cex=.75 )

# flip the signs 
df.dim1$pmulti12  <-  - df.dim1$pmulti12
df.dim1$p10yrs12  <-  - df.dim1$p10yrs12
df.dim1$pwds12    <-  - df.dim1$pwds12
df.dim1$pfhh12    <-  - df.dim1$pfhh12

corrplot( cor(df.dim1, use="complete.obs"), 
          order = "hclust", tl.col='black', tl.cex=.75 )

df.dim1 <- dplyr::select(d3, pnhwht12, pnhblk12, phisp12, pfb12,polang12 )

corrplot( cor(df.dim1, use="complete.obs"), 
          order = "hclust", tl.col='black', tl.cex=.75 )

df.dim2 <- d3[ c("pnhwht12", "pnhblk12", "phisp12", "pfb12", "polang12") ]
# Flip the sign of the percent white variable
df.dim2$pnhwht12 <- -df.dim2$pnhwht12

# Generate the correlation plot
corrplot(cor(df.dim2, use = "complete.obs"), order = "hclust", tl.col = 'black', tl.cex = .75)

# Flip the sign of phs12

df.dim3 <- select( d3, pcol12, phs12, pprof12, hinc12, mhmval12 )

df.dim3$phs12 <- -df.dim3$phs12

# Create the correlation plot with the updated sign
corrplot(cor(df.dim3, use="complete.obs"), 
         order = "hclust", tl.col='black', tl.cex=.75)

dim1 = d3$pown12 - d3$pmulti12 - d3$p10yrs12 - d3$pwds12 - d3$pfhh12
dim2 = -d3$pnhwht12 + d3$pnhblk12 + d3$phisp12 + d3$pfb12 + d3$polang12
dim3 = d3$pcol12 - d3$phs12 + d3$pprof12 + d3$hinc12 + d3$mhmval12

df.nhood.metrics <- data.frame(dim1,dim2,dim3)
summary( df.nhood.metrics )

##       dim1              dim2             dim3       
##  Min.   :-372.00   Min.   :-95.00   Min.   :-90.00  
##  1st Qu.:-234.00   1st Qu.: 44.25   1st Qu.: 27.25  
##  Median :-149.00   Median :143.00   Median :144.50  
##  Mean   :-145.20   Mean   :145.20   Mean   :145.20  
##  3rd Qu.: -54.25   3rd Qu.:253.75   3rd Qu.:260.50  
##  Max.   :  95.00   Max.   :369.00   Max.   :397.00

corrplot( cor( df.nhood.metrics, use="complete.obs" ), 
          order = "hclust", tl.col='black', tl.cex=.75 )

# Fit the Mclust model using the df.nhood.
fit2 <- Mclust(df.nhood.metrics)
summary(fit2)

## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VEE (ellipsoidal, equal shape and orientation) model with 5 components: 
## 
##  log-likelihood   n df       BIC       ICL
##       -9451.964 534 29 -19086.06 -19226.04
## 
## Clustering table:
##   1   2   3   4   5 
##  35 271 120  58  50

# Update the sd_dorling data frame with the new classifications
sd_dorling$cluster2 <- as.factor(fit2$classification)

# cluster with dataset of three census variables 
d33 <- data.frame( d3$p18und12, d3$pflabf12, d3$hinc12 )
fit3 <- Mclust( d33 )
summary( fit3 )

## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VVI (diagonal, varying volume and shape) model with 6 components: 
## 
##  log-likelihood   n df       BIC       ICL
##       -7341.632 534 41 -14940.76 -15101.73
## 
## Clustering table:
##   1   2   3   4   5   6 
##  81  40 156  64  98  95

sd_dorling$cluster3 <- as.factor( fit3$classification )



tmap_mode("plot")
tmap_style("cobalt")

tm1 <- 
tm_shape( sd_dorling, bbox=bb ) + 
  tm_polygons( col="cluster", palette="Accent"  )

tm2 <- 
tm_shape( sd_dorling, bbox=bb ) + 
  tm_polygons( col="cluster2", palette="Accent"  )

tm3 <- 
tm_shape( sd_dorling, bbox=bb ) + 
  tm_polygons( col="cluster3", palette="Accent"  )


tmap_arrange( tm1, tm2, tm3 )

plot( fit2, what = "classification" )

plot( fit3, what = "classification" )

plot( fit2, what = "classification" )

plot( fit3, what = "classification" )

Question 1

Compare that set of groups to the groups identified by the model using only the three indices above. Are they identifying the same groups? Which group is missing?

They do not perfectly align with the original six groups. There is some overlap, but the new groups capture different aspects of the data.

From the six original groups, Group 3 and Group 5 seems to be the ones least represented in the new groups formed. These group have unique combinations of characteristics that make it difficult to be accurately represented. For example, differences in foreign-born residents, language spoken at home, housing characteristics, percentage 18 and under, and female labor force participation.

# use dput( data.dictionary ) to create reproducible data frames for RMD files
data.dictionary1 <- 
structure(list(LABEL = c("pnhwht12", "punemp12","p30old12", "p10yrs12"), VARIABLE = c("Percent white","Percent unemployed","Percent structures more than 30 years old")), class = "data.frame", row.names = c(NA, 
-30L))


# list variables for clustering
use.these <- c("pnhwht12", "punemp12", "p30old12")

dd.cluster1 <- data.dictionary[ data.dictionary$LABEL %in% use.these , ]


# cluster 2
LABEL <- c("dim1","dim2","dim3")
VARIABLE <- c("Neighborhood transitivity","Neighborhood diversity","Human capital")
dd.cluster2 <- data.frame( LABEL, VARIABLE )


# cluster 3 - update with your variables
use.these <- c("pnhwht12", "punemp12", "p30old12")
dd.cluster3 <- data.dictionary[ data.dictionary$LABEL %in% use.these , ]

d5 <- select( d1, use.these )
d6 <- apply( d5, 2, scale )

Perform Cluster Analysis

set.seed( 1234 )
fit <- Mclust( d6 )
sd_dorling$cluster <- as.factor( fit$classification )
summary( fit )

## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VVI (diagonal, varying volume and shape) model with 4 components: 
## 
##  log-likelihood   n df       BIC       ICL
##        -2025.49 534 27 -4220.551 -4391.385
## 
## Clustering table:
##   1   2   3   4 
## 126 189 110 109

Some visuals of model fit statistics (doesn’t work well with a lot of variables):

plot( fit, what = "classification" )

Identifying Neighborhood Clusters

d5$cluster <- d5$cluster <- as.factor( paste0("GROUP-",fit$classification) )

ggplot( d5, aes( x=pnhwht12) ) + 
        geom_density( alpha = 0.5, fill="blue" ) + # xlim( -3, 3 ) +
        xlab( "Percent White" ) + facet_wrap( ~ cluster, nrow=2 ) + theme_minimal()

ggplot( d5, aes( x=punemp12) ) +        
  geom_density( alpha = 0.5, fill="blue" ) + # xlim( -3, 3 ) +
  xlab( "Percent Unemployed" ) + facet_wrap( ~ cluster, nrow=2 ) + theme_minimal()

df.pct <- sapply( d5, ntile, 100 )
d6 <- as.data.frame( df.pct )
d6$cluster <- as.factor( paste0("GROUP-",fit$classification) )

stats <- 
d6 %>% 
  group_by( cluster ) %>% 
  summarise_each( funs(mean) )

t <- data.frame( t(stats), stringsAsFactors=F )
names(t) <- paste0( "GROUP.", 1:4 )
t <- t[-1,]

for (i in 1:4) {
  z <- t[, i]
  plot(rep(1, 3), 1:3, bty = "n", xlim = c(-75, 100),
       type = "n", xaxt = "n", yaxt = "n",
       xlab = "Percentile", ylab = "",
       main = paste("GROUP", i))
  abline(v = seq(0, 100, 25), lty = 3, lwd = 1.5, col = "gray90")
  
  # Modify the 'segments' function call to display only 3 segments instead of 30
  segments(y0 = 1:3, x0 = 0, x1 = 100, col = "gray70", lwd = 2)
  
  # Use the filtered data dictionary for the labels
  text(-0.2, 1:3, data.dictionary1$VARIABLE, cex = 0.85, pos = 2)
  points(z, 1:3, pch = 19, col = "firebrick", cex = 1.5)
  axis(side = 1, at = c(0, 50, 100), col.axis = "gray", col = "gray")
}

print(stats)

## # A tibble: 4 × 4
##   cluster pnhwht12 punemp12 p30old12
##   <fct>      <dbl>    <dbl>    <dbl>
## 1 GROUP-1    82.9      26.8     62.1
## 2 GROUP-2    42.3      56.9     55.3
## 3 GROUP-3     9.76     72.5     58.9
## 4 GROUP-4    58.1      34.3     10.1