Importing, setting up data

The code in this section imports the data and cleans it. It is hidden by default. Many code chunks are. If you’d like to view them, click the “Show” button.

dat <- read.csv("~/Desktop/Junk/Google Drive/Research/--Research -- GLP1/01 Initial short study/GLP1 - Short_August 6, 2025_11.52.csv", comment.char="#")

# First 10 lines are extra headers, preview data, labbies doing test runs
dat=dat[-c(1:10),]

#Recode functions

likely_map=c(
  "Extremely unlikely" = 1,
  "Somewhat unlikely" = 2, 
  "Neither likely nor unlikely" = 3, 
  "Somewhat likely" = 4, 
  "Extremely likely" = 5
)

agree_map=c(
  "Strognly disagree" = 1,
  "Somewhat disagree" = 2, 
  "Neither agree nor disagree" = 3, 
  "Somewhat agree" = 4, 
  "Strongly agree" = 5
)

wtp_map=c(
  "Nothing more" = 0,
  "$0.50 more" = .5, 
  "$1.00 more" = 1, 
  "$1.50 more" = 1.5, 
  "$2.00 more" = 2,
  "$2.50 more" = 2.5,
  "$3.00 more" = 3,
  "$3.50 more" = 3.5,
  "$4.00 more" = 4,
  "$4.50 more" = 4.5,
  "$5.00+ more" = 5
)

yes_no_map=c(
  "Definitely no" = 1,
  "Probably no" = 2, 
  "Not sure" = 3, 
  "Probably yes" = 4, 
  "Definitely yes" = 5
)

For each of the 40 participants, here’s a list of ingredients they believe are in the smoothie powder:

# All of the participants correctly ID'd the product
dat$contains.glp1 # Everyone thinks it has GLP-1?
##  [1] "GLP-1,Protein,Ashwagandha" "GLP-1"                    
##  [3] "GLP-1"                     "GLP-1,Protein"            
##  [5] "GLP-1"                     "GLP-1"                    
##  [7] "GLP-1"                     "GLP-1"                    
##  [9] "GLP-1,Protein,Not sure"    "GLP-1"                    
## [11] "GLP-1"                     "Protein"                  
## [13] "GLP-1"                     "GLP-1"                    
## [15] "GLP-1"                     "GLP-1,Protein,Fiber"      
## [17] "GLP-1"                     "GLP-1,Protein"            
## [19] "GLP-1"                     "GLP-1"                    
## [21] "GLP-1"                     "GLP-1"                    
## [23] "GLP-1,Protein"             "GLP-1"                    
## [25] "GLP-1,Protein,Fiber"       "GLP-1"                    
## [27] "GLP-1"                     "GLP-1,Protein"            
## [29] "GLP-1"                     "GLP-1"                    
## [31] "GLP-1"                     "GLP-1"                    
## [33] "GLP-1"                     "GLP-1"                    
## [35] "GLP-1"                     "GLP-1"                    
## [37] "GLP-1"                     "GLP-1"                    
## [39] "GLP-1"                     "GLP-1"

Each participant indicated the product contains GLP-1.

Descriptive statistics

Below are the distributions of Likert ratings for various statements. All of them are on a 5-point scale, with 1 being most negative (“Strongly disagree”, “Very unlikely”) and 5 being the most positive (“Strongly agree”, “Very likely”).

Product attitudes

“How likely are you to buy a product like the one you saw on the previous page?”

# Most participants are unlikely to purchase this product
dat$purchase1=likely_map[dat$purchase1]
barplot(table(factor(dat$purchase1,levels=1:5)), main="Likely to buy")

“I would consider buying this product in the future.”

# Weirdly, more people are willing to "consider" it
dat$purchase2=agree_map[dat$purchase2]
barplot(table(factor(dat$purchase2, levels=1:5)),main="Willing to consider buying")

“This product is appealing to me.”

# Good bit of people find it "Appealing"
dat$appealing=agree_map[dat$appealing]
barplot(table(factor(dat$appealing, levels=1:5)),main="Appealing to me")

“Compared to a normal product in this category, how much more would you be willing to pay for this product?”

# Most won't pay more for GLP-1 smoothie
dat$wtp=wtp_map[dat$wtp]
hist(dat$wtp,main="Willing to pay X more",xlab="Willingness to Pay")

GLP-1 Questions

“I am familiar with GLP-1.”

# "familiar with GLP-1"
dat$understand1=agree_map[dat$understand1]
barplot(table(factor(dat$understand1, levels=1:5)),main="Familiar with GLP-1")

“I understand how GLP-1 affects weight loss.”

# "understand how GLP-1 affects weight loss"
dat$understand2=agree_map[dat$understand2]
barplot(table(factor(dat$understand2, levels=1:5)),main="Understand how GLP-1 works")

“I am aware of the potential side effects of GLP-1.”

# "aware of potential side effects of GLP-1"
dat$understand3=agree_map[dat$understand3]
barplot(table(factor(dat$understand3, levels=1:5)),main="Aware of side effects")

“Would you consider taking any of these weight loss drugs: Semaglutide drugs with brand names Ozempic, Wegovy, Zepbound, Mounjaro? These have been found to lead to up to approximately 15% of body weight lost across time.”

# "would you consider taking any of these meds"?
dat$wttGLP1=yes_no_map[dat$wttGLP1]
barplot(table(factor(dat$wttGLP1, levels=1:5)),main="Willing to take GLP-1")

Demographics

Below is the demographic data. For means and SDs, I put them to the right of the line that summons them following a hashtag/comment:

# mean(as.numeric(dat$age)) # Mean age = 40.925
# sd(as.numeric(dat$age)) # SD age = 12.36908
# 
# 
dat$bmi = (as.numeric(dat$weight) * 703) / (as.numeric(dat$height_1)^2)
# mean(dat$bmi) # Mean BMI = 29.02529
# sd(dat$bmi) # SD BMI = 14.28639

dat$ses=ifelse(dat$ses=="1 Worst off",1,dat$ses)
dat$ses=as.numeric(dat$ses)
barplot(table(factor(dat$ses, levels=1:10)),main="SES Ladder")

dat$income = factor(dat$income, levels=c(
  "$0 - $19,999",
  "$20,000 - $39,999",
  "$40,000 - $59,999",
  "$60,000 - $89,999",
  "$90,000 - $119,999",
  "$120,000 - $149,999",
  "$150,000+"
  ),
  ordered = TRUE
)

par(mar = c(10, 4, 4, 2))
barplot(table(dat$income), las=2,main="Annual House Income")

par(mar = c(5.1, 4.1, 4.1, 2.1))
barplot(table(dat$gender),las=2,main="Gender")

dat$race=as.character(dat$race)
dat$race <- ifelse(grepl(",", dat$race), "Multiple", dat$race)

par(mar = c(15, 4, 4, 2))
barplot(table(dat$race),las=2,main="Race")

par(mar = c(5.1, 4.1, 4.1, 2.1))
table(dat$ethnicity)
## 
##     Hispanic or Latino or Spanish Origin 
##                                        3 
## Not Hispanic or Latino or Spanish Origin 
##                                       37

Factor analyses

Product attitudes

First, we’ll combine the “attitudes towards the product” items into an isolated data frame. Then we’ll run a parallel analysis.

product.attitudes=cbind(dat$purchase1,dat$purchase2,dat$appealing,dat$wtp)
library(psych)
fa.parallel(product.attitudes,fm='minres',fa='fa')

This looks like pretty clear evidence of a single underlying “appealingness” or “attitude towards product” dimension. Let’s look at the factor loadings after fitting these data to a one factor solution:

product.attitudes.1=fa(product.attitudes,nfactors=1,rotate="oblimin",fm="minres")
print(product.attitudes.1)
## Factor Analysis using method =  minres
## Call: fa(r = product.attitudes, nfactors = 1, rotate = "oblimin", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    MR1   h2    u2 com
## 1 0.79 0.62 0.377   1
## 2 0.97 0.94 0.063   1
## 3 0.94 0.88 0.125   1
## 4 0.47 0.22 0.779   1
## 
##                 MR1
## SS loadings    2.66
## Proportion Var 0.66
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  6  with the objective function =  3.02 with Chi Square =  111.26
## df of  the model are 2  and the objective function was  0.04 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic n.obs is  33 with the empirical chi square  0.27  with prob <  0.87 
## The total n.obs was  40  with Likelihood Chi Square =  1.55  with prob <  0.46 
## 
## Tucker Lewis Index of factoring reliability =  1.013
## RMSEA index =  0  and the 90 % confidence intervals are  0 0.295
## BIC =  -5.82
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.98
## Multiple R square of scores with factors          0.96
## Minimum correlation of possible factor scores     0.91

Model fit indices look excellent, except the “willingness to pay” (WTP) item loads weakly onto the main factor. Its low communality is concerning. With SS = 2.66, this factor explains about 2.66 variables worth of variance. Seems to me like WTP has to go. Let’s check a few other things while we’re at it.

product.attitudes.r=cbind(dat$purchase1,dat$purchase2,dat$appealing)
product.attitudes.1.r=fa(product.attitudes.r,nfactors=1,rotate="oblimin",fm="minres")
print(product.attitudes.1.r)
## Factor Analysis using method =  minres
## Call: fa(r = product.attitudes.r, nfactors = 1, rotate = "oblimin", 
##     fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    MR1   h2    u2 com
## 1 0.77 0.59 0.409   1
## 2 0.98 0.96 0.044   1
## 3 0.95 0.89 0.105   1
## 
##                 MR1
## SS loadings    2.44
## Proportion Var 0.81
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  3  with the objective function =  2.79 with Chi Square =  103.52
## df of  the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic n.obs is  32 with the empirical chi square  0  with prob <  NA 
## The total n.obs was  40  with Likelihood Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.98
## Multiple R square of scores with factors          0.97
## Minimum correlation of possible factor scores     0.94
# psych::alpha(product.attitudes)[1] # Alpha = 0.8504186
# psych::alpha(product.attitudes.r)[1] # Alpha = 0.9169921

That seals it for me. We’ll treat “product attitudes” as separate from WTP

dat$product.attitude=rowMeans(product.attitudes.r,na.rm=TRUE)

Familiarity with GLP-1

First, we do a parallel analysis on all the items together.

familiarity=cbind(dat$understand1,dat$understand2,dat$understand3)
fa.parallel(product.attitudes,fm='minres',fa='fa')

Another one factor solution emerges as the top prospect.

familiarity.1=fa(familiarity,nfactors=1,rotate="oblimin",fm="minres")
print(familiarity.1)
## Factor Analysis using method =  minres
## Call: fa(r = familiarity, nfactors = 1, rotate = "oblimin", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    MR1   h2   u2 com
## 1 0.89 0.79 0.21   1
## 2 0.65 0.42 0.58   1
## 3 0.77 0.60 0.40   1
## 
##                 MR1
## SS loadings    1.81
## Proportion Var 0.60
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  3  with the objective function =  1.07 with Chi Square =  39.94
## df of  the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic n.obs is  30 with the empirical chi square  0  with prob <  NA 
## The total n.obs was  40  with Likelihood Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.93
## Multiple R square of scores with factors          0.86
## Minimum correlation of possible factor scores     0.72

This 3-item trio is “just identified” in a one factor solution, which means that our fit indices are not going to be helpful here. My takeaway is: This is an OK “scale”, but there is room for improvement.

In hindsight, the scale is mixing familiarity, understanding and awareness of specific aspects of GLP-1. I didn’t think this would matter much because I assumed these would all reflect a more general, diffuse sense of “familiarity”. I underestimate people! In the future, we’ll want to isolate these constructs, which might imply a longer survey. Or, we can pick the construct we care most about and focus on that one. Generally, we should start having at least 4 items per construct.

dat$familiarity=rowMeans(familiarity)

Note about row means

Usually, I prefer to use estimated factor scores rather than row means. Since we have so few items per scale, and one of them has really high factor loadings across the board, there’s little upside to doing it this time around.

Exploratory analyses

Here, I’m going to select a few “dependent variables” and examine how our basic demographic variables relate to them. There could very will be some type 1 errors popping up, so let’s remember to treat these analyses as purely exploratory.

dat$income=as.numeric(dat$income)
dat$is.female=ifelse(dat$gender=="Female",1,0)
dat$is.white=ifelse(dat$race=="White or European American",1,0)
dat$age=as.numeric(dat$age)

Attitudes towards GLP-1 smoothie

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gtsummary)

mod1 <- lm(product.attitude ~ age + bmi + ses + income + is.female + is.white, dat)

tbl <- tbl_regression(mod1) %>%
  add_global_p() %>%
  modify_header(label = "**Predictor**",
                estimate = "**Estimate**",
                std.error = "**SE**",
                p.value = "**p-value**") %>%
  modify_footnote(estimate ~ paste0("R² = ", round(summary(mod1)$r.squared, 3),
                                   ", Adj. R² = ", round(summary(mod1)$adj.r.squared, 3)))

tbl
Predictor Estimate1 SE 95% CI p-value
age 0.02 0.016 -0.01, 0.06 0.14
bmi 0.03 0.013 0.00, 0.05 0.062
ses 0.11 0.149 -0.19, 0.42 0.5
income -0.04 0.140 -0.32, 0.25 0.8
is.female 0.18 0.374 -0.58, 0.94 0.6
is.white 0.74 0.455 -0.19, 1.7 0.11
Abbreviations: CI = Confidence Interval, SE = Standard Error
1 R² = 0.257, Adj. R² = 0.122

None of the predictors appear to have unique associations with attitudes toward the GLP-1 smoothie (while adjusting for each other), though, collectively, they do account for ~25% of the variability in those attitudes.

Willingness to pay for GLP-1 smoothie

mod1 <- lm(wtp ~ age + bmi + ses + income + is.female + is.white, dat)

tbl <- tbl_regression(mod1) %>%
  add_global_p() %>%
  modify_header(label = "**Predictor**",
                estimate = "**Estimate**",
                std.error = "**SE**",
                p.value = "**p-value**") %>%
  modify_footnote(estimate ~ paste0("R² = ", round(summary(mod1)$r.squared, 3),
                                   ", Adj. R² = ", round(summary(mod1)$adj.r.squared, 3)))

tbl
Predictor Estimate1 SE 95% CI p-value
age 0.03 0.019 -0.01, 0.07 0.12
bmi 0.02 0.016 -0.01, 0.05 0.2
ses -0.03 0.180 -0.40, 0.33 0.9
income 0.07 0.170 -0.28, 0.41 0.7
is.female -0.15 0.452 -1.1, 0.77 0.7
is.white 0.56 0.551 -0.56, 1.7 0.3
Abbreviations: CI = Confidence Interval, SE = Standard Error
1 R² = 0.17, Adj. R² = 0.019

Similarly takeaway here with willingness to pay.

Familiarity with GLP-1

mod1 <- lm(familiarity ~ age + bmi + ses + income + is.female + is.white, dat)

tbl <- tbl_regression(mod1) %>%
  add_global_p() %>%
  modify_header(label = "**Predictor**",
                estimate = "**Estimate**",
                std.error = "**SE**",
                p.value = "**p-value**") %>%
  modify_footnote(estimate ~ paste0("R² = ", round(summary(mod1)$r.squared, 3),
                                   ", Adj. R² = ", round(summary(mod1)$adj.r.squared, 3)))

tbl
Predictor Estimate1 SE 95% CI p-value
age -0.01 0.015 -0.04, 0.03 0.7
bmi 0.02 0.011 0.00, 0.04 0.10
ses 0.04 0.137 -0.25, 0.32 0.8
income -0.08 0.125 -0.34, 0.18 0.5
is.female 0.18 0.360 -0.57, 0.93 0.6
is.white -0.31 0.491 -1.3, 0.72 0.5
Abbreviations: CI = Confidence Interval, SE = Standard Error
1 R² = 0.14, Adj. R² = -0.106

Ditto with “familiarity with GLP-1”, but we know now that we have measurement issues with this variable.

Willingness to take GLP-1 medication

mod1 <- lm(wttGLP1 ~ age + bmi + ses + income + is.female + is.white, dat)

tbl <- tbl_regression(mod1) %>%
  add_global_p() %>%
  modify_header(label = "**Predictor**",
                estimate = "**Estimate**",
                std.error = "**SE**",
                p.value = "**p-value**") %>%
  modify_footnote(estimate ~ paste0("R² = ", round(summary(mod1)$r.squared, 3),
                                   ", Adj. R² = ", round(summary(mod1)$adj.r.squared, 3)))

tbl
Predictor Estimate1 SE 95% CI p-value
age 0.02 0.020 -0.02, 0.06 0.3
bmi 0.04 0.016 0.00, 0.07 0.027
ses 0.07 0.184 -0.31, 0.44 0.7
income -0.18 0.174 -0.53, 0.18 0.3
is.female -0.07 0.462 -1.0, 0.87 0.9
is.white 0.21 0.563 -0.94, 1.4 0.7
Abbreviations: CI = Confidence Interval, SE = Standard Error
1 R² = 0.184, Adj. R² = 0.036

People with higher BMI tend to indicate more strongly they are willing to take GLP-1s, while adjusting for age, SES, and income. This does not appear to vary by sex or race.