Sweet Foods Liking and Diet Quality

Author

May

Published

March 8, 2026

Functions

Useful function that will be used throughout the analyses.

Code
## Correlation function
corrfunc <- function(data, Cor1, Cor2, title) {
  ggplot(data = data, aes(x = {{Cor1}}, y = {{Cor2}})) +
  geom_point()+
  geom_smooth(method = lm)+
  ggtitle(title) +
  stat_cor(method = "pearson") +
  theme_light() 
}

## Interaction visualization function
intfunc <- function(data, Cor1, Cor2, Group, title) {
  ggplot(data = data, aes(x = {{Cor1}}, y = {{Cor2}}, color = {{Group}})) +
  geom_point()+
  geom_smooth(method = lm, se = FALSE)+
  ggtitle(title) +
  stat_cor(method = "pearson") +
  theme_light() 
}

Load packages and call data

Cronbach’s alpha for food groups

Code
## Vegetable 
veg.cor <- survey.df %>%
  select("FL-01_3":"FL-06_3") %>%
  mutate(overall = rowMeans(select(., "FL-01_3":"FL-06_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-01_3":"FL-06_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 6
Sample units: 387
alpha: 0.705
Code
corrplot(veg.cor, method = "number", title = "Corrplot: liking for vegetables")

Code
## Fruit 
fruit.cor <- survey.df %>%
  select("FL-07_3":"FL-11_3") %>%
  mutate(overall = rowMeans(select(., "FL-07_3":"FL-11_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-07_3":"FL-11_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 5
Sample units: 410
alpha: 0.56
Code
corrplot(fruit.cor, method = "number", title = "Corrplot: liking for fruits")

Code
## Salty/fat 
saltyfat.cor <- survey.df %>%
  select("FL-12_3":"FL-15_3") %>%
  mutate(overall = rowMeans(select(., "FL-12_3":"FL-15_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-12_3":"FL-15_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 4
Sample units: 407
alpha: 0.56
Code
corrplot(saltyfat.cor, method = "number", title = "Corrplot: liking for salty & fat foods")

Code
## High-fat proteins
saltyfat.cor <- survey.df %>%
  select("FL-16_3":"FL-20_3") %>%
  mutate(overall = rowMeans(select(., "FL-16_3":"FL-20_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-16_3":"FL-20_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 5
Sample units: 380
alpha: 0.688
Code
corrplot(saltyfat.cor, method = "number", title = "Corrplot: liking for salty & fat foods")

Code
## Alcohol 
alcohol.cor <- survey.df %>%
  select("FL-21_3":"FL-25_3") %>%
  mutate(overall = rowMeans(select(., "FL-21_3":"FL-25_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-21_3":"FL-25_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 5
Sample units: 249
alpha: 0.76
Code
corrplot(alcohol.cor, method = "number", title = "Corrplot: liking for alcohol")

Code
## Whole grain 
wg.cor <- survey.df %>%
  select("FL-26_3":"FL-29_3") %>%
  mutate(overall = rowMeans(select(., "FL-26_3":"FL-29_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-26_3":"FL-29_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 4
Sample units: 401
alpha: 0.702
Code
corrplot(wg.cor, method = "number", title = "Corrplot: liking for whole grains")

Code
## Carbs
carbs.cor <- survey.df %>%
  select("FL-30_3":"FL-34_3") %>%
  mutate(overall = rowMeans(select(., "FL-30_3":"FL-34_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-30_3":"FL-34_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 5
Sample units: 399
alpha: 0.568
Code
corrplot(carbs.cor, method = "number", title = "Corrplot: liking for carbohydrates")

Code
## Healthy fats
healthyfat.cor <- survey.df %>%
  select("FL-35_3":"FL-38_3") %>%
  mutate(overall = rowMeans(select(., "FL-35_3":"FL-38_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-35_3":"FL-38_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 4
Sample units: 375
alpha: 0.505
Code
corrplot(healthyfat.cor, method = "number", title = "Corrplot: liking for healthy fats")

Code
## Unhealthy fats
unhealthyfat.cor <- survey.df %>%
  select("FL-48_3":"FL-52_3") %>%
  mutate(overall = rowMeans(select(., "FL-48_3":"FL-52_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-48_3":"FL-52_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 5
Sample units: 397
alpha: 0.564
Code
corrplot(unhealthyfat.cor, method = "number", title = "Corrplot: liking for unhealthy fats")

Code
## All fat
allfat.cor <- survey.df %>%
  select("FL-35_3":"FL-38_3", "FL-48_3":"FL-52_3") %>%
  mutate(overall = rowMeans(select(.,"FL-35_3":"FL-38_3", "FL-48_3":"FL-52_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-35_3":"FL-38_3", "FL-48_3":"FL-52_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 9
Sample units: 367
alpha: 0.613
Code
corrplot(allfat.cor, method = "number", title = "Corrplot: liking for all fats")

Code
## Sweets
sweets.cor <- survey.df %>%
  select("FL-39_3":"FL-43_3") %>%
  mutate(overall = rowMeans(select(., "FL-39_3":"FL-43_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-39_3":"FL-43_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 5
Sample units: 411
alpha: 0.763
Code
corrplot(sweets.cor, method = "number", title = "Corrplot: liking for sweets")

Code
## Sugar-sweetened beverages 
ssb.cor <- survey.df %>%
  select("FL-44_3":"FL-47_3") %>%
  mutate(ssb.overall = rowMeans(select(., "FL-44_3":"FL-47_3"), na.rm = TRUE)) %>% 
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-44_3":"FL-47_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 4
Sample units: 363
alpha: 0.589
Code
corrplot(ssb.cor, method = "number", title = "Corrplot: liking for SSB")

Code
## Sweet foods and sugar-sweetened beverages 
sfbl.cor <- survey.df %>%
  select("FL-39_3":"FL-47_3") %>%
  mutate(ssb.overall = rowMeans(select(., "FL-39_3":"FL-47_3"), na.rm = TRUE)) %>% 
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-39_3":"FL-47_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 9
Sample units: 360
alpha: 0.778
Code
corrplot(sfbl.cor, method = "number", title = "Corrplot: liking for SSB")

Code
## Protein
protein.cor <- survey.df %>%
  select("FL-53_3":"FL-57_3") %>%
  mutate(overall = rowMeans(select(., "FL-53_3":"FL-57_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs") ## soy protein does not fit

survey.df %>% 
  select("FL-53_3":"FL-57_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 5
Sample units: 306
alpha: 0.514
Code
corrplot(protein.cor, method = "number", title = "Corrplot: liking for protein")

Code
## Spicy
spicy.cor <- survey.df %>%
  select("FL-58_3":"FL-59_3") %>%
  mutate(overall = rowMeans(select(., "FL-58_3":"FL-59_3"), na.rm = TRUE)) %>%
  cor(., use = "complete.obs")

survey.df %>% 
  select("FL-58_3":"FL-59_3")%>%
  na.omit() %>%
  cronbach.alpha(., standardized = TRUE)

Standardized Cronbach's alpha for the '.' data-set

Items: 2
Sample units: 393
alpha: 0.842
Code
corrplot(spicy.cor, method = "number", title = "Corrplot: liking for spicy foods")

Decoding survey

Code
## Liking score
liking.df <- survey.df %>% 
  select("ID", "FL-01_3":"FL-60_3") %>%
  mutate(veg.liking = rowMeans(select(., "FL-01_3":"FL-06_3"), na.rm = TRUE)/2, 
         fruit.liking = rowMeans(select(., "FL-07_3":"FL-11_3"), na.rm = TRUE)/2, 
         saltyfat.liking = rowMeans(select(., "FL-12_3":"FL-15_3"), na.rm = TRUE)/2, 
         hfprotein.liking = rowMeans(select(., "FL-16_3":"FL-20_3"), na.rm = TRUE)/2,   
         alcohol.liking = rowMeans(select(., "FL-22_3":"FL-25_3"), na.rm = TRUE)/2, 
         wg.liking = rowMeans(select(., "FL-26_3":"FL-29_3"), na.rm = TRUE)/2, 
         carbs.liking = rowMeans(select(., "FL-30_3":"FL-34_3"), na.rm = TRUE)/2, 
         healthyfat.liking = rowMeans(select(., "FL-35_3":"FL-38_3"), na.rm = TRUE)/2, 
        sweets.liking = rowMeans(select(., "FL-39_3":"FL-43_3"), na.rm = TRUE)/2,
        ssb.liking = rowMeans(select(., "FL-44_3":"FL-47_3"), na.rm = TRUE)/2,
        sfbl.liking = rowMeans(select(., "FL-39_3":"FL-47_3"), na.rm = TRUE)/2,
         unhealthyfat.liking = rowMeans(select(., "FL-48_3":"FL-52_3"), na.rm = TRUE)/2,
         protein.liking = rowMeans(select(., "FL-53_3":"FL-57_3"), na.rm = TRUE)/2, 
         spicy.liking = rowMeans(select(., "FL-58_3":"FL-60_3"), na.rm = TRUE)/2) %>%
  select(c("ID", ends_with(".liking"))) 

## Three factor questionnaire
TFQ.df <- survey.df %>% 
  select("ID", "TFQ-01":"TFQ-21") %>%
  mutate(uncontrolled = rowSums(select(., "TFQ-03", "TFQ-06", "TFQ-08", "TFQ-09", "TFQ-12", "TFQ-13", "TFQ-15", "TFQ-19", "TFQ-20"), na.rm = TRUE)) %>%  
  mutate(uncontrolled = ((uncontrolled-1)/36)*100, na.rm = TRUE) %>%
  mutate(restraint = rowSums(select(., "TFQ-01", "TFQ-05", "TFQ-11"), na.rm = TRUE)) %>%
  mutate(restraint = ((restraint-1)/12)*100, na.rm = TRUE) %>%
  mutate(emotional = rowSums(select(., "TFQ-02", "TFQ-04", "TFQ-07", "TFQ-10", "TFQ-14", "TFQ-16"), na.rm = TRUE)) %>%
  mutate(emotional = ((emotional-1)/24)*100, na.rm = TRUE) %>%
    select(c("ID", "uncontrolled", "restraint", "emotional")) 

## Adult eating behavior questionnaire
AEBQ.df <- survey.df %>% 
  select("ID", "AEBQ-01":"AEBQ-35") %>%
  mutate(EF = rowSums(select(., "AEBQ-01", "AEBQ-03", "AEBQ-04"), na.rm = TRUE)) %>%
  mutate(EOE = rowSums(select(., "AEBQ-05", "AEBQ-08", "AEBQ-10", "AEBQ-16"), na.rm = TRUE)) %>%
  mutate(EUE = rowSums(select(., "AEBQ-15", "AEBQ-20", "AEBQ-27", "AEBQ-35"), na.rm = TRUE)) %>%
  mutate(FF = rowSums(select(., "AEBQ-02", "AEBQ-07", "AEBQ-12", "AEBQ-19", "AEBQ-24"), na.rm = TRUE)) %>%
  mutate(FR = rowSums(select(., "AEBQ-13", "AEBQ-22", "AEBQ-33"), na.rm = TRUE)) %>%
  mutate(SE = rowSums(select(., "AEBQ-14", "AEBQ-25", "AEBQ-26", "AEBQ-29"), na.rm = TRUE)) %>%
  mutate(H = rowSums(select(., "AEBQ-06", "AEBQ-09", "AEBQ-28", "AEBQ-32", "AEBQ-34"), na.rm = TRUE)) %>%
  mutate(SR = rowSums(select(., "AEBQ-11", "AEBQ-23", "AEBQ-30", "AEBQ-31"), na.rm = TRUE)) %>%
  select(c("ID", "EF":"SR")) 

## Body appreciation scale 
BAS.df <- survey.df %>% 
  select("ID", starts_with("BAS")) %>%
  mutate(BAS = rowSums(select(., 2:11), na.rm = TRUE))%>%
  select(c("ID", "BAS")) 

## Intuitive eating scale 
IES.df <- survey.df %>% 
  select("ID", starts_with("IES")) %>%
  mutate(IES = (rowSums(select(., 2:24), na.rm = TRUE))/23) %>%
  select(c("ID", "IES"))

## sHEI score
HEI.df <- survey.df %>%
  select("ID", "sex", "Fruit":"Water") %>%
  mutate(fruit.1 = case_when(
    Fruit == 1 ~ 0,
    Fruit == 2 ~ 2, 
    Fruit == 3 ~ 3.5,
    Fruit >= 4 ~ 5)) %>%
  mutate(fruit.2 = case_when (
    Juice == 1 ~ 0,
    Juice == 2 ~ 2,
    Juice == 3 ~ 3.5,
    Juice >= 4 ~ 5)) %>% 
  mutate(tfruitHEI = case_when(
    fruit.1 + fruit.2 <5 ~ 0,
    fruit.1 + fruit.2 >= 5 ~ 5)) %>%
  mutate(wfruitHEI = case_when(
    Fruit == 1 ~ 0,
    Fruit == 2 ~ 2.5, 
    Fruit >= 3 ~ 5)) %>%
  mutate(vegHEI = case_when(
    `Green veg` == 1 ~ 1.6,
    `Green veg` == 2 & `Starchy veg` >= 2 ~ 2.46,
    `Green veg` >= 2 & `Starchy veg` >= 2 ~ 3.24,
    `Green veg` >= 2 & `Starchy veg` == 1 ~ 3.56)) %>%
  mutate(bean.1 = case_when(
    `Green veg` == 1 ~ 0,
    `Green veg` >= 2 ~5)) %>%
  mutate(bean.2 = case_when(
    Beans == 1 ~ 0,
    Beans >= 2 ~ 5)) %>%
  mutate(beanHEI = case_when(
    bean.1 + bean.2 < 5 ~ 0,
    bean.1 + bean.2 >= 5 ~ 5)) %>%
  mutate(wgHEI = case_when(
    `Whole grains.day` == 1 ~ 0.51,
    sex == 1 & `Whole grains.day` >= 2 ~ 2.97,
    sex == 2 & `Whole grains.day` >= 2 & `Whole grains.day` <= 3 ~ 5.20,
    sex == 2 & `Whole grains.day` >= 4 ~ 6.94)) %>%
  mutate(dairyHEI = case_when(
    sex == 1 & `Milk.day` <= 3 ~ 3.22,
    sex == 2 & `Milk.day` <= 3 & `Low-fat milk.day` == 1 ~ 3.32,
    sex == 2 & `Milk.day` <= 3 & `Low-fat milk.day` >= 2 ~ 4.81,
    `Milk.day` >= 4 ~ 6.51)) %>%
  mutate(tproteinHEI = case_when(
    sex == 1 & `Seafood.freq` <= 4 ~ 4.11,
    sex == 1 & `Seafood.freq` >= 5 ~ 4.98,
    sex == 1 & is.na(`Seafood.freq`) ~ 4.11,
    sex == 2 ~ 4.97)) %>%
  mutate(sfpproteinHEI = case_when(
    sex == 1 & Nuts <= 2 ~ 0.49,
    sex == 2 & Nuts <= 2 ~ 1.50,
    Nuts >= 3 ~ 4.20)) %>%
  mutate(fatHEI = case_when(
    `Milk.day` >= 4 ~ 2.56,
    `Saturated fats` >= 2 & `Saturated fats` <= 3 & `Milk.freq` >= 1 & `Low-fat milk.freq` <= 2 ~ 2.63,
    `Saturated fats` >= 2 & `Saturated fats` <= 3 & is.na(`Milk.freq`) | is.na(`Low-fat milk.freq`) ~ 2.63,
    `Saturated fats` >= 2 & `Saturated fats` <= 3 & `Milk.freq` >= 1 & `Low-fat milk.freq` >= 3 ~ 4.54,
    `Saturated fats` == 1 & `Milk.freq` >= 1 ~ 5.93,
    `Saturated fats` == 1 & is.na(`Milk.freq`) ~ 5.93)) %>%
  mutate(grainHEI = case_when(
    `Green veg` == 1 ~ 2.13,
    `Grains.day` >= 3 & `Seafood.day` >= 2 & `Green veg` >= 2 ~ 2.27,
    `Grains.day` >= 3 & Nuts >= 1 & Nuts <= 2 & `Seafood.day` == 1 & `Green veg` >= 2 ~ 4.73,
    `Grains.day` >= 3 & Nuts >= 3 & `Seafood.day` == 1 & `Green veg` >= 2 ~ 8.45,
    `Grains.day` >= 1 & `Grains.day` <= 2 & `Green veg` >= 2 ~ 9.25)) %>%
  mutate(sodiumHEI = case_when(
    Fruit >= 1 & Fruit <= 2 & `Grains.day` >= 3 & Water == 3 ~ 0.70,
    Fruit >= 3 & `Grains.day` >= 3 & Water == 3 ~ 2.30,
    `Grains.day` >= 3 & Water >= 1 & Water <= 2 ~ 4.94,
    `Grains.day` >= 1 & `Grains.day` <= 2 ~ 6.07)) %>%
  mutate(ssb.cal = case_when(
    `SSB.day` == 1  ~ 0,
    `SSB.day` == 2 ~ 156,
    `SSB.day` == 3 ~ 312,
    `SSB.day` == 4 ~ 468,
    `SSB.day` == 5 ~ 624,
    `SSB.day` == 6 ~ 780,
    `SSB.day` == 7 ~ 936),
    sugar.cal = case_when(
    `Added sugars` == 1 ~ 130,
    `Added sugars` == 2 ~ 260,
    `Added sugars` == 3 ~ 520)) %>%
  mutate(sugar.intake = (ssb.cal + sugar.cal)) %>%
  mutate(sugarHEI = case_when(
    sugar.intake <= 130 ~ 10,
    sugar.intake > 130 & sugar.cal < 520 ~ 5,
    sugar.intake >= 520 ~ 0)) %>%
  mutate(sugar.tsp = case_when(
    `SSB.freq` <= 4 ~ 13.26,
    `SSB.freq` >= 5 & `SSB.freq` <=6 ~ 16.00,
    `SSB.day` ==2 ~ 16.00,
    `SSB.day` >=3 ~ 26.87)) %>%
  mutate(satfatHEI = case_when(
    `SSB.day` >= 3 ~ 1.82,
    `SSB.day` <= 2 & `Grains.day` <= 2 ~ 3.20,
    `SSB.day` <= 2 & `Grains.day` >= 3 & Nuts <= 2 ~ 4.64,
    `SSB.day` <= 2 & `Grains.day` >= 3 & Nuts >= 3 ~ 6.56)) %>%
  select("ID", "sugar.intake", "sugar.tsp", ends_with("HEI")) %>%
  mutate(sHEI = rowSums(select(., "tfruitHEI":"satfatHEI"), na.rm = TRUE)) %>%
  mutate(SuFatHEI = rowSums(select(., c("sugarHEI", "satfatHEI")))) 


## Combining AFR and ASI genotype files
AFR.geno <- AFR.geno.gwas.df %>%
  select(-c("CHR:POS", genetic.ancestry, phenotype.cat, effect_allele, other_allele, major_allele, minor_allele, effect, source)) %>%
  pivot_longer(cols = c(starts_with("1")), names_to = "ID", values_to = "genotype") %>%
  pivot_wider(names_from = rsID, values_from = genotype)

ASI.geno <- ASI.geno.gwas.df %>%
  select(-c("CHR:POS", genetic.ancestry, phenotype.cat, effect_allele, other_allele, major_allele, minor_allele, effect, source, "0_1001a-1-0424948735_1001a-1-0424948735")) %>%
  pivot_longer(cols = c(starts_with("1")), names_to = "ID", values_to = "genotype") %>%
  pivot_wider(names_from = rsID, values_from = genotype)

genotypes.df <- bind_rows(AFR.geno, ASI.geno)

## Putting scores together
survey.df$ID <- as.character(survey.df$ID)
anthro.df$ID <- as.character(anthro.df$ID)
SST.df$ID <- as.character(SST.df$ID)
redjade.df$ID <- as.character(redjade.df$ID)
liking.df$ID <- as.character(liking.df$ID)
HEI.df$ID <- as.character(HEI.df$ID)
TFQ.df$ID <- as.character(TFQ.df$ID)
BAS.df$ID <- as.character(BAS.df$ID)
IES.df$ID <- as.character(IES.df$ID)
AEBQ.df$ID <- as.character(AEBQ.df$ID)

survey.cleaned <- survey.df %>%
  select(c("ID", "sex", "age", "race", "ethn", "usb", "education", "hinc", "exec", "atus", "tius", "dcus", "scus")) 

full.predropout <- anthro.df %>%
  left_join(SST.df, by = "ID") %>%
  left_join(survey.cleaned, by = "ID") %>%
  left_join(redjade.df, by = "ID") %>%
  left_join(liking.df, by = "ID") %>%
  left_join(HEI.df, by = "ID") %>%
  left_join(TFQ.df, by = "ID") %>%
  left_join(BAS.df, by = "ID") %>%
  left_join(IES.df, by = "ID") %>%
  left_join(AEBQ.df, by = "ID") %>% 
  left_join(genetic.ancestry, by = "ID") %>%
  left_join(genotypes.df, by = "ID") %>%
  mutate(ageCat = case_when(
      age >= 18 & age < 21 ~ "1",
      age >= 21 & age < 25 ~ "2",
      age >= 25 & age < 35  ~ "3",
      age >= 35 & age < 45  ~ "4",
      age >= 45 & age < 65  ~ "5",
      age >= 65 ~ "5")) %>%
  mutate(BMIcat = case_when(
      BMI < 18.5 ~ "0",
      BMI >= 18.5 & BMI  < 25 & race != "Asian" ~ "1",
      BMI >= 25 & BMI < 30 & race != "Asian" ~ "2",
      BMI > 30 & race != "Asian" ~ "3",
      BMI >= 18.5 & BMI  < 23 & race == "Asian" ~ "1",
      BMI >= 23 & BMI < 25 & race == "Asian" ~ "2",
      BMI > 25 & race == "Asian" ~ "3"))  %>%
  mutate(BAScat = ntile(BAS, 2)) %>%
  mutate(UEcat = ntile(uncontrolled, 3)) %>%
  mutate(CRcat = ntile(restraint, 3)) %>%
  mutate(EMcat = ntile(emotional, 3)) %>%
  mutate(SFBLcat = ntile(sfbl.liking, 3)) %>%
  mutate(UHFcat = ntile(unhealthyfat.liking, 3)) %>%
  mutate(sugarcat = ntile(sugar.intake, 3)) %>%
  mutate(DQ = ntile(sHEI, 2)) %>%  
  mutate(atusCat = case_when(
      atus == 2 ~ "< 4 years",
      atus >= 3 & atus <= 4 ~ "4 to 12 years",
      atus == 5 ~ "13 to 18 years",
      atus >= 6 & atus <= 7 ~ "> 18 years")) %>%
  mutate(tiusCat = case_when(
      tius <= 4 ~ "< 5 years",
      tius == 5 ~ "5 to 10 years",
      tius >= 6 & tius <= 7 ~ "10 to 20 years", 
      tius == 8 ~ "> 20 years")) %>%  
    mutate(dcusCat = case_when(
      dcus <= 3 ~ "little to no change",
      dcus == 4 ~ "changed moderately",
      dcus >= 5 ~ "changed a lot")) 
  

full.df <- full.predropout %>%
  filter(!is.na(age) & !is.na(SST))

full.df$group <- paste(full.df$race, full.df$usb) 

African.df <- full.df %>%
  filter(race == "African")
  
Asian.df <- full.df %>%
  filter(race == "Asian")

USB.df <- full.df %>%
  filter(usb == "U.S. Born")
  
Immigrant.df <- full.df %>%
  filter(usb == "Immigrant")

Data distribution

Code
## Histogram function
histfunc <- function(data, x, title) {
  ggplot(data = data, aes(x = {{x}})) +
    geom_histogram(aes(y=..density..), colour="black", fill="white")+
    geom_density(alpha=.2, fill="#FF6666") +
    theme_light()
}

## QQ plot function
qqfunc <- function(data, sample, title) {
  ggplot(data = data, aes(sample = {{sample}})) +
  stat_qq() +
  stat_qq_line() +
    ggtitle(title) +
    theme_light()
}

## Age
histfunc(full.df, age, "Age histogram") 
Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Code
qqfunc(full.df, age, "Age QQ plot")

Code
shapiro.test(log10(full.df$age)) ## Not normal

    Shapiro-Wilk normality test

data:  log10(full.df$age)
W = 0.78719, p-value < 2.2e-16
Code
## BMI
histfunc(full.df, BMI, "BMI histogram")
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Code
qqfunc(full.df, BMI, "BMI QQ plot") 

Code
shapiro.test(full.df$BMI) ## Not normal

    Shapiro-Wilk normality test

data:  full.df$BMI
W = 0.85012, p-value < 2.2e-16
Code
## SST
histfunc(full.df, SST, "Simple Sweet Test histogram") +
  xlim(-130, 130) +
  labs(y= "Density", x = "Sweet Taste Liking Score")
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_bar()`).

Code
qqfunc(full.df, SST, "Simple Sweet QQ plot") +
  labs(y= "Sample Quantiles", x = "Theoretical Quantiles")

Code
shapiro.test(full.df$SST) ## Normal

    Shapiro-Wilk normality test

data:  full.df$SST
W = 0.99529, p-value = 0.2492
Code
full.df %>%
  filter(!is.na(genetic.ancestry)) %>%
  ggplot(aes(SST)) +
  geom_density(aes(fill= genetic.ancestry), alpha = 0.4) + 
    xlim(-150, 180) +
    labs(title="Sweet Taste Liking by Ancestry Groups") +
  theme_classic()

Code
## Sweet foods and beverages liking
histfunc(full.df, sfbl.liking, "Sweet Foods Liking histogram")
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Code
qqfunc(full.df, sfbl.liking, "Sweet Liking QQ plot")

Code
shapiro.test(full.df$sfbl.liking) ## Not normal

    Shapiro-Wilk normality test

data:  full.df$sfbl.liking
W = 0.98388, p-value = 0.000154
Code
full.df %>%
  filter(!is.na(genetic.ancestry)) %>%
  ggplot(aes(sfbl.liking)) +
  geom_density(aes(fill= genetic.ancestry), alpha = 0.4) + 
    xlim(-150, 180) +
    labs(title="Sweet Food and Beverage Liking by Ancestry Groups") +
  theme_classic()

Code
## Added sugars intake
histfunc(full.df, sugar.intake, "Added sugars intake histogram") ## transform
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Code
qqfunc(full.df, sugar.intake, "Added sugars intake QQ plot")

Code
shapiro.test(full.df$sugar.intake) ## Not normal

    Shapiro-Wilk normality test

data:  full.df$sugar.intake
W = 0.90785, p-value = 4.19e-15
Code
## Total HEI
histfunc(full.df, sHEI, "Total HEI intake histogram")
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Code
qqfunc(full.df, sHEI, "Overall HEI intake QQ plot")

Code
shapiro.test(full.df$sHEI) ## Normal

    Shapiro-Wilk normality test

data:  full.df$sHEI
W = 0.99209, p-value = 0.02786

Demographics and counts

Code
## Counts - by ancestry and USB

full.df %>%
  filter(race %in% c("African", "Asian")) %>%
  group_by(race, usb) %>%
  count()
# A tibble: 4 × 3
# Groups:   race, usb [4]
  race    usb           n
  <chr>   <chr>     <int>
1 African Immigrant    84
2 African U.S. Born   116
3 Asian   Immigrant    95
4 Asian   U.S. Born   112
Code
full.df %>%
  filter(race %in% c("African", "Asian")) %>%
  group_by(race, atus) %>%
  count()
# A tibble: 14 × 3
# Groups:   race, atus [14]
   race     atus     n
   <chr>   <dbl> <int>
 1 African     2    11
 2 African     3    12
 3 African     4    13
 4 African     5    28
 5 African     6    14
 6 African     7     6
 7 African    NA   116
 8 Asian       2    12
 9 Asian       3    13
10 Asian       4    11
11 Asian       5    27
12 Asian       6    26
13 Asian       7     6
14 Asian      NA   112
Code
full.df %>%
  filter(race %in% c("African", "Asian")) %>%
  group_by(race, tius) %>%
  count()
# A tibble: 16 × 3
# Groups:   race, tius [16]
   race     tius     n
   <chr>   <dbl> <int>
 1 African     2     6
 2 African     3     6
 3 African     4    13
 4 African     5    20
 5 African     6    12
 6 African     7    16
 7 African     8    11
 8 African    NA   116
 9 Asian       2    12
10 Asian       3     8
11 Asian       4    10
12 Asian       5    22
13 Asian       6    14
14 Asian       7    18
15 Asian       8    11
16 Asian      NA   112
Code
full.df %>%
  filter(race %in% c("African", "Asian")) %>%
  group_by(race, dcus) %>%
  count()
# A tibble: 12 × 3
# Groups:   race, dcus [12]
   race     dcus     n
   <chr>   <dbl> <int>
 1 African     2     8
 2 African     3    19
 3 African     4    18
 4 African     7    28
 5 African     8    10
 6 African    NA   117
 7 Asian       2    14
 8 Asian       3    20
 9 Asian       4    36
10 Asian       7    23
11 Asian       8     2
12 Asian      NA   112
Code
full.df %>%
  group_by(race, scus) %>%
  count()
# A tibble: 14 × 3
# Groups:   race, scus [14]
   race     scus     n
   <chr>   <dbl> <int>
 1 African     2     4
 2 African     3     3
 3 African     4    13
 4 African     5    34
 5 African     6    29
 6 African    NA   117
 7 Asian       2     3
 8 Asian       3    11
 9 Asian       4    21
10 Asian       5    48
11 Asian       6    12
12 Asian      NA   112
13 <NA>        6     1
14 <NA>       NA     3
Code
## Graph, time living in US
      full.df %>%
        group_by(race, usb, tius) %>%
        filter(!is.na(tius) & !is.na(usb) & !is.na(race)) %>% 
        count() %>%
        group_by(race, usb) %>%
        filter(race %in% c("African", "Asian")) %>%
        mutate(percent = (n / sum(n)) * 100) %>%
        ggplot(aes(x = usb, y = percent, fill = as.factor(tius))) +
        geom_bar(stat = "identity", position = position_dodge()) +
        labs(title = "Duration living in the US (percentages)", y = "Percentage", fill = "ageCat") +
        scale_fill_discrete(labels = c("< 1 year", "1 to 2 years", "3 to 5 years", "5 to 10 years", "10 to 15 years", "15 to 20 years", "> 20 years")) +
        theme_classic() +
        facet_wrap(~race) +
        theme(strip.text.x = element_text(size = 12, face = "bold")) 

Code
## Graph, age when moved to the US
      full.df %>%
        group_by(race, usb, atus) %>%
        filter(!is.na(atus) & !is.na(usb) & !is.na(race)) %>% 
        count() %>%
        group_by(race, usb) %>%
        filter(race %in% c("African", "Asian")) %>%
        mutate(percent = (n / sum(n)) * 100) %>%
        ggplot(aes(x = usb, y = percent, fill = as.factor(atus))) +
        geom_bar(stat = "identity", position = position_dodge()) +
        labs(title = "Age when moved to the US (percentages)", y = "Percentage", fill = "ageCat") +
        scale_fill_discrete(labels = c("< 1 year", "1 to 2 years", "3 to 5 years", "5 to 10 years", "10 to 15 years", "15 to 20 years", "> 20 years")) +
        theme_classic() +
        facet_wrap(~race) +
        theme(strip.text.x = element_text(size = 12, face = "bold")) 

Code
## Graph, diet change after moving to the US
      full.df %>%
        group_by(race, usb, dcus) %>%
        filter(!is.na(dcus) & !is.na(usb) & !is.na(race)) %>% 
        count() %>%
        group_by(race, usb) %>%
        filter(race %in% c("African", "Asian")) %>%
        mutate(percent = (n / sum(n)) * 100) %>%
        ggplot(aes(x = usb, y = percent, fill = as.factor(dcus))) +
        geom_bar(stat = "identity", position = position_dodge()) +
        labs(title = "Diet change after moving to the US (percentages)", y = "Percentage", fill = "ageCat") +
        scale_fill_discrete(labels = c("No change", "Changed a little", "Changed moderately", "Changed a lot", "Changed completely")) +
        theme_classic() +
        facet_wrap(~race) +
        theme(strip.text.x = element_text(size = 12, face = "bold"))       

Correlations - Corrplot

Code
adjust.cor.pmat <- function(p.mat, method = "fdr") {
  p.mat[upper.tri(p.mat)] <- p.adjust(p.mat[upper.tri(p.mat)], method = method)
  p.mat[lower.tri(p.mat)] <- t(p.mat)[lower.tri(p.mat)]
  diag(p.mat) <- 0
  return(p.mat)
}

## All corr
corr.df <- full.df %>%
  select(age, sex, BMI, hinc, restraint, emotional, uncontrolled, sugar.intake, sHEI, sfbl.liking, SST) %>%
  rename("household income" = hinc, "sugar intake" = sugar.intake, 
         "healthy eating index" = sHEI, "sweet foods liking" = sfbl.liking, "sweet taste liking" = SST) 

all.corr <- corr.df %>%
      cor(use = "pairwise.complete.obs")

p.mat <- cor_pmat(corr.df) %>%
  column_to_rownames()
p.mat.adj <- adjust.cor.pmat(p.mat, method = "fdr")

ggcorrplot(all.corr, type = "lower", p.mat = p.mat.adj, insig = "blank", lab = TRUE, lab_size = 2.5, colors = c("#E46726", "white", "#6D9EC1"))

Code
ggsave("All.cor.png", heigh = 5, width = 5)


## Corr - diet change
dc.corr.df <- full.df %>%
  filter(usb == "Immigrant") %>%
  select(age, sex, BMI, sHEI, sugar.intake, hinc, tius, atus, dcus, scus) %>%
  rename("healthy eating index" = sHEI, "household income" = hinc, "sugar intake" = sugar.intake, "duration in US" = tius, "age when moved to US" = atus, 
         "diet change after moving to US" = dcus, "sugar intake change after moving to US" = scus) 

dc.corr <- dc.corr.df %>%
      cor(use = "pairwise.complete.obs")

p.mat <- cor_pmat(dc.corr.df) %>%
  column_to_rownames()

p.mat.adj <- adjust.cor.pmat(p.mat, method = "fdr")

ggcorrplot(dc.corr, type = "lower", p.mat = p.mat.adj, insig = "blank", lab = TRUE, lab_size = 2.5, colors = c("#E46726", "white", "#6D9EC1"))

Code
## African diet change corr
afr.dc.corr.df <- full.df %>%
  filter(usb == "Immigrant", race == "African") %>%
  select(age, sex, BMI, sHEI, sugar.intake, hinc, tius, atus, dcus, scus) %>%
  rename("healthy eating index" = sHEI, "household income" = hinc, "sugar intake" = sugar.intake, "duration in US" = tius, "age when moved to US" = atus, "diet change after moving to US" = dcus, "sugar intake change after moving to US" = scus) 

afr.dc.corr <- afr.dc.corr.df %>%
      cor(use = "pairwise.complete.obs")

p.mat <- cor_pmat(afr.dc.corr.df) %>%
  column_to_rownames()

p.mat.adj <- adjust.cor.pmat(p.mat, method = "fdr")

ggcorrplot(afr.dc.corr, type = "lower", p.mat = p.mat.adj, insig = "blank", lab = TRUE, lab_size = 2.5, colors = c("#E46726", "white", "#6D9EC1")) + ggtitle("African")

Code
## Asian corr
asi.dc.corr.df <- full.df %>%
  filter(usb == "Immigrant", race == "Asian") %>%
  select(age, sex, BMI, sHEI, sugar.intake, hinc, tius, atus, dcus, scus) %>%
  rename("healthy eating index" = sHEI, "household income" = hinc, "sugar intake" = sugar.intake, "duration in US" = tius, "age when moved to US" = atus, "diet change after moving to US" = dcus, "sugar intake change after moving to US" = scus) 

asi.dc.corr <- asi.dc.corr.df %>%
      cor(use = "pairwise.complete.obs")

p.mat <- cor_pmat(asi.dc.corr.df) %>%
  column_to_rownames()

p.mat.adj <- adjust.cor.pmat(p.mat, method = "fdr")

ggcorrplot(asi.dc.corr, type = "lower", p.mat = p.mat.adj, insig = "blank", lab = TRUE, lab_size = 2.5, colors = c("#E46726", "white", "#6D9EC1")) + ggtitle("Asian")

Differences - BMI

Code
## Assumptions
  ## linearity
  full.df %>%
  group_by(group) %>%
  ggplot(aes(x = as.numeric(ageCat), y = BMI, color = group)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() +
  stat_cor(method = "pearson", p.accuracy = 0.001, r.accuracy = 0.01) +
  scale_color_npg() 
`geom_smooth()` using formula = 'y ~ x'

Code
  ## Homogeneity of regression slopes
  summary(full.df %>%
    aov(BMI ~ group*ageCat, .))
              Df Sum Sq Mean Sq F value   Pr(>F)    
group          5   3057   611.5  16.069  2.1e-14 ***
ageCat         4    740   185.0   4.861 0.000779 ***
group:ageCat  13   1013    77.9   2.047 0.016468 *  
Residuals    388  14765    38.1                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
  ## Normality of residuals
  model <- lm(BMI ~ ageCat + group, data = full.df)
  model.metrics <- augment(model) 
  shapiro_test(model.metrics$.resid)
# A tibble: 1 × 3
  variable             statistic  p.value
  <chr>                    <dbl>    <dbl>
1 model.metrics$.resid     0.911 7.33e-15
Code
  ## Homogeneity of variances
  model.metrics %>% levene_test(.resid ~ group)
Warning in leveneTest.default(y = y, group = group, ...): group coerced to
factor.
# A tibble: 1 × 4
    df1   df2 statistic            p
  <int> <int>     <dbl>        <dbl>
1     5   405      9.23 0.0000000245
Code
  ## Outliers
  model.metrics %>% 
  filter(abs(.std.resid) > 3) %>%
  as.data.frame()
       BMI ageCat             group  .fitted   .resid       .hat   .sigma
1 49.05577      2 African U.S. Born 28.06191 20.99387 0.01362718 6.190880
2 56.44815      3 African U.S. Born 30.87822 25.56993 0.02221344 6.145924
3 48.91270      1 African U.S. Born 27.54191 21.37079 0.01160997 6.187801
4 52.18245      3 African U.S. Born 30.87822 21.30423 0.02221344 6.187376
5 63.07895      1 African Immigrant 26.72928 36.34967 0.01642098 6.007136
6 49.65202      2 African Immigrant 27.24928 22.40275 0.01628167 6.178074
     .cooksd .std.resid
1 0.01568958   3.369948
2 0.03860907   4.122482
3 0.01379489   3.426950
4 0.02680167   3.434749
5 0.05700120   5.843152
6 0.02146162   3.600951
Code
## Robust linear model
  mBMI <- rlm(BMI ~ race*usb + ageCat, data = full.df)
  summary(mBMI) ## t-value > 2 in absolute value typically suggest statistical significance

Call: rlm(formula = BMI ~ race * usb + ageCat, data = full.df)
Residuals:
     Min       1Q   Median       3Q      Max 
-11.0996  -3.2811  -0.4405   3.1977  37.2955 

Coefficients:
                       Value   Std. Error t value
(Intercept)            25.7835  0.6435    40.0685
raceAsian              -3.7128  0.7564    -4.9083
usbU.S. Born            0.1758  0.7226     0.2432
ageCat2                 0.6700  0.5697     1.1761
ageCat3                 2.5214  0.7581     3.3258
ageCat4                 3.5690  1.3649     2.6148
ageCat5                 4.2537  1.5737     2.7029
raceAsian:usbU.S. Born -0.0399  1.0070    -0.0396

Residual standard error: 4.85 on 399 degrees of freedom
  (4 observations deleted due to missingness)
Code
  emmeans(mBMI, ~race*usb)
 race    usb       emmean    SE df asymp.LCL asymp.UCL
 African Immigrant   28.0 0.639 NA      26.7      29.2
 Asian   Immigrant   24.3 0.605 NA      23.1      25.5
 African U.S. Born   28.2 0.601 NA      27.0      29.3
 Asian   U.S. Born   24.4 0.626 NA      23.2      25.6

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
  pairs(emmeans(mBMI, ~race*usb), adjust = "tukey")
 contrast                              estimate    SE df z.ratio p.value
 African Immigrant - Asian Immigrant      3.713 0.756 NA   4.908  <.0001
 African Immigrant - African U.S. Born   -0.176 0.723 NA  -0.243  0.9949
 African Immigrant - Asian U.S. Born      3.577 0.734 NA   4.873  <.0001
 Asian Immigrant - African U.S. Born     -3.889 0.705 NA  -5.519  <.0001
 Asian Immigrant - Asian U.S. Born       -0.136 0.714 NA  -0.190  0.9976
 African U.S. Born - Asian U.S. Born      3.753 0.667 NA   5.627  <.0001

Results are averaged over the levels of: ageCat 
P value adjustment: tukey method for comparing a family of 4 estimates 
Code
## ANCOVA, African
bmi.ancova <- full.df %>%
  aov(BMI ~ race + ageCat, data = .)
summary(bmi.ancova)
             Df Sum Sq Mean Sq F value   Pr(>F)    
race          1   2623  2622.6  67.060 3.55e-15 ***
ageCat        4    730   182.6   4.669  0.00108 ** 
Residuals   401  15682    39.1                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
4 observations deleted due to missingness
Code
TukeyHSD(bmi.ancova, "race")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = BMI ~ race + ageCat, data = .)

$race
                   diff       lwr       upr p adj
Asian-African -5.077624 -6.296584 -3.858664     0
Code
emmeans(bmi.ancova, ~race)
 race    emmean    SE  df lower.CL upper.CL
 African   29.5 0.624 401     28.3     30.8
 Asian     24.7 0.624 401     23.4     25.9

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, African
bmi.afr.ancova <- full.df %>%
  filter(race == "African") %>%
  aov(BMI ~ usb + ageCat, data = .)
summary(bmi.afr.ancova)
             Df Sum Sq Mean Sq F value  Pr(>F)   
usb           1     13   12.56   0.197 0.65784   
ageCat        4    926  231.44   3.625 0.00713 **
Residuals   194  12387   63.85                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
TukeyHSD(bmi.afr.ancova, "usb")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = BMI ~ usb + ageCat, data = .)

$usb
                         diff       lwr      upr     p adj
U.S. Born-Immigrant 0.5078115 -1.750044 2.765667 0.6578404
Code
emmeans(bmi.afr.ancova, ~usb)
 usb       emmean   SE  df lower.CL upper.CL
 Immigrant   29.7 1.16 194     27.4     32.0
 U.S. Born   30.6 1.14 194     28.3     32.8

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, Asian
bmi.asi.ancova <- full.df %>%
  filter(race == "Asian") %>%
  aov(BMI ~ usb + ageCat, data = .)
summary(bmi.asi.ancova)
             Df Sum Sq Mean Sq F value Pr(>F)
usb           1   20.1   20.08   1.348  0.247
ageCat        4   72.8   18.19   1.221  0.303
Residuals   201 2994.4   14.90               
Code
TukeyHSD(bmi.asi.ancova, "usb")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = BMI ~ usb + ageCat, data = .)

$usb
                          diff       lwr       upr     p adj
U.S. Born-Immigrant -0.6249863 -1.686541 0.4365685 0.2470549
Code
emmeans(bmi.asi.ancova, ~usb)
 usb       emmean    SE  df lower.CL upper.CL
 Immigrant   24.1 0.528 201     23.0     25.1
 U.S. Born   23.7 0.577 201     22.6     24.9

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 

Differences - Healthy Eating Index

Code
## Assumptions
  ## linearity
  full.df %>%
  group_by(group) %>%
  filter(race %in% c("African", "Asian")) %>%
  ggplot(aes(x = age, y = sHEI, color = group)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  theme_classic() +
  stat_cor(method = "pearson", p.accuracy = 0.001, r.accuracy = 0.01) +
  scale_color_npg() 
`geom_smooth()` using formula = 'y ~ x'

Code
  ## Homogeneity of regression slopes
  summary(full.df %>%
    aov(sHEI ~ group*age, .))
             Df Sum Sq Mean Sq F value  Pr(>F)   
group         5    537   107.5   1.249 0.28555   
age           1    657   657.1   7.634 0.00599 **
group:age     4    543   135.6   1.576 0.17982   
Residuals   400  34427    86.1                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
  ## Normality of residuals
  model <- lm(sHEI ~ age + group, data = full.df)
  model.metrics <- augment(model) 
  shapiro_test(model.metrics$.resid)
# A tibble: 1 × 3
  variable             statistic p.value
  <chr>                    <dbl>   <dbl>
1 model.metrics$.resid     0.992  0.0273
Code
  ## Homogeneity of variances
  model.metrics %>% levene_test(.resid ~ group)
Warning in leveneTest.default(y = y, group = group, ...): group coerced to
factor.
# A tibble: 1 × 4
    df1   df2 statistic     p
  <int> <int>     <dbl> <dbl>
1     5   405      1.46 0.201
Code
  ## Outliers
  model.metrics %>% 
  filter(abs(.std.resid) > 3) %>%
  as.data.frame()
   sHEI age             group  .fitted    .resid        .hat   .sigma
1 14.76  23 African U.S. Born 45.27729 -30.51729 0.008631544 9.189261
2 12.69  18   Asian U.S. Born 42.78082 -30.09082 0.009622897 9.192657
     .cooksd .std.resid
1 0.01349899  -3.294374
2 0.01466100  -3.249961
Code
## Robust linear model, by race only
mHEI.race <- full.df %>%
    filter(race %in% c("African", "Asian")) %>%
    rlm(sHEI ~ race + age, data = .)
  summary(mHEI.race) ## t-value > 2 in absolute value typically suggest statistical significance

Call: rlm(formula = sHEI ~ race + age, data = .)
Residuals:
     Min       1Q   Median       3Q      Max 
-31.0032  -6.6759   0.5265   6.5620  24.1650 

Coefficients:
            Value   Std. Error t value
(Intercept) 41.1667  1.7275    23.8308
raceAsian   -0.6748  0.9647    -0.6995
age          0.1778  0.0680     2.6172

Residual standard error: 9.749 on 404 degrees of freedom
Code
  emmeans.HEI.race <- emmeans(mHEI.race, ~race) %>% 
    as.data.frame() %>%
    mutate(group = paste(race))
  p.values <- pairs(emmeans(mHEI.race, ~race), adjust = "tukey") %>%
    as.data.frame() %>%
    mutate(group1 = sub(" - .*", "", contrast), group2 = sub(".* - ", "", contrast),
    group1 = gsub(" ", " ", group1), group2 = gsub(" ", " ", group2),
    p.adj = p.value,
    p.adj.signif = case_when(p.adj < 0.001 ~ "***", p.adj < 0.01 ~ "**", p.adj < 0.05 ~ "*", TRUE ~ "ns"))

## Robust linear model, by race and usb
  mHEI.race.usb <- full.df %>%
    filter(race %in% c("African", "Asian")) %>%
    rlm(sHEI ~ race*usb + age, data = .)
  summary(mHEI.race.usb) ## t-value > 2 in absolute value typically suggest statistical significance

Call: rlm(formula = sHEI ~ race * usb + age, data = .)
Residuals:
    Min      1Q  Median      3Q     Max 
-30.843  -6.828   0.469   6.757  24.675 

Coefficients:
                       Value   Std. Error t value
(Intercept)            40.7588  1.9594    20.8013
raceAsian               0.5676  1.4359     0.3953
usbU.S. Born            0.8516  1.3795     0.6174
age                     0.1736  0.0679     2.5569
raceAsian:usbU.S. Born -2.2430  1.9179    -1.1695

Residual standard error: 10.09 on 402 degrees of freedom
Code
  emmeans.HEI.race.usb <- emmeans(mHEI.race.usb, ~race*usb) %>% 
    as.data.frame() %>%
    mutate(group = paste(race, usb))
  p.values <- pairs(emmeans(mHEI.race.usb, ~race*usb), adjust = "tukey") %>%
    as.data.frame() %>%
    mutate(group1 = sub(" - .*", "", contrast), group2 = sub(".* - ", "", contrast),
    group1 = gsub(" ", " ", group1), group2 = gsub(" ", " ", group2),
    p.adj = p.value,
    p.adj.signif = case_when(p.adj < 0.001 ~ "***", p.adj < 0.01 ~ "**", p.adj < 0.05 ~ "*", TRUE ~ "ns"))
  
## ANCOVA, African, usb vs immigrant
HEI.afr.ancova <- full.df %>%
  filter(!is.na(sHEI) & !is.na(usb) & !is.na(ageCat)) %>%
  filter(race == "African") %>%
  aov(sHEI ~ usb + ageCat, data = .)
summary(HEI.afr.ancova)
             Df Sum Sq Mean Sq F value Pr(>F)  
usb           1      6    6.05   0.061 0.8056  
ageCat        4    819  204.69   2.054 0.0884 .
Residuals   194  19331   99.64                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
TukeyHSD(HEI.afr.ancova, "usb")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sHEI ~ usb + ageCat, data = .)

$usb
                         diff       lwr      upr     p adj
U.S. Born-Immigrant 0.3524877 -2.468067 3.173043 0.8055741
Code
emmeans(HEI.afr.ancova, ~usb)
 usb       emmean   SE  df lower.CL upper.CL
 Immigrant   46.5 1.45 194     43.7     49.4
 U.S. Born   47.2 1.43 194     44.4     50.0

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, African, atus
HEI.afr.atus.ancova <- full.df %>%
  filter(race == "African", usb == "Immigrant") %>%
  filter(!is.na(sHEI) & !is.na(atusCat) & !is.na(ageCat)) %>%
  aov(sHEI ~ atusCat + ageCat, data = .)
summary(HEI.afr.atus.ancova)
            Df Sum Sq Mean Sq F value Pr(>F)
atusCat      3     83   27.68   0.266  0.849
ageCat       4    229   57.13   0.550  0.700
Residuals   76   7900  103.95               
Code
TukeyHSD(HEI.afr.atus.ancova, "atusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sHEI ~ atusCat + ageCat, data = .)

$atusCat
                                   diff        lwr       upr     p adj
> 18 years-< 4 years         -1.3040000 -11.357168  8.749168 0.9862845
13 to 18 years-< 4 years      0.8878571  -8.642088 10.417802 0.9948062
4 to 12 years-< 4 years      -1.2604000 -10.950279  8.429479 0.9861723
13 to 18 years-> 18 years     2.1918571  -5.648937 10.032651 0.8829852
4 to 12 years-> 18 years      0.0436000  -7.990823  8.078023 0.9999989
4 to 12 years-13 to 18 years -2.1482571  -9.517491  5.220977 0.8695891
Code
emmeans(HEI.afr.atus.ancova, ~atusCat)
 atusCat        emmean   SE df lower.CL upper.CL
 < 4 years        45.8 3.58 76     38.7     53.0
 > 18 years       43.2 2.42 76     38.4     48.1
 13 to 18 years   47.1 2.56 76     42.0     52.2
 4 to 12 years    44.4 2.74 76     38.9     49.8

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, African, tius
HEI.afr.tius.ancova <- full.df %>%
  filter(race == "African", usb == "Immigrant") %>%
  filter(!is.na(sHEI) & !is.na(tiusCat) & !is.na(ageCat)) %>%
  aov(sHEI ~ tiusCat + ageCat, data = .)
summary(HEI.afr.tius.ancova)
            Df Sum Sq Mean Sq F value Pr(>F)
tiusCat      3    294   97.93   0.959  0.417
ageCat       4    155   38.84   0.380  0.822
Residuals   76   7762  102.14               
Code
TukeyHSD(HEI.afr.tius.ancova, "tiusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sHEI ~ tiusCat + ageCat, data = .)

$tiusCat
                                    diff        lwr      upr     p adj
> 20 years-< 5 years         -1.32210909 -10.927233 8.283015 0.9837021
10 to 20 years-< 5 years     -1.36977143  -8.674549 5.935006 0.9605218
5 to 10 years-< 5 years      -5.00270000 -12.966848 2.961448 0.3572667
10 to 20 years-> 20 years    -0.04766234  -9.494251 9.398927 0.9999992
5 to 10 years-> 20 years     -3.68059091 -13.645827 6.284645 0.7667433
5 to 10 years-10 to 20 years -3.63292857 -11.405141 4.139284 0.6112599
Code
emmeans(HEI.afr.tius.ancova, ~tiusCat)
 tiusCat        emmean   SE df lower.CL upper.CL
 < 5 years        46.6 2.57 76     41.5     51.7
 > 20 years       44.2 3.60 76     37.0     51.3
 10 to 20 years   45.1 2.57 76     40.0     50.3
 5 to 10 years    41.4 2.94 76     35.5     47.2

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, African, dcus
HEI.afr.dcus.ancova <- full.df %>%
  filter(race == "African", usb == "Immigrant") %>%
    filter(!is.na(sHEI) & !is.na(dcusCat) & !is.na(ageCat)) %>%
  aov(sHEI ~ dcusCat + ageCat, data = .)
summary(HEI.afr.dcus.ancova)
            Df Sum Sq Mean Sq F value Pr(>F)
dcusCat      2    450  224.86   2.223  0.115
ageCat       4     27    6.74   0.067  0.992
Residuals   76   7687  101.15               
Code
TukeyHSD(HEI.afr.dcus.ancova, "dcusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sHEI ~ dcusCat + ageCat, data = .)

$dcusCat
                                           diff        lwr       upr     p adj
changed moderately-changed a lot       3.738246 -3.1407666 10.617258 0.4001136
little to no change-changed a lot      5.115283 -0.9359397 11.166505 0.1141469
little to no change-changed moderately 1.377037 -5.9385341  8.692608 0.8945662
Code
emmeans(HEI.afr.dcus.ancova, ~dcusCat)
 dcusCat             emmean   SE df lower.CL upper.CL
 changed a lot         42.3 2.17 76     38.0     46.7
 changed moderately    45.9 2.71 76     40.5     51.3
 little to no change   47.1 2.36 76     42.4     51.8

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, Asian
HEI.asi.ancova <- full.df %>%
  filter(race == "Asian") %>%
  aov(sHEI ~ usb + ageCat, data = .)
summary(HEI.asi.ancova)
             Df Sum Sq Mean Sq F value Pr(>F)  
usb           1    247  246.61   3.339 0.0692 .
ageCat        4    545  136.21   1.844 0.1218  
Residuals   201  14847   73.87                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
TukeyHSD(HEI.asi.ancova, "usb")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sHEI ~ usb + ageCat, data = .)

$usb
                         diff       lwr       upr     p adj
U.S. Born-Immigrant -2.190382 -4.554184 0.1734213 0.0691569
Code
emmeans(HEI.asi.ancova, ~usb)
 usb       emmean   SE  df lower.CL upper.CL
 Immigrant   47.3 1.18 201     45.0     49.6
 U.S. Born   45.3 1.28 201     42.8     47.8

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, Asian, atus
HEI.asi.atus.ancova <- full.df %>%
  filter(race == "Asian", usb == "Immigrant") %>%
  filter(!is.na(sHEI) & !is.na(atusCat) & !is.na(ageCat)) %>%
  aov(sHEI ~ atusCat + ageCat, data = .)
summary(HEI.asi.atus.ancova)
            Df Sum Sq Mean Sq F value Pr(>F)  
atusCat      3     95   31.63   0.485 0.6934  
ageCat       4    705  176.24   2.704 0.0355 *
Residuals   87   5671   65.18                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
TukeyHSD(HEI.asi.atus.ancova, "atusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sHEI ~ atusCat + ageCat, data = .)

$atusCat
                                   diff        lwr      upr     p adj
> 18 years-< 4 years         -0.4304167  -7.588965 6.728132 0.9985984
13 to 18 years-< 4 years     -0.7267593  -8.063852 6.610334 0.9938353
4 to 12 years-< 4 years      -2.7154167 -10.192276 4.761443 0.7772156
13 to 18 years-> 18 years    -0.2963426  -5.822629 5.229943 0.9990037
4 to 12 years-> 18 years     -2.2850000  -7.995546 3.425546 0.7216854
4 to 12 years-13 to 18 years -1.9886574  -7.921486 3.944171 0.8162293
Code
emmeans(HEI.asi.atus.ancova, ~atusCat)
 atusCat        emmean   SE df lower.CL upper.CL
 < 4 years        49.9 2.71 87     44.5     55.3
 > 18 years       47.0 1.56 87     43.9     50.1
 13 to 18 years   49.0 1.90 87     45.2     52.7
 4 to 12 years    46.9 2.09 87     42.8     51.1

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, Asian, tius
HEI.asi.tius.ancova <- full.df %>%
  filter(race == "Asian") %>%
  filter(!is.na(sHEI) & !is.na(tiusCat) & !is.na(ageCat)) %>%
  aov(sHEI ~ tiusCat + ageCat, data = .)
summary(HEI.asi.tius.ancova)
            Df Sum Sq Mean Sq F value Pr(>F)  
tiusCat      3    236   78.71   1.255 0.2951  
ageCat       4    776  194.09   3.094 0.0197 *
Residuals   87   5458   62.74                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
TukeyHSD(HEI.asi.tius.ancova, "tiusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sHEI ~ tiusCat + ageCat, data = .)

$tiusCat
                                   diff        lwr       upr     p adj
> 20 years-< 5 years          4.0139394  -3.299156 11.327035 0.4795563
10 to 20 years-< 5 years      1.1119792  -4.160641  6.384600 0.9456562
5 to 10 years-< 5 years       3.6012121  -2.222448  9.424872 0.3729905
10 to 20 years-> 20 years    -2.9019602 -10.153482  4.349562 0.7216085
5 to 10 years-> 20 years     -0.4127273  -8.074258  7.248804 0.9989900
5 to 10 years-10 to 20 years  2.4892330  -3.256915  8.235381 0.6691844
Code
emmeans(HEI.asi.tius.ancova, ~tiusCat)
 tiusCat        emmean   SE df lower.CL upper.CL
 < 5 years        48.1 2.09 87     44.0     52.2
 > 20 years       44.2 2.72 87     38.8     49.6
 10 to 20 years   49.5 2.10 87     45.3     53.7
 5 to 10 years    52.3 2.40 87     47.6     57.1

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, Asian, dcus
HEI.asi.dcus.ancova <- full.df %>%
  filter(race == "Asian", usb == "Immigrant") %>%
  filter(!is.na(sHEI) & !is.na(dcusCat) & !is.na(ageCat)) %>%
  aov(sHEI ~ dcusCat + ageCat, data = .)
summary(HEI.asi.dcus.ancova)
            Df Sum Sq Mean Sq F value Pr(>F)  
dcusCat      2     44   21.83   0.338 0.7141  
ageCat       4    746  186.40   2.887 0.0269 *
Residuals   88   5681   64.56                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
TukeyHSD(HEI.asi.dcus.ancova, "dcusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sHEI ~ dcusCat + ageCat, data = .)

$dcusCat
                                             diff       lwr      upr     p adj
changed moderately-changed a lot       1.51775556 -3.469239 6.504750 0.7490421
little to no change-changed a lot      1.56102353 -3.485726 6.607774 0.7419811
little to no change-changed moderately 0.04326797 -4.537661 4.624197 0.9997205
Code
emmeans(HEI.asi.dcus.ancova, ~dcusCat)
 dcusCat             emmean   SE df lower.CL upper.CL
 changed a lot         46.1 1.78 88     42.6     49.6
 changed moderately    48.9 1.66 88     45.6     52.1
 little to no change   47.9 1.66 88     44.6     51.2

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## Graph
emmeans.HEI.race.usb %>%
    filter(race %in% c("African", "Asian")) %>%
  ggplot(aes(x = group, y = emmean, fill = race)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  geom_errorbar(aes(ymin = emmean-SE, ymax = emmean+SE), 
                width = 0.2, position = position_dodge(0.9)) +
  labs(x = " ", 
       y = "Healthy Eating Index (Adjusted Mean)",
       title = " ",
       fill = "Ancestry") +
    ylim(0, 50) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Code
## Graph
emmeans.HEI.race %>%
    filter(race %in% c("African", "Asian")) %>%
  ggplot(aes(x = group, y = emmean, fill = race)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  geom_errorbar(aes(ymin = emmean-SE, ymax = emmean+SE), 
                width = 0.2, position = position_dodge(0.9)) +
  labs(x = " ", 
       y = "Healthy Eating Index (Adjusted Mean)",
       title = " ",
       fill = "Ancestry") +
  ylim(0, 50) +
  theme_classic() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 14), axis.text.y = element_text(size = 14), axis.title.y = element_text(size = 16), plot.title = element_text(size = 18), legend.text = element_text(size = 14), legend.title = element_text(size = 16))

Differences - sugar intake

Code
## Assumptions
  ## linearity
  full.df %>%
  group_by(group) %>%
  ggplot(aes(x = age, y = sugar.intake, color = group)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  theme_classic() +
  stat_cor(method = "pearson", p.accuracy = 0.001, r.accuracy = 0.01) +
  scale_color_npg() 
`geom_smooth()` using formula = 'y ~ x'

Code
  ## Homogeneity of regression slopes
  summary(full.df %>%
    aov(sugar.intake ~ group*age, .))
             Df   Sum Sq Mean Sq F value   Pr(>F)    
group         5  3268614  653723  10.741 1.05e-09 ***
age           1   388012  388012   6.375    0.012 *  
group:age     4   103769   25942   0.426    0.790    
Residuals   400 24344754   60862                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
  ## Normality of residuals
  model <- lm(sugar.intake ~ age + group, data = full.df)
  model.metrics <- augment(model) 
  shapiro_test(model.metrics$.resid)
# A tibble: 1 × 3
  variable             statistic  p.value
  <chr>                    <dbl>    <dbl>
1 model.metrics$.resid     0.953 3.44e-10
Code
  ## Homogeneity of variances
  model.metrics %>% levene_test(.resid ~ group)
Warning in leveneTest.default(y = y, group = group, ...): group coerced to
factor.
# A tibble: 1 × 4
    df1   df2 statistic         p
  <int> <int>     <dbl>     <dbl>
1     5   405      5.84 0.0000321
Code
  ## Outliers
  model.metrics %>% 
  filter(abs(.std.resid) > 3) %>%
  as.data.frame()
  sugar.intake age             group  .fitted    .resid        .hat   .sigma
1         1196  34   Asian U.S. Born 362.8070  833.1930 0.016477689 242.7238
2         1300  26 African U.S. Born 549.5500  750.4500 0.009222228 243.4252
3         1456  18   Asian U.S. Born 433.3422 1022.6578 0.009622897 240.9273
4         1456  18 African U.S. Born 584.8176  871.1824 0.009650560 242.4143
     .cooksd .std.resid
1 0.02791579   3.415214
2 0.01248986   3.064771
3 0.02422122   4.177288
4 0.01762887   3.558600
Code
## Robust linear model, by race and usb
  mSugar.race.usb <- full.df %>%
    filter(race %in% c("African", "Asian")) %>%
    rlm(sugar.intake ~ race*usb + age, data = .)
  summary(mSugar.race.usb) ## t-value > 2 in absolute value typically suggest statistical significance

Call: rlm(formula = sugar.intake ~ race * usb + age, data = .)
Residuals:
    Min      1Q  Median      3Q     Max 
-436.33 -134.23   13.51  166.85 1049.67 

Coefficients:
                       Value     Std. Error t value  
(Intercept)             639.1869   49.6627    12.8706
raceAsian              -165.0181   36.3947    -4.5341
usbU.S. Born            -23.5955   34.9628    -0.6749
age                      -3.8345    1.7207    -2.2285
raceAsian:usbU.S. Born   24.7728   48.6091     0.5096

Residual standard error: 205.8 on 402 degrees of freedom
Code
  emmeans.sugar.race.usb <- emmeans(mSugar.race.usb, ~race*usb) %>% 
    as.data.frame() %>%
    mutate(group = paste(race, usb))
  p.values <- pairs(emmeans(mSugar.race.usb, ~race*usb), adjust = "tukey") %>%
    as.data.frame() %>%
    mutate(group1 = sub(" - .*", "", contrast), group2 = sub(".* - ", "", contrast),
    group1 = gsub(" ", " ", group1), group2 = gsub(" ", " ", group2),
    p.adj = p.value,
    p.adj.signif = case_when(p.adj < 0.001 ~ "***", p.adj < 0.01 ~ "**", p.adj < 0.05 ~ "*", TRUE ~ "ns"))

## Graph
emmeans.sugar.race.usb %>%
    filter(race %in% c("African", "Asian")) %>%
  ggplot(aes(x = group, y = emmean, fill = race)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  geom_errorbar(aes(ymin = emmean-SE, ymax = emmean+SE), 
                width = 0.2, position = position_dodge(0.9)) +
    stat_pvalue_manual(p.values, hide.ns = T, label = "p.adj.signif", label.size = 5, bracket.size = 0.5, tip.length = 0.05, step.increase = 0.2, y.position = 650) +
  labs(x = " ", 
       y = "Sugar Intake in Calories (Adjusted Mean)",
       title = "Sugar Intake by Ancestry and USB (Adjusted for Age)",
       fill = "Ancestry") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Code
## Robust linear model, by race only
  mSugar.race <- full.df %>%
    filter(race %in% c("African", "Asian")) %>%
    rlm(sugar.intake ~ race + age, data = .)
  summary(mSugar.race) ## t-value > 2 in absolute value typically suggest statistical significance

Call: rlm(formula = sugar.intake ~ race + age, data = .)
Residuals:
    Min      1Q  Median      3Q     Max 
-425.72 -133.79   14.69  171.48 1050.93 

Coefficients:
            Value     Std. Error t value  
(Intercept)  623.4076   44.8495    13.9000
raceAsian   -150.6554   25.0453    -6.0153
age           -3.7602    1.7642    -2.1314

Residual standard error: 203.9 on 404 degrees of freedom
Code
  emmeans.sugar.race <- emmeans(mSugar.race, ~race) %>% 
    as.data.frame() %>%
    mutate(group = paste(race))
  p.values <- pairs(emmeans(mSugar.race, ~race), adjust = "tukey") %>%
    as.data.frame() %>%
    mutate(group1 = sub(" - .*", "", contrast), group2 = sub(".* - ", "", contrast),
    group1 = gsub(" ", " ", group1), group2 = gsub(" ", " ", group2),
    p.adj = p.value,
    p.adj.signif = case_when(p.adj < 0.001 ~ "***", p.adj < 0.01 ~ "**", p.adj < 0.05 ~ "*", TRUE ~ "ns"))

  ## ANCOVA, African, usb vs immigrant
sugar.afr.ancova <- full.df %>%
  filter(!is.na(sHEI) & !is.na(usb) & !is.na(ageCat)) %>%
  filter(race == "African") %>%
  aov(sugar.intake ~ usb + ageCat, data = .)
summary(sugar.afr.ancova)
             Df   Sum Sq Mean Sq F value Pr(>F)
usb           1      653     653   0.008  0.928
ageCat        4   603927  150982   1.906  0.111
Residuals   194 15364267   79197               
Code
TukeyHSD(sugar.afr.ancova, "usb")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sugar.intake ~ usb + ageCat, data = .)

$usb
                       diff       lwr      upr     p adj
U.S. Born-Immigrant 3.66092 -75.85741 83.17925 0.9277447
Code
emmeans(sugar.afr.ancova, ~usb)
 usb       emmean   SE  df lower.CL upper.CL
 Immigrant    503 40.8 194      422      583
 U.S. Born    497 40.2 194      417      576

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, African, atus
sugar.afr.atus.ancova <- full.df %>%
  filter(race == "African", usb == "Immigrant") %>%
  filter(!is.na(sugar.intake) & !is.na(atusCat) & !is.na(ageCat)) %>%
  aov(sugar.intake ~ atusCat + ageCat, data = .)
summary(sugar.afr.atus.ancova)
            Df  Sum Sq Mean Sq F value Pr(>F)
atusCat      3  103170   34390   0.453  0.716
ageCat       4  417353  104338   1.374  0.251
Residuals   76 5769263   75911               
Code
TukeyHSD(sugar.afr.atus.ancova, "atusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sugar.intake ~ atusCat + ageCat, data = .)

$atusCat
                                  diff       lwr      upr     p adj
> 18 years-< 4 years         -72.20909 -343.8839 199.4657 0.8974707
13 to 18 years-< 4 years     -49.55195 -307.0873 207.9834 0.9575589
4 to 12 years-< 4 years       13.33091 -248.5264 275.1882 0.9991386
13 to 18 years-> 18 years     22.65714 -189.2309 234.5452 0.9922049
4 to 12 years-> 18 years      85.54000 -131.5806 302.6606 0.7295773
4 to 12 years-13 to 18 years  62.88286 -136.2618 262.0276 0.8403227
Code
emmeans(sugar.afr.atus.ancova, ~atusCat)
 atusCat        emmean   SE df lower.CL upper.CL
 < 4 years         517 96.8 76      324      710
 > 18 years        531 65.3 76      401      661
 13 to 18 years    466 69.3 76      328      604
 4 to 12 years     535 74.1 76      388      683

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, African, tius
sugar.afr.tius.ancova <- full.df %>%
  filter(race == "African", usb == "Immigrant") %>%
  filter(!is.na(sugar.intake) & !is.na(tiusCat) & !is.na(ageCat)) %>%
  aov(sugar.intake ~ tiusCat + ageCat, data = .)
summary(sugar.afr.tius.ancova)
            Df  Sum Sq Mean Sq F value Pr(>F)
tiusCat      3   85507   28502   0.372  0.773
ageCat       4  382142   95536   1.247  0.298
Residuals   76 5822137   76607               
Code
TukeyHSD(sugar.afr.tius.ancova, "tiusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sugar.intake ~ tiusCat + ageCat, data = .)

$tiusCat
                                  diff       lwr      upr     p adj
> 20 years-< 5 years         -61.64364 -324.6982 201.4109 0.9268237
10 to 20 years-< 5 years      41.00571 -159.0495 241.0609 0.9493571
5 to 10 years-< 5 years        9.62000 -208.4933 227.7333 0.9994389
10 to 20 years-> 20 years    102.64935 -156.0634 361.3621 0.7252544
5 to 10 years-> 20 years      71.26364 -201.6532 344.1805 0.9021683
5 to 10 years-10 to 20 years -31.38571 -244.2425 181.4711 0.9801138
Code
emmeans(sugar.afr.tius.ancova, ~tiusCat)
 tiusCat        emmean   SE df lower.CL upper.CL
 < 5 years         496 70.4 76      356      637
 > 20 years        521 98.6 76      324      717
 10 to 20 years    530 70.4 76      390      671
 5 to 10 years     498 80.5 76      337      658

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, African, dcus
sugar.afr.dcus.ancova <- full.df %>%
  filter(race == "African", usb == "Immigrant") %>%
    filter(!is.na(sugar.intake) & !is.na(dcusCat) & !is.na(ageCat)) %>%
  aov(sugar.intake ~ dcusCat + ageCat, data = .)
summary(sugar.afr.dcus.ancova)
            Df  Sum Sq Mean Sq F value Pr(>F)  
dcusCat      2  498337  249168   3.644 0.0308 *
ageCat       4  251154   62789   0.918 0.4578  
Residuals   76 5196507   68375                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
TukeyHSD(sugar.afr.dcus.ancova, "dcusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sugar.intake ~ dcusCat + ageCat, data = .)

$dcusCat
                                             diff       lwr       upr     p adj
changed moderately-changed a lot       -199.94152 -378.7961 -21.08699 0.0247106
little to no change-changed a lot       -87.75634 -245.0883  69.57564 0.3812740
little to no change-changed moderately  112.18519  -78.0199 302.39027 0.3409110
Code
emmeans(sugar.afr.dcus.ancova, ~dcusCat)
 dcusCat             emmean   SE df lower.CL upper.CL
 changed a lot          567 56.4 76      454      679
 changed moderately     386 70.4 76      246      526
 little to no change    519 61.2 76      397      641

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, Asian, usb vs immigrant
sugar.asi.ancova <- full.df %>%
  filter(!is.na(sHEI) & !is.na(usb) & !is.na(ageCat)) %>%
  filter(race == "Asian") %>%
  aov(sugar.intake ~ usb + ageCat, data = .)
summary(sugar.asi.ancova)
             Df  Sum Sq Mean Sq F value Pr(>F)
usb           1   38035   38035   0.880  0.349
ageCat        4  161913   40478   0.936  0.444
Residuals   201 8690203   43235               
Code
TukeyHSD(sugar.asi.ancova, "usb")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sugar.intake ~ usb + ageCat, data = .)

$usb
                        diff       lwr     upr     p adj
U.S. Born-Immigrant 27.20226 -29.98539 84.3899 0.3494025
Code
emmeans(sugar.asi.ancova, ~usb)
 usb       emmean   SE  df lower.CL upper.CL
 Immigrant    377 28.5 201      321      433
 U.S. Born    395 31.1 201      334      456

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, Asian, atus
sugar.asi.atus.ancova <- full.df %>%
  filter(race == "Asian", usb == "Immigrant") %>%
  filter(!is.na(sugar.intake) & !is.na(atus) & !is.na(ageCat)) %>%
  aov(sugar.intake ~ atusCat + ageCat, data = .)
summary(sugar.asi.atus.ancova)
            Df  Sum Sq Mean Sq F value Pr(>F)
atusCat      3  161444   53815   1.704  0.172
ageCat       4  107845   26961   0.854  0.495
Residuals   87 2747692   31583               
Code
TukeyHSD(sugar.asi.atus.ancova, "atusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sugar.intake ~ atusCat + ageCat, data = .)

$atusCat
                                    diff        lwr       upr     p adj
> 18 years-< 4 years         -71.5000000 -229.07419  86.07419 0.6356759
13 to 18 years-< 4 years     -70.7777778 -232.28211  90.72655 0.6610146
4 to 12 years-< 4 years       19.5000000 -145.08088 184.08088 0.9895665
13 to 18 years-> 18 years      0.7222222 -120.92257 122.36701 0.9999986
4 to 12 years-> 18 years      91.0000000  -34.70072 216.70072 0.2371984
4 to 12 years-13 to 18 years  90.2777778  -40.31584 220.87140 0.2752991
Code
emmeans(sugar.asi.atus.ancova, ~atusCat)
 atusCat        emmean   SE df lower.CL upper.CL
 < 4 years         414 59.7 87      295      532
 > 18 years        364 34.4 87      295      432
 13 to 18 years    347 41.8 87      264      430
 4 to 12 years     441 45.9 87      350      533

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, Asian, tius
sugar.asi.tius.ancova <- full.df %>%
  filter(race == "Asian", usb == "Immigrant") %>%
  filter(!is.na(sugar.intake) & !is.na(tiusCat) & !is.na(ageCat)) %>%
  aov(sugar.intake ~ tiusCat + ageCat, data = .)
summary(sugar.asi.tius.ancova)
            Df  Sum Sq Mean Sq F value Pr(>F)
tiusCat      3  116533   38844   1.222  0.307
ageCat       4  134856   33714   1.061  0.381
Residuals   87 2765591   31788               
Code
TukeyHSD(sugar.asi.tius.ancova, "tiusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sugar.intake ~ tiusCat + ageCat, data = .)

$tiusCat
                                   diff        lwr       upr     p adj
> 20 years-< 5 years         -54.678788 -219.29375 109.93617 0.8202760
10 to 20 years-< 5 years      56.116667  -62.56799 174.80132 0.6042578
5 to 10 years-< 5 years        4.412121 -126.67622 135.50046 0.9997525
10 to 20 years-> 20 years    110.795455  -52.43352 274.02443 0.2908610
5 to 10 years-> 20 years      59.090909 -113.36721 231.54902 0.8061421
5 to 10 years-10 to 20 years -51.704545 -181.04813  77.63904 0.7222927
Code
emmeans(sugar.asi.tius.ancova, ~tiusCat)
 tiusCat        emmean   SE df lower.CL upper.CL
 < 5 years         393 47.0 87      300      487
 > 20 years        309 61.3 87      187      430
 10 to 20 years    448 47.2 87      354      542
 5 to 10 years     390 54.1 87      282      497

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## ANCOVA, Asian, dcus
sugar.asi.dcus.ancova <- full.df %>%
  filter(race == "Asian", usb == "Immigrant") %>%
    filter(!is.na(sugar.intake) & !is.na(dcusCat) & !is.na(ageCat)) %>%
  aov(sugar.intake ~ dcusCat + ageCat, data = .)
summary(sugar.asi.dcus.ancova)
            Df  Sum Sq Mean Sq F value Pr(>F)
dcusCat      2   83680   41840   1.310  0.275
ageCat       4  122501   30625   0.959  0.434
Residuals   88 2810801   31941               
Code
TukeyHSD(sugar.asi.dcus.ancova, "dcusCat")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = sugar.intake ~ dcusCat + ageCat, data = .)

$dcusCat
                                             diff       lwr       upr     p adj
changed moderately-changed a lot         8.637778 -102.2862 119.56173 0.9811836
little to no change-changed a lot      -56.404706 -168.6578  55.84836 0.4575812
little to no change-changed moderately -65.042484 -166.9345  36.84949 0.2856983
Code
emmeans(sugar.asi.dcus.ancova, ~dcusCat)
 dcusCat             emmean   SE df lower.CL upper.CL
 changed a lot          396 39.5 88      318      475
 changed moderately     394 36.9 88      321      468
 little to no change    339 36.9 88      266      413

Results are averaged over the levels of: ageCat 
Confidence level used: 0.95 
Code
## Graph
emmeans.sugar.race %>%
    filter(race %in% c("African", "Asian")) %>%
  ggplot(aes(x = group, y = emmean, fill = race)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  geom_errorbar(aes(ymin = emmean-SE, ymax = emmean+SE), 
                width = 0.2, position = position_dodge(0.9)) +
    stat_pvalue_manual(p.values, hide.ns = T, label = "p.adj.signif", label.size = 5, bracket.size = 0.5, tip.length = 0.05, step.increase = 0.2, y.position = 650) +
  labs(x = " ", 
       y = "Sugar Intake in Calories (Adjusted Mean)",
       title = " ",
       fill = "Ancestry") +
  ylim(0, 800) +
  theme_classic() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 14), axis.text.y = element_text(size = 14), axis.title.y = element_text(size = 16), plot.title = element_text(size = 18), legend.text = element_text(size = 14), legend.title = element_text(size = 16))