Objective

The objective of this exercise is to use logistic regression to model a dichotomous outcome variable, i.e., physical activity or exercise, for Texas’s Hispanic and non Hispanic r categories. The binary outcome variable is categorized as participation in any form of physical activities or exercises or not during the past month. Also, the age of respondents is added to the model. Marginal effects are calculated and explained in the exercise.The data source is 2021 BRFSS survey data.

Loading libraries

library(viridis)
## Loading required package: viridisLite
library(purrr)
library(haven)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
## 
##     compact
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
library(summarytools)
library(Rmisc)
## Loading required package: lattice
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(usmap)
library(ggthemes)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble  3.1.8     ✔ stringr 1.4.1
## ✔ tidyr   1.2.0     ✔ forcats 0.5.2
## ✔ readr   2.1.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange()    masks plyr::arrange()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ plyr::compact()     masks purrr::compact()
## ✖ dplyr::count()      masks plyr::count()
## ✖ scales::discard()   masks purrr::discard()
## ✖ dplyr::failwith()   masks plyr::failwith()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::id()         masks plyr::id()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dplyr::mutate()     masks plyr::mutate()
## ✖ car::recode()       masks dplyr::recode()
## ✖ dplyr::rename()     masks plyr::rename()
## ✖ car::some()         masks purrr::some()
## ✖ dplyr::summarise()  masks plyr::summarise()
## ✖ dplyr::summarize()  masks plyr::summarize()
## ✖ tibble::view()      masks summarytools::view()
library(tidycensus)
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library("ggpubr")
## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:plyr':
## 
##     mutate
library(readxl)
library("tinytex")
library("lubridate")
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library("splitstackshape")
library(psych)
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:car':
## 
##     logit
## 
## The following objects are masked from 'package:scales':
## 
##     alpha, rescale
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(glmmTMB)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.5.0
## Current Matrix version is 1.5.1
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
library(haven)
library(foreign)
library(WriteXLS)

Loading the dataset and selecting required variables

brfss21 = read.xport("C:\\Users\\anami\\OneDrive\\Documents\\Stat ll\\Assignment 4\\LLCP2021.XPT_")
head(brfss21)
names(brfss21)
names(brfss21)<-tolower(names(brfss21))
data1<-subset(brfss21,x_state==48,select=c(x_age80,x_hispanc,exerany2,iyear,x_state,sexvar)) 
summary(data1)
##     x_age80       x_hispanc        exerany2        iyear              x_state  
##  Min.   :18.0   Min.   :1.000   Min.   :1.000   Length:10815       Min.   :48  
##  1st Qu.:37.0   1st Qu.:1.000   1st Qu.:1.000   Class :character   1st Qu.:48  
##  Median :53.0   Median :2.000   Median :1.000   Mode  :character   Median :48  
##  Mean   :52.1   Mean   :1.887   Mean   :1.278                      Mean   :48  
##  3rd Qu.:67.0   3rd Qu.:2.000   3rd Qu.:2.000                      3rd Qu.:48  
##  Max.   :80.0   Max.   :9.000   Max.   :9.000                      Max.   :48  
##      sexvar     
##  Min.   :1.000  
##  1st Qu.:1.000  
##  Median :2.000  
##  Mean   :1.517  
##  3rd Qu.:2.000  
##  Max.   :2.000

Recoding variables and description of data

library(dplyr)
data2<-data1%>%
mutate(noexercise=car::Recode(exerany2, recodes = "1=0 ; 2=1; else=NA" ))%>%
mutate(hispanic=car::Recode(x_hispanc, recodes = "1=1 ; 2=0; else=NA") )
summary(data2)
##     x_age80       x_hispanc        exerany2        iyear              x_state  
##  Min.   :18.0   Min.   :1.000   Min.   :1.000   Length:10815       Min.   :48  
##  1st Qu.:37.0   1st Qu.:1.000   1st Qu.:1.000   Class :character   1st Qu.:48  
##  Median :53.0   Median :2.000   Median :1.000   Mode  :character   Median :48  
##  Mean   :52.1   Mean   :1.887   Mean   :1.278                      Mean   :48  
##  3rd Qu.:67.0   3rd Qu.:2.000   3rd Qu.:2.000                      3rd Qu.:48  
##  Max.   :80.0   Max.   :9.000   Max.   :9.000                      Max.   :48  
##                                                                                
##      sexvar        noexercise        hispanic    
##  Min.   :1.000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :2.000   Median :0.0000   Median :0.000  
##  Mean   :1.517   Mean   :0.2604   Mean   :0.286  
##  3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :2.000   Max.   :1.0000   Max.   :1.000  
##                  NA's   :29       NA's   :257
library(dplyr)
nrow(data2)
## [1] 10815
data3<-data2%>%
filter(!is.na(noexercise),!is.na(hispanic))
nrow(data3)
## [1] 10531
tabyl(data3$x_age80)
##  data3$x_age80   n     percent
##             18  78 0.007406704
##             19 108 0.010255436
##             20  97 0.009210901
##             21  98 0.009305859
##             22 123 0.011679802
##             23 127 0.012059633
##             24 105 0.009970563
##             25 146 0.013863831
##             26 120 0.011394929
##             27 117 0.011110056
##             28 135 0.012819295
##             29 140 0.013294084
##             30 169 0.016047859
##             31 126 0.011964676
##             32 173 0.016427690
##             33 175 0.016617605
##             34 152 0.014433577
##             35 170 0.016142816
##             36 155 0.014718450
##             37 164 0.015573070
##             38 174 0.016522647
##             39 159 0.015098281
##             40 195 0.018516760
##             41 161 0.015288197
##             42 197 0.018706676
##             43 180 0.017092394
##             44 145 0.013768873
##             45 159 0.015098281
##             46 147 0.013958788
##             47 173 0.016427690
##             48 168 0.015952901
##             49 146 0.013863831
##             50 246 0.023359605
##             51 175 0.016617605
##             52 163 0.015478112
##             53 183 0.017377267
##             54 172 0.016332732
##             55 170 0.016142816
##             56 178 0.016902478
##             57 157 0.014908366
##             58 168 0.015952901
##             59 162 0.015383154
##             60 222 0.021080619
##             61 145 0.013768873
##             62 176 0.016712563
##             63 180 0.017092394
##             64 200 0.018991549
##             65 220 0.020890704
##             66 176 0.016712563
##             67 170 0.016142816
##             68 184 0.017472225
##             69 185 0.017567183
##             70 177 0.016807521
##             71 160 0.015193239
##             72 173 0.016427690
##             73 169 0.016047859
##             74 177 0.016807521
##             75 142 0.013484000
##             76 108 0.010255436
##             77 107 0.010160479
##             78 123 0.011679802
##             79 112 0.010635267
##             80 769 0.073022505
histogram(data3$x_age80)

data3$female<- recode(data3$sexvar, "2=1; else=0")

tabyl(data3$noexercise)
##  data3$noexercise    n   percent
##                 0 7792 0.7399107
##                 1 2739 0.2600893
tabyl(data3$hispanic)
##  data3$hispanic    n   percent
##               0 7518 0.7138923
##               1 3013 0.2861077

Calculating averages of variables

data3$x_age80sq<- data3$x_age80*data3$x_age80
data3$x_age80cu<- data3$x_age80*data3$x_age80*data3$x_age80
mean(data3$noexercise)
## [1] 0.2600893
mean(data3$x_age80)
## [1] 52.04197
mean(data3$x_age80sq)
## [1] 3029.052
mean(data3$x_age80cu)
## [1] 190491.5

Calculating statistics for specific ages to compare actual values to model-based values

ageprobs<- data3 %>%
    group_by(x_age80) %>%
    summarize(p=mean(noexercise),n=n())
              
ageprobs$num <- ageprobs$p*(1-ageprobs$p)

ageprobs$sep <- sqrt(ageprobs$num/ageprobs$n)

ageprobs$me <- 2*ageprobs$sep
  
ggplot(data =ageprobs, aes(x = x_age80, y = p, ymin=p-me, ymax=p+me)) +
     geom_line() + ylim(0,1) + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="No exercise by Age", 
       subtitle="Respondents in Sample", 
       caption="Source: 2021 BRFSS Data") +
       xlab(label="Age at Survey") +
  ylab(label="Proportion of Respondents with no exercise") 

Model 1: physical exercise by age

model1 <- glm(noexercise ~ x_age80, family = "binomial", data = data3)
summary(model1)
## 
## Call:
## glm(formula = noexercise ~ x_age80, family = "binomial", data = data3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9798  -0.8223  -0.6830   1.3888   1.9788  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.189371   0.075033  -29.18   <2e-16 ***
## x_age80      0.021312   0.001295   16.46   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12072  on 10530  degrees of freedom
## Residual deviance: 11790  on 10529  degrees of freedom
## AIC: 11794
## 
## Number of Fisher Scoring iterations: 4

Interpretation

For every year of increase in age, the log odds of not doing physical activity/ exercise increases by (.02). This estimate is statistically highly significant (p<0).

model1Exp = model1
model1Exp$coefficients = exp(model1Exp$coefficients)
summary(model1Exp)
## 
## Call:
## glm(formula = noexercise ~ x_age80, family = "binomial", data = data3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9798  -0.8223  -0.6830   1.3888   1.9788  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 0.111987   0.075033   1.493    0.136    
## x_age80     1.021541   0.001295 788.882   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12072  on 10530  degrees of freedom
## Residual deviance: 11790  on 10529  degrees of freedom
## AIC: 11794
## 
## Number of Fisher Scoring iterations: 4

Interpretation:

Age- change in Odds %: (1.022 – 1) * 100 = 2.2%

This means that each additional increase of one year in age is associated with a 2.2% increase in the odds of not doing exercise.

library(margins)

MERs<-margins(model1,variables="x_age80", at=list(x_age80=c(18,20,30,40,50,60,70,80)))

summary(MERs)
##   factor x_age80    AME     SE       z      p  lower  upper
##  x_age80 18.0000 0.0026 0.0001 32.5598 0.0000 0.0024 0.0027
##  x_age80 20.0000 0.0027 0.0001 30.6665 0.0000 0.0025 0.0028
##  x_age80 30.0000 0.0031 0.0001 23.7449 0.0000 0.0028 0.0033
##  x_age80 40.0000 0.0035 0.0002 19.6329 0.0000 0.0032 0.0039
##  x_age80 50.0000 0.0039 0.0002 17.1182 0.0000 0.0035 0.0044
##  x_age80 60.0000 0.0044 0.0003 15.6029 0.0000 0.0038 0.0049
##  x_age80 70.0000 0.0047 0.0003 14.7983 0.0000 0.0041 0.0054
##  x_age80 80.0000 0.0050 0.0003 14.5828 0.0000 0.0044 0.0057
library(margins)
MERs<-margins(model1,variables="x_age80", at=list(x_age80=c(18,20,30,40,50,60,70,80)))
summary(MERs)
##   factor x_age80    AME     SE       z      p  lower  upper
##  x_age80 18.0000 0.0026 0.0001 32.5598 0.0000 0.0024 0.0027
##  x_age80 20.0000 0.0027 0.0001 30.6665 0.0000 0.0025 0.0028
##  x_age80 30.0000 0.0031 0.0001 23.7449 0.0000 0.0028 0.0033
##  x_age80 40.0000 0.0035 0.0002 19.6329 0.0000 0.0032 0.0039
##  x_age80 50.0000 0.0039 0.0002 17.1182 0.0000 0.0035 0.0044
##  x_age80 60.0000 0.0044 0.0003 15.6029 0.0000 0.0038 0.0049
##  x_age80 70.0000 0.0047 0.0003 14.7983 0.0000 0.0041 0.0054
##  x_age80 80.0000 0.0050 0.0003 14.5828 0.0000 0.0044 0.0057
MERs<-margins(model1,variables="x_age80", at=list(x_age80=c(39,40,41)))

summary(MERs)
##   factor x_age80    AME     SE       z      p  lower  upper
##  x_age80 39.0000 0.0035 0.0002 19.9580 0.0000 0.0031 0.0038
##  x_age80 40.0000 0.0035 0.0002 19.6329 0.0000 0.0032 0.0039
##  x_age80 41.0000 0.0036 0.0002 19.3226 0.0000 0.0032 0.0039
AMEs<-margins(model1,variables=c("x_age80"))

summary(AMEs)
##   factor    AME     SE       z      p  lower  upper
##  x_age80 0.0040 0.0002 17.0938 0.0000 0.0035 0.0045

Model 2: physical exercise by age and squared age

model2 <- glm(noexercise ~ x_age80 + x_age80sq, family = "binomial", data = data3)
summary(model2)
## 
## Call:
## glm(formula = noexercise ~ x_age80 + x_age80sq, family = "binomial", 
##     data = data3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0001  -0.8117  -0.6775   1.3658   1.9330  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.898e+00  2.028e-01  -9.357   <2e-16 ***
## x_age80      8.837e-03  8.199e-03   1.078    0.281    
## x_age80sq    1.185e-04  7.699e-05   1.539    0.124    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12072  on 10530  degrees of freedom
## Residual deviance: 11787  on 10528  degrees of freedom
## AIC: 11793
## 
## Number of Fisher Scoring iterations: 4
model2Exp = model2
model2Exp$coefficients = exp(model2Exp$coefficients)
summary(model2Exp)
## 
## Call:
## glm(formula = noexercise ~ x_age80 + x_age80sq, family = "binomial", 
##     data = data3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0001  -0.8117  -0.6775   1.3658   1.9330  
## 
## Coefficients:
##              Estimate Std. Error   z value Pr(>|z|)    
## (Intercept) 1.499e-01  2.028e-01     0.739     0.46    
## x_age80     1.009e+00  8.199e-03   123.054   <2e-16 ***
## x_age80sq   1.000e+00  7.699e-05 12990.190   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12072  on 10530  degrees of freedom
## Residual deviance: 11787  on 10528  degrees of freedom
## AIC: 11793
## 
## Number of Fisher Scoring iterations: 4

Model 3

model3 <- glm(noexercise ~ x_age80 + x_age80sq + x_age80cu, family = "binomial", data = data3)
summary(model3)
## 
## Call:
## glm(formula = noexercise ~ x_age80 + x_age80sq + x_age80cu, family = "binomial", 
##     data = data3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0354  -0.7922  -0.7019   1.3264   2.0429  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.411e+00  5.564e-01  -6.132  8.7e-10 ***
## x_age80      1.137e-01  3.657e-02   3.109  0.00188 ** 
## x_age80sq   -2.072e-03  7.461e-04  -2.777  0.00548 ** 
## x_age80cu    1.413e-05  4.779e-06   2.957  0.00311 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12072  on 10530  degrees of freedom
## Residual deviance: 11779  on 10527  degrees of freedom
## AIC: 11787
## 
## Number of Fisher Scoring iterations: 4
model3Exp = model3
model3Exp$coefficients = exp(model3Exp$coefficients)
summary(model3Exp)
## 
## Call:
## glm(formula = noexercise ~ x_age80 + x_age80sq + x_age80cu, family = "binomial", 
##     data = data3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0354  -0.7922  -0.7019   1.3264   2.0429  
## 
## Coefficients:
##              Estimate Std. Error   z value Pr(>|z|)    
## (Intercept) 3.299e-02  5.564e-01 5.900e-02    0.953    
## x_age80     1.120e+00  3.657e-02 3.064e+01   <2e-16 ***
## x_age80sq   9.979e-01  7.461e-04 1.337e+03   <2e-16 ***
## x_age80cu   1.000e+00  4.779e-06 2.092e+05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12072  on 10530  degrees of freedom
## Residual deviance: 11779  on 10527  degrees of freedom
## AIC: 11787
## 
## Number of Fisher Scoring iterations: 4
ageprobs<- data3 %>%
    group_by(hispanic, female, x_age80) %>%
    summarize(pnoexercise=mean(noexercise),n=n())
## `summarise()` has grouped output by 'hispanic', 'female'. You can override
## using the `.groups` argument.
ggplot(data =ageprobs, aes(x = x_age80, y = pnoexercise, color = hispanic, group = hispanic)) + geom_line() + facet_wrap(~ female) + ylim(0,.5)

Model 4: Physical activity by age, gender and race-ethnicity group

model4 <- glm(noexercise ~ x_age80 + hispanic + female, family = "binomial", data = data3)
summary(model4)
## 
## Call:
## glm(formula = noexercise ~ x_age80 + hispanic + female, family = "binomial", 
##     data = data3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1894  -0.8169  -0.6695   1.2628   2.1298  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.589833   0.085521 -30.283  < 2e-16 ***
## x_age80      0.023949   0.001346  17.786  < 2e-16 ***
## hispanic     0.499407   0.050414   9.906  < 2e-16 ***
## female       0.202664   0.045577   4.447 8.72e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12072  on 10530  degrees of freedom
## Residual deviance: 11670  on 10527  degrees of freedom
## AIC: 11678
## 
## Number of Fisher Scoring iterations: 4

Interpretation:

model4Exp = model4
model4Exp$coefficients = exp(model4Exp$coefficients)
summary(model4Exp)
## 
## Call:
## glm(formula = noexercise ~ x_age80 + hispanic + female, family = "binomial", 
##     data = data3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1894  -0.8169  -0.6695   1.2628   2.1298  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 0.075033   0.085521   0.877     0.38    
## x_age80     1.024238   0.001346 760.671   <2e-16 ***
## hispanic    1.647745   0.050414  32.684   <2e-16 ***
## female      1.224661   0.045577  26.870   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12072  on 10530  degrees of freedom
## Residual deviance: 11670  on 10527  degrees of freedom
## AIC: 11678
## 
## Number of Fisher Scoring iterations: 4

Interpretation

MEMs: marginal effects at the mean

library(emmeans)
emmeans(model4, specs = "hispanic",
regrid = "response")
##  hispanic  prob      SE  df asymp.LCL asymp.UCL
##         0 0.225 0.00496 Inf     0.215     0.234
##         1 0.323 0.00888 Inf     0.305     0.340
## 
## Results are averaged over the levels of: female 
## Confidence level used: 0.95
emmeans(model4, specs = "hispanic",
regrid = "response") |>
contrast(method = "revpairwise") |>
confint()
##  contrast              estimate     SE  df asymp.LCL asymp.UCL
##  hispanic1 - hispanic0   0.0982 0.0103 Inf    0.0781     0.118
## 
## Results are averaged over the levels of: female 
## Confidence level used: 0.95

Interpretation

MERs: marginal effects at representative values

emmeans(model4, specs = "hispanic", by = "female", at = list(x_age80=20),
regrid = "response") |>
contrast(method = "revpairwise") |>
confint()
## female = 0:
##  contrast              estimate      SE  df asymp.LCL asymp.UCL
##  hispanic1 - hispanic0   0.0583 0.00632 Inf    0.0460    0.0707
## 
## female = 1:
##  contrast              estimate      SE  df asymp.LCL asymp.UCL
##  hispanic1 - hispanic0   0.0672 0.00714 Inf    0.0532    0.0812
## 
## Confidence level used: 0.95
emmeans(model4, specs = "hispanic", by = "female", at = list(x_age80=40),
regrid = "response") |>
contrast(method = "revpairwise") |>
confint()
## female = 0:
##  contrast              estimate      SE  df asymp.LCL asymp.UCL
##  hispanic1 - hispanic0   0.0801 0.00853 Inf    0.0634    0.0969
## 
## female = 1:
##  contrast              estimate      SE  df asymp.LCL asymp.UCL
##  hispanic1 - hispanic0   0.0897 0.00938 Inf    0.0714    0.1081
## 
## Confidence level used: 0.95
emmeans(model4, specs = "hispanic", by = "female", at = list(x_age80=60),
regrid = "response") |>
contrast(method = "revpairwise") |>
confint()
## female = 0:
##  contrast              estimate     SE  df asymp.LCL asymp.UCL
##  hispanic1 - hispanic0    0.102 0.0109 Inf    0.0809     0.124
## 
## female = 1:
##  contrast              estimate     SE  df asymp.LCL asymp.UCL
##  hispanic1 - hispanic0    0.110 0.0115 Inf    0.0878     0.133
## 
## Confidence level used: 0.95
emmeans(model4, specs = "hispanic", by = "female", at = list(x_age80=80),
regrid = "response") |>
contrast(method = "revpairwise") |>
confint()
## female = 0:
##  contrast              estimate     SE  df asymp.LCL asymp.UCL
##  hispanic1 - hispanic0    0.119 0.0124 Inf    0.0946     0.143
## 
## female = 1:
##  contrast              estimate     SE  df asymp.LCL asymp.UCL
##  hispanic1 - hispanic0    0.123 0.0125 Inf    0.0982     0.147
## 
## Confidence level used: 0.95

Interpretation

For male, being in the Hispanic group, increases the probability of not doing exercises by .06,.08,.10 and .12 percentage points at age 20,40,60 and 80 years respectively. However, the marginal effects for females are slightly higher than men at different ages.For female, being in the Hispanic group, increases the probability of not doing exercises by .07,.09,.11 and .122 percentage points at age 20,40,60 and 80 years respectively.

AMEs: average marginal effects

hispanic_eff <- predict(model4, newdata = data.frame(hispanic=1, female=data3$female, x_age80=data3$x_age80), 
type = "response")

non_hispanic_eff <- predict(model4, newdata = data.frame(hispanic=0, female=data3$female, x_age80=data3$x_age80), 
type = "response")

mean(hispanic_eff - non_hispanic_eff)
## [1] 0.09667336

Interpretation

The average marginal effect of the model is .096. Being in the Hispanic category, increases the probability of not doing exercises by .096 percentage points.