On SARI data from Brazil National Surveillance System
### Carregar pacotes
if (!require(viridis)) {
install.packages("viridis")
library(viridis)
}
if (!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
if (!require(tidyr)) {
install.packages("tidyr")
library(tidyr)
}
if (!require(lubridate)) {
install.packages("lubridate")
library(lubridate)
}
if (!require(readr)) {
install.packages("readr")
library(readr)
}
if (!require(binom)) {
install.packages("binom")
library(binom)
}
if (!require(ggplot2)) {
install.packages("ggplot2")
library(ggplot2)
}
if (!require(scales)) {
install.packages("scales")
library(scales)
}
if (!require(nnet)) {
install.packages("nnet")
library(nnet)
}
if (!require(broom)) {
install.packages("broom")
library(broom)
}
if (!require(ggstance)) {
install.packages("ggstance")
library(ggstance)
}
if (!require(MNLpred)) {
install.packages("MNLpred")
library(MNLpred)
}
if (!require(MASS)) {
install.packages("MASS")
library(MASS)
}
if (!require(DescTools)) {
install.packages("DescTools")
library(DescTools)
}
if (!require(geofacet)) {
install.packages("geofacet")
library(geofacet)
}
if (!require(epiR)) {
install.packages("epiR")
library(epiR)
}
if (!require(rms)) {
install.packages("rms")
library(rms)
}
if (!require(splines)) {
install.packages("splines")
library(splines)
}
if (!require(ggpubr)) {
install.packages("ggpubr")
library(ggpubr)
}
if (!require(stats)) {
install.packages("stats")
library(stats)
}
if (!require(formatR)) {
install.packages("formatR")
library(formatR)
}
if (!require(purrr)) {
install.packages("purrr")
library(purrr)
}
if (!require(stringr)) {
install.packages("stringr")
library(stringr)
}
if (!require(gtsummary)) {
install.packages("gtsummary")
library(gtsummary)
}
if (!require(parameters)) {
install.packages("parameters")
library(parameters)
}
# setwd('~/Área de Trabalho/central_covid/Codes_sandwich/')
source("../../nowcasting/fct/get.last.date.R")
source("../../nowcasting/fct/read.sivep.R")
source("../../modelo-variante/functions/dados_estados.R")
source("../Scripts/functions.R")
source("../Scripts/functions_multinomial.R")
source("../Scripts/misc/theme.publication.R")
last.date <- get.last.date("../CSV/")
data.dir <- "../CSV/"
Over the year of 2020 the world has seen an unprecedented severe pandemic due to the Novel Coronavirus-2 (SARS-CoV-2), this has put challenges over each and every surveillance system on how to produce and curate data about the pandemic. Brazil has one of the biggest and most well established public health system, in Portuguese, Sistema Único de Saúde (SUS).
From the national surveillance system to the influenza (SIVEP-Gripe) has coming the most of the data about severe acute respiratory syndrome SARS cases due to the SARS-Cov-2. We compiled those data and run Logistic Regression Models, aiming to seen how can be associated to mortality the age factor. SIVEP-Gripe disposes the data of all hospitalizations with Severe Acute Respiratory Illness SARI.
dados<-read_csv(paste0("../CSV/sivep_mortality_cut_", last.date, ".csv"))
dados<-dados %>%
filter(idade >= 0 & !is.na(idade) & raca != "Não Informado" & sexo != "Não Informado" & !is.na(sg_uf)) %>%
mutate(outcome = case_when(evolucao == 1 ~ "Discharge",
evolucao == 2 ~ "Death"),
# age = cut(idade, breaks = breaks, labels = labels, right = T),
age = idade - mean(idade),
state = sg_uf
) %>%
mutate(raca = factor(raca),
sexo = factor(sexo),
state = factor(state),
outcome = factor(outcome)) %>%
mutate(raca = relevel(factor(raca), ref = "Branca"), # Releveling the race to the white people
sexo = relevel(factor(sexo), ref = "Feminino"), # Releveling the sex to the female
state = relevel(factor(sg_uf), ref = "SP"), # Releveling to the state with most cases
# age = relevel(factor(age), ref = "60-69"),
outcome = relevel(factor(outcome), ref = "Discharge")
)
explanatory_vars <- c("age", "sexo", "raca", "state") # The explanatory variables
dados_glm<-dados %>%
dplyr::filter(!is.na(outcome) & !is.na(raca) & !is.na(sexo) & !is.na(age) & !is.na(state)) %>%
dplyr::select(explanatory_vars, outcome) ## select variables of interest
dados_tab<-dados_glm %>%
tbl_summary() %>%
bold_labels()
dados_tab
| Characteristic | N = 1,229,8741 |
|---|---|
| age | 3 (-12, 16) |
| sexo | |
| Feminino | 560,719 (46%) |
| Masculino | 669,155 (54%) |
| raca | |
| Branca | 634,957 (52%) |
| Amarela | 14,626 (1.2%) |
| Indígena | 3,430 (0.3%) |
| Parda | 507,044 (41%) |
| Preta | 69,817 (5.7%) |
| state | |
| SP | 388,890 (32%) |
| AC | 3,699 (0.3%) |
| AL | 10,215 (0.8%) |
| AM | 34,078 (2.8%) |
| AP | 3,923 (0.3%) |
| BA | 38,961 (3.2%) |
| CE | 35,391 (2.9%) |
| DF | 15,239 (1.2%) |
| ES | 8,146 (0.7%) |
| GO | 40,273 (3.3%) |
| MA | 14,780 (1.2%) |
| MG | 147,616 (12%) |
| MS | 23,795 (1.9%) |
| MT | 13,782 (1.1%) |
| PA | 37,219 (3.0%) |
| PB | 19,629 (1.6%) |
| PE | 32,338 (2.6%) |
| PI | 11,058 (0.9%) |
| PR | 84,227 (6.8%) |
| RJ | 85,452 (6.9%) |
| RN | 11,991 (1.0%) |
| RO | 9,011 (0.7%) |
| RR | 3,055 (0.2%) |
| RS | 89,771 (7.3%) |
| SC | 54,541 (4.4%) |
| SE | 6,192 (0.5%) |
| TO | 6,602 (0.5%) |
| outcome | |
| Discharge | 822,784 (67%) |
| Death | 407,090 (33%) |
|
1
Median (IQR); n (%)
|
|
In this last tutorial we selected only age, in continuous and categorical scales, and outcome, respectively identified by, idade and evolucao.
Now we gonna take other variables to help to explain the outcome, being discharged or dying, first we start with social-demographical variables such as Sex, Race, State, being on ICU and having or not Ventilation help.
We start by producing a univariate model for every single explanatory variable we selected on the database, the aim here is to see which, between all explanatory variables are the most explanatory variable as a single factor, from the last tutorial we hope to be probably Age, but we test it to make sure.
models <- explanatory_vars %>% # begin with variables of interest
str_c("outcome ~ ", .) %>% # combine each variable into formula ("outcome ~ variables)
map(
.f = ~glm( # pass the formulas one-by-one to glm()
formula = as.formula(.x), # within glm(), the string formula is .x
family = binomial(link = logit), # specify type of glm (logistic)
data = dados_glm)) # dataset
models<- models%>%
setNames(explanatory_vars) # Naming the output list with the input explanatory variables
AIC(models$age, models$sexo, models$raca, models$state)
df AIC
models$age 2 1418677
models$sexo 2 1561218
models$raca 5 1560415
models$state 27 1539624
As though, the most explanatory variable, to a univariate model, is as expected the Age variable, we studied it in a more detailed way in the last tutorial. We used a variety of transformation on this variable to select the better way to display it. It were, as we used above, as categorical variable, on age bins. To compare each effect for each of the explanatory variable, we can construct a OR table, to do so:
age_tab <- models$age %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
age_aug <- models$age %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response")
sex_tab <- models$sexo %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
sex_aug <- models$sexo %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response")
race_tab <- models$raca %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
race_aug <- models$raca %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response")
state_tab <- models$state %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
state_aug <- models$state %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response")
plt_univar <- function(x) {
plt <- x %>%
filter(Parameter != "(Intercept)") %>%
mutate(color = ifelse(Coefficient > 1, "cyan3", "firebrick1")) %>%
ggplot(aes(y = reorder(Parameter, Coefficient), x = Coefficient,
xmin = CI_low, xmax = CI_high, col = color)) + geom_pointrangeh(alpha = 0.5) +
geom_vline(xintercept = 1, lty = "dotted") + theme_minimal() +
theme(plot.title = element_text(face = "bold"), axis.title.y = element_blank(),
legend.position = "none") + scale_color_manual(values = c("firebrick1",
"cyan3")) + labs(x = "Odds Ratio")
return(plt)
}
plt_aug <- function(x, var1, var2 = NULL) {
data <- x %>%
group_by(outcome, {
{
var1
}
}, {
{
var2
}
}, .fitted, .se.fit) %>%
summarise(fit = first(.fitted), se_fit = first(.se.fit)) %>%
mutate(lwr = fit - 2 * se_fit, upr = fit + 2 * se_fit)
plt <- data %>%
ggplot(aes(y = fit, x = {
{
var1
}
}, col = {
{
var2
}
})) + geom_pointrange(aes(ymin = lwr, ymax = upr, x = {
{
var1
}
}), fatten = 0.75) + geom_line() + theme_minimal()
return(plt)
}
Now for each explanatory variable we took a plot of each Odds Ratio of the term in the model and a plot of the fitted model in function to the variable.
Age:
plt_aug_age
Sex:
plt_sex <- sex_tab %>%
plt_univar() + labs(title = "Sex", subtitle = "As categorical factor, reference sex 'Feminino'",
caption = "Female in Portuguese")
plt_aug_sex <- sex_aug %>%
plt_aug(var1 = sexo) + labs(title = "Sex", subtitle = "As categorical factor, reference sex 'Feminino'",
caption = "Female in Portuguese")
plt_sex
plt_aug_sex
Race:
plt_race <- race_tab %>%
plt_univar() + labs(title = "Race", subtitle = "As categorical factor, reference race 'Branco'",
caption = "White in Portuguese")
plt_aug_race <- race_aug %>%
plt_aug(var1 = raca) + labs(title = "Race", subtitle = "As categorical factor, reference race 'Branco'",
caption = "White in Portuguese")
plt_race
plt_aug_race
State:
plt_state <- state_tab %>%
mutate(across(where(is.numeric), round, digits = 2)) %>%
plt_univar() + labs(title = "State", subtitle = "As categorical factor, reference state 'São Paulo'",
caption = "Labelled as SP")
plt_aug_state <- state_aug %>%
plt_aug(var1 = state) + labs(title = "State", subtitle = "As categorical factor, reference state 'São Paulo'",
caption = "Labelled as SP")
plt_state
plt_aug_state
Using only univariate models, we can assess explanatory variables to mortality. This suggest that univariate models do not fulfill the entire range of interactions that can happens in an outcome like mortality. Besides, from the univariate models, we could saw how are more severe conditions to expect less or more mortality, Age is clear a strong factor to contributing to mortality.
It is well known that age is a strong factor, but it is well know that it can be modulated by others factor, this was explored in the first tutorial. To overcome the explanatories limitations of the univariate models, we propose models that allows more than single variable as explanatory variable. Interaction between the explanatory variables, we considers too, after the pair models.
Now, we construct models that makes pairs between all of explanatory variables, to do so, let those formulas as follows:
## run a regression with all variables of interest
mv_reg <- explanatory_vars %>% ## begin with vector of explanatory column names
str_c(collapse = "+") %>% ## combine all names of the variables of interest separated by a plus
str_c("outcome ~ ", .)%>% ## combine the names of variables of interest with outcome in formula style
glm(family = binomial(link = logit), ## define type of glm as logistic,
data = dados_glm) ## define your dataset
mv_tab<-mv_reg %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
mv_aug<-mv_reg %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response")
plt_mv<- mv_tab %>% # Plotting the model
plt_univar()+
labs(title = "Multivariate Model",
subtitle = "All variable as categorical, 'age+sexo+raca+state'",
caption = "References Values: Age '60-69'; Sex 'Feminino'; Race 'Branca'; State 'SP'")
plt_mv
As we can seen consider now the effects of all possible does not guarantee a clear explanation, to do so we can select a which ones of this variables are most additive in the explanation. We proceed with an model selection, by stepwise algorithm via the AIC.
final_reg_mv <- mv_reg %>%
stepAIC(direction = "both", trace = FALSE)
final_reg_mv$anova
Stepwise Model Path
Analysis of Deviance Table
Initial Model:
outcome ~ age + sexo + raca + state
Final Model:
outcome ~ age + sexo + raca + state
Step Df Deviance Resid. Df Resid. Dev AIC
1 1229841 1393083 1393149
And the stepwise selected the full model, so with all variables.
Now we allow the interaction between variables, or multiplicative effects between explanatories variables, the way to do it is by given the formula interaction terms:
summ_func<-function(x, var1, var2){
data<-x %>%
group_by(outcome, {{var1}}, {{var2}}, .fitted, .se.fit) %>%
summarise(fit = first(.fitted),
se_fit = first(.se.fit)) %>%
mutate(lwr = fit - 2*se_fit,
upr = fit + 2*se_fit)
return(data)
}
plt_aug<-function(x, var1, var2 = NULL){
data<-summ_func(x, var1 = {{var1}}, var2 = {{var2}})
plt<-data %>%
ggplot(aes(y = fit, x = {{var1}}, col = {{var2}}))+
# geom_pointrange(aes(ymin = lwr, ymax = upr, x = {{var1}}), fatten = 0.75)+
geom_line()+
theme_minimal()
return(plt)
}
# All pairs of intercations possibles
## Age*Sex = Age + Sex + Age*Sex
mv_reg_int_age_sex <- dados_glm %>%
select(outcome, age, sexo) %>% ## define your dataset
glm(formula = outcome ~ age*sexo,
family = binomial(link = logit))
## Age*Race = Age + Race + Age*Race
mv_reg_int_age_raca <- dados_glm %>%
select(outcome, age, raca) %>% ## define your dataset
glm(formula = outcome ~ age + raca + age:raca,
family = binomial(link = logit))
# ## Age*State = Age + State + Age*State
mv_reg_int_age_state <- dados_glm %>%
dplyr::select(outcome, age, state) %>% ## define your dataset
glm(formula = outcome ~ age + state + age:state,
family = binomial(link = logit))
## Sex*Race = Sex + Race + Sex*Race
mv_reg_int_sex_raca <- dados_glm %>%
select(outcome, raca, sexo) %>% ## define your dataset
glm(formula = outcome ~ sexo + raca + sexo:raca,
family = binomial(link = logit))
## Sex*State = Sex + State + Sex*State
mv_reg_int_sex_state <- dados_glm %>%
select(outcome, state, sexo) %>% ## define your dataset
glm(formula = outcome ~ sexo + state + sexo:state,
family = binomial(link = logit))
# ## Race*State = Race + State + Race*State
mv_reg_int_raca_state <- dados_glm %>%
select(outcome, state, raca) %>% ## define your dataset
glm(formula = outcome ~ raca + state + raca:state,
family = binomial(link = logit))
# Tables
mv_tab_age_sex<-mv_reg_int_age_sex %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
mv_aug_age_sex<-mv_reg_int_age_sex %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response") %>%
summ_func(var1 = age, var2 = sexo)
mv_tab_age_raca<-mv_reg_int_age_raca %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
mv_aug_age_raca<-mv_reg_int_age_raca %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response") %>%
summ_func(var1 = age, var2 = raca)
mv_tab_age_state<-mv_reg_int_age_state %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
mv_aug_age_state<-mv_reg_int_age_state %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response") %>%
summ_func(var1 = age, var2 = state)
mv_tab_sex_raca<-mv_reg_int_sex_raca %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
mv_aug_sex_raca<-mv_reg_int_sex_raca %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response") %>%
summ_func(var1 = sexo, var2 = raca)
mv_tab_sex_state<-mv_reg_int_sex_state %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
mv_aug_sex_state<-mv_reg_int_sex_state %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response") %>%
summ_func(var1 = sexo, var2 = state)
mv_tab_raca_state<-mv_reg_int_raca_state %>%
parameters(exponentiate = TRUE, df_method = "wald") # Create a table
mv_aug_raca_state<-mv_reg_int_raca_state %>%
augment(se_fit = TRUE, exponetiate = TRUE, type.predict = "response") %>%
summ_func(var1 = raca, var2 = state)
We proceed by plotting the Odds Ratio and the model fit for each of the interaction model, by pairs.
plt_mv_int_age_sex<- mv_tab_age_sex %>% # Plotting the model
plt_univar()+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Age + Sex + Age:Sex",
caption = "References Values: Age deviance from mean; Sex 'Feminino'")
plt_aug_mv_int_age_sex <- mv_aug_age_sex %>%
plt_aug(var1 = age, var2 = sexo)+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Age + Sex + Age:Sex",
caption = "References Values: Age deviance from mean; Sex 'Feminino'")
plt_mv_int_age_sex
plt_aug_mv_int_age_sex
From this we saw, there are a difference in Odds due to age from each of the sexes. This suggest, after 60 years old, men has an odds greater to die when hospitalized by a SARI. This is greater than of the odds to female sex, men has an odds of 1.1645157.
plt_mv_int_age_raca<- mv_tab_age_raca %>% # Plotting the model
plt_univar()+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Age + Race + Age:Race",
caption = "References Values: Age deviance from mean; Race 'Branca'")
plt_aug_mv_int_age_raca<-mv_aug_age_raca %>%
plt_aug(var1 = age, var2 = raca)+
scale_color_viridis_d_interactive(option = "cividis")+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Age + Race + Age:Race",
caption = "References Values: Age deviance from mean; Race 'Branca'")
plt_mv_int_age_raca
plt_aug_mv_int_age_raca
In the model with Race interaction we have different levels of odds ratio fo reach of Race, this is not surprisingly due to different prevalence of commodities on each population. Every interaction terms suggest no effects between Age and Race. But for each of the Races has different Odds Ratios, black people, in Brazil has an Odds of dying due to an hospitalization of 1.39, for indigenous 1.34, with to a upper bound to confidence interval of 1.4444977. Only for the yellow people, is there an Odds that is less due to Odds of Age.
plt_mv_int_age_state<- mv_tab_age_state %>% # Plotting the model
plt_univar()+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Age + State + Age:State",
caption = "References Values: Age deviance from mean; State 'São Paulo, SP'")
plt_aug_mv_int_age_state<-mv_aug_age_state %>%
plt_aug(var1 = age, var2 = state)+
scale_color_viridis_d(option = "viridis")+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Age + State + Age:State",
caption = "References Values: Age deviance from mean; State 'São Paulo, SP'")
plt_mv_int_age_state
plt_aug_mv_int_age_state
For this model of interaction between Age and State variables, we have a huge variety in the odds of mortality for each State. This is mostly showed when look over the line of Age 0, in a hospitalization by SARI for patients with 60 years old, in different States will departure in its odds to die, only by being hospitalized in different States.
plt_mv_int_sex_raca<- mv_tab_sex_raca %>% # Plotting the model
plt_univar()+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Sex + Race + Sex:Race",
caption = "References Values: Sex 'Feminino'; Race 'Branca'")
plt_aug_mv_int_sex_raca<-mv_aug_sex_raca %>%
plt_aug(var1 = raca, var2 = sexo)+
geom_pointrange(aes(ymin = lwr, ymax = upr, x = raca, fatten = 0.75))+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Sex + Race + Sex:Race",
caption = "References Values: Sex 'Feminino'; Race 'Branca'")
plt_mv_int_sex_raca
plt_aug_mv_int_sex_raca
In this model we see there Races that has great differences between each sexes, this suggest two things, for example, for Black people, the Odds difference between sexes are little, so Black people has in practice the same odds of dying due to a hospitalization of SARI. On the other side, we have for Indigenous people a huge difference in the odds of dying by an hospitalization.
plt_mv_int_sex_state<- mv_tab_sex_state %>% # Plotting the model
plt_univar()+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Sex + State + Sex:State",
caption = "References Values: Sex 'Feminino'; State 'São Paulo, SP'")
plt_aug_int_sex_state<-mv_aug_sex_state %>%
plt_aug(var1 = state, var2 = sexo)+
geom_pointrange(aes(ymin = lwr, ymax = upr, x = state),
position = position_jitterdodge(),
fatten = 0.75)+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Sex + State + Sex:State",
caption = "References Values: Sex 'Feminino'; State 'São Paulo, SP'")
plt_mv_int_sex_state
plt_aug_int_sex_state
The differences of fit between sexes varies greatly over states, there states alone has a greater effect on mortality than only being of different sex. The contrary occurs too, the state variable is not more determinant in the mortality than the differences in sexes, of the same state. The state variable and sex variable, account for two types of effects. The state variable accounts for the effects of access to public health systems on the mortality and the sex variable accounts for social differences on the mortality outcome, the interaction between those variables suggests on how public policies can or can not increase (decrease) the social determinants on mortality.
plt_mv_int_raca_state<- mv_tab_raca_state %>% # Plotting the model
plt_univar()+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Race + State + Race*State",
caption = "References Values: Race 'Branca'; State 'São Paulo, SP'")
plt_aug_mv_int_raca_state<-mv_aug_raca_state %>%
plt_aug(var1 = state, var2 = raca)+
geom_pointrange(aes(ymin = lwr, ymax = upr, x = state),
position = position_jitterdodge(),
fatten = 0.75)+
scale_color_viridis_d_interactive(option = "cividis")+
labs(title = "Multivariate Model, Interaction Terms",
subtitle = "Race + State + Race*State",
caption = "References Values: Race 'Branca'; State 'São Paulo, SP'")
plt_mv_int_raca_state
plt_aug_mv_int_raca_state
In this model we access again on how social and public health system access can interact, producing a sort of different odds to the outcome of mortality. Through all States we have a very great range of situation, but we have too some consistent odds, for the Black people we have consistently higher odds over all the States. For the Indigenous people we have, all over the States, broad confidence intervals, which is related to less incidence on this population, which is already carries greater vulnerabilities in general.
We produce so a selection of variables to this models with the stepwise algorithm:
AIC(mv_reg_int_age_sex, mv_reg_int_age_raca, mv_reg_int_age_state, mv_reg_int_sex_raca,
mv_reg_int_sex_state, mv_reg_int_raca_state)
df AIC
mv_reg_int_age_sex 4 1416383
mv_reg_int_age_raca 10 1414177
mv_reg_int_age_state 54 1394917
mv_reg_int_sex_raca 10 1559976
mv_reg_int_sex_state 54 1539060
mv_reg_int_raca_state 135 1537396
In this we see Age has more interaction effects with Race than with Sex, this is not surprisingly. As we saw, Race plays effects modification on the exposure to age for the outcome of mortality, in the tutorials for simple Odds Ratio calculations.
Now we want to know how the other levels of each categories, like Sex, Race and State, can act on each of the models. A very common question to address is: In a model, like outcome ~ age+sex+age:sex, where the reference level to sex is the female, which are and how much it is the coefficient of Age on the male sex? To do so we just need to sum the age coefficient with the interaction term of age and sex, like age+age:sex, if this is 0 (The Hypothesis to test here is: age+age:sexMale = 0), we can say there is no effect of age on the male sex if this difference is not significant. Or in other way, if we repeat the model but changing the reference level of sex to male, the linear coefficient will be the effect of age on the male sex now. Sometimes it is not easy to repeat this for each level of categorical variable, for Sex which is 2 leveled it its but for State which in Brazil will have 27 levels, or Race, or others variables?
A very clever way to do this is by the function glht from multicomp package, it is literally sum the coefficients but gives a confidence interval and test if this difference is significant.
library(multcomp)
sexo_ref <- as.character(unique(dados_glm$sexo))
sexo_ref <- sexo_ref[sexo_ref != "Feminino"]
raca_ref <- as.character(unique(dados_glm$raca))
raca_ref <- raca_ref[raca_ref != "Branca"]
state_ref <- as.character(unique(dados_glm$state))
state_ref <- state_ref[state_ref != "SP"]
glht_age_sex <- exp(confint(glht(model = mv_reg_int_age_sex, linfct = c(str_c("age+age:sexo",
sexo_ref, "=0"), str_c("sexo", sexo_ref, "+age:sexo", sexo_ref, "=0"))))$confint) %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "term")
glht_age_raca <- exp(confint(glht(model = mv_reg_int_age_raca, linfct = c(str_c("age+age:raca",
raca_ref, "=0"), str_c("raca", raca_ref, "+age:raca", raca_ref, "=0"))))$confint) %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "term")
glht_age_state <- exp(confint(glht(model = mv_reg_int_age_state, linfct = c(str_c("age+age:state",
state_ref, "=0"), str_c("state", state_ref, "+age:state", state_ref,
"=0"))))$confint) %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "term")
glht_sex_raca <- exp(confint(glht(model = mv_reg_int_sex_raca, linfct = c(str_c("sexoMasculino+sexoMasculino:raca",
raca_ref, "=0"), str_c("raca", raca_ref, "+sexoMasculino:raca", raca_ref,
"=0"))))$confint) %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "term")
glht_sex_state <- exp(confint(glht(model = mv_reg_int_sex_state, linfct = c(str_c("sexoMasculino+sexoMasculino:state",
state_ref, "=0"), str_c("state", state_ref, "+sexoMasculino:state",
state_ref, "=0"))))$confint) %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "term")
glht_raca_state <- exp(confint(glht(model = mv_reg_int_raca_state, linfct = c(str_c("raca",
raca_ref, "+raca", raca_ref, ":state", state_ref, "=0"), str_c("state",
state_ref, "+raca", raca_ref, ":state", state_ref, "=0"))))$confint) %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "term")
plt_glht <- function(x) {
plt <- x %>%
mutate(color = ifelse(Estimate > 1, "cyan3", "firebrick1")) %>%
ggplot(aes(y = reorder(term, Estimate), x = Estimate, xmin = lwr,
xmax = upr, col = color)) + geom_pointrangeh() + geom_vline(xintercept = 1,
lty = "dotted") + theme_minimal() + theme(plot.title = element_text(face = "bold"),
axis.title.y = element_blank(), legend.position = "none") + scale_color_manual(values = c("firebrick1",
"cyan3")) + labs(x = "Odds Ratio")
return(plt)
}
plt_glht_age_sex <- glht_age_sex %>%
plt_glht() + labs(title = "Contrats Terms", subtitle = "Age + Sex + Age:Sex",
caption = "References Values: Sex 'Feminino'")
plt_glht_age_raca <- glht_age_raca %>%
plt_glht() + labs(title = "Contrats Terms", subtitle = "Age + Race + Age:Race",
caption = "References Values: Race 'Branca'")
plt_glht_age_state <- glht_age_state %>%
plt_glht() + labs(title = "Contrats Terms", subtitle = "Age + State + Age:State",
caption = "References Values: State 'São Paulo'")
plt_glht_sex_raca <- glht_sex_raca %>%
plt_glht() + labs(title = "Contrats Terms", subtitle = "Sex + Race + Sex:Race",
caption = "References Values: Sex 'Feminino'; Race 'Branco'")
plt_glht_sex_state <- glht_sex_state %>%
plt_glht() + labs(title = "Contrats Terms", subtitle = "Sex + State + Sex:State",
caption = "References Values: Sex 'Feminino'; State 'São Paulo'")
plt_glht_raca_state <- glht_raca_state %>%
plt_glht() + labs(title = "Contrats Terms", subtitle = "Race + State + Race:State",
caption = "References Values: Race 'Branco'; State 'São Paulo'")
plt_glht_age_sex
plt_glht_age_raca
plt_glht_age_state
plt_glht_sex_raca
plt_glht_sex_state
After we test all the contrasts for each of the level of each of pairwise interactions we have found, how and how much are the effect. If the estimate for this are in blue, it is protective, if it is in red is non-protective.