Packages

# if (!require("pacman")) install.packages("pacman") # run in a new installation
pacman::p_load(tidyverse,  # for data science
               janitor,    # for data cleaning 
               gt,         # for tables
               ggpubr,     # for graphs publication themes
               gtsummary,  # for data summaries
            #  epitool,    # for Epi
            #  pubh,       # for public health
               finalfit,   # for model reporting
               naniar,     # for NAs management
               cowplot,    # for various graphs 
               santoku,    # to create new vars 
               ggsignif,   # to add sign values to ggplots
            nlme, 
               GGally      # for regression diagnostic plots
               )

Graph theme

theme_set(theme_minimal())

Dataset

df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vQx0EAzcq52X0nGCMkBjG7xWj8mvMIFaoVFIJynfeVuH4bYMW4Hua2zQmefpAb1mLT7S5ZP07ILmOx4/pub?gid=1811895835&single=true&output=csv")
## Parsed with column specification:
## cols(
##   n = col_double(),
##   genero = col_character(),
##   edad = col_double(),
##   aguja = col_double(),
##   biotipo = col_character(),
##   lado = col_character(),
##   `Ancho (cm)` = col_double(),
##   `Alto (cm)` = col_double()
## )
df <- df %>% 
  janitor::clean_names(case = "upper_camel") 
table(df$Biotipo)
## 
## euriporosopo leptoprosopo  mesoprosopo 
##           58           58           66
df <- df %>%
  mutate(
    Biotipo = str_replace_all(Biotipo, "euriporosopo", "Euriprosopo"),
    Biotipo = str_replace_all(Biotipo, "leptoprosopo", "Leptoprosopo"),
    Biotipo = str_replace_all(Biotipo, "mesoprosopo", "Mesoprosopo"),
    Biotipo = fct_relevel(Biotipo, "Euriprosopo", "Mesoprosopo")
  ) %>%
  mutate(
    Genero = str_replace_all(Genero, "fem", "Femenino"),
    Genero = str_replace_all(Genero, "masc", "Masculino")
  )

EDA

df %>% 
  select(-N) %>% 
  gtsummary::tbl_summary(by = Biotipo, 
                         # change statistics printed in table
    statistic = list(all_continuous() ~ "{mean} ({sd})",
                     all_categorical() ~ "{n}  ({p}%)")) %>% 
  gtsummary::bold_labels() 
Characteristic Euriprosopo, N = 581 Mesoprosopo, N = 661 Leptoprosopo, N = 581
Genero
Femenino 33 (57%) 34 (52%) 26 (45%)
Masculino 25 (43%) 32 (48%) 32 (55%)
Edad 39 (14) 38 (15) 25 (9)
Aguja 26.1 (3.3) 25.3 (3.6) 22.5 (3.4)
Lado
derecho 25 (43%) 35 (53%) 36 (62%)
izquierdo 33 (57%) 31 (47%) 22 (38%)
AnchoCm 14.11 (1.41) 12.97 (1.03) 12.05 (0.90)
AltoCm 12.99 (1.20) 13.26 (0.95) 13.68 (1.02)

1 Statistics presented: n (%); mean (SD)

df %>%
  ggplot(aes(x = Aguja,
             color = Biotipo,
             fill = Biotipo)) +
  geom_histogram(position = "identity", bins = 12) +
  facet_grid(Biotipo ~ Genero)

## Aguja por edad

df %>% 
  ggplot(aes(x = Edad, 
             y = Aguja, 
             fill = Biotipo, 
             color = Biotipo)) + 
  geom_jitter() +
  facet_grid(.~Genero) + 
  geom_smooth() + 
  labs(title = "Long aguja por edad y sexo", 
       y = "Long aguja (mm)", 
       x = "Edad")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

df %>% 
  ggplot(aes(x = Aguja, 
             fill = Lado)) + 
  geom_histogram(bins = 11) + 
  facet_grid(Lado~Genero)

Efecto del Biotipo

df %>% 
  ggplot(aes(x = Biotipo, 
             y = Aguja, 
             fill = Genero)) + 
  geom_boxplot() + 
  labs(title = "Longitud Aguja según Biotipo y sexo", 
       x = "Biotipo", 
       y = "Long Aguja mm", 
       fill = "Sexo") + 
  stat_compare_means(comparisons = list( c("Euriprosopo", "Mesoprosopo"), c("Mesoprosopo", "Leptoprosopo"), c("Euriprosopo", "Leptoprosopo") ))

Modelo sin ajustar

df %>% 
  with(lm(Aguja ~ Biotipo)) %>% 
  summary()
## 
## Call:
## lm(formula = Aguja ~ Biotipo)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2879  -2.2879  -0.4569   2.5431   9.9310 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          26.0690     0.4544  57.374  < 2e-16 ***
## BiotipoMesoprosopo   -0.7811     0.6228  -1.254    0.211    
## BiotipoLeptoprosopo  -3.6121     0.6426  -5.621 7.17e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.46 on 179 degrees of freedom
## Multiple R-squared:  0.1647, Adjusted R-squared:  0.1553 
## F-statistic: 17.64 on 2 and 179 DF,  p-value: 1.014e-07

Ajustado por sexo

df %>% 
  with(lm(Aguja ~ Biotipo +  Genero)) %>% 
  summary()
## 
## Call:
## lm(formula = Aguja ~ Biotipo + Genero)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.8475  -1.9439  -0.2723   2.2388   9.3130 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          25.6007     0.5009  51.111  < 2e-16 ***
## BiotipoMesoprosopo   -0.8395     0.6174  -1.360   0.1756    
## BiotipoLeptoprosopo  -3.7432     0.6393  -5.855 2.25e-08 ***
## GeneroMasculino       1.0863     0.5105   2.128   0.0347 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.427 on 178 degrees of freedom
## Multiple R-squared:  0.1854, Adjusted R-squared:  0.1717 
## F-statistic:  13.5 on 3 and 178 DF,  p-value: 5.59e-08

ajustado por Genero y lado

df %>% 
  with(lm(Aguja ~ Biotipo +  Genero +  Lado)) %>% 
  summary()
## 
## Call:
## lm(formula = Aguja ~ Biotipo + Genero + Lado)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6380  -2.0495  -0.2833   2.3620   9.1366 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          25.3631     0.5769  43.968  < 2e-16 ***
## BiotipoMesoprosopo   -0.7961     0.6201  -1.284   0.2009    
## BiotipoLeptoprosopo  -3.6599     0.6476  -5.651 6.27e-08 ***
## GeneroMasculino       1.0710     0.5113   2.095   0.0376 *  
## Ladoizquierdo         0.4293     0.5155   0.833   0.4061    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.43 on 177 degrees of freedom
## Multiple R-squared:  0.1886, Adjusted R-squared:  0.1702 
## F-statistic: 10.28 on 4 and 177 DF,  p-value: 1.645e-07

Ajustado por sexo y lado

df %>% 
  with(lm(Aguja ~ Biotipo +  Genero +  Lado)) %>% 
  gtsummary::tbl_regression()
Characteristic Beta 95% CI1 p-value
Biotipo
Euriprosopo
Mesoprosopo -0.80 -2.0, 0.43 0.2
Leptoprosopo -3.7 -4.9, -2.4 <0.001
Genero
Femenino
Masculino 1.1 0.06, 2.1 0.038
Lado
derecho
izquierdo 0.43 -0.59, 1.4 0.4

1 CI = Confidence Interval

Interaction plot

df  %>% 
  ggpubr::ggline(x = "Biotipo", 
                 y = "Aguja", 
                 color = "Genero",
       add = c("mean_se", "dotplot"),
       palette = c("#00AFBB", "#E7B800")) + 
  labs(title = "Longitud Aguja por Biotipo y sexo", 
       y = "Long Aguja (mm)", 
       x = "Biotipo", 
       color = "Sexo")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.