MODELOS MIXTOS

#1.- BIBLIOTECAS

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.2.3

## Warning: package 'ggplot2' was built under R version 4.2.3

## Warning: package 'tibble' was built under R version 4.2.3

## Warning: package 'tidyr' was built under R version 4.2.3

## Warning: package 'readr' was built under R version 4.2.3

## Warning: package 'purrr' was built under R version 4.2.3

## Warning: package 'dplyr' was built under R version 4.2.3

## Warning: package 'forcats' was built under R version 4.2.3

## Warning: package 'lubridate' was built under R version 4.2.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

library(broom.mixed)

## Warning: package 'broom.mixed' was built under R version 4.2.3

library(ggplot2)

#2.- BASE DE DATOS

Modelo lineal mixto proporciona la flexibilidad necesaria para modelar no sólo las medias, sino también las varianzas y covarianzas de los datos.

Es un experimentos con plantas, con diferentes dosis de carbono y cuando CO2 se va a captar ésa es la variable respuesta.

A diferentes concentraciones, la que capta mayor Co2 se usa ruido para tener una variablidad en los datos, además de que se pondra distribución uniforme.

set.seed(123)
data("CO2")
uptake2 = CO2$uptake + runif(84, 0.02, 0.04)
CO2$uptake = uptake2

head(CO2, n = 10)

## Grouped Data: uptake ~ conc | Plant
##    Plant   Type  Treatment conc   uptake
## 1    Qn1 Quebec nonchilled   95 16.02575
## 2    Qn1 Quebec nonchilled  175 30.43577
## 3    Qn1 Quebec nonchilled  250 34.82818
## 4    Qn1 Quebec nonchilled  350 37.23766
## 5    Qn1 Quebec nonchilled  500 35.33881
## 6    Qn1 Quebec nonchilled  675 39.22091
## 7    Qn1 Quebec nonchilled 1000 39.73056
## 8    Qn2 Quebec nonchilled   95 13.63785
## 9    Qn2 Quebec nonchilled  175 27.33103
## 10   Qn2 Quebec nonchilled  250 37.12913

summary(CO2)

##      Plant             Type         Treatment       conc          uptake      
##  Qn1    : 7   Quebec     :42   nonchilled:42   Min.   :  95   Min.   : 7.735  
##  Qn2    : 7   Mississippi:42   chilled   :42   1st Qu.: 175   1st Qu.:17.931  
##  Qn3    : 7                                    Median : 350   Median :28.328  
##  Qc1    : 7                                    Mean   : 435   Mean   :27.243  
##  Qc3    : 7                                    3rd Qu.: 675   3rd Qu.:37.156  
##  Qc2    : 7                                    Max.   :1000   Max.   :45.538  
##  (Other):42

ggplot(CO2, aes(x=conc, y=uptake2)) + geom_point(color="blue", size=1)

ggplot(CO2, aes(x=conc, y=uptake2, color = Type))+
  geom_point(aes(shape = Treatment))+
  geom_path(aes(group = Plant, lty= Treatment))+
  theme_light()

fit1 <- lm(uptake2 ~conc + Type + Treatment, data = CO2)

library(lme4)

## Warning: package 'lme4' was built under R version 4.2.3

## Loading required package: Matrix

## Warning: package 'Matrix' was built under R version 4.2.3

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

fit2 <- lmer(uptake2 ~conc + Type + Treatment + (1 | Plant), data = CO2)

broom::glance(fit1)

## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.684         0.672  6.19      57.7 5.88e-20     3  -270.  551.  563.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

broom::tidy(fit1)

## # A tibble: 4 × 5
##   term             estimate std.error statistic  p.value
##   <chr>               <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)       29.3      1.54        19.0  1.94e-31
## 2 conc               0.0177   0.00230      7.72 2.88e-11
## 3 TypeMississippi  -12.7      1.35        -9.37 1.66e-14
## 4 Treatmentchilled  -6.86     1.35        -5.08 2.47e- 6

Todos los efectos son significativos, dado que todos esos p valores son 0.000 < 0.05.

broom.mixed::glance(fit1)

## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.684         0.672  6.19      57.7 5.88e-20     3  -270.  551.  563.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

broom.mixed::glance(fit2)

## # A tibble: 1 × 7
##    nobs sigma logLik   AIC   BIC REMLcrit df.residual
##   <int> <dbl>  <dbl> <dbl> <dbl>    <dbl>       <int>
## 1    84  6.00  -272.  556.  571.     544.          78

broom.mixed::tidy(fit2)

## # A tibble: 6 × 6
##   effect   group    term             estimate std.error statistic
##   <chr>    <chr>    <chr>               <dbl>     <dbl>     <dbl>
## 1 fixed    <NA>     (Intercept)       29.3      1.72        17.0 
## 2 fixed    <NA>     conc               0.0177   0.00223      7.96
## 3 fixed    <NA>     TypeMississippi  -12.7      1.64        -7.72
## 4 fixed    <NA>     Treatmentchilled  -6.86     1.64        -4.18
## 5 ran_pars Plant    sd__(Intercept)    1.71    NA           NA   
## 6 ran_pars Residual sd__Observation    6.00    NA           NA

Se va a poder ver las variables aleatorias para cada planta e intercepto para efectos fijos y aleatorios.

broom.mixed::tidy(fit2) %>%view()

Fit1 <- lm(uptake2 ~I(log(conc))+ Type: Treatment, data = CO2)

Fit2 <- lmer(uptake2 ~I(log(conc))+ Type: Treatment + (1 | Plant), data = CO2)

## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

Conclusión: El tipo con el tratamiento tiene una interacción, hay diferencias entre los tratamientos según el ggplot.

broom::glance(Fit1)

## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.823         0.814  4.67      91.7 7.02e-29     4  -246.  504.  519.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

broom::tidy(Fit1)

## # A tibble: 6 × 5
##   term                                estimate std.error statistic   p.value
##   <chr>                                  <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)                           -33.5      4.08      -8.22  3.21e-12
## 2 I(log(conc))                            8.48     0.678     12.5   2.04e-20
## 3 TypeQuebec:Treatmentnonchilled         19.5      1.44      13.6   2.61e-22
## 4 TypeMississippi:Treatmentnonchilled    10.1      1.44       7.04  6.35e-10
## 5 TypeQuebec:Treatmentchilled            15.9      1.44      11.1   9.95e-18
## 6 TypeMississippi:Treatmentchilled       NA       NA         NA    NA

broom.mixed::glance(Fit1)

## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.823         0.814  4.67      91.7 7.02e-29     4  -246.  504.  519.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

broom.mixed::glance(Fit2)

## # A tibble: 1 × 7
##    nobs sigma logLik   AIC   BIC REMLcrit df.residual
##   <int> <dbl>  <dbl> <dbl> <dbl>    <dbl>       <int>
## 1    84  4.50  -241.  496.  513.     482.          77

broom::tidy(Fit2)

## # A tibble: 7 × 6
##   effect   group    term                            estimate std.error statistic
##   <chr>    <chr>    <chr>                              <dbl>     <dbl>     <dbl>
## 1 fixed    <NA>     (Intercept)                       -33.5      4.02      -8.34
## 2 fixed    <NA>     I(log(conc))                        8.48     0.654     13.0 
## 3 fixed    <NA>     TypeQuebec:Treatmentnonchilled     19.5      1.83      10.6 
## 4 fixed    <NA>     TypeMississippi:Treatmentnonch…    10.1      1.83       5.52
## 5 fixed    <NA>     TypeQuebec:Treatmentchilled        15.9      1.83       8.69
## 6 ran_pars Plant    sd__(Intercept)                     1.47    NA         NA   
## 7 ran_pars Residual sd__Observation                     4.50    NA         NA

broom.mixed::tidy(Fit2) %>% view()

broom.mixed::tidy(Fit2)

## # A tibble: 7 × 6
##   effect   group    term                            estimate std.error statistic
##   <chr>    <chr>    <chr>                              <dbl>     <dbl>     <dbl>
## 1 fixed    <NA>     (Intercept)                       -33.5      4.02      -8.34
## 2 fixed    <NA>     I(log(conc))                        8.48     0.654     13.0 
## 3 fixed    <NA>     TypeQuebec:Treatmentnonchilled     19.5      1.83      10.6 
## 4 fixed    <NA>     TypeMississippi:Treatmentnonch…    10.1      1.83       5.52
## 5 fixed    <NA>     TypeQuebec:Treatmentchilled        15.9      1.83       8.69
## 6 ran_pars Plant    sd__(Intercept)                     1.47    NA         NA   
## 7 ran_pars Residual sd__Observation                     4.50    NA         NA

Se pueden agregar familiar con lmer4: se escoge otra base de datos de peso de pollos, se va a tener también la identidad de un pollo, además de peso, tiempo y dieta, como se muestra a continuación:

data("ChickWeight")
view(ChickWeight)
ChickWeight$Chick %>% length()

## [1] 578

ChickWeight$Chick %>% unique() %>% length()

## [1] 50

Fit1_Poisson <- glm(weight ~Diet:Time, data = ChickWeight, family = poisson)

Fit1_Poisson

## 
## Call:  glm(formula = weight ~ Diet:Time, family = poisson, data = ChickWeight)
## 
## Coefficients:
## (Intercept)   Diet1:Time   Diet2:Time   Diet3:Time   Diet4:Time  
##     3.85899      0.06557      0.07537      0.08624      0.08228  
## 
## Degrees of Freedom: 577 Total (i.e. Null);  573 Residual
## Null Deviance:       22440 
## Residual Deviance: 4038  AIC: 7791

summary(Fit1_Poisson)

## 
## Call:
## glm(formula = weight ~ Diet:Time, family = poisson, data = ChickWeight)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -12.0552   -1.1072   -0.3559    1.2662    8.3815  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 3.8589865  0.0093064  414.66   <2e-16 ***
## Diet1:Time  0.0655659  0.0007219   90.82   <2e-16 ***
## Diet2:Time  0.0753719  0.0007680   98.14   <2e-16 ***
## Diet3:Time  0.0862448  0.0007292  118.28   <2e-16 ***
## Diet4:Time  0.0822816  0.0007564  108.78   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 22444.4  on 577  degrees of freedom
## Residual deviance:  4037.8  on 573  degrees of freedom
## AIC: 7790.6
## 
## Number of Fisher Scoring iterations: 4

ggplot(ChickWeight, aes(x=Time, y=weight))+ geom_point(aes(color = Diet))+
  geom_path(aes(color=Diet, group=Chick))

Se observa que el grupo se va a dar por el pollo. Cada pollo, a pesar de tener dietas similares desde el punto de partida, van a tener comportamientos distintos.

Fit2_Poisson <- glm(weight ~ Diet:Time, data = ChickWeight, family=poisson)

Fit1_Poisson <- glmer(weight ~ Diet:Time+ (1|Chick), data = ChickWeight, family = poisson)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?

broom::tidy(Fit1_Poisson)

## # A tibble: 6 × 7
##   effect   group term            estimate std.error statistic p.value
##   <chr>    <chr> <chr>              <dbl>     <dbl>     <dbl>   <dbl>
## 1 fixed    <NA>  (Intercept)       3.84     0.0315      122.        0
## 2 fixed    <NA>  Diet1:Time        0.0666   0.00105      63.7       0
## 3 fixed    <NA>  Diet2:Time        0.0749   0.00129      57.9       0
## 4 fixed    <NA>  Diet3:Time        0.0868   0.00123      70.5       0
## 5 fixed    <NA>  Diet4:Time        0.0763   0.00126      60.4       0
## 6 ran_pars Chick sd__(Intercept)   0.213   NA            NA        NA

broom.mixed::tidy(Fit2_Poisson)

## # A tibble: 5 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)   3.86    0.00931      415.        0
## 2 Diet1:Time    0.0656  0.000722      90.8       0
## 3 Diet2:Time    0.0754  0.000768      98.1       0
## 4 Diet3:Time    0.0862  0.000729     118.        0
## 5 Diet4:Time    0.0823  0.000756     109.        0

view(broom.mixed::tidy(Fit2_Poisson))

view(broom::tidy(Fit1_Poisson))

#3.- INFO DE LA SESIÓN

sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
)

## # A tibble: 91 × 3
##    package     loadedversion source        
##    <chr>       <chr>         <chr>         
##  1 backports   1.4.1         CRAN (R 4.2.0)
##  2 boot        1.3-28        CRAN (R 4.2.2)
##  3 broom       1.0.5         CRAN (R 4.2.3)
##  4 broom.mixed 0.2.9.4       CRAN (R 4.2.3)
##  5 bslib       0.4.2         CRAN (R 4.2.2)
##  6 cachem      1.0.6         CRAN (R 4.2.2)
##  7 callr       3.7.3         CRAN (R 4.2.3)
##  8 cli         3.6.0         CRAN (R 4.2.2)
##  9 codetools   0.2-18        CRAN (R 4.2.2)
## 10 colorspace  2.1-0         CRAN (R 4.2.3)
## # ℹ 81 more rows

MODELOS MIXTOS

2023-09-21