#1.- BIBLIOTECAS
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(broom.mixed)
## Warning: package 'broom.mixed' was built under R version 4.2.3
library(ggplot2)
#2.- BASE DE DATOS
Modelo lineal mixto proporciona la flexibilidad necesaria para modelar no sólo las medias, sino también las varianzas y covarianzas de los datos.
Es un experimentos con plantas, con diferentes dosis de carbono y cuando CO2 se va a captar ésa es la variable respuesta.
A diferentes concentraciones, la que capta mayor Co2 se usa ruido para tener una variablidad en los datos, además de que se pondra distribución uniforme.
set.seed(123)
data("CO2")
uptake2 = CO2$uptake + runif(84, 0.02, 0.04)
CO2$uptake = uptake2
head(CO2, n = 10)
## Grouped Data: uptake ~ conc | Plant
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.02575
## 2 Qn1 Quebec nonchilled 175 30.43577
## 3 Qn1 Quebec nonchilled 250 34.82818
## 4 Qn1 Quebec nonchilled 350 37.23766
## 5 Qn1 Quebec nonchilled 500 35.33881
## 6 Qn1 Quebec nonchilled 675 39.22091
## 7 Qn1 Quebec nonchilled 1000 39.73056
## 8 Qn2 Quebec nonchilled 95 13.63785
## 9 Qn2 Quebec nonchilled 175 27.33103
## 10 Qn2 Quebec nonchilled 250 37.12913
summary(CO2)
## Plant Type Treatment conc uptake
## Qn1 : 7 Quebec :42 nonchilled:42 Min. : 95 Min. : 7.735
## Qn2 : 7 Mississippi:42 chilled :42 1st Qu.: 175 1st Qu.:17.931
## Qn3 : 7 Median : 350 Median :28.328
## Qc1 : 7 Mean : 435 Mean :27.243
## Qc3 : 7 3rd Qu.: 675 3rd Qu.:37.156
## Qc2 : 7 Max. :1000 Max. :45.538
## (Other):42
ggplot(CO2, aes(x=conc, y=uptake2)) + geom_point(color="blue", size=1)
ggplot(CO2, aes(x=conc, y=uptake2, color = Type))+
geom_point(aes(shape = Treatment))+
geom_path(aes(group = Plant, lty= Treatment))+
theme_light()
fit1 <- lm(uptake2 ~conc + Type + Treatment, data = CO2)
library(lme4)
## Warning: package 'lme4' was built under R version 4.2.3
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.2.3
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
fit2 <- lmer(uptake2 ~conc + Type + Treatment + (1 | Plant), data = CO2)
broom::glance(fit1)
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.684 0.672 6.19 57.7 5.88e-20 3 -270. 551. 563.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
broom::tidy(fit1)
## # A tibble: 4 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 29.3 1.54 19.0 1.94e-31
## 2 conc 0.0177 0.00230 7.72 2.88e-11
## 3 TypeMississippi -12.7 1.35 -9.37 1.66e-14
## 4 Treatmentchilled -6.86 1.35 -5.08 2.47e- 6
Todos los efectos son significativos, dado que todos esos p valores son 0.000 < 0.05.
broom.mixed::glance(fit1)
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.684 0.672 6.19 57.7 5.88e-20 3 -270. 551. 563.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
broom.mixed::glance(fit2)
## # A tibble: 1 × 7
## nobs sigma logLik AIC BIC REMLcrit df.residual
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 84 6.00 -272. 556. 571. 544. 78
broom.mixed::tidy(fit2)
## # A tibble: 6 × 6
## effect group term estimate std.error statistic
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) 29.3 1.72 17.0
## 2 fixed <NA> conc 0.0177 0.00223 7.96
## 3 fixed <NA> TypeMississippi -12.7 1.64 -7.72
## 4 fixed <NA> Treatmentchilled -6.86 1.64 -4.18
## 5 ran_pars Plant sd__(Intercept) 1.71 NA NA
## 6 ran_pars Residual sd__Observation 6.00 NA NA
Se va a poder ver las variables aleatorias para cada planta e intercepto para efectos fijos y aleatorios.
broom.mixed::tidy(fit2) %>%view()
Fit1 <- lm(uptake2 ~I(log(conc))+ Type: Treatment, data = CO2)
Fit2 <- lmer(uptake2 ~I(log(conc))+ Type: Treatment + (1 | Plant), data = CO2)
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
Conclusión: El tipo con el tratamiento tiene una interacción, hay diferencias entre los tratamientos según el ggplot.
broom::glance(Fit1)
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.823 0.814 4.67 91.7 7.02e-29 4 -246. 504. 519.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
broom::tidy(Fit1)
## # A tibble: 6 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -33.5 4.08 -8.22 3.21e-12
## 2 I(log(conc)) 8.48 0.678 12.5 2.04e-20
## 3 TypeQuebec:Treatmentnonchilled 19.5 1.44 13.6 2.61e-22
## 4 TypeMississippi:Treatmentnonchilled 10.1 1.44 7.04 6.35e-10
## 5 TypeQuebec:Treatmentchilled 15.9 1.44 11.1 9.95e-18
## 6 TypeMississippi:Treatmentchilled NA NA NA NA
broom.mixed::glance(Fit1)
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.823 0.814 4.67 91.7 7.02e-29 4 -246. 504. 519.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
broom.mixed::glance(Fit2)
## # A tibble: 1 × 7
## nobs sigma logLik AIC BIC REMLcrit df.residual
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 84 4.50 -241. 496. 513. 482. 77
broom::tidy(Fit2)
## # A tibble: 7 × 6
## effect group term estimate std.error statistic
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) -33.5 4.02 -8.34
## 2 fixed <NA> I(log(conc)) 8.48 0.654 13.0
## 3 fixed <NA> TypeQuebec:Treatmentnonchilled 19.5 1.83 10.6
## 4 fixed <NA> TypeMississippi:Treatmentnonch… 10.1 1.83 5.52
## 5 fixed <NA> TypeQuebec:Treatmentchilled 15.9 1.83 8.69
## 6 ran_pars Plant sd__(Intercept) 1.47 NA NA
## 7 ran_pars Residual sd__Observation 4.50 NA NA
broom.mixed::tidy(Fit2) %>% view()
broom.mixed::tidy(Fit2)
## # A tibble: 7 × 6
## effect group term estimate std.error statistic
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) -33.5 4.02 -8.34
## 2 fixed <NA> I(log(conc)) 8.48 0.654 13.0
## 3 fixed <NA> TypeQuebec:Treatmentnonchilled 19.5 1.83 10.6
## 4 fixed <NA> TypeMississippi:Treatmentnonch… 10.1 1.83 5.52
## 5 fixed <NA> TypeQuebec:Treatmentchilled 15.9 1.83 8.69
## 6 ran_pars Plant sd__(Intercept) 1.47 NA NA
## 7 ran_pars Residual sd__Observation 4.50 NA NA
Se pueden agregar familiar con lmer4: se escoge otra base de datos de peso de pollos, se va a tener también la identidad de un pollo, además de peso, tiempo y dieta, como se muestra a continuación:
data("ChickWeight")
view(ChickWeight)
ChickWeight$Chick %>% length()
## [1] 578
ChickWeight$Chick %>% unique() %>% length()
## [1] 50
Fit1_Poisson <- glm(weight ~Diet:Time, data = ChickWeight, family = poisson)
Fit1_Poisson
##
## Call: glm(formula = weight ~ Diet:Time, family = poisson, data = ChickWeight)
##
## Coefficients:
## (Intercept) Diet1:Time Diet2:Time Diet3:Time Diet4:Time
## 3.85899 0.06557 0.07537 0.08624 0.08228
##
## Degrees of Freedom: 577 Total (i.e. Null); 573 Residual
## Null Deviance: 22440
## Residual Deviance: 4038 AIC: 7791
summary(Fit1_Poisson)
##
## Call:
## glm(formula = weight ~ Diet:Time, family = poisson, data = ChickWeight)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -12.0552 -1.1072 -0.3559 1.2662 8.3815
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.8589865 0.0093064 414.66 <2e-16 ***
## Diet1:Time 0.0655659 0.0007219 90.82 <2e-16 ***
## Diet2:Time 0.0753719 0.0007680 98.14 <2e-16 ***
## Diet3:Time 0.0862448 0.0007292 118.28 <2e-16 ***
## Diet4:Time 0.0822816 0.0007564 108.78 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 22444.4 on 577 degrees of freedom
## Residual deviance: 4037.8 on 573 degrees of freedom
## AIC: 7790.6
##
## Number of Fisher Scoring iterations: 4
ggplot(ChickWeight, aes(x=Time, y=weight))+ geom_point(aes(color = Diet))+
geom_path(aes(color=Diet, group=Chick))
Se observa que el grupo se va a dar por el pollo. Cada pollo, a pesar de
tener dietas similares desde el punto de partida, van a tener
comportamientos distintos.
Fit2_Poisson <- glm(weight ~ Diet:Time, data = ChickWeight, family=poisson)
Fit1_Poisson <- glmer(weight ~ Diet:Time+ (1|Chick), data = ChickWeight, family = poisson)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
broom::tidy(Fit1_Poisson)
## # A tibble: 6 × 7
## effect group term estimate std.error statistic p.value
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) 3.84 0.0315 122. 0
## 2 fixed <NA> Diet1:Time 0.0666 0.00105 63.7 0
## 3 fixed <NA> Diet2:Time 0.0749 0.00129 57.9 0
## 4 fixed <NA> Diet3:Time 0.0868 0.00123 70.5 0
## 5 fixed <NA> Diet4:Time 0.0763 0.00126 60.4 0
## 6 ran_pars Chick sd__(Intercept) 0.213 NA NA NA
broom.mixed::tidy(Fit2_Poisson)
## # A tibble: 5 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 3.86 0.00931 415. 0
## 2 Diet1:Time 0.0656 0.000722 90.8 0
## 3 Diet2:Time 0.0754 0.000768 98.1 0
## 4 Diet3:Time 0.0862 0.000729 118. 0
## 5 Diet4:Time 0.0823 0.000756 109. 0
view(broom.mixed::tidy(Fit2_Poisson))
view(broom::tidy(Fit1_Poisson))
#3.- INFO DE LA SESIÓN
sesion_info <- devtools::session_info()
dplyr::select(
tibble::as_tibble(sesion_info$packages),
c(package, loadedversion, source)
)
## # A tibble: 91 × 3
## package loadedversion source
## <chr> <chr> <chr>
## 1 backports 1.4.1 CRAN (R 4.2.0)
## 2 boot 1.3-28 CRAN (R 4.2.2)
## 3 broom 1.0.5 CRAN (R 4.2.3)
## 4 broom.mixed 0.2.9.4 CRAN (R 4.2.3)
## 5 bslib 0.4.2 CRAN (R 4.2.2)
## 6 cachem 1.0.6 CRAN (R 4.2.2)
## 7 callr 3.7.3 CRAN (R 4.2.3)
## 8 cli 3.6.0 CRAN (R 4.2.2)
## 9 codetools 0.2-18 CRAN (R 4.2.2)
## 10 colorspace 2.1-0 CRAN (R 4.2.3)
## # ℹ 81 more rows