if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
pacman::p_load("tidyverse", "lme4","readxl","beanplot","knitr", "kableExtra","stargazer", "tableone","sjPlot","sjlabelled","sjmisc","gridExtra","emmeans","sjstats","finalfit")

df=read_xlsx(path = "ajedrez por rival.xlsx",sheet = 1) %>% mutate(Player=as.factor(str_c("player ",JUGADOR)))

####
#Estudio1
#Queremos estudiar so T y C de un jugador es afectado por 
# si mi rival es un alfa, cuando yo no lo soy. Para ello excluimos el individuo 1
df1= df %>% filter(ALPHA==0) %>% mutate(Player=as.factor(as.character(Player)))

Estudiamos el efecto de jugar contra un alpha usando un modelo mixto con componente aleatoria el individuo al que medimos repetidamente T

modelo2=lmer(formula = T ~ ALPHARIVAL + RONDA +(1|Player),  data=df1)
summary(modelo2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: T ~ ALPHARIVAL + RONDA + (1 | Player)
##    Data: df1
## 
## REML criterion at convergence: 283.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.98554 -0.67822 -0.03209  0.45029  1.81214 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Player   (Intercept) 34763    186.45  
##  Residual              9242     96.14  
## Number of obs: 25, groups:  Player, 5
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   240.50      95.28   2.524
## ALPHARIVAL    264.80      48.07   5.509
## RONDA          43.90      13.60   3.229
## 
## Correlation of Fixed Effects:
##            (Intr) ALPHAR
## ALPHARIVAL -0.101       
## RONDA      -0.428  0.000
modelo3=lmer(formula = C ~ ALPHARIVAL +(1|Player),  data=df1)
## singular fit
summary(modelo3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: C ~ ALPHARIVAL + (1 | Player)
##    Data: df1
## 
## REML criterion at convergence: 120.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.9138 -0.6436 -0.1822  0.4307  3.9039 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Player   (Intercept) 0.000    0.000   
##  Residual             9.209    3.035   
## Number of obs: 25, groups:  Player, 5
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   4.0530     0.6786   5.973
## ALPHARIVAL    0.1950     1.5173   0.129
## 
## Correlation of Fixed Effects:
##            (Intr)
## ALPHARIVAL -0.447
## convergence code: 0
## singular fit
modelo4=lmer(formula = C ~ ALPHARIVAL + RONDA +(1|Player),  data=df1)
## singular fit
summary(modelo4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: C ~ ALPHARIVAL + RONDA + (1 | Player)
##    Data: df1
## 
## REML criterion at convergence: 120.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.9064 -0.6294 -0.1909  0.4339  3.8182 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  Player   (Intercept) 6.025e-14 2.455e-07
##  Residual             9.627e+00 3.103e+00
## Number of obs: 25, groups:  Player, 5
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.9942     1.4880   2.684
## ALPHARIVAL    0.1950     1.5514   0.126
## RONDA         0.0196     0.4388   0.045
## 
## Correlation of Fixed Effects:
##            (Intr) ALPHAR
## ALPHARIVAL -0.209       
## RONDA      -0.885  0.000
## convergence code: 0
## singular fit
beanplot(T ~ ALPHARIVAL*RONDA,side="both",col=list("red","blue"),log="y",data=df1)

boxplot(C ~ ALPHARIVAL,data=df1,col="red")

####
#Estudio2
#Estudio1
#Queremos estudiar so T y C de un jugador es afectado por 
#la diferencia de ELO con el rival

df2=df %>% mutate(diffELO=ELO_RIVAL-ELO)

modelo21=lmer(formula = T ~ diffELO +(1|Player),  data=df2)
summary(modelo21)
## Linear mixed model fit by REML ['lmerMod']
## Formula: T ~ diffELO + (1 | Player)
##    Data: df2
## 
## REML criterion at convergence: 380.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8141 -0.7616  0.1860  0.6131  1.8227 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Player   (Intercept) 42582    206.4   
##  Residual             16178    127.2   
## Number of obs: 30, groups:  Player, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 396.2000    87.3853   4.534
## diffELO       0.7239     0.2189   3.307
## 
## Correlation of Fixed Effects:
##         (Intr)
## diffELO 0.000
modelo21 %>% tab_model(show.se = TRUE) %>% return() %$% 
    knitr %>% 
    asis_output()
## Computing p-values via Wald-statistics approximation (treating t as Wald z).
  T
Predictors Estimates std. Error CI p
(Intercept) 396.20 87.39 224.93 – 567.47 <0.001
diff ELO 0.72 0.22 0.29 – 1.15 0.001
Random Effects
σ2 16178.11
τ00 Player 42581.54
ICC Player 0.72
Observations 30
Marginal R2 / Conditional R2 0.190 / 0.777

E incluyendo la ronda en la partida, es aún mucho más interesante:

modelo21b=lmer(formula = T ~ RONDA+diffELO +(1|Player),  data=df2)
summary(modelo21)
## Linear mixed model fit by REML ['lmerMod']
## Formula: T ~ diffELO + (1 | Player)
##    Data: df2
## 
## REML criterion at convergence: 380.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8141 -0.7616  0.1860  0.6131  1.8227 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Player   (Intercept) 42582    206.4   
##  Residual             16178    127.2   
## Number of obs: 30, groups:  Player, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 396.2000    87.3853   4.534
## diffELO       0.7239     0.2189   3.307
## 
## Correlation of Fixed Effects:
##         (Intr)
## diffELO 0.000

La forma de enviarlo a una revista es más o menos esta:

modelo21b %>% tab_model(show.se = TRUE) %>% return() %$% 
    knitr %>% 
    asis_output()
## Computing p-values via Wald-statistics approximation (treating t as Wald z).
  T
Predictors Estimates std. Error CI p
(Intercept) 294.30 98.66 100.93 – 487.67 0.003
RONDA 33.97 15.14 4.28 – 63.65 0.025
diff ELO 0.74 0.20 0.34 – 1.13 <0.001
Random Effects
σ2 13762.24
τ00 Player 43263.79
ICC Player 0.76
Observations 30
Marginal R2 / Conditional R2 0.226 / 0.813

Comparemos los dos modelos por si ayuda a tomar una decisión:

tab_model(modelo21,modelo21b,show.se = FALSE,show.aic = TRUE) %>% return() %$% 
    knitr %>% 
    asis_output()
## Computing p-values via Wald-statistics approximation (treating t as Wald z).
## Computing p-values via Wald-statistics approximation (treating t as Wald z).
  T T
Predictors Estimates CI p Estimates CI p
(Intercept) 396.20 224.93 – 567.47 <0.001 294.30 100.93 – 487.67 0.003
diff ELO 0.72 0.29 – 1.15 0.001 0.74 0.34 – 1.13 <0.001
RONDA 33.97 4.28 – 63.65 0.025
Random Effects
σ2 16178.11 13762.24
τ00 42581.54 Player 43263.79 Player
ICC 0.72 Player 0.76 Player
Observations 30 30
Marginal R2 / Conditional R2 0.190 / 0.777 0.226 / 0.813
AIC 388.203 378.217

Todo lleva a pensar que es más interesante usar el modelo con la RONDA (mejor R2, mejor estimaciones, AIC(Akaike information Criteria) más bajo).

ggplot(df2,aes(x=diffELO,y=T,col=Player,shape=Player),alpha=0.75)+geom_point()+geom_smooth(method="lm",se=TRUE,alpha=0.05)+theme_classic()+
  coord_cartesian(ylim=c(0,1000))

ggplot(df2,aes(x=diffELO,y=T),alpha=0.75)+geom_point(aes(col=Player))+geom_smooth(method="lm")+theme_bw()

Y ahora queremos una tabla con el valor de la tasa de cambio de T en función de diffELO. Queremos mostrar una tasa de cambio diferente para cada jugador. No es que es lo que me parezca mejor para generalizar a los jugadores del mundo (para eso prefiero el modelo anterior). Pero si lo que pretendemos es describir a nuestros jugadores, lo que sigue está bien:

####
#Estudio2
#Estudio1
#Queremos estudiar so T y C de un jugador es afectado por 
#la diferencia de ELO con el rival

df2=df %>% mutate(diffELO=ELO_RIVAL-ELO)

modelo21b=lm(formula = T ~  Player + diffELO:Player,  data=df2)
summary(modelo21b)
## 
## Call:
## lm(formula = T ~ Player + diffELO:Player, data = df2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -244.56  -79.99   21.05   70.32  214.64 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             330.4956   133.1421   2.482   0.0231 *
## Playerplayer 2          145.6536   152.6036   0.954   0.3525  
## Playerplayer 3          196.4772   149.3890   1.315   0.2049  
## Playerplayer 4          369.3878   147.5947   2.503   0.0222 *
## Playerplayer 5          -16.1616   149.2922  -0.108   0.9150  
## Playerplayer 6         -167.5945   231.6763  -0.723   0.4787  
## Playerplayer 1:diffELO    0.4535     0.6824   0.665   0.5148  
## Playerplayer 2:diffELO    1.5736     0.5534   2.843   0.0108 *
## Playerplayer 3:diffELO    0.7968     0.5418   1.471   0.1586  
## Playerplayer 4:diffELO    0.5557     0.5353   1.038   0.3130  
## Playerplayer 5:diffELO    0.7888     0.5415   1.457   0.1624  
## Playerplayer 6:diffELO    0.2365     0.8394   0.282   0.7814  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 133.4 on 18 degrees of freedom
## Multiple R-squared:  0.7754, Adjusted R-squared:  0.6382 
## F-statistic:  5.65 on 11 and 18 DF,  p-value: 0.0006427

Para la revista quedamos en que le interesaría las estimaciones de las interacciones (no el resto posiblemente), y el \(R^2\)

modelo21b %>% tab_model(show.se = TRUE) %>% return() %$% 
    knitr %>% 
    asis_output()
  T
Predictors Estimates std. Error CI p
(Intercept) 330.50 133.14 69.54 – 591.45 0.023
Playerplayer 2 145.65 152.60 -153.44 – 444.75 0.352
Playerplayer 3 196.48 149.39 -96.32 – 489.27 0.205
Playerplayer 4 369.39 147.59 80.11 – 658.67 0.022
Playerplayer 5 -16.16 149.29 -308.77 – 276.45 0.915
Playerplayer 6 -167.59 231.68 -621.67 – 286.48 0.479
Playerplayer 1:diffELO 0.45 0.68 -0.88 – 1.79 0.515
Playerplayer 2:diffELO 1.57 0.55 0.49 – 2.66 0.011
Playerplayer 3:diffELO 0.80 0.54 -0.27 – 1.86 0.159
Playerplayer 4:diffELO 0.56 0.54 -0.49 – 1.60 0.313
Playerplayer 5:diffELO 0.79 0.54 -0.27 – 1.85 0.162
Playerplayer 6:diffELO 0.24 0.84 -1.41 – 1.88 0.781
Observations 30
R2 / adjusted R2 0.775 / 0.638

Me pregunto si realmente cada jugador tiene su propia pendiente, o hay una pendiente común para todos ellos:

modelo21c=lm(formula = T ~  Player + diffELO*Player,  data=df2)

tab_model(modelo21b,modelo21c, modelo2,show.se = FALSE,show.aic = TRUE) %>% return() %$% 
    knitr %>% 
    asis_output()
## Computing p-values via Wald-statistics approximation (treating t as Wald z).
  T T T
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 330.50 69.54 – 591.45 0.023 330.50 69.54 – 591.45 0.023 240.50 53.75 – 427.25 0.012
Playerplayer 2 145.65 -153.44 – 444.75 0.352 145.65 -153.44 – 444.75 0.352
Playerplayer 3 196.48 -96.32 – 489.27 0.205 196.48 -96.32 – 489.27 0.205
Playerplayer 4 369.39 80.11 – 658.67 0.022 369.39 80.11 – 658.67 0.022
Playerplayer 5 -16.16 -308.77 – 276.45 0.915 -16.16 -308.77 – 276.45 0.915
Playerplayer 6 -167.59 -621.67 – 286.48 0.479 -167.59 -621.67 – 286.48 0.479
Playerplayer 1:diffELO 0.45 -0.88 – 1.79 0.515
Playerplayer 2:diffELO 1.57 0.49 – 2.66 0.011 1.12 -0.60 – 2.84 0.219
Playerplayer 3:diffELO 0.80 -0.27 – 1.86 0.159 0.34 -1.36 – 2.05 0.698
Playerplayer 4:diffELO 0.56 -0.49 – 1.60 0.313 0.10 -1.60 – 1.80 0.908
Playerplayer 5:diffELO 0.79 -0.27 – 1.85 0.162 0.34 -1.37 – 2.04 0.705
Playerplayer 6:diffELO 0.24 -1.41 – 1.88 0.781 -0.22 -2.34 – 1.90 0.843
diff ELO 0.45 -0.88 – 1.79 0.515
ALPHARIVAL 264.80 170.59 – 359.01 <0.001
RONDA 43.90 17.25 – 70.55 0.001
Random Effects
σ2     9242.47
τ00     34763.01 Player
ICC     0.79 Player
Observations 30 30 25
R2 / adjusted R2 0.775 / 0.638 0.775 / 0.638 0.263 / 0.845
AIC 389.432 389.432 293.789

Es interesante ver \(R^2\) ajustada del modelo es bastante alta. Una gran parte de la variabilidad observada en la smediciones de testosterona es explicada por unos niveles y diferencia de elo con rival, que dependen del individuo.

Vamos a ver qué pendiente tiene cada juhgador que se pueda atribuir a la diffELO con el rival, tras ajustar por la RONDA en la que se ha enfrentado a él.

####
#Estudio2
#Estudio1
#Queremos estudiar so T y C de un jugador es afectado por 
#la diferencia de ELO con el rival

df2=df %>% mutate(diffELO=ELO_RIVAL-ELO)

modelo21b=lm(formula = T ~  RONDA+Player + diffELO:Player,  data=df2)
summary(modelo21b)
## 
## Call:
## lm(formula = T ~ RONDA + Player + diffELO:Player, data = df2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -227.17  -39.18   25.13   61.27  131.41 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             187.1776   132.4408   1.413  0.17562   
## RONDA                    38.5193    15.9991   2.408  0.02770 * 
## Playerplayer 2          177.0537   136.2281   1.300  0.21106   
## Playerplayer 3          230.7029   133.5049   1.728  0.10210   
## Playerplayer 4          406.4312   132.0509   3.078  0.00682 **
## Playerplayer 5            8.9318   133.0687   0.067  0.94727   
## Playerplayer 6         -207.1440   206.5201  -1.003  0.32991   
## Playerplayer 1:diffELO    0.2944     0.6100   0.483  0.63557   
## Playerplayer 2:diffELO    1.6187     0.4921   3.289  0.00433 **
## Playerplayer 3:diffELO    0.9061     0.4836   1.874  0.07827 . 
## Playerplayer 4:diffELO    0.3325     0.4846   0.686  0.50187   
## Playerplayer 5:diffELO    0.8345     0.4815   1.733  0.10119   
## Playerplayer 6:diffELO    0.5504     0.7572   0.727  0.47716   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 118.6 on 17 degrees of freedom
## Multiple R-squared:  0.8325, Adjusted R-squared:  0.7143 
## F-statistic: 7.043 on 12 and 17 DF,  p-value: 0.000182
modelo21b %>% tab_model(show.se = TRUE) %>% return() %$% 
    knitr %>% 
    asis_output()
  T
Predictors Estimates std. Error CI p
(Intercept) 187.18 132.44 -72.40 – 446.76 0.176
RONDA 38.52 16.00 7.16 – 69.88 0.028
Playerplayer 2 177.05 136.23 -89.95 – 444.06 0.211
Playerplayer 3 230.70 133.50 -30.96 – 492.37 0.102
Playerplayer 4 406.43 132.05 147.62 – 665.25 0.007
Playerplayer 5 8.93 133.07 -251.88 – 269.74 0.947
Playerplayer 6 -207.14 206.52 -611.92 – 197.63 0.330
Playerplayer 1:diffELO 0.29 0.61 -0.90 – 1.49 0.636
Playerplayer 2:diffELO 1.62 0.49 0.65 – 2.58 0.004
Playerplayer 3:diffELO 0.91 0.48 -0.04 – 1.85 0.078
Playerplayer 4:diffELO 0.33 0.48 -0.62 – 1.28 0.502
Playerplayer 5:diffELO 0.83 0.48 -0.11 – 1.78 0.101
Playerplayer 6:diffELO 0.55 0.76 -0.93 – 2.03 0.477
Observations 30
R2 / adjusted R2 0.833 / 0.714

Al tener en cuenta la ronda, ha mejorado aún más \(R^2\) y han mejorado las estimaciones de error de los coeficientes relevantes (las interacciones con diffELO).

modelo22=lmer(C ~ diffELO +(1|Player),  data=df2)
## singular fit
summary(modelo22)
## Linear mixed model fit by REML ['lmerMod']
## Formula: C ~ diffELO + (1 | Player)
##    Data: df2
## 
## REML criterion at convergence: 150.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2136 -0.6532 -0.1319  0.4076  3.9879 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Player   (Intercept) 0.000    0.000   
##  Residual             6.829    2.613   
## Number of obs: 30, groups:  Player, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 3.942333   0.477122   8.263
## diffELO     0.006245   0.002992   2.087
## 
## Correlation of Fixed Effects:
##         (Intr)
## diffELO 0.000 
## convergence code: 0
## singular fit

En formato para revista sería:

modelo22 %>% tab_model(show.se = TRUE) %>% return() %$% 
    knitr %>% 
    asis_output()
## Computing p-values via Wald-statistics approximation (treating t as Wald z).
  C
Predictors Estimates std. Error CI p
(Intercept) 3.94 0.48 3.01 – 4.88 <0.001
diff ELO 0.01 0.00 0.00 – 0.01 0.037
Random Effects
σ2 6.83
τ00 Player 0.00
ICC Player 0.00
Observations 30
ggplot(df2,aes(x=diffELO,y=C,col=Player,shape=Player),alpha=0.75)+geom_point()+geom_smooth(method="lm",se=TRUE,alpha=0.05)+theme_classic()+
  coord_cartesian(ylim=c(0,20))

Al ver la gráfica, observo la heterogeneidad de las pendientes, y aunque en el modelo aparezca que diffELO tiene efecto significativo, preferiría considerarlo como un falso positivo. No hablaría de la variable C.

ggplot(df2,aes(x=diffELO,y=C,lty=Player),alpha=0.75)+geom_point()+geom_smooth(method="lm",se = T)+theme_bw()

ggplot(df2,aes(x=diffELO,y=C,lty=Player),alpha=0.75)+geom_point()+geom_smooth(method="lm",se = F)+theme_bw()

ggplot(df2,aes(x=diffELO,y=T),alpha=0.75)+geom_point(aes(col=Player))+geom_smooth(method="lm")

####
#Estudio2
#Estudio1
#Queremos estudiar so T y C de un jugador es afectado por 
#la diferencia de ELO con el rival

df2=df %>% mutate(diffELO=ELO_RIVAL-ELO)

modelo21b=lm(formula = C ~ Player + diffELO:Player,  data=df2)
summary(modelo21b)
## 
## Call:
## lm(formula = C ~ Player + diffELO:Player, data = df2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6392 -0.8665 -0.2109  0.7776  8.7227 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             2.0841039  2.7382239   0.761   0.4564  
## Playerplayer 2          0.4952781  3.1384728   0.158   0.8764  
## Playerplayer 3          1.1556519  3.0723613   0.376   0.7112  
## Playerplayer 4          0.8912676  3.0354594   0.294   0.7724  
## Playerplayer 5          2.3146183  3.0703706   0.754   0.4607  
## Playerplayer 6         -3.9005228  4.7646977  -0.819   0.4237  
## Playerplayer 1:diffELO -0.0063641  0.0140353  -0.453   0.6557  
## Playerplayer 2:diffELO -0.0083245  0.0113818  -0.731   0.4740  
## Playerplayer 3:diffELO  0.0017526  0.0111429   0.157   0.8768  
## Playerplayer 4:diffELO  0.0149190  0.0110096   1.355   0.1922  
## Playerplayer 5:diffELO  0.0009465  0.0111357   0.085   0.9332  
## Playerplayer 6:diffELO  0.0365598  0.0172626   2.118   0.0484 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.744 on 18 degrees of freedom
## Multiple R-squared:  0.3865, Adjusted R-squared:  0.01153 
## F-statistic: 1.031 on 11 and 18 DF,  p-value: 0.4606
modelo21b %>% tab_model(show.se = TRUE) %>% return() %$% 
    knitr %>% 
    asis_output()
  C
Predictors Estimates std. Error CI p
(Intercept) 2.08 2.74 -3.28 – 7.45 0.456
Playerplayer 2 0.50 3.14 -5.66 – 6.65 0.876
Playerplayer 3 1.16 3.07 -4.87 – 7.18 0.711
Playerplayer 4 0.89 3.04 -5.06 – 6.84 0.772
Playerplayer 5 2.31 3.07 -3.70 – 8.33 0.461
Playerplayer 6 -3.90 4.76 -13.24 – 5.44 0.424
Playerplayer 1:diffELO -0.01 0.01 -0.03 – 0.02 0.656
Playerplayer 2:diffELO -0.01 0.01 -0.03 – 0.01 0.474
Playerplayer 3:diffELO 0.00 0.01 -0.02 – 0.02 0.877
Playerplayer 4:diffELO 0.01 0.01 -0.01 – 0.04 0.192
Playerplayer 5:diffELO 0.00 0.01 -0.02 – 0.02 0.933
Playerplayer 6:diffELO 0.04 0.02 0.00 – 0.07 0.048
Observations 30
R2 / adjusted R2 0.386 / 0.012

El valor de \(R^2\) ajustado y el de los coeficientes de la interaccion convencen aún más de que mejor nohablar mucho de C.