packages

library(googlesheets4)
library(googledrive)
## 
## Attaching package: 'googledrive'
## The following objects are masked from 'package:googlesheets4':
## 
##     request_generate, request_make
library(ggplot2)
library(DescTools)

Tabla Aspirin $ Infarto

gs4_deauth()
asp <- read_sheet("https://docs.google.com/spreadsheets/d/1NHTLGSXJ0a9UtWRPN3uf-5_7kEbuFgP5UIwNEgnGqs0/edit?usp=sharing",
           range= "B2:D4",
           col_names= TRUE
           )
asp$group <- as.factor(asp$group)
#
asp$Pr <- asp$YES/ (asp$YES + asp$NO)
asp$Odds <- asp$YES/ asp$NO

plots

p <- ggplot(aes(x=group, y= Pr), data=asp)
p + geom_col()

asp
## # A tibble: 2 × 5
##   group     YES    NO      Pr    Odds
##   <fct>   <dbl> <dbl>   <dbl>   <dbl>
## 1 Placebo   189 10845 0.0171  0.0174 
## 2 Aspirin   104 10933 0.00942 0.00951
obs <- cbind(asp$YES, asp$NO)
obs
##      [,1]  [,2]
## [1,]  189 10845
## [2,]  104 10933

Chi-squared and G-test

chi2 <- chisq.test(obs)
chi2
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  obs
## X-squared = 24.429, df = 1, p-value = 7.71e-07
expected <- chi2$expected

expected
##          [,1]     [,2]
## [1,] 146.4801 10887.52
## [2,] 146.5199 10890.48

Gtest

Deviance Residual (ajustando modelo Nulo))

2*sum(o*log(o/e)) (Agresti, 2007; pag.36)

GTest(obs)
## 
##  Log likelihood ratio (G-test) test of independence without correction
## 
## data:  obs
## G = 25.372, X-squared df = 1, p-value = 4.727e-07
#### Odds Ratio `Placebo/Aspirin` 
OR.placebo <- asp$Odds[1]/ asp$Odds[2]
OR.placebo  
## [1] 1.832054

Table Politics & Sex

Table Politics & Sex

ss= "https://docs.google.com/spreadsheets/d/1N0OsCXyn3-81dFbJh_KpwkoKpJlGl3JXf3DiHIRP_uk/edit?usp=sharing"
hoja= "or.polit"
rango = "C5:F7"
#
gs4_deauth()
politics <- read_sheet(ss,
                  sheet= hoja,
                  range= rango,
                  col_names= TRUE)
politics$sexo <- as.factor(politics$sexo)
politic <- politics
# Not republican
politic$Not.repu <- (politics$demo+politic$inde )
politic$Pr.NotRepub <- politic$Not.repu/
  (politics$demo+ politics$inde+politics$repu)
# Proportion of Republican
politic$Pr.repu <- politic$repu/
  (politic$demo+ politic$inde + politic$repu)
# Odds
politic$Odds.repu <- politic$repu/ (politic$demo + politic$inde)

Plots

p <- ggplot(aes(x=sexo, y= Pr.repu), data=politic)
p + geom_col()

Chi-squared and G-test

politic
## # A tibble: 2 × 8
##   sexo     demo  inde  repu Not.repu Pr.NotRepub Pr.repu Odds.repu
##   <fct>   <dbl> <dbl> <dbl>    <dbl>       <dbl>   <dbl>     <dbl>
## 1 mujeres   762   327   468     1089       0.699   0.301     0.430
## 2 hombres   484   239   477      723       0.602   0.398     0.660
obs <- cbind(politic$demo, 
             politic$inde,
             politic$repu)
obs
##      [,1] [,2] [,3]
## [1,]  762  327  468
## [2,]  484  239  477
mosaicplot(obs, color= TRUE, main="Distribución de frecc. orientación política")

chi2 <- chisq.test(obs)
chi2
## 
##  Pearson's Chi-squared test
## 
## data:  obs
## X-squared = 30.07, df = 2, p-value = 2.954e-07
expected <- chi2$expected

politic$expected.Repu <- expected[,3]
GTest
o <- obs
e <- expected

2*sum(o*log(o/e))     # G test 
## [1] 30.01669
GTest(obs)
## 
##  Log likelihood ratio (G-test) test of independence without correction
## 
## data:  obs
## G = 30.017, X-squared df = 2, p-value = 3.034e-07

==================================

GLMs

Table aspirin

Y.asp <- cbind(asp$YES, asp$NO)

glm.asp <- glm( Y.asp ~ asp$group, family=binomial)
summary(glm.asp)
## 
## Call:
## glm(formula = Y.asp ~ asp$group, family = binomial)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -4.65515    0.09852 -47.249  < 2e-16 ***
## asp$groupPlacebo  0.60544    0.12284   4.929 8.28e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  2.5372e+01  on 1  degrees of freedom
## Residual deviance: -7.3030e-13  on 0  degrees of freedom
## AIC: 17.538
## 
## Number of Fisher Scoring iterations: 3

Table Politics & Sex

glm

Response variable = republican/ not republican

politic
## # A tibble: 2 × 9
##   sexo     demo  inde  repu Not.repu Pr.NotRepub Pr.repu Odds.repu expected.Repu
##   <fct>   <dbl> <dbl> <dbl>    <dbl>       <dbl>   <dbl>     <dbl>         <dbl>
## 1 mujeres   762   327   468     1089       0.699   0.301     0.430          534.
## 2 hombres   484   239   477      723       0.602   0.398     0.660          411.
Y.politic <- cbind(politic$repu, politic$Not.repu)

glm.politic <- glm( Y.politic ~ politic$sexo, family=binomial)
summary(glm.politic)
## 
## Call:
## glm(formula = Y.politic ~ politic$sexo, family = binomial)
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.41589    0.05899  -7.050 1.78e-12 ***
## politic$sexomujeres -0.42865    0.08084  -5.303 1.14e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  2.8166e+01  on 1  degrees of freedom
## Residual deviance: -2.4380e-13  on 0  degrees of freedom
## AIC: 19.128
## 
## Number of Fisher Scoring iterations: 2

Table Treatment & Clinics

ss= "https://docs.google.com/spreadsheets/d/1wkFSvv6po7-TXA-eAf5yf9XpmR3VmVB31OLRqrKFHSI/edit?usp=sharing"
hoja= "clinicTratModific"
rango = "B4:E8"
#
gs4_deauth()
trat <- read_sheet(ss,
                   sheet=hoja,
                   range=rango,
                   col_names=TRUE
                   )
trat$Clinic <- as.factor(trat$Clinic)
trat$T <- as.factor(trat$T) 
#
trat$pr.exito <- trat$Posit / 
  (trat$Posit+trat$Negat)
trat
## # A tibble: 4 × 5
##   Clinic T     Posit Negat pr.exito
##   <fct>  <fct> <dbl> <dbl>    <dbl>
## 1 dos    A         4    16    0.2  
## 2 dos    B        22    32    0.407
## 3 uno    A         8    32    0.2  
## 4 uno    B        12     8    0.6

ggplot

ggplot( aes( x= T, y= pr.exito, fill = Clinic), data= trat) + 
  geom_bar(stat="identity", position= "dodge")

# Odds
trat$Odds.exito <- trat$Posit / trat$Negat

glm

Y.trat <- cbind(trat$Posit, trat$Negat)

glm.trat <- glm( Y.trat ~ trat$T + trat$Clinic, 
                 family=binomial)
summary(glm.trat)
## 
## Call:
## glm(formula = Y.trat ~ trat$T + trat$Clinic, family = binomial)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -1.7352     0.4507  -3.850 0.000118 ***
## trat$TB          1.4369     0.4458   3.223 0.001268 ** 
## trat$Clinicuno   0.4987     0.4277   1.166 0.243581    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 12.40871  on 3  degrees of freedom
## Residual deviance:  0.79129  on 1  degrees of freedom
## AIC: 21.4
## 
## Number of Fisher Scoring iterations: 4