packages

if(!require(googlesheets4)){install.packages("googlesheets4")}
if(!require(AICcmodavg)){install.packages("AICcmodavg")}
if(!require(tidyverse)){install.packages("tidyverse")}
if(!require(DescTools)){install.packages("DescTools")}
if(!require(car)){install.packages("car")}
if(!require(lme4)){install.packages("lme4")}
if(!require(ggpubr)){install.packages("ggpubr")}
if(!require(gridExtra)){install.packages("gridExtra")}
#
library(googlesheets4)
library(AICcmodavg)
library(tidyverse)
library(DescTools)
library(car)
library(lme4)
library(ggpubr)
library(gridExtra)

Cuatro tipos de diseño

Según Gotelli & Ellison (2013)

Cuatro tipos de Diseño

Ejemplos

Regresión

Import data

ss="https://docs.google.com/spreadsheets/d/1CMPY0Bjm6Kc3XUq0mZtgIoyXAacdWVkv2KONh0T-inU/edit?usp=sharing"
hoja="Hoja1"
rango="A1:E18"
latsp <- data.frame(read_sheet(ss=ss,
               sheet=hoja,
               range=rango,
               col_names = TRUE,
               na= "NA")
               )
latsp$group <- factor(latsp$group)

Explore

modelo respuesta -> species
predictora -> latitude

head(latsp)
##                town state latitude species group
## 1 Seaford-Nanticoke    DE   38.583      94     a
## 2            Denton    MD   38.900      96     a
## 3            Elkton    MD   39.533      98     a
## 4        Middletown    DE   39.467     108     a
## 5         Salisbury    MD   38.333     108     a
## 6        Wilmington    DE   39.733     113     a
summary(latsp)
##      town              state              latitude        species    group
##  Length:17          Length:17          Min.   :37.20   Min.   : 94   a:9  
##  Class :character   Class :character   1st Qu.:38.32   1st Qu.:108   b:8  
##  Mode  :character   Mode  :character   Median :38.60   Median :118        
##                                        Mean   :38.64   Mean   :120        
##                                        3rd Qu.:39.13   3rd Qu.:128        
##                                        Max.   :39.73   Max.   :157
# ggplot
ggplot(data= latsp, aes(x=latitude, y= species)) +
  geom_point()

#### Analysis

reg1 <- lm(data=latsp, species ~ latitude)
summary(reg1)
## 
## Call:
## lm(formula = species ~ latitude, data = latsp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.635 -11.198  -1.993  14.569  28.162 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  585.145    230.024   2.544   0.0225 *
## latitude     -12.039      5.953  -2.022   0.0613 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.37 on 15 degrees of freedom
## Multiple R-squared:  0.2143, Adjusted R-squared:  0.1619 
## F-statistic:  4.09 on 1 and 15 DF,  p-value: 0.06134

ggplot

p.reg <- ggplot(data= latsp, aes(x=latitude, y= species, col=)) +
  geom_point() + 
  geom_smooth(method="lm")
p.reg

Supuestos

plot(reg1, which=c(1))

#
hist(reg1$residuals)

shapiro.test(reg1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  reg1$residuals
## W = 0.97504, p-value = 0.899

ANDEVA

data

head(latsp)
##                town state latitude species group
## 1 Seaford-Nanticoke    DE   38.583      94     a
## 2            Denton    MD   38.900      96     a
## 3            Elkton    MD   39.533      98     a
## 4        Middletown    DE   39.467     108     a
## 5         Salisbury    MD   38.333     108     a
## 6        Wilmington    DE   39.733     113     a

Explore

modelo respuesta -> species
predictora -> group

summary(latsp)
##      town              state              latitude        species    group
##  Length:17          Length:17          Min.   :37.20   Min.   : 94   a:9  
##  Class :character   Class :character   1st Qu.:38.32   1st Qu.:108   b:8  
##  Mode  :character   Mode  :character   Median :38.60   Median :118        
##                                        Mean   :38.64   Mean   :120        
##                                        3rd Qu.:39.13   3rd Qu.:128        
##                                        Max.   :39.73   Max.   :157
# ggplot
p.aov<- ggplot(data= latsp, aes(x=group, y= species)) +
  geom_boxplot() +
  geom_point(col=6,
             position = position_jitter(width=0.1, height=0)) +
  geom_hline(yintercept = mean(latsp$species), lty="dashed", lwd=1)
p.aov

Analysis

aov1 <- aov(data=latsp, species ~ group)
summary(aov1)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## group        1   3015  3014.9    21.5 0.000322 ***
## Residuals   15   2103   140.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Supuestos

y= latsp$species
binwidth=4
ggplot(data=latsp, aes(x=y)) + geom_histogram(aes(y=..density..), binwidth=binwidth,colour="black", fill="white") + geom_density(alpha=.2, fill="#FF6666")

shapiro.test(latsp$species)
## 
##  Shapiro-Wilk normality test
## 
## data:  latsp$species
## W = 0.95152, p-value = 0.4809
shapiro.test(reg1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  reg1$residuals
## W = 0.97504, p-value = 0.899
leveneTest(aov1)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  1.2805 0.2756
##       15

===============================================

Análisis de Frecuencias (tablas de contingencia)

Import Data

ss="https://docs.google.com/spreadsheets/d/1sd5Bkkhs2CBl-5mpRUNPv0WfqZr3KQwjbgScY27Tf-s/edit?usp=sharing"
hoja="Sheet3"
rango="A2:C5"
dogs <- read_sheet(ss=ss,
               sheet=hoja,
               range=rango,
               col_names = TRUE,
               na= "NA"
               )
## Reading from "TresDiseños_Clase"
## Range "'Sheet3'!A2:C5"
dogs$total <- dogs$imig+dogs$local
dogs$Pr.imig <- dogs$imig/dogs$total
obs <- cbind(dogs$imig, dogs$local)

Explore

modelo
respuesta -> origen
predictora -> edad

dogs
## # A tibble: 3 x 5
##   edad      imig local total Pr.imig
##   <chr>    <dbl> <dbl> <dbl>   <dbl>
## 1 adulto      20    19    39  0.513 
## 2 Juv          3    16    19  0.158 
## 3 cachorro     4   203   207  0.0193
# ggplot
pr.gen <- sum(dogs$imig)/sum(dogs$total)
p.tc <- ggplot(data= dogs, 
       aes(x=edad, y= Pr.imig, fill=edad)) +
  geom_col() + 
  geom_hline(yintercept = pr.gen, lty="dashed", lwd=1) +
  ylim(0,0.8)
p.tc

Analysis

chi2 <- chisq.test(obs)
## Warning in chisq.test(obs): Chi-squared approximation may be incorrect
chi2
## 
##  Pearson's Chi-squared test
## 
## data:  obs
## X-squared = 88.043, df = 2, p-value < 2.2e-16
chi2$stdres
##            [,1]       [,2]
## [1,]  9.1864627 -9.1864627
## [2,]  0.8376399 -0.8376399
## [3,] -8.3937442  8.3937442
chi2$expected
##           [,1]      [,2]
## [1,]  3.973585  35.02642
## [2,]  1.935849  17.06415
## [3,] 21.090566 185.90943
dogs[, c(1,2,3)]
## # A tibble: 3 x 3
##   edad      imig local
##   <chr>    <dbl> <dbl>
## 1 adulto      20    19
## 2 Juv          3    16
## 3 cachorro     4   203

==============================================================

Regresión logística

Import Data

ss="https://docs.google.com/spreadsheets/d/1CMPY0Bjm6Kc3XUq0mZtgIoyXAacdWVkv2KONh0T-inU/edit?usp=sharing"
hoja="Hoja3"
rango="A1:D32"
fever <- data.frame(read_sheet(ss=ss,
               sheet=hoja,
               range=rango,
               col_names = TRUE,
               na= "NA")
               )
## Reading from "Regres_datos"
## Range "'Hoja3'!A1:D32"

Explore

modelo respuesta -> fever
predictora -> Pulse

summary(fever)
##    Species               Temp           Pulse            fever       
##  Length:31          Min.   :17.20   Min.   : 44.30   Min.   :0.0000  
##  Class :character   1st Qu.:20.80   1st Qu.: 59.45   1st Qu.:0.0000  
##  Mode  :character   Median :24.00   Median : 76.20   Median :0.0000  
##                     Mean   :23.76   Mean   : 72.89   Mean   :0.3871  
##                     3rd Qu.:26.35   3rd Qu.: 85.25   3rd Qu.:1.0000  
##                     Max.   :30.40   Max.   :101.70   Max.   :1.0000
# ggplot
ggplot(data= fever, 
       aes(x=Pulse, y= fever)) +
  geom_point(col=6,
             position = position_jitter(width=0.1, height=0.02)) +
  geom_hline(yintercept = 0.5, lty="dashed", lwd=0.6)

#### Analysis

reglog <- glm(data=fever, 
              fever ~ Pulse, 
              family= binomial)
summary(reglog)
## 
## Call:
## glm(formula = fever ~ Pulse, family = binomial, data = fever)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.38513  -0.16483  -0.00855   0.14584   1.62815  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -27.4351    13.0573  -2.101   0.0356 *
## Pulse         0.3472     0.1657   2.095   0.0362 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 41.381  on 30  degrees of freedom
## Residual deviance: 14.002  on 29  degrees of freedom
## AIC: 18.002
## 
## Number of Fisher Scoring iterations: 8

ggplot

p.rlog <- ggplot(data= fever, 
       aes(x=Pulse, y= fever)) +
  geom_point(col=6,
             position = position_jitter(width=0.1, height=0.02)) +
  geom_hline(yintercept = 0.5, lty="dashed", lwd=0.6) +
  geom_smooth(method="glm",method.args=list(family=binomial))
p.rlog
## `geom_smooth()` using formula 'y ~ x'

===============================================

Referencias