4_diseños

Cuatro tipos de diseño

Según Gotelli & Ellison (2013)

Cuatro tipos de Diseño

Ejemplos

Regresión

Import data

ss="https://docs.google.com/spreadsheets/d/1CMPY0Bjm6Kc3XUq0mZtgIoyXAacdWVkv2KONh0T-inU/edit?usp=sharing"
hoja="Hoja1"
rango="A1:E18"
latsp <- data.frame(read_sheet(ss=ss,
               sheet=hoja,
               range=rango,
               col_names = TRUE,
               na= "NA")
               )
latsp$group <- factor(latsp$group)

Explore

modelo respuesta -> species
predictora -> latitude

head(latsp)

##                town state latitude species group
## 1 Seaford-Nanticoke    DE   38.583      94     a
## 2            Denton    MD   38.900      96     a
## 3            Elkton    MD   39.533      98     a
## 4        Middletown    DE   39.467     108     a
## 5         Salisbury    MD   38.333     108     a
## 6        Wilmington    DE   39.733     113     a

summary(latsp)

##      town              state              latitude        species    group
##  Length:17          Length:17          Min.   :37.20   Min.   : 94   a:9  
##  Class :character   Class :character   1st Qu.:38.32   1st Qu.:108   b:8  
##  Mode  :character   Mode  :character   Median :38.60   Median :118        
##                                        Mean   :38.64   Mean   :120        
##                                        3rd Qu.:39.13   3rd Qu.:128        
##                                        Max.   :39.73   Max.   :157

# ggplot
ggplot(data= latsp, aes(x=latitude, y= species)) +
  geom_point()

#### Analysis

reg1 <- lm(data=latsp, species ~ latitude)
summary(reg1)

## 
## Call:
## lm(formula = species ~ latitude, data = latsp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.635 -11.198  -1.993  14.569  28.162 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  585.145    230.024   2.544   0.0225 *
## latitude     -12.039      5.953  -2.022   0.0613 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.37 on 15 degrees of freedom
## Multiple R-squared:  0.2143, Adjusted R-squared:  0.1619 
## F-statistic:  4.09 on 1 and 15 DF,  p-value: 0.06134

ggplot

p.reg <- ggplot(data= latsp, aes(x=latitude, y= species, col=)) +
  geom_point() + 
  geom_smooth(method="lm")
p.reg

Supuestos

plot(reg1, which=c(1))

#
hist(reg1$residuals)

shapiro.test(reg1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  reg1$residuals
## W = 0.97504, p-value = 0.899

ANDEVA

data

head(latsp)

##                town state latitude species group
## 1 Seaford-Nanticoke    DE   38.583      94     a
## 2            Denton    MD   38.900      96     a
## 3            Elkton    MD   39.533      98     a
## 4        Middletown    DE   39.467     108     a
## 5         Salisbury    MD   38.333     108     a
## 6        Wilmington    DE   39.733     113     a

Explore

modelo respuesta -> species
predictora -> group

summary(latsp)

##      town              state              latitude        species    group
##  Length:17          Length:17          Min.   :37.20   Min.   : 94   a:9  
##  Class :character   Class :character   1st Qu.:38.32   1st Qu.:108   b:8  
##  Mode  :character   Mode  :character   Median :38.60   Median :118        
##                                        Mean   :38.64   Mean   :120        
##                                        3rd Qu.:39.13   3rd Qu.:128        
##                                        Max.   :39.73   Max.   :157

# ggplot
p.aov<- ggplot(data= latsp, aes(x=group, y= species)) +
  geom_boxplot() +
  geom_point(col=6,
             position = position_jitter(width=0.1, height=0)) +
  geom_hline(yintercept = mean(latsp$species), lty="dashed", lwd=1)
p.aov

Analysis

aov1 <- aov(data=latsp, species ~ group)
summary(aov1)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## group        1   3015  3014.9    21.5 0.000322 ***
## Residuals   15   2103   140.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Supuestos

y= latsp$species
binwidth=4
ggplot(data=latsp, aes(x=y)) + geom_histogram(aes(y=..density..), binwidth=binwidth,colour="black", fill="white") + geom_density(alpha=.2, fill="#FF6666")

shapiro.test(latsp$species)

## 
##  Shapiro-Wilk normality test
## 
## data:  latsp$species
## W = 0.95152, p-value = 0.4809

shapiro.test(reg1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  reg1$residuals
## W = 0.97504, p-value = 0.899

leveneTest(aov1)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  1.2805 0.2756
##       15

===============================================

Análisis de Frecuencias (tablas de contingencia)

Import Data

ss="https://docs.google.com/spreadsheets/d/1sd5Bkkhs2CBl-5mpRUNPv0WfqZr3KQwjbgScY27Tf-s/edit?usp=sharing"
hoja="Sheet3"
rango="A2:C5"
dogs <- read_sheet(ss=ss,
               sheet=hoja,
               range=rango,
               col_names = TRUE,
               na= "NA"
               )

## Reading from "TresDiseños_Clase"

## Range "'Sheet3'!A2:C5"

dogs$total <- dogs$imig+dogs$local
dogs$Pr.imig <- dogs$imig/dogs$total
obs <- cbind(dogs$imig, dogs$local)

Explore

modelo
respuesta -> origen
predictora -> edad

dogs

## # A tibble: 3 x 5
##   edad      imig local total Pr.imig
##   <chr>    <dbl> <dbl> <dbl>   <dbl>
## 1 adulto      20    19    39  0.513 
## 2 Juv          3    16    19  0.158 
## 3 cachorro     4   203   207  0.0193

# ggplot
pr.gen <- sum(dogs$imig)/sum(dogs$total)
p.tc <- ggplot(data= dogs, 
       aes(x=edad, y= Pr.imig, fill=edad)) +
  geom_col() + 
  geom_hline(yintercept = pr.gen, lty="dashed", lwd=1) +
  ylim(0,0.8)
p.tc

Analysis

chi2 <- chisq.test(obs)

## Warning in chisq.test(obs): Chi-squared approximation may be incorrect

chi2

## 
##  Pearson's Chi-squared test
## 
## data:  obs
## X-squared = 88.043, df = 2, p-value < 2.2e-16

chi2$stdres

##            [,1]       [,2]
## [1,]  9.1864627 -9.1864627
## [2,]  0.8376399 -0.8376399
## [3,] -8.3937442  8.3937442

chi2$expected

##           [,1]      [,2]
## [1,]  3.973585  35.02642
## [2,]  1.935849  17.06415
## [3,] 21.090566 185.90943

dogs[, c(1,2,3)]

## # A tibble: 3 x 3
##   edad      imig local
##   <chr>    <dbl> <dbl>
## 1 adulto      20    19
## 2 Juv          3    16
## 3 cachorro     4   203

==============================================================

Regresión logística

Import Data

ss="https://docs.google.com/spreadsheets/d/1CMPY0Bjm6Kc3XUq0mZtgIoyXAacdWVkv2KONh0T-inU/edit?usp=sharing"
hoja="Hoja3"
rango="A1:D32"
fever <- data.frame(read_sheet(ss=ss,
               sheet=hoja,
               range=rango,
               col_names = TRUE,
               na= "NA")
               )

## Reading from "Regres_datos"

## Range "'Hoja3'!A1:D32"

Explore

modelo respuesta -> fever
predictora -> Pulse

summary(fever)

##    Species               Temp           Pulse            fever       
##  Length:31          Min.   :17.20   Min.   : 44.30   Min.   :0.0000  
##  Class :character   1st Qu.:20.80   1st Qu.: 59.45   1st Qu.:0.0000  
##  Mode  :character   Median :24.00   Median : 76.20   Median :0.0000  
##                     Mean   :23.76   Mean   : 72.89   Mean   :0.3871  
##                     3rd Qu.:26.35   3rd Qu.: 85.25   3rd Qu.:1.0000  
##                     Max.   :30.40   Max.   :101.70   Max.   :1.0000

# ggplot
ggplot(data= fever, 
       aes(x=Pulse, y= fever)) +
  geom_point(col=6,
             position = position_jitter(width=0.1, height=0.02)) +
  geom_hline(yintercept = 0.5, lty="dashed", lwd=0.6)

#### Analysis

reglog <- glm(data=fever, 
              fever ~ Pulse, 
              family= binomial)
summary(reglog)

## 
## Call:
## glm(formula = fever ~ Pulse, family = binomial, data = fever)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.38513  -0.16483  -0.00855   0.14584   1.62815  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -27.4351    13.0573  -2.101   0.0356 *
## Pulse         0.3472     0.1657   2.095   0.0362 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 41.381  on 30  degrees of freedom
## Residual deviance: 14.002  on 29  degrees of freedom
## AIC: 18.002
## 
## Number of Fisher Scoring iterations: 8

ggplot

p.rlog <- ggplot(data= fever, 
       aes(x=Pulse, y= fever)) +
  geom_point(col=6,
             position = position_jitter(width=0.1, height=0.02)) +
  geom_hline(yintercept = 0.5, lty="dashed", lwd=0.6) +
  geom_smooth(method="glm",method.args=list(family=binomial))
p.rlog

## `geom_smooth()` using formula 'y ~ x'

===============================================

4_diseños

F.Villatoro

10/15/2020

Cuatro tipos de diseño

Ejemplos

Regresión

Import data

Explore

ggplot

Supuestos

ANDEVA

data

Explore

Analysis

Supuestos

Análisis de Frecuencias (tablas de contingencia)

Import Data

Explore

Analysis

Regresión logística

Import Data

Explore

ggplot

Referencias