Etan A. Dieppa Colón

Curso INTD 4055, Departamento de Biología, UPRH, Humacao, Puerto Rico 00791

Tabla 1. Datos de hambrunas

library(readxl)
famine <- read_excel("famine.xlsx")
kable(famine)
TM RP LAT CCA DENS VOAP
6.815892 3 -4.32 3.303226 22.500000 1.7099876
20.604396 0 2.03 0.000000 19.773528 2.6647727
59.268833 -7 11.55 0.000000 38.345366 2.2049677
17.885008 -8 39.90 3.389171 70.335792 6.8190909
1.292247 9 28.60 3.151057 163.464007 7.9982843
21.820721 -2 23.73 0.000000 536.870400 51.7993567
1.628095 -9 33.33 0.000000 46.399070 4.7293443
6.331117 -10 39.09 4.075114 183.652089 24.3959701
55.113937 -20 -8.57 0.000000 42.777135 7.2485371
23.547881 -7 15.63 0.000000 9.487595 0.7873961

Matriz de relación entre variables

cor(famine)
##              TM          RP         LAT           CCA          DENS
## TM    1.0000000 -0.55322082 -0.52329935 -0.5511275085 -0.1497974790
## RP   -0.5532208  1.00000000  0.07145836  0.3169423390  0.1937318165
## LAT  -0.5232993  0.07145836  1.00000000  0.4110549694  0.3370688268
## CCA  -0.5511275  0.31694234  0.41105497  1.0000000000 -0.0006213453
## DENS -0.1497975  0.19373182  0.33706883 -0.0006213453  1.0000000000
## VOAP -0.1105358  0.02569331  0.32438247 -0.0008632122  0.9736209880
##               VOAP
## TM   -0.1105357750
## RP    0.0256933058
## LAT   0.3243824730
## CCA  -0.0008632122
## DENS  0.9736209880
## VOAP  1.0000000000
## Warning in applyDefaults(legend, defaults = list(coords = NULL), type =
## "legend"): unnamed legend arguments, will be ignored

Modelo de regresión múltiple (se utiliza la tasa de mortalidad como variable dependiente)

Modelo A. Contiene todas las variables, no hay interacción

modRMA <- lm(TM ~ RP + LAT , 
              data = famine)
summary(modRMA)
## 
## Call:
## lm(formula = TM ~ RP + LAT, data = famine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.516  -8.084  -1.573   4.870  31.589 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  24.8864     8.2680   3.010   0.0197 *
## RP           -1.3348     0.6607  -2.020   0.0831 .
## LAT          -0.5671     0.2993  -1.895   0.1000 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.83 on 7 degrees of freedom
## Multiple R-squared:  0.5413, Adjusted R-squared:  0.4102 
## F-statistic:  4.13 on 2 and 7 DF,  p-value: 0.06537
ncvTest(modRMA)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.1415305, Df = 1, p = 0.70676
spreadLevelPlot(modRMA)
## Warning in spreadLevelPlot.lm(modRMA): 
## 1 negative fitted value removed

## 
## Suggested power transformation:  1.756077

Modelo B. Interacción entre RP y CCA, variables: LAT, DENS y VOAP sin interacción

modRMB <- lm(TM ~ RP*CCA + LAT + DENS + VOAP, 
              data = famine)
summary(modRMB)
## 
## Call:
## lm(formula = TM ~ RP * CCA + LAT + DENS + VOAP, data = famine)
## 
## Residuals:
##       1       2       3       4       5       6       7       8       9 
##  -3.821  -2.409  14.532 -10.149   3.890  -3.114  -3.414   8.531  -1.310 
##      10 
##  -2.734 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  31.5602    10.5042   3.005   0.0575 .
## RP           -1.9669     0.9037  -2.176   0.1177  
## CCA          -3.1633     3.1440  -1.006   0.3885  
## LAT          -1.3137     0.4300  -3.055   0.0552 .
## DENS          0.8846     0.3250   2.722   0.0724 .
## VOAP         -8.7706     3.2486  -2.700   0.0738 .
## RP:CCA       -1.5293     0.6809  -2.246   0.1104  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.29 on 3 degrees of freedom
## Multiple R-squared:  0.8815, Adjusted R-squared:  0.6445 
## F-statistic:  3.72 on 6 and 3 DF,  p-value: 0.1541
ncvTest(modRMB)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.7916513, Df = 1, p = 0.3736
spreadLevelPlot(modRMB)
## Warning in spreadLevelPlot.lm(modRMB): 
## 2 negative fitted values removed

## 
## Suggested power transformation:  0.7086425

Modelo C. Interacción entre DENS y VOAP, variables: LAT, RP y CCA sin interacción

modRMC <- lm(TM ~ RP + CCA + LAT + DENS * VOAP, 
              data = famine)
summary(modRMC)
## 
## Call:
## lm(formula = TM ~ RP + CCA + LAT + DENS * VOAP, data = famine)
## 
## Residuals:
##        1        2        3        4        5        6        7        8 
##  -7.0875   3.8729  26.0546   5.6738  -4.0120  -0.4252 -18.6732   4.1285 
##        9       10 
##  -6.9842  -2.5476 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.1958890 14.7410508   1.302    0.284
## RP          -2.2218530  1.5499389  -1.434    0.247
## CCA         -0.9910776  6.4467204  -0.154    0.888
## LAT         -0.5982616  0.4945416  -1.210    0.313
## DENS         0.2920760  0.3232736   0.903    0.433
## VOAP        -2.6308630  3.4169640  -0.770    0.497
## DENS:VOAP   -0.0002778  0.0051597  -0.054    0.960
## 
## Residual standard error: 20.11 on 3 degrees of freedom
## Multiple R-squared:  0.6826, Adjusted R-squared:  0.04782 
## F-statistic: 1.075 on 6 and 3 DF,  p-value: 0.5181
ncvTest(modRMC)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.8932903, Df = 1, p = 0.34459
spreadLevelPlot(modRMC)

## 
## Suggested power transformation:  0.8421274

Modelo D. Interacción entre DENS y VOAP, variableS: RP y CCA sin interacción, se eliminó la variable LAT

modRMD <- lm(TM ~ RP + CCA  + DENS * VOAP, 
              data = famine)
summary(modRMD)
## 
## Call:
## lm(formula = TM ~ RP + CCA + DENS * VOAP, data = famine)
## 
## Residuals:
##         1         2         3         4         5         6         7 
##   1.33717   5.42483  26.18738  -4.11377  -0.09692  -0.25381 -30.39403 
##         8         9        10 
##   2.41237   8.16851  -8.67173 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.938260  15.569987   1.216    0.291
## RP          -1.979438   1.623527  -1.219    0.290
## CCA         -1.998184   6.752930  -0.296    0.782
## DENS         0.151114   0.318533   0.474    0.660
## VOAP        -2.568148   3.609072  -0.712    0.516
## DENS:VOAP    0.001837   0.005128   0.358    0.738
## 
## Residual standard error: 21.25 on 4 degrees of freedom
## Multiple R-squared:  0.5278, Adjusted R-squared:  -0.0625 
## F-statistic: 0.8941 on 5 and 4 DF,  p-value: 0.5587
ncvTest(modRMD)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 3.03505, Df = 1, p = 0.081484
spreadLevelPlot(modRMD)

## 
## Suggested power transformation:  0.01915271

Comparación de modelos

library(ggplot2)
library(coefplot)
#cálculo para todos los modelos
modRMA <- modRMA <- lm(TM ~ RP + LAT, 
              data = famine)
modRMB <- lm(TM ~ RP*LAT, 
              data = famine)
modRMC <- lm(TM ~ RP + CCA + LAT, 
              data = famine)
modRMD <- lm(TM ~ RP + LAT + CCA  + DENS * VOAP, 
              data = famine)
#comparando coeficientes de todos los modelos
multiplot(modRMA, modRMB, modRMC, modRMD, pointSize = 2, intercept=FALSE)

Selección de modelos por pasos

#formulación de un modelo nulo y un modelo completo
modNulo <- lm(TM ~ 1, data = famine)
modFull <- lm(TM ~ RP + LAT  + CCA + DENS + VOAP+ RP*CCA + RP * LAT, 
              data = famine)
#procedimiento stepwise
faminestep <- step(modNulo,
                scope = list(lower=modNulo, upper=modFull,
                             direction="both"))
## Start:  AIC=61.46
## TM ~ 1
## 
##        Df Sum of Sq    RSS    AIC
## + RP    1   1170.18 2653.3 59.810
## + CCA   1   1161.34 2662.1 59.843
## + LAT   1   1047.03 2776.4 60.263
## <none>              3823.5 61.463
## + DENS  1     85.80 3737.7 63.236
## + VOAP  1     46.72 3776.7 63.340
## 
## Step:  AIC=59.81
## TM ~ RP
## 
##        Df Sum of Sq    RSS    AIC
## + LAT   1    899.40 1753.9 57.670
## + CCA   1    600.23 2053.0 59.245
## <none>              2653.3 59.810
## - RP    1   1170.18 3823.5 61.463
## + VOAP  1     35.50 2617.8 61.675
## + DENS  1      7.22 2646.1 61.782
## 
## Step:  AIC=57.67
## TM ~ RP + LAT
## 
##          Df Sum of Sq    RSS    AIC
## <none>                1753.9 57.670
## + CCA     1    178.63 1575.2 58.596
## + RP:LAT  1    175.47 1578.4 58.616
## + DENS    1     58.51 1695.4 59.331
## + VOAP    1     15.65 1738.2 59.580
## - LAT     1    899.40 2653.3 59.810
## - RP      1   1022.56 2776.4 60.263
faminestep 
## 
## Call:
## lm(formula = TM ~ RP + LAT, data = famine)
## 
## Coefficients:
## (Intercept)           RP          LAT  
##     24.8864      -1.3348      -0.5671

Interpretación y selección del modelo

modST <- lm(formula = TM ~ RP + LAT, data = famine)
summary(modST)
## 
## Call:
## lm(formula = TM ~ RP + LAT, data = famine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.516  -8.084  -1.573   4.870  31.589 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  24.8864     8.2680   3.010   0.0197 *
## RP           -1.3348     0.6607  -2.020   0.0831 .
## LAT          -0.5671     0.2993  -1.895   0.1000 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.83 on 7 degrees of freedom
## Multiple R-squared:  0.5413, Adjusted R-squared:  0.4102 
## F-statistic:  4.13 on 2 and 7 DF,  p-value: 0.06537

Selección usando R-cuadrado ajustado y Mallow’s Cp para mejores modelos

library(leaps)
modCp <- regsubsets(TM ~ RP + LAT  + CCA + DENS, data = famine, nbest = 2)
plot(modCp, scale="adjr2")

plot(modCp, scale="Cp")