Etan A. Dieppa Colón
Curso INTD 4055, Departamento de Biología, UPRH, Humacao, Puerto Rico 00791
Tabla 1. Datos de hambrunas
library(readxl)
famine <- read_excel("famine.xlsx")
kable(famine)
| 6.815892 |
3 |
-4.32 |
3.303226 |
22.500000 |
1.7099876 |
| 20.604396 |
0 |
2.03 |
0.000000 |
19.773528 |
2.6647727 |
| 59.268833 |
-7 |
11.55 |
0.000000 |
38.345366 |
2.2049677 |
| 17.885008 |
-8 |
39.90 |
3.389171 |
70.335792 |
6.8190909 |
| 1.292247 |
9 |
28.60 |
3.151057 |
163.464007 |
7.9982843 |
| 21.820721 |
-2 |
23.73 |
0.000000 |
536.870400 |
51.7993567 |
| 1.628095 |
-9 |
33.33 |
0.000000 |
46.399070 |
4.7293443 |
| 6.331117 |
-10 |
39.09 |
4.075114 |
183.652089 |
24.3959701 |
| 55.113937 |
-20 |
-8.57 |
0.000000 |
42.777135 |
7.2485371 |
| 23.547881 |
-7 |
15.63 |
0.000000 |
9.487595 |
0.7873961 |
Matriz de relación entre variables
cor(famine)
## TM RP LAT CCA DENS
## TM 1.0000000 -0.55322082 -0.52329935 -0.5511275085 -0.1497974790
## RP -0.5532208 1.00000000 0.07145836 0.3169423390 0.1937318165
## LAT -0.5232993 0.07145836 1.00000000 0.4110549694 0.3370688268
## CCA -0.5511275 0.31694234 0.41105497 1.0000000000 -0.0006213453
## DENS -0.1497975 0.19373182 0.33706883 -0.0006213453 1.0000000000
## VOAP -0.1105358 0.02569331 0.32438247 -0.0008632122 0.9736209880
## VOAP
## TM -0.1105357750
## RP 0.0256933058
## LAT 0.3243824730
## CCA -0.0008632122
## DENS 0.9736209880
## VOAP 1.0000000000
## Warning in applyDefaults(legend, defaults = list(coords = NULL), type =
## "legend"): unnamed legend arguments, will be ignored

Modelo de regresión múltiple (se utiliza la tasa de mortalidad como variable dependiente)
Modelo A. Contiene todas las variables, no hay interacción
modRMA <- lm(TM ~ RP + LAT ,
data = famine)
summary(modRMA)
##
## Call:
## lm(formula = TM ~ RP + LAT, data = famine)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.516 -8.084 -1.573 4.870 31.589
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.8864 8.2680 3.010 0.0197 *
## RP -1.3348 0.6607 -2.020 0.0831 .
## LAT -0.5671 0.2993 -1.895 0.1000 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.83 on 7 degrees of freedom
## Multiple R-squared: 0.5413, Adjusted R-squared: 0.4102
## F-statistic: 4.13 on 2 and 7 DF, p-value: 0.06537
ncvTest(modRMA)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.1415305, Df = 1, p = 0.70676
spreadLevelPlot(modRMA)
## Warning in spreadLevelPlot.lm(modRMA):
## 1 negative fitted value removed

##
## Suggested power transformation: 1.756077
Modelo B. Interacción entre RP y CCA, variables: LAT, DENS y VOAP sin interacción
modRMB <- lm(TM ~ RP*CCA + LAT + DENS + VOAP,
data = famine)
summary(modRMB)
##
## Call:
## lm(formula = TM ~ RP * CCA + LAT + DENS + VOAP, data = famine)
##
## Residuals:
## 1 2 3 4 5 6 7 8 9
## -3.821 -2.409 14.532 -10.149 3.890 -3.114 -3.414 8.531 -1.310
## 10
## -2.734
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.5602 10.5042 3.005 0.0575 .
## RP -1.9669 0.9037 -2.176 0.1177
## CCA -3.1633 3.1440 -1.006 0.3885
## LAT -1.3137 0.4300 -3.055 0.0552 .
## DENS 0.8846 0.3250 2.722 0.0724 .
## VOAP -8.7706 3.2486 -2.700 0.0738 .
## RP:CCA -1.5293 0.6809 -2.246 0.1104
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.29 on 3 degrees of freedom
## Multiple R-squared: 0.8815, Adjusted R-squared: 0.6445
## F-statistic: 3.72 on 6 and 3 DF, p-value: 0.1541
ncvTest(modRMB)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.7916513, Df = 1, p = 0.3736
spreadLevelPlot(modRMB)
## Warning in spreadLevelPlot.lm(modRMB):
## 2 negative fitted values removed

##
## Suggested power transformation: 0.7086425
Modelo C. Interacción entre DENS y VOAP, variables: LAT, RP y CCA sin interacción
modRMC <- lm(TM ~ RP + CCA + LAT + DENS * VOAP,
data = famine)
summary(modRMC)
##
## Call:
## lm(formula = TM ~ RP + CCA + LAT + DENS * VOAP, data = famine)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## -7.0875 3.8729 26.0546 5.6738 -4.0120 -0.4252 -18.6732 4.1285
## 9 10
## -6.9842 -2.5476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.1958890 14.7410508 1.302 0.284
## RP -2.2218530 1.5499389 -1.434 0.247
## CCA -0.9910776 6.4467204 -0.154 0.888
## LAT -0.5982616 0.4945416 -1.210 0.313
## DENS 0.2920760 0.3232736 0.903 0.433
## VOAP -2.6308630 3.4169640 -0.770 0.497
## DENS:VOAP -0.0002778 0.0051597 -0.054 0.960
##
## Residual standard error: 20.11 on 3 degrees of freedom
## Multiple R-squared: 0.6826, Adjusted R-squared: 0.04782
## F-statistic: 1.075 on 6 and 3 DF, p-value: 0.5181
ncvTest(modRMC)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.8932903, Df = 1, p = 0.34459
spreadLevelPlot(modRMC)

##
## Suggested power transformation: 0.8421274
Modelo D. Interacción entre DENS y VOAP, variableS: RP y CCA sin interacción, se eliminó la variable LAT
modRMD <- lm(TM ~ RP + CCA + DENS * VOAP,
data = famine)
summary(modRMD)
##
## Call:
## lm(formula = TM ~ RP + CCA + DENS * VOAP, data = famine)
##
## Residuals:
## 1 2 3 4 5 6 7
## 1.33717 5.42483 26.18738 -4.11377 -0.09692 -0.25381 -30.39403
## 8 9 10
## 2.41237 8.16851 -8.67173
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.938260 15.569987 1.216 0.291
## RP -1.979438 1.623527 -1.219 0.290
## CCA -1.998184 6.752930 -0.296 0.782
## DENS 0.151114 0.318533 0.474 0.660
## VOAP -2.568148 3.609072 -0.712 0.516
## DENS:VOAP 0.001837 0.005128 0.358 0.738
##
## Residual standard error: 21.25 on 4 degrees of freedom
## Multiple R-squared: 0.5278, Adjusted R-squared: -0.0625
## F-statistic: 0.8941 on 5 and 4 DF, p-value: 0.5587
ncvTest(modRMD)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 3.03505, Df = 1, p = 0.081484
spreadLevelPlot(modRMD)

##
## Suggested power transformation: 0.01915271
Comparación de modelos
library(ggplot2)
library(coefplot)
#cálculo para todos los modelos
modRMA <- modRMA <- lm(TM ~ RP + LAT,
data = famine)
modRMB <- lm(TM ~ RP*LAT,
data = famine)
modRMC <- lm(TM ~ RP + CCA + LAT,
data = famine)
modRMD <- lm(TM ~ RP + LAT + CCA + DENS * VOAP,
data = famine)
#comparando coeficientes de todos los modelos
multiplot(modRMA, modRMB, modRMC, modRMD, pointSize = 2, intercept=FALSE)

Selección de modelos por pasos
#formulación de un modelo nulo y un modelo completo
modNulo <- lm(TM ~ 1, data = famine)
modFull <- lm(TM ~ RP + LAT + CCA + DENS + VOAP+ RP*CCA + RP * LAT,
data = famine)
#procedimiento stepwise
faminestep <- step(modNulo,
scope = list(lower=modNulo, upper=modFull,
direction="both"))
## Start: AIC=61.46
## TM ~ 1
##
## Df Sum of Sq RSS AIC
## + RP 1 1170.18 2653.3 59.810
## + CCA 1 1161.34 2662.1 59.843
## + LAT 1 1047.03 2776.4 60.263
## <none> 3823.5 61.463
## + DENS 1 85.80 3737.7 63.236
## + VOAP 1 46.72 3776.7 63.340
##
## Step: AIC=59.81
## TM ~ RP
##
## Df Sum of Sq RSS AIC
## + LAT 1 899.40 1753.9 57.670
## + CCA 1 600.23 2053.0 59.245
## <none> 2653.3 59.810
## - RP 1 1170.18 3823.5 61.463
## + VOAP 1 35.50 2617.8 61.675
## + DENS 1 7.22 2646.1 61.782
##
## Step: AIC=57.67
## TM ~ RP + LAT
##
## Df Sum of Sq RSS AIC
## <none> 1753.9 57.670
## + CCA 1 178.63 1575.2 58.596
## + RP:LAT 1 175.47 1578.4 58.616
## + DENS 1 58.51 1695.4 59.331
## + VOAP 1 15.65 1738.2 59.580
## - LAT 1 899.40 2653.3 59.810
## - RP 1 1022.56 2776.4 60.263
faminestep
##
## Call:
## lm(formula = TM ~ RP + LAT, data = famine)
##
## Coefficients:
## (Intercept) RP LAT
## 24.8864 -1.3348 -0.5671
Interpretación y selección del modelo
modST <- lm(formula = TM ~ RP + LAT, data = famine)
summary(modST)
##
## Call:
## lm(formula = TM ~ RP + LAT, data = famine)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.516 -8.084 -1.573 4.870 31.589
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.8864 8.2680 3.010 0.0197 *
## RP -1.3348 0.6607 -2.020 0.0831 .
## LAT -0.5671 0.2993 -1.895 0.1000 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.83 on 7 degrees of freedom
## Multiple R-squared: 0.5413, Adjusted R-squared: 0.4102
## F-statistic: 4.13 on 2 and 7 DF, p-value: 0.06537
Selección usando R-cuadrado ajustado y Mallow’s Cp para mejores modelos
library(leaps)
modCp <- regsubsets(TM ~ RP + LAT + CCA + DENS, data = famine, nbest = 2)
plot(modCp, scale="adjr2")

plot(modCp, scale="Cp")
