source("~/desktop/cursos R/2013/funciones.r")
DATOS <- read.table("~/desktop/cursos R/2013/cancer.csv", header = T, sep = ",")
attach(DATOS)
names(DATOS)
## [1] "death" "treatment" "status"
DATOS$status <- as.factor(status)
attach(DATOS)
## The following object(s) are masked from 'DATOS (position 3)':
##
## death, status, treatment
M <- glm(death ~ treatment/status, poisson("sqrt"))
par(mfrow = c(2, 2))
plot(M)
M <- glm(death ~ treatment/status, Gamma("sqrt"))
anova(M, test = "Chi")
## Analysis of Deviance Table
##
## Model: Gamma, link: sqrt
##
## Response: death
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 119 83.3
## treatment 3 5.27 116 78.0 0.042 *
## treatment:status 4 2.15 112 75.9 0.503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(M)
##
## Call:
## glm(formula = death ~ treatment/status, family = Gamma("sqrt"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.646 -0.729 -0.255 0.340 2.210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.225 0.578 5.58 1.7e-07 ***
## treatmentDrugB -0.192 0.793 -0.24 0.809
## treatmentDrugC -1.328 0.671 -1.98 0.050 .
## treatmentplacebo -0.558 0.679 -0.82 0.413
## treatmentDrugA:status1 -0.146 0.628 -0.23 0.817
## treatmentDrugB:status1 -0.142 0.591 -0.24 0.811
## treatmentDrugC:status1 0.710 0.399 1.78 0.078 .
## treatmentplacebo:status1 -0.378 0.409 -0.93 0.357
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.6424)
##
## Null deviance: 83.297 on 119 degrees of freedom
## Residual deviance: 75.883 on 112 degrees of freedom
## AIC: 717.9
##
## Number of Fisher Scoring iterations: 6
par(mfrow = c(2, 2))
plot(M)
anova(M, test = "Chi")
## Analysis of Deviance Table
##
## Model: Gamma, link: sqrt
##
## Response: death
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 119 83.3
## treatment 3 5.27 116 78.0 0.042 *
## treatment:status 4 2.15 112 75.9 0.503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
M2 <- update(M, ~. - treatment:status)
anova(M2, M, test = "Chi")
## Analysis of Deviance Table
##
## Model 1: death ~ treatment
## Model 2: death ~ treatment/status
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 116 78.0
## 2 112 75.9 4 2.15 0.5
anova(M2, test = "Chi")
## Analysis of Deviance Table
##
## Model: Gamma, link: sqrt
##
## Response: death
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 119 83.3
## treatment 3 5.27 116 78.0 0.04 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(M2)
##
## Call:
## glm(formula = death ~ treatment, family = Gamma("sqrt"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.655 -0.694 -0.260 0.322 2.180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.104 0.226 13.76 <2e-16 ***
## treatmentDrugB -0.188 0.310 -0.61 0.544
## treatmentDrugC -0.600 0.290 -2.07 0.041 *
## treatmentplacebo -0.695 0.286 -2.44 0.016 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.6339)
##
## Null deviance: 83.297 on 119 degrees of freedom
## Residual deviance: 78.029 on 116 degrees of freedom
## AIC: 713.6
##
## Number of Fisher Scoring iterations: 5
A <- c(1, 0, 0, 0)
B <- c(1, 1, 0, 0)
C <- c(1, 0, 1, 0)
P <- c(1, 0, 0, 1)
DAB <- A - B
DAC <- A - C
DAP <- A - P
DBC <- B - C
DBP <- B - P
DCP <- C - P
library(gmodels)
estimable(M2, rbind(DAB, DAC, DAP, DBC, DBP, DCP))
## Estimate Std. Error t value DF Pr(>|t|)
## DAB 0.18829 0.3095 0.6083 116 0.54415
## DAC 0.60043 0.2898 2.0718 116 0.04051
## DAP 0.69544 0.2855 2.4356 116 0.01639
## DBC 0.41214 0.2793 1.4757 116 0.14274
## DBP 0.50716 0.2748 1.8453 116 0.06755
## DCP 0.09501 0.2525 0.3763 116 0.70735
MEDIAS <- estimable(M2, rbind(A, B, C, P))[, 1]
ERROR <- estimable(M2, rbind(A, B, C, P))[, 2]
par(mfrow = c(1, 1), mar = c(4, 4, 1, 1))
barplot.error(MEDIAS, ERROR, func = "sqrt", names.arg = c("A", "B", "C", "P"),
xlab = "Droga", ylab = "Muertes")
M <- glm(death ~ status/treatment, Gamma("sqrt"))
anova(M, test = "Chi")
## Analysis of Deviance Table
##
## Model: Gamma, link: sqrt
##
## Response: death
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 119 83.3
## status 1 0.00 118 83.3 0.964
## status:treatment 6 7.41 112 75.9 0.073 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
M2 <- update(M, ~. - treatment:status)
anova(M2, M, test = "Chi")
## Analysis of Deviance Table
##
## Model 1: death ~ status
## Model 2: death ~ status/treatment
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 118 83.3
## 2 112 75.9 6 7.41 0.073 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(M2, test = "Chi")
## Analysis of Deviance Table
##
## Model: Gamma, link: sqrt
##
## Response: death
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 119 83.3
## status 1 0.00132 118 83.3 0.97
###
DATOS <- read.table("~/desktop/cursos R/2013/species.csv", header = T, sep = ",")
attach(DATOS)
names(DATOS)
## [1] "pH" "Biomass" "Species"
M <- glm(Biomass ~ pH * Species, gaussian("log"))
par(mfrow = c(2, 2))
plot(M)
anova(M, test = "Chi")
## Analysis of Deviance Table
##
## Model: gaussian, link: log
##
## Response: Biomass
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 89 596
## pH 2 110 87 486 3.3e-16 ***
## Species 1 333 86 153 < 2e-16 ***
## pH:Species 2 23 84 130 6e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
M2 <- update(M, ~. - Species:pH)
anova(M2, M, test = "Chi")
## Analysis of Deviance Table
##
## Model 1: Biomass ~ pH + Species
## Model 2: Biomass ~ pH * Species
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 86 153
## 2 84 130 2 22.9 6e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(1, 1), mar = c(4, 4, 1, 1))
plot(Biomass ~ Species, type = "n", xlab = "Riqueza de especies", ylab = "Biomasa (mg)")
COLOR <- c("red", "orange", "green")
k <- 0
for (i in levels(pH)) {
k <- k + 1
points(Biomass[pH == i] ~ Species[pH == i], col = COLOR[k], pch = 19)
SEC <- Species[pH == i]
SEC <- seq(min(SEC), max(SEC), length = 100)
ND <- data.frame(pH = i, Species = SEC)
PRED <- predict(M, ND, se = T)
lines(SEC, exp(PRED$fit), col = COLOR[k], lwd = 2)
lines(SEC, exp(PRED$fit + 2 * PRED$se.fit), col = COLOR[k], lwd = 2, lty = 2)
lines(SEC, exp(PRED$fit - 2 * PRED$se.fit), col = COLOR[k], lwd = 2, lty = 2)
}
PL <- c(0, 0, 0, 1, 1, 0)
PM <- c(0, 0, 0, 1, 0, 1)
PH <- c(0, 0, 0, 1, 0, 0)
DPLPM <- PL - PM
DPLPH <- PL - PH
DPMPH <- PM - PH
estimable(M, rbind(PL, PM, PH, DPLPM, DPLPH, DPMPH))
## Estimate Std. Error t value DF Pr(>|t|)
## PL -0.09666 0.019266 -5.0170 84 2.890e-06
## PM -0.07700 0.009152 -8.4138 84 8.971e-13
## PH -0.04765 0.004695 -10.1477 84 4.441e-16
## DPLPM -0.01966 0.021329 -0.9216 84 3.594e-01
## DPLPH -0.04901 0.019830 -2.4716 84 1.547e-02
## DPMPH -0.02936 0.010286 -2.8540 84 5.436e-03
############
DATOS <- read.table("~/desktop/cursos R/2013/droga.csv", header = T, sep = ",")
attach(DATOS)
names(DATOS)
## [1] "N" "MUERTES" "DROGA" "DOSIS"
VIVOS <- N - MUERTES
Y <- cbind(MUERTES, VIVOS)
M <- glm(Y ~ DOSIS * DROGA, binomial("logit"))
summary(M)
##
## Call:
## glm(formula = Y ~ DOSIS * DROGA, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3980 -0.6073 0.0969 0.6592 1.6930
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.041 0.303 -6.73 1.7e-11 ***
## DOSIS 2.877 0.343 8.38 < 2e-16 ***
## DROGAB 1.292 0.385 3.36 0.00078 ***
## DROGAC -0.308 0.431 -0.71 0.47469
## DOSIS:DROGAB -1.646 0.422 -3.90 9.6e-05 ***
## DOSIS:DROGAC 0.432 0.490 0.88 0.37793
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 311.241 on 50 degrees of freedom
## Residual deviance: 42.305 on 45 degrees of freedom
## AIC: 201.3
##
## Number of Fisher Scoring iterations: 4
# Checando la sobre dispersión devianza residual / grados de libertatd
# residuales
42.305/45
## [1] 0.9401
# No hay sobredispersión ya que el cociente es menor a 1 Si el cociente es
# mayor a una usar quasibinomial
M2 <- update(M, ~. - DROGA:DOSIS)
anova(M2, M, test = "Chi")
## Analysis of Deviance Table
##
## Model 1: Y ~ DOSIS + DROGA
## Model 2: Y ~ DOSIS * DROGA
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 47 72.2
## 2 45 42.3 2 29.9 3.3e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pchisq(29.876, 2, lower = FALSE)
## [1] 3.255e-07
anova(M, test = "Chi")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Y
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 50 311.2
## DOSIS 1 238.9 49 72.4 < 2e-16 ***
## DROGA 2 0.2 47 72.2 0.9
## DOSIS:DROGA 2 29.9 45 42.3 3.3e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(M)
##
## Call:
## glm(formula = Y ~ DOSIS * DROGA, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3980 -0.6073 0.0969 0.6592 1.6930
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.041 0.303 -6.73 1.7e-11 ***
## DOSIS 2.877 0.343 8.38 < 2e-16 ***
## DROGAB 1.292 0.385 3.36 0.00078 ***
## DROGAC -0.308 0.431 -0.71 0.47469
## DOSIS:DROGAB -1.646 0.422 -3.90 9.6e-05 ***
## DOSIS:DROGAC 0.432 0.490 0.88 0.37793
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 311.241 on 50 degrees of freedom
## Residual deviance: 42.305 on 45 degrees of freedom
## AIC: 201.3
##
## Number of Fisher Scoring iterations: 4
# Contrastes para las pendientes
A <- c(0, 1, 0, 0, 0, 0)
B <- c(0, 1, 0, 0, 1, 0)
C <- c(0, 1, 0, 0, 0, 1)
DAB <- A - B
DAC <- A - C
DBC <- B - C
EST <- estimable(M, rbind(A, B, C, DAB, DAC, DBC))
library(boot)
COLOR <- c("orchid", "red3", "blue4")
k <- 0
par(mfrow = c(1, 1), mar = c(4, 4, 1, 1))
plot(DOSIS, MUERTES/N, type = "n", ylab = "Probabilida de morir", xlab = "Dosis",
xlim = c(0, 1.9))
for (i in levels(DROGA)) {
k <- k + 1
KK <- DOSIS[DROGA == i]
SEQ <- seq(min(KK), max(KK), length = 30)
ND <- data.frame(DOSIS = SEQ, DROGA = i)
PRED <- predict(M, ND)
points(KK, (MUERTES/N)[DROGA == i], col = COLOR[k], pch = 17, cex = 0.8)
lines(SEQ, inv.logit(PRED), col = COLOR[k])
text(1.8, max(inv.logit(PRED)), round(EST[k, 1], 2), cex = 0.7)
legend(0, 1, levels(DROGA), title = "Droga", bty = "n", col = COLOR, pch = 17)
}