Roger Guevara — May 22, 2014, 6:01 PM
rm(list=ls()) #elimina TODOS los objetos del ambiente de trabajo
source("~/desktop/cursos R/2013/funciones.r")
DATOS <- read.table("~/desktop/cursos R/2014/productivity.csv", header = TRUE, sep =",")
attach(DATOS)
names(DATOS)
[1] "productivity" "mammals" "region"
sc(productivity, region)
M <- lm(productivity ~ region)
summary(M)
Call:
lm(formula = productivity ~ region)
Residuals:
Min 1Q Median 3Q Max
-7.00 -2.25 0.00 2.25 7.00
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.50 1.03 4.39 6.4e-05 ***
regionb 3.00 1.45 2.07 0.044 *
regionc 7.00 1.45 4.83 1.5e-05 ***
regiond 12.00 1.45 8.27 1.0e-10 ***
regione 10.00 1.45 6.89 1.2e-08 ***
regionf 18.50 1.45 12.75 < 2e-16 ***
regiong 14.00 1.57 8.94 1.1e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.9 on 47 degrees of freedom
Multiple R-squared: 0.829, Adjusted R-squared: 0.807
F-statistic: 38 on 6 and 47 DF, p-value: <2e-16
summary.aov(M)
Df Sum Sq Mean Sq F value Pr(>F)
region 6 1920 320 38 <2e-16 ***
Residuals 47 396 8
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(M)
Call:
lm(formula = productivity ~ region)
Residuals:
Min 1Q Median 3Q Max
-7.00 -2.25 0.00 2.25 7.00
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.50 1.03 4.39 6.4e-05 ***
regionb 3.00 1.45 2.07 0.044 *
regionc 7.00 1.45 4.83 1.5e-05 ***
regiond 12.00 1.45 8.27 1.0e-10 ***
regione 10.00 1.45 6.89 1.2e-08 ***
regionf 18.50 1.45 12.75 < 2e-16 ***
regiong 14.00 1.57 8.94 1.1e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.9 on 47 degrees of freedom
Multiple R-squared: 0.829, Adjusted R-squared: 0.807
F-statistic: 38 on 6 and 47 DF, p-value: <2e-16
par(mfrow=c(2, 2))
plot(M)
#Cambiar order y naombres de las etiquetas en un factor
#levels(region) <- list("G" = "g", "B" = "b", "C" = "c", "D" = "d", "E" = "e", "F" = "f", "A" = "a")
#Generar vectores para contrastes
A <- c(1,0,0,0,0,0,0)
B <- c(1,1,0,0,0,0,0)
C <- c(1,0,1,0,0,0,0)
D <- c(1,0,0,1,0,0,0)
E <- c(1,0,0,0,1,0,0)
F <- c(1,0,0,0,0,1,0)
G <- c(1,0,0,0,0,0,1)
DB_C <- B-C
DB_D <- B-D
DB_E <- B-E
DD_E <- D-E
library(gmodels)
Warning: package 'gmodels' was built under R version 2.15.3
estimable(M, rbind(A,B,C,D,E,F,G,DB_C,DB_D, DB_E, DD_E))
Estimate Std. Error t value DF Pr(>|t|)
A 4.5 1.026 4.388 47 6.436e-05
B 7.5 1.026 7.313 47 2.747e-09
C 11.5 1.026 11.213 47 7.105e-15
D 16.5 1.026 16.088 47 0.000e+00
E 14.5 1.026 14.138 47 0.000e+00
F 23.0 1.026 22.426 47 0.000e+00
G 18.5 1.184 15.622 47 0.000e+00
DB_C -4.0 1.450 -2.758 47 8.262e-03
DB_D -9.0 1.450 -6.205 47 1.316e-07
DB_E -7.0 1.450 -4.826 47 1.510e-05
DD_E 2.0 1.450 1.379 47 1.745e-01
tapply(productivity, region, ee)
a b c d e f g
0.8660 0.8660 0.8660 0.8660 0.8660 1.7321 0.7638
MEDIAS <- estimable(M, rbind(A,B,C,D,E,F,G))[, 1]
ERRORES <- estimable(M, rbind(A,B,C,D,E,F,G))[, 2]
par(mfrow=c(1, 1))
barplot.error(MEDIAS, ERRORES, col = "red", names.arg=LETTERS[1:7], ylim = c(0,25))
text(c(2,5,8,11,14,17,20), c(6,9,13,18,16,25,20), c("a", "b", "c", "ed", "d", "f", "e"))
####COVARIANZA
M <- lm(mammals ~ productivity + region + productivity:region)
M <- lm(mammals ~ productivity * region)
summary.aov(M)
Df Sum Sq Mean Sq F value Pr(>F)
productivity 1 1359 1359 455.8 <2e-16 ***
region 6 1097 183 61.3 <2e-16 ***
productivity:region 6 32 5 1.8 0.12
Residuals 40 119 3
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(M)
Call:
lm(formula = mammals ~ productivity * region)
Residuals:
Min 1Q Median 3Q Max
-5.964 -0.664 0.125 0.927 3.143
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.464 1.345 2.57 0.0138 *
productivity -0.298 0.266 -1.12 0.2706
regionb 6.250 2.485 2.52 0.0160 *
regionc 17.476 3.402 5.14 7.6e-06 ***
regiond 22.964 4.638 4.95 1.4e-05 ***
regione 19.833 4.136 4.80 2.3e-05 ***
regionf 27.429 3.402 8.06 6.5e-10 ***
regiong 24.355 7.785 3.13 0.0033 **
productivity:regionb -0.464 0.377 -1.23 0.2251
productivity:regionc -0.893 0.377 -2.37 0.0227 *
productivity:regiond -0.524 0.377 -1.39 0.1722
productivity:regione -0.714 0.377 -1.90 0.0652 .
productivity:regionf -0.149 0.298 -0.50 0.6201
productivity:regiong -0.188 0.491 -0.38 0.7038
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.73 on 40 degrees of freedom
Multiple R-squared: 0.954, Adjusted R-squared: 0.939
F-statistic: 64.2 on 13 and 40 DF, p-value: <2e-16
plot(productivity, mammals, type = "n")
LEV <- levels(region)
for(i in 1:7){
points(productivity[region==LEV[i]], mammals[region==LEV[i]], col =i, pch = 19)}
PA <- c(0,1,0,0,0,0,0,0,0,0,0,0,0,0)
PB <- c(0,1,0,0,0,0,0,0,1,0,0,0,0,0)
PC <- c(0,1,0,0,0,0,0,0,0,1,0,0,0,0)
PD <- c(0,1,0,0,0,0,0,0,0,0,1,0,0,0)
PE <- c(0,1,0,0,0,0,0,0,0,0,0,1,0,0)
PF <- c(0,1,0,0,0,0,0,0,0,0,0,0,1,0)
PG <- c(0,1,0,0,0,0,0,0,0,0,0,0,0,1)
LEV2 <- character()
CONT <- matrix(NA, 21, 5)
MATRIZ <- rbind(PA,PB,PC,PD,PE,PF,PG)
k <- 2
CC <- 0
for(i in (1:6)){
for(j in k:7){
CC <- CC +1
CONT[CC, ] <- unlist(estimable(M, MATRIZ[i,]- MATRIZ[j,])[,1:5])
LEV2 <- c(LEV2, paste(LEV[i], LEV[j]))
}
k<-k+1
}
rownames(CONT) <- LEV2
colnames(CONT) <- c("Estimate", "se", "tval", "df", "Pval")
abline
function (a = NULL, b = NULL, h = NULL, v = NULL, reg = NULL,
coef = NULL, untf = FALSE, ...)
{
int_abline <- function(a, b, h, v, untf, col = par("col"),
lty = par("lty"), lwd = par("lwd"), ...) .Internal(abline(a,
b, h, v, untf, col, lty, lwd, ...))
if (!is.null(reg)) {
if (!is.null(a))
warning("'a' is overridden by 'reg'")
a <- reg
}
if (is.object(a) || is.list(a)) {
p <- length(coefa <- as.vector(coef(a)))
if (p > 2)
warning(gettextf("only using the first two of %d regression coefficients",
p), domain = NA)
islm <- inherits(a, "lm")
noInt <- if (islm)
!as.logical(attr(stats::terms(a), "intercept"))
else p == 1
if (noInt) {
a <- 0
b <- coefa[1L]
}
else {
a <- coefa[1L]
b <- if (p >= 2)
coefa[2L]
else 0
}
}
if (!is.null(coef)) {
if (!is.null(a))
warning("'a' and 'b' are overridden by 'coef'")
a <- coef[1L]
b <- coef[2L]
}
int_abline(a = a, b = b, h = h, v = v, untf = untf, ...)
invisible()
}
<bytecode: 0x100f68f90>
<environment: namespace:graphics>
summary(M)
Call:
lm(formula = mammals ~ productivity * region)
Residuals:
Min 1Q Median 3Q Max
-5.964 -0.664 0.125 0.927 3.143
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.464 1.345 2.57 0.0138 *
productivity -0.298 0.266 -1.12 0.2706
regionb 6.250 2.485 2.52 0.0160 *
regionc 17.476 3.402 5.14 7.6e-06 ***
regiond 22.964 4.638 4.95 1.4e-05 ***
regione 19.833 4.136 4.80 2.3e-05 ***
regionf 27.429 3.402 8.06 6.5e-10 ***
regiong 24.355 7.785 3.13 0.0033 **
productivity:regionb -0.464 0.377 -1.23 0.2251
productivity:regionc -0.893 0.377 -2.37 0.0227 *
productivity:regiond -0.524 0.377 -1.39 0.1722
productivity:regione -0.714 0.377 -1.90 0.0652 .
productivity:regionf -0.149 0.298 -0.50 0.6201
productivity:regiong -0.188 0.491 -0.38 0.7038
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.73 on 40 degrees of freedom
Multiple R-squared: 0.954, Adjusted R-squared: 0.939
F-statistic: 64.2 on 13 and 40 DF, p-value: <2e-16
abline(3.4643, -0.2976)
abline(3.4643+6.2500, -0.2976 + -0.4643, col = "red")
abline(3.4643+17.4762, -0.2976 + -0.8929, col = "green")
abline(3.4643+22.9643, -0.2976 + -0.5238, col = "purple")
abline(3.4643+19.8333, -0.2976 + -0.7143, col = "cyan")
abline(3.4643+27.4286, -0.2976 + -0.1488, col = "pink")
abline(3.4643+24.3548, -0.2976 + -0.1881, col = "yellow")
plot(productivity, mammals, type = "n")
LEV <- levels(region)
for(i in 1:7){
points(productivity[region==LEV[i]], mammals[region==LEV[i]], col =i, pch = 19)}
K <- 0
for(i in LEV){
K <- K + 1
SEQ <- seq(min(productivity[region== i]), max(productivity[region== i]), length = 30)
PRED <- predict(M, data.frame(region = i, productivity = SEQ))
lines(SEQ, PRED, col = K, lwd =3)}
par(bg="darkgrey", fg = "red")
plot(productivity, mammals, type = "n")
LEV <- levels(region)
for(i in 1:7){
points(productivity[region==LEV[i]], mammals[region==LEV[i]], col =i, pch = 19)}
K <- 0
for(i in LEV){
K <- K + 1
SEQ <- seq(min(productivity[region== i]), max(productivity[region == i]), length = 30)
PRED <- predict(M, data.frame(region = i, productivity = SEQ), se = TRUE)
lines(SEQ, PRED$fit, col = K, lwd =3)
lines(SEQ, PRED$fit - PRED$se.fit*2, col = K, lwd =1, lty= 2)
lines(SEQ, PRED$fit + PRED$se.fit*2, col = K, lwd =1, lty= 2)
}
###SIMPLIFICAR
M2 <- lm(mammals ~ productivity + region)
anova(M2)
Analysis of Variance Table
Response: mammals
Df Sum Sq Mean Sq F value Pr(>F)
productivity 1 1359 1359 412.8 <2e-16 ***
region 6 1097 183 55.6 <2e-16 ***
Residuals 46 151 3
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(M2, M)
Analysis of Variance Table
Model 1: mammals ~ productivity + region
Model 2: mammals ~ productivity * region
Res.Df RSS Df Sum of Sq F Pr(>F)
1 46 151
2 40 119 6 32.2 1.8 0.12
AIC(M2, M)
df AIC
M2 9 226.9
M 15 226.0
par(bg="darkgrey", fg = "red")
plot(productivity, mammals, type = "n")
LEV <- levels(region)
for(i in 1:7){
points(productivity[region==LEV[i]], mammals[region==LEV[i]], col =i, pch = 19)}
K <- 0
for(i in LEV){
K <- K + 1
SEQ <- seq(min(productivity[region== i]), max(productivity[region == i]), length = 30)
PRED <- predict(M2, data.frame(region = i, productivity = SEQ), se = TRUE)
lines(SEQ, PRED$fit, col = K, lwd =3)
lines(SEQ, PRED$fit - PRED$se.fit*2, col = K, lwd =1, lty= 2)
lines(SEQ, PRED$fit + PRED$se.fit*2, col = K, lwd =1, lty= 2)
}
#####
DATOS$TV <- factor(rep(c("CD", "PN"), each =27))
DATOS$CONTI <- factor(rep(c("AM", "SEA","SEA", "AM"), c(13, 14, 14, 13)))
attach(DATOS)
The following object(s) are masked from 'DATOS (position 4)':
mammals, productivity, region
M <- lm(productivity ~ TV*CONTI)
summary(M)
Call:
lm(formula = productivity ~ TV * CONTI)
Residuals:
Min 1Q Median 3Q Max
-5.462 -2.022 0.033 2.088 8.538
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.077 0.829 6.12 1.4e-07 ***
TVPN 16.385 1.172 13.98 < 2e-16 ***
CONTISEA 6.637 1.151 5.77 5.1e-07 ***
TVPN:CONTISEA -12.242 1.628 -7.52 9.3e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.99 on 50 degrees of freedom
Multiple R-squared: 0.807, Adjusted R-squared: 0.795
F-statistic: 69.7 on 3 and 50 DF, p-value: <2e-16
plot(M)