Linnerrud.csv
# Lectura de los datos
Linnerud <- read.csv("Linnerud.csv", row.names = 1);head(Linnerud)
## Weight Waist Pulse Pulls Squats Jumps
## 1 191 36 50 5 162 60
## 2 189 37 52 2 110 60
## 3 193 38 58 12 101 101
## 4 162 35 62 12 105 37
## 5 189 35 46 13 155 58
## 6 182 36 56 4 101 42
attach(Linnerud) #archivo en uso
lab = rownames(Linnerud) #etiquetas de las unidades en lab
1) Pulls ~ Weight,Waist y Pulse
# regresión a mano
# definición de los datos para el modelo
n <- dim(Linnerud)[1]; n # n es el número de observaciones
## [1] 20
x <- as.matrix(Linnerud[,1:3]); x # x es el caracter descriptivo
## Weight Waist Pulse
## 1 191 36 50
## 2 189 37 52
## 3 193 38 58
## 4 162 35 62
## 5 189 35 46
## 6 182 36 56
## 7 211 38 56
## 8 167 34 60
## 9 176 31 74
## 10 154 33 56
## 11 169 34 50
## 12 166 33 52
## 13 154 34 64
## 14 247 46 50
## 15 193 36 46
## 16 202 37 62
## 17 176 37 54
## 18 157 32 52
## 19 156 33 54
## 20 138 33 68
p <- dim(x)[2]; p # numero de regresores
## [1] 3
y <- Pulls # y es el caracter respuesta
nomy <- deparse(substitute(Pulls)); nomy # nomy es su nombre
## [1] "Pulls"
nom <- c(colnames(x),nomy) ; nom # etiquetas de las variables en nom
## [1] "Weight" "Waist" "Pulse" "Pulls"
# estadísticas
xm <- apply(x,2,mean); xm # cálculo de xm, promedio de x
## Weight Waist Pulse
## 178.6 35.4 56.1
ym <- sum(y)/n; ym # cálculo de ym, promedio de y
## [1] 9.45
ssx <- apply(x^2,2,sum); ssx # ssx es la suma de los x cuadros
## Weight Waist Pulse
## 649542 25258 63932
ssy <- sum(y^2); ssy # ssy es la suma de los y cuadros
## [1] 2317
sxy <- t(x)%*%y ; sxy # sxy es la suma de los productos xy
## [,1]
## Weight 32789
## Waist 6513
## Pulse 10712
ssxc <- ssx-n*xm^2 ; ssxc # ssxc es ssx centrado sobre el promedio
## Weight Waist Pulse
## 11582.8 194.8 987.8
ssyc <- ssy-n*ym^2 ; ssyc # ssyc es ssy centrado sobre el promedio
## [1] 530.95
sxyc <- sxy-n*xm*ym; sxyc # sxyc es sxy centrado sobre el promedio
## [,1]
## Weight -966.4
## Waist -177.6
## Pulse 109.1
varx <- ssxc/n; varx # varx es la varianza de x
## Weight Waist Pulse
## 579.14 9.74 49.39
vary <- ssyc/n; vary # vary es la varianza de y
## [1] 26.5475
covxy <- sxyc/n; covxy # covxy es la covarianza de xy
## [,1]
## Weight -48.320
## Waist -8.880
## Pulse 5.455
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
"Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats # impresión de la tabla
## Weight Waist Pulse Pulls x1y x2y x3y
## Mínimo 138.00000 31.000000 46.000000 1.000000 0.00 0.00 0.000
## Máximo 247.00000 46.000000 74.000000 17.000000 0.00 0.00 0.000
## Promedio 178.60000 35.400000 56.100000 9.450000 0.00 0.00 0.000
## Total 3572.00000 708.000000 1122.000000 189.000000 0.00 0.00 0.000
## Suma de cuadrados 649542.00000 25258.000000 63932.000000 2317.000000 32789.00 6513.00 10712.000
## Cuadrados centrados 11582.80000 194.800000 987.800000 530.950000 -966.40 -177.60 109.100
## Varianza covarianza 579.14000 9.740000 49.390000 26.547500 -48.32 -8.88 5.455
## Desvío estándar 24.06533 3.120897 7.027802 5.152427 0.00 0.00 0.000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx # matriz de varianza/covarianza de los xy
## Weight Waist Pulse
## Weight 579.14 65.36 -61.86
## Waist 65.36 9.74 -7.74
## Pulse -61.86 -7.74 49.39
# estimación
X <- cbind(rep(1,n),x); X # se necesita ajuntar la columna de unos
## Weight Waist Pulse
## 1 1 191 36 50
## 2 1 189 37 52
## 3 1 193 38 58
## 4 1 162 35 62
## 5 1 189 35 46
## 6 1 182 36 56
## 7 1 211 38 56
## 8 1 167 34 60
## 9 1 176 31 74
## 10 1 154 33 56
## 11 1 169 34 50
## 12 1 166 33 52
## 13 1 154 34 64
## 14 1 247 46 50
## 15 1 193 36 46
## 16 1 202 37 62
## 17 1 176 37 54
## 18 1 157 32 52
## 19 1 156 33 54
## 20 1 138 33 68
p1 <- p + 1; p1 # un regresor más
## [1] 4
XX <- t(X)%*%X; XX # matriz X'X
## Weight Waist Pulse
## 20 3572 708 1122
## Weight 3572 649542 127756 199152
## Waist 708 127756 25258 39564
## Pulse 1122 199152 39564 63932
XXm1 <- solve(XX); XXm1 # su inversa
## Weight Waist Pulse
## 15.25501668 1.432897e-02 -0.3526671297 -9.411326e-02
## Weight 0.01432897 3.616185e-04 -0.0023606976 8.297072e-05
## Waist -0.35266713 -2.360698e-03 0.0212746646 3.772656e-04
## Pulse -0.09411326 8.297072e-05 0.0003772656 1.175392e-03
bh <- XXm1 %*% t(X) %*% y # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh
## [,1]
## 47.96841291
## Weight 0.07884384
## Waist -1.45584256
## Pulse -0.01895002
eta <- X %*% bh; eta # valores estimados
## [,1]
## 1 9.6697532
## 2 8.0183229
## 3 6.7641556
## 4 8.6117242
## 5 11.0437082
## 6 8.8464585
## 7 8.2212448
## 8 10.4996860
## 9 15.3115079
## 10 11.0063587
## 11 10.8468738
## 12 12.0282848
## 13 9.3989160
## 14 -0.4734174
## 15 9.9032410
## 16 8.8537926
## 17 6.9554530
## 18 12.7745328
## 19 11.2019464
## 20 9.5174570
res <- y - eta; res # residuos
## [,1]
## 1 -4.6697532
## 2 -6.0183229
## 3 5.2358444
## 4 3.3882758
## 5 1.9562918
## 6 -4.8464585
## 7 -0.2212448
## 8 -4.4996860
## 9 -0.3115079
## 10 5.9936413
## 11 6.1531262
## 12 0.9717152
## 13 4.6010840
## 14 1.4734174
## 15 -3.9032410
## 16 3.1462074
## 17 -2.9554530
## 18 -1.7745328
## 19 3.7980536
## 20 -7.5174570
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res) # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad # impresión de la tabla
## Weight Waist Pulse Pulls eta residuos % res
## 1 191 36 50 5 9.6697532 -4.6697532 -93.395064
## 2 189 37 52 2 8.0183229 -6.0183229 -300.916146
## 3 193 38 58 12 6.7641556 5.2358444 43.632037
## 4 162 35 62 12 8.6117242 3.3882758 28.235632
## 5 189 35 46 13 11.0437082 1.9562918 15.048399
## 6 182 36 56 4 8.8464585 -4.8464585 -121.161463
## 7 211 38 56 8 8.2212448 -0.2212448 -2.765560
## 8 167 34 60 6 10.4996860 -4.4996860 -74.994766
## 9 176 31 74 15 15.3115079 -0.3115079 -2.076720
## 10 154 33 56 17 11.0063587 5.9936413 35.256714
## 11 169 34 50 17 10.8468738 6.1531262 36.194860
## 12 166 33 52 13 12.0282848 0.9717152 7.474732
## 13 154 34 64 14 9.3989160 4.6010840 32.864886
## 14 247 46 50 1 -0.4734174 1.4734174 147.341736
## 15 193 36 46 6 9.9032410 -3.9032410 -65.054016
## 16 202 37 62 12 8.8537926 3.1462074 26.218395
## 17 176 37 54 4 6.9554530 -2.9554530 -73.886324
## 18 157 32 52 11 12.7745328 -1.7745328 -16.132117
## 19 156 33 54 15 11.2019464 3.7980536 25.320357
## 20 138 33 68 2 9.5174570 -7.5174570 -375.872850
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="Weight",ylab="Pulls",ylim=c(min(y,eta),max(y,eta)),pch="*")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch="*")
text(x[,1],eta,labels=lab,col="red",pos=2)
# Añadiendo una leyenda
legend("topright",
legend=c("Y", "Eta"),
pch=c("*", "*"),
col=c("black", "red"),
title="Legend Title")

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# datos para anova
ssr <- t(eta) %*% eta; ssr # suma de cuadrados por regresión
## [,1]
## [1,] 1966.346
sse <- ssy - ssr; sse # suma por residuos
## [,1]
## [1,] 350.6545
msr <- ssr/p1; msr # cuadrado promedio por regresión
## [,1]
## [1,] 491.5864
mse <- sse/(n-p1); mse # cuadrado promedio por residuos
## [,1]
## [1,] 21.91591
df <- c(p1,n-p1,n) # grados de libertad
ssq <- c(ssr,sse,ssy) # sumas de cuadrados
msq <- c(msr,mse,0) # suma de residuos
anova <- (cbind(df,ssq,msq)) # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
## Degrees of Freedom Sum of squares Mean squares
## Regression 4 1966.3455 491.58638
## Residuals 16 350.6545 21.91591
## Total 20 2317.0000 0.00000
# apalancamiento
Hat <- X %*% XXm1 %*% t(X) # matriz H sombrero
rownames(Hat) <- lab # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 0.10655710 0.070795483 0.0267442663 -0.019894425 0.143901992 0.04801517 0.084681195 0.02355179 0.020227354 0.03271984 0.0878217868 0.087116980 -0.033406882 0.061911065 0.135338940 0.044948432 0.025666204 0.082872382 0.049672292 -0.079240967
## 2 0.07079548 0.072744656 0.0591447046 0.032960464 0.076977336 0.05388813 0.059433539 0.02994190 -0.044026733 0.04004485 0.0627548367 0.049177635 0.021165578 0.130247731 0.083492901 0.034584771 0.070673506 0.044444385 0.046037095 0.005517226
## 3 0.02674427 0.059144705 0.1045436224 0.080027350 -0.011382549 0.06015351 0.081814272 0.04530832 0.033966692 0.01568300 -0.0003265344 -0.011227007 0.072991190 0.203832839 0.007562888 0.093186628 0.078009743 -0.026308132 0.005319444 0.080955755
## 4 -0.01989443 0.032960464 0.0800273504 0.144583882 -0.065286252 0.05366668 -0.013028942 0.07802181 0.013881794 0.08288966 0.0148177663 0.006434553 0.159045977 0.053273944 -0.050658916 0.027628581 0.103171448 0.014652526 0.062938353 0.220873748
## 5 0.14390199 0.076977336 -0.0113825494 -0.065286252 0.217677155 0.04214121 0.080544327 0.01323973 0.002425648 0.04447671 0.1325686301 0.135519874 -0.081670547 -0.008255014 0.196274082 0.015655049 0.004385998 0.137587408 0.074529859 -0.151310641
## 6 0.04801517 0.053888135 0.0601535085 0.053666680 0.042141211 0.05211761 0.055857841 0.04720808 0.036821951 0.04348146 0.0429084448 0.039575224 0.051309522 0.084090266 0.046061061 0.055259577 0.057207740 0.036631437 0.042309191 0.051295906
## 7 0.08468120 0.059433539 0.0818142724 -0.013028942 0.080544327 0.05585784 0.174977008 0.02892970 0.192415717 -0.03648079 0.0045543385 0.016156756 -0.029278175 0.184517658 0.081615445 0.167363855 -0.005877547 -0.012766013 -0.032443340 -0.082986844
## 8 0.02355179 0.029941901 0.0453083249 0.078021808 0.013239728 0.04720808 0.028929704 0.06993305 0.110931992 0.06584934 0.0378666682 0.046681200 0.089667404 -0.017447145 0.010045775 0.053515097 0.043489458 0.052706217 0.058530119 0.112029493
## 9 0.02022735 -0.044026733 0.0339666916 0.013881794 0.002425648 0.03682195 0.192415717 0.11093199 0.719766801 -0.02712212 -0.0588421522 0.027407036 0.045470797 -0.174755123 -0.034563859 0.289727970 -0.147815572 0.009736357 -0.043585692 0.027931145
## 10 0.03271984 0.040044849 0.0156829955 0.082889660 0.044476706 0.04348146 -0.036480794 0.06584934 -0.027122123 0.11322903 0.0900130217 0.086623986 0.095692819 -0.078870340 0.038499190 -0.032695421 0.076016444 0.108793697 0.112880264 0.128275379
## 11 0.08782179 0.062754837 -0.0003265344 0.014817766 0.132568630 0.04290844 0.004554338 0.03786667 -0.058842152 0.09001302 0.1214671139 0.115919153 0.012633219 -0.044079980 0.120454746 -0.030935850 0.054510550 0.131396451 0.105656818 -0.001159028
## 12 0.08711698 0.049177635 -0.0112270070 0.006434553 0.135519874 0.03957522 0.016156756 0.04668120 0.027407036 0.08662399 0.1159191532 0.122933095 0.009602991 -0.098425320 0.115735082 -0.008523808 0.025639376 0.138872382 0.101702138 -0.006921327
## 13 -0.03340688 0.021165578 0.0729911898 0.159045977 -0.081670547 0.05130952 -0.029278175 0.08966740 0.045470797 0.09569282 0.0126332194 0.009602991 0.180692603 0.002902642 -0.070143001 0.024169199 0.098758089 0.022752300 0.072389389 0.255254885
## 14 0.06191106 0.130247731 0.2038328389 0.053273944 -0.008255014 0.08409027 0.184517658 -0.01744714 -0.174755123 -0.07887034 -0.0440799805 -0.098425320 0.002902642 0.634785903 0.050304161 0.144958452 0.145588874 -0.153010458 -0.085468600 -0.036101554
## 15 0.13533894 0.083492901 0.0075628877 -0.050658916 0.196274082 0.04606106 0.081615445 0.01004578 -0.034563859 0.03849919 0.1204547458 0.115735082 -0.070143001 0.050304161 0.183045989 0.017377195 0.024204766 0.114198754 0.065305599 -0.134150800
## 16 0.04494843 0.034584771 0.0931866280 0.027628581 0.015655049 0.05525958 0.167363855 0.05351510 0.289727970 -0.03269542 -0.0309358502 -0.008523808 0.024169199 0.144958452 0.017377195 0.196649995 -0.013721030 -0.036117379 -0.041306754 -0.001724560
## 17 0.02566620 0.070673506 0.0780097427 0.103171448 0.004385998 0.05720774 -0.005877547 0.04348946 -0.147815572 0.07601644 0.0545105501 0.025639376 0.098758089 0.145588874 0.024204766 -0.013721030 0.130102980 0.030278423 0.070394162 0.129316389
## 18 0.08287238 0.044444385 -0.0263081321 0.014652526 0.137587408 0.03663144 -0.012766013 0.05270622 0.009736357 0.10879370 0.1313964513 0.138872382 0.022752300 -0.153010458 0.114198754 -0.036117379 0.030278423 0.162884880 0.124332114 0.016062269
## 19 0.04967229 0.046037095 0.0053194439 0.062938353 0.074529859 0.04230919 -0.032443340 0.05853012 -0.043585692 0.11288026 0.1056568184 0.101702138 0.072389389 -0.085468600 0.065305599 -0.041306754 0.070394162 0.124332114 0.118015774 0.092791775
## 20 -0.07924097 0.005517226 0.0809557552 0.220873748 -0.151310641 0.05129591 -0.082986844 0.11202949 0.027931145 0.12827538 -0.0011590278 -0.006921327 0.255254885 -0.036101554 -0.134150800 -0.001724560 0.129316389 0.016062269 0.092791775 0.373291750
lev <- diag(Hat); lev # apalancamientos
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 0.10655710 0.07274466 0.10454362 0.14458388 0.21767716 0.05211761 0.17497701 0.06993305 0.71976680 0.11322903 0.12146711 0.12293309 0.18069260 0.63478590 0.18304599 0.19664999 0.13010298 0.16288488 0.11801577 0.37329175
pesos <- cbind(x,y,lev) # construcción de una salida conjunta
rownames(pesos) <- lab
pesos # salida
## Weight Waist Pulse y lev
## 1 191 36 50 5 0.10655710
## 2 189 37 52 2 0.07274466
## 3 193 38 58 12 0.10454362
## 4 162 35 62 12 0.14458388
## 5 189 35 46 13 0.21767716
## 6 182 36 56 4 0.05211761
## 7 211 38 56 8 0.17497701
## 8 167 34 60 6 0.06993305
## 9 176 31 74 15 0.71976680
## 10 154 33 56 17 0.11322903
## 11 169 34 50 17 0.12146711
## 12 166 33 52 13 0.12293309
## 13 154 34 64 14 0.18069260
## 14 247 46 50 1 0.63478590
## 15 193 36 46 6 0.18304599
## 16 202 37 62 12 0.19664999
## 17 176 37 54 4 0.13010298
## 18 157 32 52 11 0.16288488
## 19 156 33 54 15 0.11801577
## 20 138 33 68 2 0.37329175
niv <- 2 * p1 / n; niv # nivel para apalancamiento
## [1] 0.4
level <- which(lev>niv) # unidades con fuerte apalancamiento
out <- rbind(level,lev[level]) # salida
rownames(out) <- c("unidad","apalancamiento"); out
## 9 14
## unidad 9.0000000 14.0000000
## apalancamiento 0.7197668 0.6347859
# De acuerdo al gráfico de residuos vs estimados y Resultados del apalancamiento
# podemos ver que los puntos 14 y 9 son los que tienen mayor apalancamiento ya que se alejan de donde están la mayor concentración de puntos si observamos el gráfico de residuos vs estimados. Por otro lado los valores más altos de apalancamiento corresponden a las observaciones 9 (apal= 0.71976680) y 14 (apal= 0.63478590).
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo
lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud); summary(lm1)
##
## Call:
## lm(formula = y ~ Weight + Waist + Pulse, data = Linnerud)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5175 -4.0524 0.3752 3.4907 6.1531
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.96841 18.28462 2.623 0.0184 *
## Weight 0.07884 0.08902 0.886 0.3889
## Waist -1.45584 0.68283 -2.132 0.0488 *
## Pulse -0.01895 0.16050 -0.118 0.9075
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.681 on 16 degrees of freedom
## Multiple R-squared: 0.3396, Adjusted R-squared: 0.2157
## F-statistic: 2.742 on 3 and 16 DF, p-value: 0.07742
residuals(lm1)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## -4.6697532 -6.0183229 5.2358444 3.3882758 1.9562918 -4.8464585 -0.2212448 -4.4996860 -0.3115079 5.9936413 6.1531262 0.9717152 4.6010840 1.4734174 -3.9032410 3.1462074 -2.9554530 -1.7745328 3.7980536 -7.5174570
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# Se ve en el gráfico de residuos vs estimados: se nota observaciones con alto valor residual como la 20,10.
# Por otro lado también un alto grado de apalancamiento en las observaciones 9 y 14
# Se procederá a evaluar el retiro de las observaciones 9 y 14
# modelo sin unidades con fuerte apalancamiento (Todas las observaciones menos la 9 y 14)
lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]); summary(lm2)
##
## Call:
## lm(formula = y[-level] ~ Weight + Waist + Pulse, data = Linnerud[-level,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.8968 -4.1278 0.0871 3.6142 6.3868
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.23659 27.79140 2.203 0.0448 *
## Weight 0.14054 0.15682 0.896 0.3853
## Waist -2.27499 1.50604 -1.511 0.1531
## Pulse 0.06383 0.26214 0.244 0.8111
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.924 on 14 degrees of freedom
## Multiple R-squared: 0.2074, Adjusted R-squared: 0.03755
## F-statistic: 1.221 on 3 and 14 DF, p-value: 0.3387
residuals(lm2)
## 1 2 3 4 5 6 7 8 10 11 12 13 15 16 17 18 19 20
## -4.37139407 -4.94299365 6.38684624 3.66322962 1.89002397 -4.48954741 -0.01517551 -5.18678537 5.62055277 6.17046670 0.18942586 4.38487974 -3.39713907 2.59168142 -1.24366285 -2.82072014 3.46714208 -7.89683030
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# En el gráfico de residuos vs estimados:
#Luego de retirar las observaciones con alto grado de apalancamiento (9 y 14) se puede ver que la nube de puntos está más concentrada en la línea con residuo 0. Además ya no tener observaciones con un alto apalancamiento
# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))
legend("bottomright", # Escoge la posición adecuada para tu gráfico
legend=c("Modelo 1", "Modelo 2 sin Obs 9 y 14"),
pch=c(2, 2),
col=c("black", "red"),
title="Leyenda")

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

# Comentarios:
# Mostrar la diferencia entre betas y entre los residuos de los dos modelos.
# Modelo 1: Con todas las observaciones
# Pulls ~ Weight,Waist,Pulse
# lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud)
# Estimate(betas) Std. Error t value Pr(>|t|)
# (Intercept) 47.96841 18.28462 2.623 0.0184 *
# Weight 0.07884 0.08902 0.886 0.3889
# Waist -1.45584 0.68283 -2.132 0.0488 *
# Pulse -0.01895 0.16050 -0.118 0.9075
# beta = [47.96841 (intercepto) ,0.07884(Weight), -1.45584(Waist), -0.01895(Pulse)]
# residuals(lm1):
# 1 2 3 4 5 6 7
# -4.6697532 -6.0183229 5.2358444 3.3882758 1.9562918 -4.8464585 -0.2212448
# 8 9 10 11 12 13 14
# -4.4996860 -0.3115079 5.9936413 6.1531262 0.9717152 4.6010840 1.4734174
# 15 16 17 18 19 20
# -3.9032410 3.1462074 -2.9554530 -1.7745328 3.7980536 -7.5174570
# Modelo 2: Sin observaciones con alto apalancamiento (observaciones 9 y 14)
# Pulls ~ Weight,Waist,Pulse
# lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]);
# Estimate(Betas) Std. Error t value Pr(>|t|)
# (Intercept) 61.23659 27.79140 2.203 0.0448 *
# Weight 0.14054 0.15682 0.896 0.3853
# Waist -2.27499 1.50604 -1.511 0.1531
# Pulse 0.06383 0.26214 0.244 0.8111
# beta = [61.23659 (intercepto) ,0.14054 (Weight), -2.27499(Waist), 0.06383(Pulse)]
# residuals(lm2):
# 1 2 3 4 5 6
# -4.37139407 -4.94299365 6.38684624 3.66322962 1.89002397 -4.48954741
# 7 8 10 11 12 13
# -0.01517551 -5.18678537 5.62055277 6.17046670 0.18942586 4.38487974
# 15 16 17 18 19 20
# -3.39713907 2.59168142 -1.24366285 -2.82072014 3.46714208 -7.89683030
# Comentarios:
# La eliminación de observaciones con alto apalancamiento (9 y 14) ha afectado notablemente las estimaciones de los coeficientes y su significancia estadística. Esto es particularmente evidente en el cambio de la significancia de "Waist" y en los cambios en los coeficientes de "Intercept" y "Pulse"
# Dada la eliminación de las observaciones que generan un alto apalancamiento del modelo, el rango en el cual oscilan los residuos no sifre una variación significativa, lo que quiere decir que la variabilidad en el residuos no se reduce significativamente.Esto sugiere que las observaciones 9 y 14 no estaban ejerciendo una influencia dramática en la mayoría de las otras predicciones, aunque sí afectaron las estimaciones de los coeficientes como se vio previamente.
2) Squats ~ Weight,Waist y Pulse
head(Linnerud)
## Weight Waist Pulse Pulls Squats Jumps
## 1 191 36 50 5 162 60
## 2 189 37 52 2 110 60
## 3 193 38 58 12 101 101
## 4 162 35 62 12 105 37
## 5 189 35 46 13 155 58
## 6 182 36 56 4 101 42
# regresión a mano
# definición de los datos para el modelo
n <- dim(Linnerud)[1]; n # n es el número de observaciones
## [1] 20
x <- as.matrix(Linnerud[,1:3]); x # x es el caracter descriptivo
## Weight Waist Pulse
## 1 191 36 50
## 2 189 37 52
## 3 193 38 58
## 4 162 35 62
## 5 189 35 46
## 6 182 36 56
## 7 211 38 56
## 8 167 34 60
## 9 176 31 74
## 10 154 33 56
## 11 169 34 50
## 12 166 33 52
## 13 154 34 64
## 14 247 46 50
## 15 193 36 46
## 16 202 37 62
## 17 176 37 54
## 18 157 32 52
## 19 156 33 54
## 20 138 33 68
p <- dim(x)[2]; p # numero de regresores
## [1] 3
y <- Squats # y es el caracter respuesta
nomy <- deparse(substitute(Squats)); nomy # nomy es su nombre
## [1] "Squats"
nom <- c(colnames(x),nomy) ; nom # etiquetas de las variables en nom
## [1] "Weight" "Waist" "Pulse" "Squats"
# estadísticas
xm <- apply(x,2,mean); xm # cálculo de xm, promedio de x
## Weight Waist Pulse
## 178.6 35.4 56.1
ym <- sum(y)/n; ym # cálculo de ym, promedio de y
## [1] 145.55
ssx <- apply(x^2,2,sum); ssx # ssx es la suma de los x cuadros
## Weight Waist Pulse
## 649542 25258 63932
ssy <- sum(y^2); ssy # ssy es la suma de los y cuadros
## [1] 498073
sxy <- t(x)%*%y ; sxy # sxy es la suma de los productos xy
## [,1]
## Weight 505432
## Waist 100592
## Pulse 165236
ssxc <- ssx-n*xm^2 ; ssxc # ssxc es ssx centrado sobre el promedio
## Weight Waist Pulse
## 11582.8 194.8 987.8
ssyc <- ssy-n*ym^2 ; ssyc # ssyc es ssy centrado sobre el promedio
## [1] 74376.95
sxyc <- sxy-n*xm*ym; sxyc # sxyc es sxy centrado sobre el promedio
## [,1]
## Weight -14472.6
## Waist -2457.4
## Pulse 1928.9
varx <- ssxc/n; varx # varx es la varianza de x
## Weight Waist Pulse
## 579.14 9.74 49.39
vary <- ssyc/n; vary # vary es la varianza de y
## [1] 3718.847
covxy <- sxyc/n; covxy # covxy es la covarianza de xy
## [,1]
## Weight -723.630
## Waist -122.870
## Pulse 96.445
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
"Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats # impresión de la tabla
## Weight Waist Pulse Squats x1y x2y x3y
## Mínimo 138.00000 31.000000 46.000000 50.00000 0.00 0.00 0.000
## Máximo 247.00000 46.000000 74.000000 251.00000 0.00 0.00 0.000
## Promedio 178.60000 35.400000 56.100000 145.55000 0.00 0.00 0.000
## Total 3572.00000 708.000000 1122.000000 2911.00000 0.00 0.00 0.000
## Suma de cuadrados 649542.00000 25258.000000 63932.000000 498073.00000 505432.00 100592.00 165236.000
## Cuadrados centrados 11582.80000 194.800000 987.800000 74376.95000 -14472.60 -2457.40 1928.900
## Varianza covarianza 579.14000 9.740000 49.390000 3718.84750 -723.63 -122.87 96.445
## Desvío estándar 24.06533 3.120897 7.027802 60.98235 0.00 0.00 0.000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx # matriz de varianza/covarianza de los xy
## Weight Waist Pulse
## Weight 579.14 65.36 -61.86
## Waist 65.36 9.74 -7.74
## Pulse -61.86 -7.74 49.39
# estimación
X <- cbind(rep(1,n),x); X # se necesita adjuntar la columna de unos
## Weight Waist Pulse
## 1 1 191 36 50
## 2 1 189 37 52
## 3 1 193 38 58
## 4 1 162 35 62
## 5 1 189 35 46
## 6 1 182 36 56
## 7 1 211 38 56
## 8 1 167 34 60
## 9 1 176 31 74
## 10 1 154 33 56
## 11 1 169 34 50
## 12 1 166 33 52
## 13 1 154 34 64
## 14 1 247 46 50
## 15 1 193 36 46
## 16 1 202 37 62
## 17 1 176 37 54
## 18 1 157 32 52
## 19 1 156 33 54
## 20 1 138 33 68
p1 <- p + 1; p1 # un regresor más
## [1] 4
XX <- t(X)%*%X; XX # matriz X'X
## Weight Waist Pulse
## 20 3572 708 1122
## Weight 3572 649542 127756 199152
## Waist 708 127756 25258 39564
## Pulse 1122 199152 39564 63932
XXm1 <- solve(XX); XXm1 # su inversa
## Weight Waist Pulse
## 15.25501668 1.432897e-02 -0.3526671297 -9.411326e-02
## Weight 0.01432897 3.616185e-04 -0.0023606976 8.297072e-05
## Waist -0.35266713 -2.360698e-03 0.0212746646 3.772656e-04
## Pulse -0.09411326 8.297072e-05 0.0003772656 1.175392e-03
bh <- XXm1 %*% t(X) %*% y # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh
## [,1]
## 623.2817463
## Weight 0.7276600
## Waist -17.3872206
## Pulse 0.1393189
eta <- X %*% bh; eta # valores estimados
## [,1]
## 1 143.29081
## 2 124.72690
## 3 111.08624
## 4 141.24771
## 5 158.66543
## 6 137.57778
## 7 123.90548
## 8 161.99460
## 9 222.65566
## 10 169.36496
## 11 162.05673
## 12 177.53961
## 13 153.09229
## 14 10.16756
## 15 144.18885
## 16 135.57967
## 17 115.54596
## 18 188.37789
## 19 170.54164
## 20 159.39423
res <- y - eta; res # residuos
## [,1]
## 1 18.709194
## 2 -14.726904
## 3 -10.086236
## 4 -36.247714
## 5 -3.665431
## 6 -36.577780
## 7 -22.905478
## 8 -36.994597
## 9 -22.655662
## 10 81.635038
## 11 -42.056728
## 12 32.460394
## 13 61.907708
## 14 39.832440
## 15 -74.188851
## 16 74.420328
## 17 -55.545962
## 18 41.622113
## 19 54.458356
## 20 -49.394229
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res) # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad # impresión de la tabla
## Weight Waist Pulse Squats eta residuos % res
## 1 191 36 50 162 143.29081 18.709194 11.548885
## 2 189 37 52 110 124.72690 -14.726904 -13.388094
## 3 193 38 58 101 111.08624 -10.086236 -9.986372
## 4 162 35 62 105 141.24771 -36.247714 -34.521632
## 5 189 35 46 155 158.66543 -3.665431 -2.364794
## 6 182 36 56 101 137.57778 -36.577780 -36.215623
## 7 211 38 56 101 123.90548 -22.905478 -22.678691
## 8 167 34 60 125 161.99460 -36.994597 -29.595677
## 9 176 31 74 200 222.65566 -22.655662 -11.327831
## 10 154 33 56 251 169.36496 81.635038 32.523920
## 11 169 34 50 120 162.05673 -42.056728 -35.047273
## 12 166 33 52 210 177.53961 32.460394 15.457330
## 13 154 34 64 215 153.09229 61.907708 28.794283
## 14 247 46 50 50 10.16756 39.832440 79.664881
## 15 193 36 46 70 144.18885 -74.188851 -105.984072
## 16 202 37 62 210 135.57967 74.420328 35.438251
## 17 176 37 54 60 115.54596 -55.545962 -92.576603
## 18 157 32 52 230 188.37789 41.622113 18.096571
## 19 156 33 54 225 170.54164 54.458356 24.203714
## 20 138 33 68 110 159.39423 -49.394229 -44.903844
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="Weight",ylab="Squats",ylim=c(min(y,eta),max(y,eta)),pch="*")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch="*")
text(x[,1],eta,labels=lab,col="red",pos=2)
legend("topright",
legend=c("Y", "Eta"),
pch=c("*", "*"),
col=c("black", "red"))

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# Se ve en el gráfico de residuos vs estimados: se nota observaciones con alto valor residual como la 15 y 10.
# Por otro lado también un alto grado de apalancamiento en las observaciones 9 y 14
# Se procederá a evaluar el retiro de las observaciones 9 y 14
# datos para anova
ssr <- t(eta) %*% eta; ssr # suma de cuadrados por regresión
## [,1]
## [1,] 456161
sse <- ssy - ssr; sse # suma por residuos
## [,1]
## [1,] 41911.99
msr <- ssr/p1; msr # cuadrado promedio por regresión
## [,1]
## [1,] 114040.3
mse <- sse/(n-p1); mse # cuadrado promedio por residuos
## [,1]
## [1,] 2619.5
df <- c(p1,n-p1,n) # grados de libertad
ssq <- c(ssr,sse,ssy) # sumas de cuadrados
msq <- c(msr,mse,0) # suma de residuos
anova <- (cbind(df,ssq,msq)) # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
## Degrees of Freedom Sum of squares Mean squares
## Regression 4 456161.01 114040.3
## Residuals 16 41911.99 2619.5
## Total 20 498073.00 0.0
# apalancamiento
Hat <- X %*% XXm1 %*% t(X) # matriz H sombrero
rownames(Hat) <- lab # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 0.10655710 0.070795483 0.0267442663 -0.019894425 0.143901992 0.04801517 0.084681195 0.02355179 0.020227354 0.03271984 0.0878217868 0.087116980 -0.033406882 0.061911065 0.135338940 0.044948432 0.025666204 0.082872382 0.049672292 -0.079240967
## 2 0.07079548 0.072744656 0.0591447046 0.032960464 0.076977336 0.05388813 0.059433539 0.02994190 -0.044026733 0.04004485 0.0627548367 0.049177635 0.021165578 0.130247731 0.083492901 0.034584771 0.070673506 0.044444385 0.046037095 0.005517226
## 3 0.02674427 0.059144705 0.1045436224 0.080027350 -0.011382549 0.06015351 0.081814272 0.04530832 0.033966692 0.01568300 -0.0003265344 -0.011227007 0.072991190 0.203832839 0.007562888 0.093186628 0.078009743 -0.026308132 0.005319444 0.080955755
## 4 -0.01989443 0.032960464 0.0800273504 0.144583882 -0.065286252 0.05366668 -0.013028942 0.07802181 0.013881794 0.08288966 0.0148177663 0.006434553 0.159045977 0.053273944 -0.050658916 0.027628581 0.103171448 0.014652526 0.062938353 0.220873748
## 5 0.14390199 0.076977336 -0.0113825494 -0.065286252 0.217677155 0.04214121 0.080544327 0.01323973 0.002425648 0.04447671 0.1325686301 0.135519874 -0.081670547 -0.008255014 0.196274082 0.015655049 0.004385998 0.137587408 0.074529859 -0.151310641
## 6 0.04801517 0.053888135 0.0601535085 0.053666680 0.042141211 0.05211761 0.055857841 0.04720808 0.036821951 0.04348146 0.0429084448 0.039575224 0.051309522 0.084090266 0.046061061 0.055259577 0.057207740 0.036631437 0.042309191 0.051295906
## 7 0.08468120 0.059433539 0.0818142724 -0.013028942 0.080544327 0.05585784 0.174977008 0.02892970 0.192415717 -0.03648079 0.0045543385 0.016156756 -0.029278175 0.184517658 0.081615445 0.167363855 -0.005877547 -0.012766013 -0.032443340 -0.082986844
## 8 0.02355179 0.029941901 0.0453083249 0.078021808 0.013239728 0.04720808 0.028929704 0.06993305 0.110931992 0.06584934 0.0378666682 0.046681200 0.089667404 -0.017447145 0.010045775 0.053515097 0.043489458 0.052706217 0.058530119 0.112029493
## 9 0.02022735 -0.044026733 0.0339666916 0.013881794 0.002425648 0.03682195 0.192415717 0.11093199 0.719766801 -0.02712212 -0.0588421522 0.027407036 0.045470797 -0.174755123 -0.034563859 0.289727970 -0.147815572 0.009736357 -0.043585692 0.027931145
## 10 0.03271984 0.040044849 0.0156829955 0.082889660 0.044476706 0.04348146 -0.036480794 0.06584934 -0.027122123 0.11322903 0.0900130217 0.086623986 0.095692819 -0.078870340 0.038499190 -0.032695421 0.076016444 0.108793697 0.112880264 0.128275379
## 11 0.08782179 0.062754837 -0.0003265344 0.014817766 0.132568630 0.04290844 0.004554338 0.03786667 -0.058842152 0.09001302 0.1214671139 0.115919153 0.012633219 -0.044079980 0.120454746 -0.030935850 0.054510550 0.131396451 0.105656818 -0.001159028
## 12 0.08711698 0.049177635 -0.0112270070 0.006434553 0.135519874 0.03957522 0.016156756 0.04668120 0.027407036 0.08662399 0.1159191532 0.122933095 0.009602991 -0.098425320 0.115735082 -0.008523808 0.025639376 0.138872382 0.101702138 -0.006921327
## 13 -0.03340688 0.021165578 0.0729911898 0.159045977 -0.081670547 0.05130952 -0.029278175 0.08966740 0.045470797 0.09569282 0.0126332194 0.009602991 0.180692603 0.002902642 -0.070143001 0.024169199 0.098758089 0.022752300 0.072389389 0.255254885
## 14 0.06191106 0.130247731 0.2038328389 0.053273944 -0.008255014 0.08409027 0.184517658 -0.01744714 -0.174755123 -0.07887034 -0.0440799805 -0.098425320 0.002902642 0.634785903 0.050304161 0.144958452 0.145588874 -0.153010458 -0.085468600 -0.036101554
## 15 0.13533894 0.083492901 0.0075628877 -0.050658916 0.196274082 0.04606106 0.081615445 0.01004578 -0.034563859 0.03849919 0.1204547458 0.115735082 -0.070143001 0.050304161 0.183045989 0.017377195 0.024204766 0.114198754 0.065305599 -0.134150800
## 16 0.04494843 0.034584771 0.0931866280 0.027628581 0.015655049 0.05525958 0.167363855 0.05351510 0.289727970 -0.03269542 -0.0309358502 -0.008523808 0.024169199 0.144958452 0.017377195 0.196649995 -0.013721030 -0.036117379 -0.041306754 -0.001724560
## 17 0.02566620 0.070673506 0.0780097427 0.103171448 0.004385998 0.05720774 -0.005877547 0.04348946 -0.147815572 0.07601644 0.0545105501 0.025639376 0.098758089 0.145588874 0.024204766 -0.013721030 0.130102980 0.030278423 0.070394162 0.129316389
## 18 0.08287238 0.044444385 -0.0263081321 0.014652526 0.137587408 0.03663144 -0.012766013 0.05270622 0.009736357 0.10879370 0.1313964513 0.138872382 0.022752300 -0.153010458 0.114198754 -0.036117379 0.030278423 0.162884880 0.124332114 0.016062269
## 19 0.04967229 0.046037095 0.0053194439 0.062938353 0.074529859 0.04230919 -0.032443340 0.05853012 -0.043585692 0.11288026 0.1056568184 0.101702138 0.072389389 -0.085468600 0.065305599 -0.041306754 0.070394162 0.124332114 0.118015774 0.092791775
## 20 -0.07924097 0.005517226 0.0809557552 0.220873748 -0.151310641 0.05129591 -0.082986844 0.11202949 0.027931145 0.12827538 -0.0011590278 -0.006921327 0.255254885 -0.036101554 -0.134150800 -0.001724560 0.129316389 0.016062269 0.092791775 0.373291750
lev <- diag(Hat); lev # apalancamientos
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 0.10655710 0.07274466 0.10454362 0.14458388 0.21767716 0.05211761 0.17497701 0.06993305 0.71976680 0.11322903 0.12146711 0.12293309 0.18069260 0.63478590 0.18304599 0.19664999 0.13010298 0.16288488 0.11801577 0.37329175
pesos <- cbind(x,y,lev) # construcción de una salida conjunta
rownames(pesos) <- lab
pesos # salida
## Weight Waist Pulse y lev
## 1 191 36 50 162 0.10655710
## 2 189 37 52 110 0.07274466
## 3 193 38 58 101 0.10454362
## 4 162 35 62 105 0.14458388
## 5 189 35 46 155 0.21767716
## 6 182 36 56 101 0.05211761
## 7 211 38 56 101 0.17497701
## 8 167 34 60 125 0.06993305
## 9 176 31 74 200 0.71976680
## 10 154 33 56 251 0.11322903
## 11 169 34 50 120 0.12146711
## 12 166 33 52 210 0.12293309
## 13 154 34 64 215 0.18069260
## 14 247 46 50 50 0.63478590
## 15 193 36 46 70 0.18304599
## 16 202 37 62 210 0.19664999
## 17 176 37 54 60 0.13010298
## 18 157 32 52 230 0.16288488
## 19 156 33 54 225 0.11801577
## 20 138 33 68 110 0.37329175
niv <- 2 * p1 / n; niv # nivel para apalancamiento
## [1] 0.4
level <- which(lev>niv) # unidades con fuerte apalancamiento
out <- rbind(level,lev[level]) # salida
rownames(out) <- c("unidad","apalancamiento"); out
## 9 14
## unidad 9.0000000 14.0000000
## apalancamiento 0.7197668 0.6347859
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo
lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud); summary(lm1) # regresión con y sin
##
## Call:
## lm(formula = y ~ Weight + Waist + Pulse, data = Linnerud)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74.19 -36.68 -12.41 40.28 81.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 623.2817 199.9013 3.118 0.00663 **
## Weight 0.7277 0.9733 0.748 0.46552
## Waist -17.3872 7.4652 -2.329 0.03328 *
## Pulse 0.1393 1.7547 0.079 0.93770
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.18 on 16 degrees of freedom
## Multiple R-squared: 0.4365, Adjusted R-squared: 0.3308
## F-statistic: 4.131 on 3 and 16 DF, p-value: 0.02394
residuals(lm1)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 18.709194 -14.726904 -10.086236 -36.247714 -3.665431 -36.577780 -22.905478 -36.994597 -22.655662 81.635038 -42.056728 32.460394 61.907708 39.832440 -74.188851 74.420328 -55.545962 41.622113 54.458356 -49.394229
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# Del gráfico de observa que las muestras 9 y 14 tiene un alto grado de apalancamiento, se procederá a retirar
# modelo sin unidades con fuerte apalancamiento (sin la muestra 9 y 14)
lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]); summary(lm2)
##
## Call:
## lm(formula = y[-level] ~ Weight + Waist + Pulse, data = Linnerud[-level,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.205 -27.651 6.197 27.000 70.780
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1047.205 255.616 4.097 0.00109 **
## Weight 3.214 1.442 2.229 0.04274 *
## Waist -47.515 13.852 -3.430 0.00406 **
## Pulse 3.678 2.411 1.525 0.14943
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45.29 on 14 degrees of freedom
## Multiple R-squared: 0.5382, Adjusted R-squared: 0.4393
## F-statistic: 5.439 on 3 and 14 DF, p-value: 0.01085
residuals(lm2)
## 1 2 3 4 5 6 7 8 10 11 12 13 15 16 17 18 19 20
## 27.455381 22.043138 25.632151 -27.974074 -5.918506 -26.681767 -24.872939 -64.205325 70.780268 -38.854935 5.918045 52.871397 -56.261861 43.475275 6.475736 7.333891 45.707149 -62.923025
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# En el gráfico de residuos vs estimados:
#Luego de retirar las observaciones con alto grado de apalancamiento (9 y 14) se puede ver que la nube de puntos está más concentrada en la línea con residuo 0. Además ya no tener observaciones con un alto apalancamiento (aunque podría evaluarse la observacióm 15 y 16)
# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))
legend("bottomright", # Escoge la posición adecuada para tu gráfico
legend=c("Modelo 1", "Modelo 2 sin Obs 9 y 14"),
pch=c(2, 2),
col=c("black", "red"),
title="Leyenda")

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

# Análisis gráfico:
#Al comparar los puntos negros con los rojos(lm1 vs lm2), puedes evaluar cómo cada modelo se ajusta a los datos. Los puntos rojos son los valores ajustados del segundo modelo después de remover algunos puntos influyentes o atípicos. Si los puntos rojos están más cerca de la línea de identidad que los negros, podríamos inferir que el segundo modelo (lm2) tiene un mejor ajuste después de excluir las observaciones con alto apalancamiento
# Los residuos del modelo lm2 (puntos rojos) están en general más cerca de la línea horizontal que los de lm1 (puntos negros), se podría concluir que lm2 se ajusta mejor a los datos, especialmente después de excluir los outliers.
# Comentarios:
# Mostrar la diferencia entre betas y entre los residuos de los dos modelos.
# Modelo 1: Con todas las observaciones
# Squats ~ Weight,Waist,Pulse
# lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud)
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 623.2817 199.9013 3.118 0.00663 **
# Weight 0.7277 0.9733 0.748 0.46552
# Waist -17.3872 7.4652 -2.329 0.03328 *
#Pulse 0.1393 1.7547 0.079 0.93770
# beta = [623.2817 (intercepto) ,0.7277(Weight), -17.3872(Waist), 0.1393(Pulse)]
# residuals(lm1):
# 1 2 3 4 5 6 7
# 18.709194 -14.726904 -10.086236 -36.247714 -3.665431 -36.577780 -22.905478
# 8 9 10 11 12 13 14
# -36.994597 -22.655662 81.635038 -42.056728 32.460394 61.907708 39.832440
# 15 16 17 18 19 20
# -74.188851 74.420328 -55.545962 41.622113 54.458356 -49.394229
# Modelo 2: Sin observaciones con alto apalancamiento (observaciones 9 y 14)
# Squats ~ Weight,Waist,Pulse
# lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]);
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 1047.205 255.616 4.097 0.00109 **
# Weight 3.214 1.442 2.229 0.04274 *
# Waist -47.515 13.852 -3.430 0.00406 **
# Pulse 3.678 2.411 1.525 0.14943
# beta = [1047.205 (intercepto) ,3.214 (Weight), -47.515(Waist), 3.678(Pulse)]
# residuals(lm2):
# 1 2 3 4 5 6 7
# 27.455381 22.043138 25.632151 -27.974074 -5.918506 -26.681767 -24.872939
# 8 10 11 12 13 15 16
# -64.205325 70.780268 -38.854935 5.918045 52.871397 -56.261861 43.475275
# 17 18 19 20
# 6.475736 7.333891 45.707149 -62.923025
# Comentarios:
# La eliminación de las observaciones 9 y 14 tiene un impacto notable en los coeficientes y su significancia
# Es especialmente notable que "Weight" se vuelva significativo al eliminar dos puntos de datos. Esto podría indicar que estas observaciones eran atípicas en términos de su relación entre el peso y "Squats".
# La consistencia del efecto negativo de "Waist" en ambos modelos subraya que es un predictor relevante y robusto para "Squats"
# Los cambios en la magnitud y significancia de los coeficientes tras la eliminación de las observaciones de alto apalancamiento sugieren que los resultados del modelo pueden ser más confiables sin estas observaciones
# Al remover las observaciones identificadas como de alto apalancamiento, los residuos todavía muestran variabilidad significativa, pero el rango de estos residuos es algo más moderado comparado con el modelo completo, especialmente en el extremo superior. Los residuos oscilan entre -64.20 y 70.78, lo cual todavía es considerable.
Decathlon.csv
1) Points ~ X100m,Shot.put,Discus
# Lectura de los datos
Decathlon <- read.csv("Decathlon.csv", row.names = 1);head(Decathlon)
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus Pole.vault Javeline X1500m Points
## Sebrle 10.85 7.84 16.36 2.12 48.36 14.05 48.72 5.0 70.52 280.01 8893
## Clay 10.44 7.96 15.23 2.06 49.19 14.13 50.11 4.9 69.71 282.00 8820
## Karpov 10.50 7.81 15.93 2.09 46.81 13.97 51.65 4.6 55.54 278.11 8725
## Macey 10.89 7.47 15.73 2.15 48.97 14.56 48.34 4.4 58.46 265.42 8414
## Warners 10.62 7.74 14.48 1.97 47.97 14.01 43.73 4.9 55.39 278.05 8343
## Zsivoczky 10.91 7.14 15.31 2.12 49.40 14.95 45.62 4.7 63.45 269.54 8287
attach(Decathlon) #archivo en uso
lab = rownames(Decathlon) #etiquetas de las unidades en lab
# regresión a mano
# definición de los datos para el modelo
n <- dim(Decathlon)[1]; n # n es el número de observaciones
## [1] 28
x <- as.matrix(Decathlon[,c(1,3,7)]); x # x es el caracter descriptivo
## X100m Shot.put Discus
## Sebrle 10.85 16.36 48.72
## Clay 10.44 15.23 50.11
## Karpov 10.50 15.93 51.65
## Macey 10.89 15.73 48.34
## Warners 10.62 14.48 43.73
## Zsivoczky 10.91 15.31 45.62
## Hernu 10.97 14.65 44.72
## Nool 10.80 14.26 42.05
## Bernard 10.69 14.80 44.75
## Schwarzl 10.98 14.01 42.43
## Pogorelov 10.95 15.10 44.60
## Schoenbeck 10.90 14.77 44.41
## Barras 11.14 14.91 44.83
## Smith 10.85 15.24 49.02
## Averyanov 10.55 14.44 39.88
## Ojaniemi 10.68 14.97 40.35
## Smirnov 10.89 13.88 42.47
## Qi 11.06 13.55 45.13
## Drews 10.87 13.07 40.11
## Parkhomenko 11.14 15.69 41.90
## Terek 10.92 15.15 45.62
## Gomez 11.08 14.57 40.95
## Turi 11.08 13.62 39.83
## Lorenzo 11.10 13.22 40.22
## Karlivans 11.33 13.30 43.34
## Korkizoglou 10.86 14.81 46.07
## Uldal 11.23 13.53 43.01
## Casarsa 11.36 14.92 48.66
p <- dim(x)[2]; p # numero de regresores
## [1] 3
y <- Points # y es el caracter respuesta
nomy <- deparse(substitute(Points)); nomy # nomy es su nombre
## [1] "Points"
nom <- c(colnames(x),nomy) ; nom # etiquetas de las variables en nom
## [1] "X100m" "Shot.put" "Discus" "Points"
# estadísticas
xm <- apply(x,2,mean); xm # cálculo de xm, promedio de x
## X100m Shot.put Discus
## 10.91571 14.62500 44.37571
ym <- sum(y)/n; ym # cálculo de ym, promedio de y
## [1] 8051.536
ssx <- apply(x^2,2,sum); ssx # ssx es la suma de los x cuadros
## X100m Shot.put Discus
## 3337.720 6008.732 55431.669
ssy <- sum(y^2); ssy # ssy es la suma de los y cuadros
## [1] 1818910257
sxy <- t(x)%*%y ; sxy # sxy es la suma de los productos xy
## [,1]
## X100m 2459212
## Shot.put 3302452
## Discus 10020967
ssxc <- ssx-n*xm^2 ; ssxc # ssxc es ssx centrado sobre el promedio
## X100m Shot.put Discus
## 1.441086 19.794100 293.956286
ssyc <- ssy-n*ym^2 ; ssyc # ssyc es ssy centrado sobre el promedio
## [1] 3747891
sxyc <- sxy-n*xm*ym; sxyc # sxyc es sxy centrado sobre el promedio
## [,1]
## X100m -1658.966
## Shot.put 5347.735
## Discus 16772.364
varx <- ssxc/n; varx # varx es la varianza de x
## X100m Shot.put Discus
## 0.05146735 0.70693214 10.49843878
vary <- ssyc/n; vary # vary es la varianza de y
## [1] 133853.2
covxy <- sxyc/n; covxy # covxy es la covarianza de xy
## [,1]
## X100m -59.24878
## Shot.put 190.99054
## Discus 599.01301
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
"Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats # impresión de la tabla
## X100m Shot.put Discus Points x1y x2y x3y
## Mínimo 1.044000e+01 13.0700000 39.830000 7.404000e+03 0.00000 0.0000 0.000
## Máximo 1.136000e+01 16.3600000 51.650000 8.893000e+03 0.00000 0.0000 0.000
## Promedio 1.091571e+01 14.6250000 44.375714 8.051536e+03 0.00000 0.0000 0.000
## Total 3.056400e+02 409.5000000 1242.520000 2.254430e+05 0.00000 0.0000 0.000
## Suma de cuadrados 3.337720e+03 6008.7316000 55431.668800 1.818910e+09 2459212.41000 3302451.6100 10020966.520
## Cuadrados centrados 1.441086e+00 19.7941000 293.956286 3.747891e+06 -1658.96571 5347.7350 16772.364
## Varianza covarianza 5.146735e-02 0.7069321 10.498439 1.338532e+05 -59.24878 190.9905 599.013
## Desvío estándar 2.268642e-01 0.8407926 3.240129 3.658596e+02 0.00000 0.0000 0.000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx # matriz de varianza/covarianza de los xy
## X100m Shot.put Discus
## X100m 0.05146735 -0.07051786 -0.1715041
## Shot.put -0.07051786 0.70693214 1.8137536
## Discus -0.17150408 1.81375357 10.4984388
# estimación
X <- cbind(rep(1,n),x); X # se necesita ajuntar la columna de unos
## X100m Shot.put Discus
## Sebrle 1 10.85 16.36 48.72
## Clay 1 10.44 15.23 50.11
## Karpov 1 10.50 15.93 51.65
## Macey 1 10.89 15.73 48.34
## Warners 1 10.62 14.48 43.73
## Zsivoczky 1 10.91 15.31 45.62
## Hernu 1 10.97 14.65 44.72
## Nool 1 10.80 14.26 42.05
## Bernard 1 10.69 14.80 44.75
## Schwarzl 1 10.98 14.01 42.43
## Pogorelov 1 10.95 15.10 44.60
## Schoenbeck 1 10.90 14.77 44.41
## Barras 1 11.14 14.91 44.83
## Smith 1 10.85 15.24 49.02
## Averyanov 1 10.55 14.44 39.88
## Ojaniemi 1 10.68 14.97 40.35
## Smirnov 1 10.89 13.88 42.47
## Qi 1 11.06 13.55 45.13
## Drews 1 10.87 13.07 40.11
## Parkhomenko 1 11.14 15.69 41.90
## Terek 1 10.92 15.15 45.62
## Gomez 1 11.08 14.57 40.95
## Turi 1 11.08 13.62 39.83
## Lorenzo 1 11.10 13.22 40.22
## Karlivans 1 11.33 13.30 43.34
## Korkizoglou 1 10.86 14.81 46.07
## Uldal 1 11.23 13.53 43.01
## Casarsa 1 11.36 14.92 48.66
p1 <- p + 1; p1 # un regresor más
## [1] 4
XX <- t(X)%*%X; XX # matriz X'X
## X100m Shot.put Discus
## 28.00 305.640 409.500 1242.52
## X100m 305.64 3337.720 4468.011 13558.19
## Shot.put 409.50 4468.011 6008.732 18222.64
## Discus 1242.52 13558.191 18222.640 55431.67
XXm1 <- solve(XX); XXm1 # su inversa
## X100m Shot.put Discus
## 134.03355845 -9.940932497 -1.66416391 -0.025849005
## X100m -9.94093250 0.804052240 0.08353098 -0.001296037
## Shot.put -1.66416391 0.083530978 0.09941981 -0.015811602
## Discus -0.02584901 -0.001296037 -0.01581160 0.006112372
bh <- XXm1 %*% t(X) %*% y # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh
## [,1]
## 15210.14536
## X100m -908.93117
## Shot.put 127.89783
## Discus 20.11275
eta <- X %*% bh; eta # valores estimados
## [,1]
## Sebrle 8420.544
## Clay 8676.638
## Karpov 8742.604
## Macey 8295.968
## Warners 8288.787
## Zsivoczky 8169.366
## Hernu 8012.316
## Nool 8063.253
## Bernard 8286.604
## Schwarzl 7875.314
## Pogorelov 8085.635
## Schoenbeck 8085.054
## Barras 7893.263
## Smith 8283.332
## Averyanov 8269.863
## Ojaniemi 8228.940
## Smirnov 7941.295
## Qi 7798.071
## Drews 7808.410
## Parkhomenko 7934.093
## Terek 8139.813
## Gomez 7826.276
## Turi 7682.247
## Lorenzo 7620.753
## Karlivans 7484.683
## Korkizoglou 8159.914
## Uldal 7598.355
## Casarsa 7771.609
res <- y - eta; res # residuos
## [,1]
## Sebrle 472.456260
## Clay 143.362309
## Karpov -17.603935
## Macey 118.031986
## Warners 54.212627
## Zsivoczky 117.634373
## Hernu 224.684286
## Nool 171.747178
## Bernard -61.604500
## Schwarzl 226.686403
## Pogorelov -1.634832
## Schoenbeck -8.053684
## Barras 173.736747
## Smith -260.331992
## Averyanov -248.862561
## Ojaniemi -222.940351
## Smirnov 51.704806
## Qi 135.929480
## Drews 117.589512
## Parkhomenko -16.093210
## Terek -246.812662
## Gomez 38.723602
## Turi 25.752820
## Lorenzo -28.753395
## Karlivans 98.317174
## Korkizoglou -586.914006
## Uldal -103.355238
## Casarsa -367.609198
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res) # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad # impresión de la tabla
## X100m Shot.put Discus Points eta residuos % res
## Sebrle 10.85 16.36 48.72 8893 8420.544 472.456260 5.31267582
## Clay 10.44 15.23 50.11 8820 8676.638 143.362309 1.62542300
## Karpov 10.50 15.93 51.65 8725 8742.604 -17.603935 -0.20176430
## Macey 10.89 15.73 48.34 8414 8295.968 118.031986 1.40280468
## Warners 10.62 14.48 43.73 8343 8288.787 54.212627 0.64979776
## Zsivoczky 10.91 15.31 45.62 8287 8169.366 117.634373 1.41950493
## Hernu 10.97 14.65 44.72 8237 8012.316 224.684286 2.72774416
## Nool 10.80 14.26 42.05 8235 8063.253 171.747178 2.08557594
## Bernard 10.69 14.80 44.75 8225 8286.604 -61.604500 -0.74899088
## Schwarzl 10.98 14.01 42.43 8102 7875.314 226.686403 2.79790673
## Pogorelov 10.95 15.10 44.60 8084 8085.635 -1.634832 -0.02022306
## Schoenbeck 10.90 14.77 44.41 8077 8085.054 -8.053684 -0.09971133
## Barras 11.14 14.91 44.83 8067 7893.263 173.736747 2.15367233
## Smith 10.85 15.24 49.02 8023 8283.332 -260.331992 -3.24482105
## Averyanov 10.55 14.44 39.88 8021 8269.863 -248.862561 -3.10263759
## Ojaniemi 10.68 14.97 40.35 8006 8228.940 -222.940351 -2.78466589
## Smirnov 10.89 13.88 42.47 7993 7941.295 51.704806 0.64687609
## Qi 11.06 13.55 45.13 7934 7798.071 135.929480 1.71325284
## Drews 10.87 13.07 40.11 7926 7808.410 117.589512 1.48359212
## Parkhomenko 11.14 15.69 41.90 7918 7934.093 -16.093210 -0.20324842
## Terek 10.92 15.15 45.62 7893 8139.813 -246.812662 -3.12698165
## Gomez 11.08 14.57 40.95 7865 7826.276 38.723602 0.49235349
## Turi 11.08 13.62 39.83 7708 7682.247 25.752820 0.33410509
## Lorenzo 11.10 13.22 40.22 7592 7620.753 -28.753395 -0.37873281
## Karlivans 11.33 13.30 43.34 7583 7484.683 98.317174 1.29654720
## Korkizoglou 10.86 14.81 46.07 7573 8159.914 -586.914006 -7.75008591
## Uldal 11.23 13.53 43.01 7495 7598.355 -103.355238 -1.37898916
## Casarsa 11.36 14.92 48.66 7404 7771.609 -367.609198 -4.96500808
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="X100m",ylab="Points",ylim=c(min(y,eta),max(y,eta)),pch=".")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch=".")
text(x[,1],eta,labels=lab,col="red",pos=2)
legend("topright",
legend=c("Y", "Eta"),
pch=c("*", "*"),
col=c("black", "red"))

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# Del gráfico residuos vs estimados. Se puede ver que hay observaciones como Sebrle y Korkizoglou con alto valor residual. Además de observaciones como Clay Parkhomenko Casarsa tienen un valor alto de apalancamiento. Se evaluará los valores para posteriormente analizar el retiro de estas observaciones
# datos para anova
ssr <- t(eta) %*% eta; ssr # suma de cuadrados por regresión
## [,1]
## [1,] 1817691554
sse <- ssy - ssr; sse # suma por residuos
## [,1]
## [1,] 1218703
msr <- ssr/p1; msr # cuadrado promedio por regresión
## [,1]
## [1,] 454422888
mse <- sse/(n-p1); mse # cuadrado promedio por residuos
## [,1]
## [1,] 50779.3
df <- c(p1,n-p1,n) # grados de libertad
ssq <- c(ssr,sse,ssy) # sumas de cuadrados
msq <- c(msr,mse,0) # suma de residuos
anova <- (cbind(df,ssq,msq)) # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
## Degrees of Freedom Sum of squares Mean squares
## Regression 4 1817691554 454422888.4
## Residuals 24 1218703 50779.3
## Total 28 1818910257 0.0
# apalancamiento
Hat <- X %*% XXm1 %*% t(X) # matriz H sombrero
rownames(Hat) <- lab # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez
## Sebrle 0.197157998 0.049511478 0.12229595 0.138980203 -0.0035954283 0.101577330 0.04259219 -0.0083280130 0.033107195 -0.017645809 0.085199654 0.04858397 0.082764395 0.0868080078 -0.01052288 0.052449926 -0.038239638 -0.058097628 -0.11772919 0.161776009 0.0867116592 0.0472311193
## Clay 0.049511478 0.304332605 0.27498456 0.070278822 0.1294117632 0.022004846 0.02451966 0.0399280350 0.109794789 0.006319247 -0.003439777 0.03175537 -0.048572972 0.1360285338 0.05549229 -0.013597490 0.047042096 0.081957137 0.04914002 -0.179847545 0.0298518808 -0.1055925664
## Karpov 0.122295952 0.274984563 0.28443020 0.116249337 0.0922766963 0.053675579 0.03086421 0.0134972414 0.094304434 -0.014484582 0.023633471 0.03733748 -0.011550294 0.1520065950 0.01568807 -0.013967080 0.010210989 0.041734482 -0.02640323 -0.098548101 0.0545288655 -0.0852166309
## Macey 0.138980203 0.070278822 0.11624934 0.110690084 0.0051373833 0.074631438 0.04278801 -0.0042128936 0.031129466 -0.000921098 0.060906278 0.04143160 0.066545807 0.0905852564 -0.02746984 0.008231702 -0.012487134 0.002024782 -0.06632134 0.081766024 0.0680916412 0.0208909160
## Warners -0.003595428 0.129411763 0.09227670 0.005137383 0.1143726228 0.015754460 0.02103447 0.0780392997 0.086392790 0.039955131 0.013159544 0.03539270 -0.028958881 0.0283995866 0.13785505 0.089560970 0.066076294 0.029897498 0.09747152 -0.047782896 0.0178891952 0.0007434437
## Zsivoczky 0.101577330 0.022004846 0.05367558 0.074631438 0.0157544599 0.064265178 0.03857435 0.0197935347 0.031387252 0.015764987 0.059518272 0.04175525 0.059359606 0.0469066955 0.02265493 0.053188045 0.004811589 -0.010900213 -0.02745454 0.106190218 0.0571031171 0.0524813998
## Hernu 0.042592193 0.024519663 0.03086421 0.042788006 0.0210344678 0.038574346 0.03877653 0.0260868775 0.026380766 0.034467835 0.038383411 0.03528733 0.047066040 0.0413183358 0.01149186 0.018985309 0.030252307 0.041790615 0.02420221 0.043494019 0.0387750397 0.0374542798
## Nool -0.008328013 0.039928035 0.01349724 -0.004212894 0.0780392997 0.019793535 0.02608688 0.0723009996 0.058205120 0.049751604 0.025361196 0.03599251 0.002299974 -0.0005339676 0.11877611 0.094345024 0.061459660 0.021938579 0.09088116 0.019441702 0.0200572959 0.0448346744
## Bernard 0.033107195 0.109794789 0.09430443 0.031129466 0.0863927904 0.031387252 0.02638077 0.0582051202 0.072128014 0.029854013 0.026432062 0.03726853 -0.004006993 0.0413097498 0.09912135 0.073369594 0.045866920 0.019353515 0.05562737 -0.009211401 0.0308935155 0.0092657523
## Schwarzl -0.017645809 0.006319247 -0.01448458 -0.000921098 0.0399551309 0.015764987 0.03446783 0.0497516040 0.029854013 0.055658984 0.023427489 0.03196622 0.028200569 0.0096880507 0.04942735 0.035483465 0.058564470 0.061309324 0.08407953 0.015292641 0.0197947353 0.0452712848
## Pogorelov 0.085199654 -0.003439777 0.02363347 0.060906278 0.0131595442 0.059518272 0.03838341 0.0253611965 0.026432062 0.023427489 0.058730320 0.04119872 0.061186278 0.0312173903 0.03041959 0.060884391 0.011103802 -0.009322393 -0.01334000 0.115608253 0.0527410899 0.0653387829
## Schoenbeck 0.048583973 0.031755370 0.03733748 0.041431603 0.0353927009 0.041755255 0.03528733 0.0359925113 0.037268530 0.031966225 0.041198719 0.03747385 0.038229837 0.0338966819 0.04287157 0.048485694 0.030301847 0.020573278 0.02500677 0.054071374 0.0397397981 0.0419965717
## Barras 0.082764395 -0.048572972 -0.01155029 0.066545807 -0.0289588811 0.059359606 0.04706604 0.0022999740 -0.004006993 0.028200569 0.061186278 0.03822984 0.091818645 0.0374854847 -0.03702532 0.009627151 0.004614571 0.020682583 -0.02699683 0.128849351 0.0550133208 0.0738824242
## Smith 0.086808008 0.136028534 0.15200659 0.090585256 0.0283995866 0.046906695 0.04131834 -0.0005339676 0.041309750 0.009688051 0.031217390 0.03389668 0.037485485 0.1123457403 -0.04254749 -0.044131914 0.013423023 0.067889146 -0.01627258 -0.031316092 0.0496765768 -0.0287650313
## Averyanov -0.010522885 0.055492293 0.01568807 -0.027469835 0.1378550493 0.022654929 0.01149186 0.1187761095 0.099121349 0.049427348 0.030419592 0.04287157 -0.037025318 -0.0425474946 0.25093627 0.211876056 0.072916704 -0.050069922 0.11788385 0.050801890 0.0160752000 0.0670985187
## Ojaniemi 0.052449926 -0.013597490 -0.01396708 0.008231702 0.0895609704 0.053188045 0.01898531 0.0943450235 0.073369594 0.035483465 0.060884391 0.04848569 0.009627151 -0.0441319139 0.21187606 0.219156427 0.038113162 -0.093298625 0.04805420 0.157858168 0.0391109945 0.1078009972
## Smirnov -0.038239638 0.047042096 0.01021099 -0.012487134 0.0660762935 0.004811589 0.03025231 0.0614596600 0.045866920 0.058564470 0.011103802 0.03030185 0.004614571 0.0134230231 0.07291670 0.038113162 0.071801151 0.073772131 0.11034905 -0.031812497 0.0113805542 0.0244706733
## Qi -0.058097628 0.081957137 0.04173448 0.002024782 0.0298974980 -0.010900213 0.04179062 0.0219385791 0.019353515 0.061309324 -0.009322393 0.02057328 0.020682583 0.0678891459 -0.05006992 -0.093298625 0.073772131 0.170270272 0.10918175 -0.125346133 0.0064322557 -0.0276499352
## Drews -0.117729194 0.049140023 -0.02640323 -0.066321345 0.0974715235 -0.027454542 0.02420221 0.0908811613 0.055627368 0.084079531 -0.013340001 0.02500677 -0.026996833 -0.0162725758 0.11788385 0.048054203 0.110349048 0.109181752 0.19062451 -0.093770220 -0.0145107554 0.0191377055
## Parkhomenko 0.161776009 -0.179847545 -0.09854810 0.081766024 -0.0477828959 0.106190218 0.04349402 0.0194417018 -0.009211401 0.015292641 0.115608253 0.05407137 0.128849351 -0.0313160922 0.05080189 0.157858168 -0.031812497 -0.125346133 -0.09377022 0.351112842 0.0827133773 0.1819985039
## Terek 0.086711659 0.029851881 0.05452887 0.068091641 0.0178891952 0.057103117 0.03877504 0.0200572959 0.030893516 0.019794735 0.052741090 0.03973980 0.055013321 0.0496765768 0.01607520 0.039110994 0.011380554 0.006432256 -0.01451076 0.082713377 0.0522993092 0.0438134573
## Gomez 0.047231119 -0.105592566 -0.08521663 0.020890916 0.0007434437 0.052481400 0.03745428 0.0448346744 0.009265752 0.045271285 0.065338783 0.04199657 0.073882424 -0.0287650313 0.06709852 0.107800997 0.024470673 -0.027649935 0.01913771 0.181998504 0.0438134573 0.1234391080
## Turi -0.045277895 -0.068081919 -0.09350828 -0.029494684 0.0296288882 0.010532019 0.03412136 0.0628460865 0.016479920 0.071551701 0.028049982 0.03237378 0.038252934 -0.0328728887 0.07303237 0.066777694 0.068065531 0.049771519 0.10715649 0.062558713 0.0133634203 0.0868556885
## Lorenzo -0.083184051 -0.036588122 -0.08069825 -0.043527467 0.0357152971 -0.008883213 0.03503576 0.0608732382 0.016009523 0.080733553 0.008360486 0.02653350 0.025581559 -0.0185987919 0.04871011 0.020757616 0.084954295 0.101330661 0.13976489 -0.010078307 0.0008520327 0.0566159490
## Karlivans -0.057911051 -0.038835501 -0.06024624 -0.003439627 -0.0278428404 -0.003354656 0.05069163 0.0065417432 -0.023654196 0.072359316 0.008189106 0.02097208 0.069287018 0.0367529497 -0.09449552 -0.101583479 0.063280628 0.165429150 0.09097676 -0.034959926 0.0118029367 0.0286743415
## Korkizoglou 0.047740049 0.085847926 0.08637501 0.051849116 0.0420884942 0.036290951 0.03625909 0.0266771347 0.043357751 0.027113813 0.030116750 0.03457483 0.028329521 0.0646076430 0.01593112 0.008440624 0.031949099 0.050852438 0.02544339 -0.003834202 0.0380636447 0.0055468241
## Uldal -0.043682586 -0.029668140 -0.04942009 -0.001973313 -0.0091676574 0.003634889 0.04598433 0.0192188374 -0.008553210 0.067066639 0.014239095 0.02459860 0.058772081 0.0272251282 -0.05108805 -0.058214271 0.060670804 0.131283651 0.08663689 -0.013898707 0.0150278218 0.0365303830
## Casarsa 0.099727036 -0.023977041 0.02994961 0.107639530 -0.0889053958 0.058736687 0.06332217 -0.0560727654 -0.041613593 0.019949535 0.052696755 0.03033026 0.129262141 0.1074681807 -0.19584123 -0.137764352 -0.012912082 0.107210077 -0.06881938 0.066872940 0.0627080200 0.0258513650
## Turi Lorenzo Karlivans Korkizoglou Uldal Casarsa
## Sebrle -0.045277895 -0.0831840509 -0.057911051 0.047740049 -0.043682586 0.09972704
## Clay -0.068081919 -0.0365881217 -0.038835501 0.085847926 -0.029668140 -0.02397704
## Karpov -0.093508280 -0.0806982521 -0.060246245 0.086375009 -0.049420093 0.02994961
## Macey -0.029494684 -0.0435274668 -0.003439627 0.051849116 -0.001973313 0.10763953
## Warners 0.029628888 0.0357152971 -0.027842840 0.042088494 -0.009167657 -0.08890540
## Zsivoczky 0.010532019 -0.0088832132 -0.003354656 0.036290951 0.003634889 0.05873669
## Hernu 0.034121364 0.0350357611 0.050691627 0.036259092 0.045984331 0.06332217
## Nool 0.062846086 0.0608732382 0.006541743 0.026677135 0.019218837 -0.05607277
## Bernard 0.016479920 0.0160095234 -0.023654196 0.043357751 -0.008553210 -0.04161359
## Schwarzl 0.071551701 0.0807335527 0.072359316 0.027113813 0.067066639 0.01994953
## Pogorelov 0.028049982 0.0083604863 0.008189106 0.030116750 0.014239095 0.05269676
## Schoenbeck 0.032373776 0.0265335005 0.020972078 0.034574830 0.024598600 0.03033026
## Barras 0.038252934 0.0255815590 0.069287018 0.028329521 0.058772081 0.12926214
## Smith -0.032872889 -0.0185987919 0.036752950 0.064607643 0.027225128 0.10746818
## Averyanov 0.073032369 0.0487101090 -0.094495523 0.015931118 -0.051088045 -0.19584123
## Ojaniemi 0.066777694 0.0207576162 -0.101583479 0.008440624 -0.058214271 -0.13776435
## Smirnov 0.068065531 0.0849542954 0.063280628 0.031949099 0.060670804 -0.01291208
## Qi 0.049771519 0.1013306606 0.165429150 0.050852438 0.131283651 0.10721008
## Drews 0.107156490 0.1397648863 0.090976762 0.025443390 0.086636891 -0.06881938
## Parkhomenko 0.062558713 -0.0100783066 -0.034959926 -0.003834202 -0.013898707 0.06687294
## Terek 0.013363420 0.0008520327 0.011802937 0.038063645 0.015027822 0.06270802
## Gomez 0.086855688 0.0566159490 0.028674341 0.005546824 0.036530383 0.02585136
## Turi 0.114018919 0.1161053682 0.089613432 0.009541325 0.084911795 0.00362673
## Lorenzo 0.116105368 0.1389268056 0.131880839 0.017056993 0.115916962 0.01984277
## Karlivans 0.089613432 0.1318808390 0.220826509 0.032168664 0.175229751 0.17164619
## Korkizoglou 0.009541325 0.0170569931 0.032168664 0.047769654 0.029844969 0.04999738
## Uldal 0.084911795 0.1159169616 0.175229751 0.029844969 0.142070699 0.12680270
## Casarsa 0.003626730 0.0198427681 0.171646193 0.049997378 0.126802695 0.29226576
lev <- diag(Hat); lev # apalancamientos
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez Turi Lorenzo Karlivans
## 0.19715800 0.30433260 0.28443020 0.11069008 0.11437262 0.06426518 0.03877653 0.07230100 0.07212801 0.05565898 0.05873032 0.03747385 0.09181864 0.11234574 0.25093627 0.21915643 0.07180115 0.17027027 0.19062451 0.35111284 0.05229931 0.12343911 0.11401892 0.13892681 0.22082651
## Korkizoglou Uldal Casarsa
## 0.04776965 0.14207070 0.29226576
pesos <- cbind(x,y,lev) # construcción de una salida conjunta
rownames(pesos) <- lab
pesos # salida
## X100m Shot.put Discus y lev
## Sebrle 10.85 16.36 48.72 8893 0.19715800
## Clay 10.44 15.23 50.11 8820 0.30433260
## Karpov 10.50 15.93 51.65 8725 0.28443020
## Macey 10.89 15.73 48.34 8414 0.11069008
## Warners 10.62 14.48 43.73 8343 0.11437262
## Zsivoczky 10.91 15.31 45.62 8287 0.06426518
## Hernu 10.97 14.65 44.72 8237 0.03877653
## Nool 10.80 14.26 42.05 8235 0.07230100
## Bernard 10.69 14.80 44.75 8225 0.07212801
## Schwarzl 10.98 14.01 42.43 8102 0.05565898
## Pogorelov 10.95 15.10 44.60 8084 0.05873032
## Schoenbeck 10.90 14.77 44.41 8077 0.03747385
## Barras 11.14 14.91 44.83 8067 0.09181864
## Smith 10.85 15.24 49.02 8023 0.11234574
## Averyanov 10.55 14.44 39.88 8021 0.25093627
## Ojaniemi 10.68 14.97 40.35 8006 0.21915643
## Smirnov 10.89 13.88 42.47 7993 0.07180115
## Qi 11.06 13.55 45.13 7934 0.17027027
## Drews 10.87 13.07 40.11 7926 0.19062451
## Parkhomenko 11.14 15.69 41.90 7918 0.35111284
## Terek 10.92 15.15 45.62 7893 0.05229931
## Gomez 11.08 14.57 40.95 7865 0.12343911
## Turi 11.08 13.62 39.83 7708 0.11401892
## Lorenzo 11.10 13.22 40.22 7592 0.13892681
## Karlivans 11.33 13.30 43.34 7583 0.22082651
## Korkizoglou 10.86 14.81 46.07 7573 0.04776965
## Uldal 11.23 13.53 43.01 7495 0.14207070
## Casarsa 11.36 14.92 48.66 7404 0.29226576
niv <- 2 * p1 / n; niv # nivel para apalancamiento
## [1] 0.2857143
level <- which(lev>niv) # unidades con fuerte apalancamiento
out <- rbind(level,lev[level]) # salida
rownames(out) <- c("unidad","apalancamiento"); out
## Clay Parkhomenko Casarsa
## unidad 2.0000000 20.0000000 28.0000000
## apalancamiento 0.3043326 0.3511128 0.2922658
# De acuerdo al gráfico de residuos vs estimados y Resultados del apalancamiento
# podemos ver que las observaciones que tienen mayor apalancamiento es Clay (0.3043326), Parkhomenko(0.3511128) y Casarsa(0.2922658)
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo X100m,Shot.put,Discus
lm1 <- lm(y~X100m+Shot.put+Discus,data=Decathlon); summary(lm1)
##
## Call:
## lm(formula = y ~ X100m + Shot.put + Discus, data = Decathlon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -586.91 -72.04 32.24 122.51 472.46
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15210.15 2608.86 5.830 5.18e-06 ***
## X100m -908.93 202.06 -4.498 0.000149 ***
## Shot.put 127.90 71.05 1.800 0.084438 .
## Discus 20.11 17.62 1.142 0.264873
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 225.3 on 24 degrees of freedom
## Multiple R-squared: 0.6748, Adjusted R-squared: 0.6342
## F-statistic: 16.6 on 3 and 24 DF, p-value: 4.71e-06
residuals(lm1)
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez Turi Lorenzo Karlivans
## 472.456260 143.362309 -17.603935 118.031986 54.212627 117.634373 224.684286 171.747178 -61.604500 226.686403 -1.634832 -8.053684 173.736747 -260.331992 -248.862561 -222.940351 51.704806 135.929480 117.589512 -16.093210 -246.812662 38.723602 25.752820 -28.753395 98.317174
## Korkizoglou Uldal Casarsa
## -586.914006 -103.355238 -367.609198
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# modelo sin unidades con fuerte apalancamiento (Todas las observaciones menos la Clay Parkhomenko, Casarsa)
lm2 <- lm(y[-level]~X100m+Shot.put+Discus,data=Decathlon[-level,]); summary(lm2)
##
## Call:
## lm(formula = y[-level] ~ X100m + Shot.put + Discus, data = Decathlon[-level,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -590.75 -48.97 0.23 118.72 406.48
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10948.31 3330.56 3.287 0.00351 **
## X100m -572.80 260.14 -2.202 0.03899 *
## Shot.put 171.02 86.95 1.967 0.06255 .
## Discus 19.61 21.54 0.910 0.37298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 215.9 on 21 degrees of freedom
## Multiple R-squared: 0.6402, Adjusted R-squared: 0.5888
## F-statistic: 12.46 on 3 and 21 DF, p-value: 6.76e-05
residuals(lm2)
## Sebrle Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Terek Gomez Turi Lorenzo Karlivans
## 406.4824456 54.0910697 65.5875086 144.0930353 75.2027494 190.0901008 209.7617099 -8.5360580 215.1705846 -48.9718939 -24.4503278 70.8448971 -277.8578489 -135.6755612 -176.0666810 76.0665048 118.7249799 182.4086760 -285.7060995 -31.3016138 -3.8733022 -47.6561660 0.2333375
## Korkizoglou Uldal
## -590.7508443 -177.9112029
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# Gráfico observados vs estimados: Se ve que las observaciones se ajustan en la recta identidad por lo que se tiene un buen ajuste de los datos respecto al modelo.Por otro lado, en el gráfico residuos vs estimados, la mayor parte de las observaciones están cerca de la recta horizontal 0. Sin embargo, aún persisten observaciones con alto grado residual como Lorenzo
# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))
legend("bottomright", # Escoge la posición adecuada para tu gráfico
legend=c("Modelo 1", "Modelo 2 sin Obs Clay,Parkhomenko,Casarsa"),
pch=c(2, 2),
col=c("black", "red"),
title="Leyenda",
cex=0.5)

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

# Análisis de residuales
# Al eliminar los outliers (como parece ser el caso con lm2) los residuos se reducen (los puntos rojos están más cerca de cero), esto sugeriría que estos puntos tenían una gran influencia y que su eliminación mejora la bondad de ajuste del modelo.
2) Points ~ X110m.hurdle,Javeline,X1500m
# Lectura de los datos
Decathlon <- read.csv("Decathlon.csv", row.names = 1);head(Decathlon)
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus Pole.vault Javeline X1500m Points
## Sebrle 10.85 7.84 16.36 2.12 48.36 14.05 48.72 5.0 70.52 280.01 8893
## Clay 10.44 7.96 15.23 2.06 49.19 14.13 50.11 4.9 69.71 282.00 8820
## Karpov 10.50 7.81 15.93 2.09 46.81 13.97 51.65 4.6 55.54 278.11 8725
## Macey 10.89 7.47 15.73 2.15 48.97 14.56 48.34 4.4 58.46 265.42 8414
## Warners 10.62 7.74 14.48 1.97 47.97 14.01 43.73 4.9 55.39 278.05 8343
## Zsivoczky 10.91 7.14 15.31 2.12 49.40 14.95 45.62 4.7 63.45 269.54 8287
attach(Decathlon) #archivo en uso
## The following objects are masked from Decathlon (pos = 3):
##
## Discus, High.jump, Javeline, Long.jump, Points, Pole.vault, Shot.put, X100m, X110m.hurdle, X1500m, X400m
lab = rownames(Decathlon) #etiquetas de las unidades en lab
# regresión a mano
# definición de los datos para el modelo
n <- dim(Decathlon)[1]; n # n es el número de observaciones
## [1] 28
x <- as.matrix(Decathlon[,c(6,9,10)]); x # x es el caracter descriptivo
## X110m.hurdle Javeline X1500m
## Sebrle 14.05 70.52 280.01
## Clay 14.13 69.71 282.00
## Karpov 13.97 55.54 278.11
## Macey 14.56 58.46 265.42
## Warners 14.01 55.39 278.05
## Zsivoczky 14.95 63.45 269.54
## Hernu 14.25 57.76 264.35
## Nool 14.80 61.33 276.33
## Bernard 14.17 55.27 276.31
## Schwarzl 14.25 56.32 273.56
## Pogorelov 14.21 53.45 287.63
## Schoenbeck 14.34 60.89 278.82
## Barras 14.37 64.55 267.09
## Smith 14.01 61.52 272.74
## Averyanov 14.39 54.51 271.02
## Ojaniemi 15.01 59.26 275.71
## Smirnov 14.77 60.88 263.31
## Qi 14.78 60.79 272.63
## Drews 14.01 51.53 274.21
## Parkhomenko 14.88 65.82 277.94
## Terek 15.12 50.62 290.36
## Gomez 14.41 60.71 269.70
## Turi 14.26 59.34 290.01
## Lorenzo 15.38 58.36 263.08
## Karlivans 14.98 52.92 278.67
## Korkizoglou 14.96 53.05 317.00
## Uldal 15.09 60.00 281.70
## Casarsa 15.39 58.62 296.12
p <- dim(x)[2]; p # numero de regresores
## [1] 3
y <- Points # y es el caracter respuesta
nomy <- deparse(substitute(Points)); nomy # nomy es su nombre
## [1] "Points"
nom <- c(colnames(x),nomy) ; nom # etiquetas de las variables en nom
## [1] "X110m.hurdle" "Javeline" "X1500m" "Points"
# estadísticas
xm <- apply(x,2,mean); xm # cálculo de xm, promedio de x
## X110m.hurdle Javeline X1500m
## 14.55357 58.94893 277.55071
ym <- sum(y)/n; ym # cálculo de ym, promedio de y
## [1] 8051.536
ssx <- apply(x^2,2,sum); ssx # ssx es la suma de los x cuadros
## X110m.hurdle Javeline X1500m
## 5935.872 97967.828 2160424.133
ssy <- sum(y^2); ssy # ssy es la suma de los y cuadros
## [1] 1818910257
sxy <- t(x)%*%y ; sxy # sxy es la suma de los productos xy
## [,1]
## X110m.hurdle 3278092
## Javeline 13310718
## X1500m 62538969
ssxc <- ssx-n*xm^2 ; ssxc # ssxc es ssx centrado sobre el promedio
## X110m.hurdle Javeline X1500m
## 5.291443 668.495068 3460.961386
ssyc <- ssy-n*ym^2 ; ssyc # ssyc es ssy centrado sobre el promedio
## [1] 3747891
sxyc <- sxy-n*xm*ym; sxyc # sxyc es sxy centrado sobre el promedio
## [,1]
## X110m.hurdle -2908.684
## Javeline 21094.966
## X1500m -32896.191
varx <- ssxc/n; varx # varx es la varianza de x
## X110m.hurdle Javeline X1500m
## 0.1889801 23.8748239 123.6057638
vary <- ssyc/n; vary # vary es la varianza de y
## [1] 133853.2
covxy <- sxyc/n; covxy # covxy es la covarianza de xy
## [,1]
## X110m.hurdle -103.8816
## Javeline 753.3916
## X1500m -1174.8640
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
"Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats # impresión de la tabla
## X110m.hurdle Javeline X1500m Points x1y x2y x3y
## Mínimo 13.9700000 50.620000 2.630800e+02 7.404000e+03 0.0000 0.000000e+00 0.000
## Máximo 15.3900000 70.520000 3.170000e+02 8.893000e+03 0.0000 0.000000e+00 0.000
## Promedio 14.5535714 58.948929 2.775507e+02 8.051536e+03 0.0000 0.000000e+00 0.000
## Total 407.5000000 1650.570000 7.771420e+03 2.254430e+05 0.0000 0.000000e+00 0.000
## Suma de cuadrados 5935.8718000 97967.828100 2.160424e+06 1.818910e+09 3278092.1200 1.331072e+07 62538969.490
## Cuadrados centrados 5.2914429 668.495068 3.460961e+03 3.747891e+06 -2908.6836 2.109497e+04 -32896.191
## Varianza covarianza 0.1889801 23.874824 1.236058e+02 1.338532e+05 -103.8816 7.533916e+02 -1174.864
## Desvío estándar 0.4347184 4.886187 1.111781e+01 3.658596e+02 0.0000 0.000000e+00 0.000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx # matriz de varianza/covarianza de los xy
## X110m.hurdle Javeline X1500m
## X110m.hurdle 0.1889801 -0.1695462 0.8653474
## Javeline -0.1695462 23.8748239 -13.6627207
## X1500m 0.8653474 -13.6627207 123.6057638
# estimación
X <- cbind(rep(1,n),x); X # se necesita ajuntar la columna de unos
## X110m.hurdle Javeline X1500m
## Sebrle 1 14.05 70.52 280.01
## Clay 1 14.13 69.71 282.00
## Karpov 1 13.97 55.54 278.11
## Macey 1 14.56 58.46 265.42
## Warners 1 14.01 55.39 278.05
## Zsivoczky 1 14.95 63.45 269.54
## Hernu 1 14.25 57.76 264.35
## Nool 1 14.80 61.33 276.33
## Bernard 1 14.17 55.27 276.31
## Schwarzl 1 14.25 56.32 273.56
## Pogorelov 1 14.21 53.45 287.63
## Schoenbeck 1 14.34 60.89 278.82
## Barras 1 14.37 64.55 267.09
## Smith 1 14.01 61.52 272.74
## Averyanov 1 14.39 54.51 271.02
## Ojaniemi 1 15.01 59.26 275.71
## Smirnov 1 14.77 60.88 263.31
## Qi 1 14.78 60.79 272.63
## Drews 1 14.01 51.53 274.21
## Parkhomenko 1 14.88 65.82 277.94
## Terek 1 15.12 50.62 290.36
## Gomez 1 14.41 60.71 269.70
## Turi 1 14.26 59.34 290.01
## Lorenzo 1 15.38 58.36 263.08
## Karlivans 1 14.98 52.92 278.67
## Korkizoglou 1 14.96 53.05 317.00
## Uldal 1 15.09 60.00 281.70
## Casarsa 1 15.39 58.62 296.12
p1 <- p + 1; p1 # un regresor más
## [1] 4
XX <- t(X)%*%X; XX # matriz X'X
## X110m.hurdle Javeline X1500m
## 28.00 407.500 1650.57 7771.42
## X110m.hurdle 407.50 5935.872 24016.94 113126.15
## Javeline 1650.57 24016.941 97967.83 457734.33
## X1500m 7771.42 113126.146 457734.33 2160424.13
XXm1 <- solve(XX); XXm1 # su inversa
## X110m.hurdle Javeline X1500m
## 67.68929468 -2.5232989465 -0.1514649671 -0.0792715359
## X110m.hurdle -2.52329895 0.1955042881 0.0006459688 -0.0012972975
## Javeline -0.15146497 0.0006459688 0.0015990444 0.0001722275
## X1500m -0.07927154 -0.0012972975 0.0001722275 0.0003170564
bh <- XXm1 %*% t(X) %*% y # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh
## [,1]
## 14803.597126
## X110m.hurdle -512.357276
## Javeline 26.187240
## X1500m -3.023386
eta <- X %*% bh; eta # valores estimados
## [,1]
## Sebrle 8605.123
## Clay 8536.906
## Karpov 8259.571
## Macey 8072.114
## Warners 8235.330
## Zsivoczky 7990.513
## Hernu 8215.849
## Nool 7991.321
## Bernard 8155.471
## Schwarzl 8150.294
## Pogorelov 8053.092
## Schoenbeck 8207.954
## Barras 8323.893
## Smith 8411.912
## Averyanov 8038.844
## Ojaniemi 7831.393
## Smirnov 8034.272
## Qi 7998.613
## Drews 8145.857
## Parkhomenko 8063.045
## Terek 7504.483
## Gomez 8194.949
## Turi 8174.521
## Lorenzo 7656.437
## Karlivans 7671.787
## Korkizoglou 7569.552
## Uldal 7791.672
## Casarsa 7558.230
res <- y - eta; res # residuos
## [,1]
## Sebrle 287.876742
## Clay 283.093527
## Karpov 465.428587
## Macey 341.885869
## Warners 107.669561
## Zsivoczky 296.487228
## Hernu 21.151158
## Nool 243.679377
## Bernard 69.528502
## Schwarzl -48.293830
## Pogorelov 30.908300
## Schoenbeck -130.954353
## Barras -256.893253
## Smith -388.912403
## Averyanov -17.844307
## Ojaniemi 174.607493
## Smirnov -41.271570
## Qi -64.613187
## Drews -219.857494
## Parkhomenko -145.045099
## Terek 388.517156
## Gomez -329.948921
## Turi -466.521023
## Lorenzo -64.437164
## Karlivans -88.786899
## Korkizoglou 3.448002
## Uldal -296.672400
## Casarsa -154.229599
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res) # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad # impresión de la tabla
## X110m.hurdle Javeline X1500m Points eta residuos % res
## Sebrle 14.05 70.52 280.01 8893 8605.123 287.876742 3.2371162
## Clay 14.13 69.71 282.00 8820 8536.906 283.093527 3.2096772
## Karpov 13.97 55.54 278.11 8725 8259.571 465.428587 5.3344251
## Macey 14.56 58.46 265.42 8414 8072.114 341.885869 4.0632977
## Warners 14.01 55.39 278.05 8343 8235.330 107.669561 1.2905377
## Zsivoczky 14.95 63.45 269.54 8287 7990.513 296.487228 3.5777390
## Hernu 14.25 57.76 264.35 8237 8215.849 21.151158 0.2567823
## Nool 14.80 61.33 276.33 8235 7991.321 243.679377 2.9590695
## Bernard 14.17 55.27 276.31 8225 8155.471 69.528502 0.8453313
## Schwarzl 14.25 56.32 273.56 8102 8150.294 -48.293830 -0.5960729
## Pogorelov 14.21 53.45 287.63 8084 8053.092 30.908300 0.3823392
## Schoenbeck 14.34 60.89 278.82 8077 8207.954 -130.954353 -1.6213242
## Barras 14.37 64.55 267.09 8067 8323.893 -256.893253 -3.1844955
## Smith 14.01 61.52 272.74 8023 8411.912 -388.912403 -4.8474686
## Averyanov 14.39 54.51 271.02 8021 8038.844 -17.844307 -0.2224699
## Ojaniemi 15.01 59.26 275.71 8006 7831.393 174.607493 2.1809579
## Smirnov 14.77 60.88 263.31 7993 8034.272 -41.271570 -0.5163464
## Qi 14.78 60.79 272.63 7934 7998.613 -64.613187 -0.8143835
## Drews 14.01 51.53 274.21 7926 8145.857 -219.857494 -2.7738770
## Parkhomenko 14.88 65.82 277.94 7918 8063.045 -145.045099 -1.8318401
## Terek 15.12 50.62 290.36 7893 7504.483 388.517156 4.9223002
## Gomez 14.41 60.71 269.70 7865 8194.949 -329.948921 -4.1951548
## Turi 14.26 59.34 290.01 7708 8174.521 -466.521023 -6.0524263
## Lorenzo 15.38 58.36 263.08 7592 7656.437 -64.437164 -0.8487508
## Karlivans 14.98 52.92 278.67 7583 7671.787 -88.786899 -1.1708677
## Korkizoglou 14.96 53.05 317.00 7573 7569.552 3.448002 0.0455302
## Uldal 15.09 60.00 281.70 7495 7791.672 -296.672400 -3.9582708
## Casarsa 15.39 58.62 296.12 7404 7558.230 -154.229599 -2.0830578
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="X110m.hurdle",ylab="Points",ylim=c(min(y,eta),max(y,eta)),pch=".")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch=".")
text(x[,1],eta,labels=lab,col="red",pos=2)
legend("topright",
legend=c("Y", "Eta"),
pch=c("*", "*"),
col=c("black", "red"))

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# datos para anova
ssr <- t(eta) %*% eta; ssr # suma de cuadrados por regresión
## [,1]
## [1,] 1817304528
sse <- ssy - ssr; sse # suma por residuos
## [,1]
## [1,] 1605729
msr <- ssr/p1; msr # cuadrado promedio por regresión
## [,1]
## [1,] 454326132
mse <- sse/(n-p1); mse # cuadrado promedio por residuos
## [,1]
## [1,] 66905.37
df <- c(p1,n-p1,n) # grados de libertad
ssq <- c(ssr,sse,ssy) # sumas de cuadrados
msq <- c(msr,mse,0) # suma de residuos
anova <- (cbind(df,ssq,msq)) # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
## Degrees of Freedom Sum of squares Mean squares
## Regression 4 1817304528 454326132.01
## Residuals 24 1605729 66905.37
## Total 28 1818910257 0.00
# apalancamiento
Hat <- X %*% XXm1 %*% t(X) # matriz H sombrero
rownames(Hat) <- lab # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek
## Sebrle 0.3067915102 0.291008946 0.029173843 -0.015543849 0.022411503 0.054664503 -0.003038639 0.052617147 -0.0008482223 0.001728302 0.0003123053 0.0962796328 0.1213485575 0.118243782 -0.053824353 -0.0077857139 0.002466883 0.031780252 -0.0625433995 0.134117639 -0.128666963
## Clay 0.2910089461 0.277730313 0.025146118 -0.019725350 0.018997060 0.052473905 -0.010890360 0.053088127 -0.0028234079 -0.001255950 0.0048662685 0.0923459072 0.1099467279 0.107248895 -0.054407802 -0.0030579383 -0.002080090 0.031052643 -0.0639692855 0.132170918 -0.109098103
## Karpov 0.0291738434 0.025146118 0.123735936 0.033551653 0.119894116 -0.039255753 0.073467920 -0.007193827 0.1012547163 0.084919974 0.1109368328 0.0500212704 0.0214592079 0.082914908 0.078029644 -0.0201184294 -0.005625606 -0.003043292 0.1406848190 -0.041731385 0.021618852
## Macey -0.0155438489 -0.019725350 0.033551653 0.085002059 0.034885134 0.060973348 0.086053049 0.037805926 0.0447509440 0.053906275 0.0060501322 0.0215881473 0.0577961702 0.038225309 0.071434530 0.0496848061 0.089877135 0.053593562 0.0610758812 0.019926505 0.018588016
## Warners 0.0224115029 0.018997060 0.119894116 0.034885134 0.116403799 -0.036397959 0.072643623 -0.005687504 0.0992078436 0.083527682 0.1085151212 0.0477968903 0.0197826050 0.078563055 0.078381328 -0.0164494794 -0.002992680 -0.001213868 0.1384320165 -0.040764077 0.026666944
## Zsivoczky 0.0546645025 0.052473905 -0.039255753 0.060973348 -0.036397959 0.117305872 0.031011091 0.075334954 -0.0186329574 0.001275873 -0.0518526299 0.0252184187 0.0769045270 0.012935704 0.008783179 0.0832437592 0.099549242 0.078671045 -0.0510918624 0.106201638 0.007367987
## Hernu -0.0030386391 -0.010890360 0.073467920 0.086053049 0.072643623 0.031011091 0.106716251 0.019590996 0.0732429394 0.076204110 0.0342397777 0.0313397597 0.0609297287 0.067071214 0.088246929 0.0220878340 0.074879624 0.037597422 0.1048606541 -0.009907222 -0.003423841
## Nool 0.0526171466 0.053088127 -0.007193827 0.037805926 -0.005687504 0.075334954 0.019590996 0.057662296 0.0025858158 0.011449745 -0.0055567895 0.0316730506 0.0504399941 0.018914288 0.012586098 0.0608434090 0.058295195 0.055705427 -0.0190480448 0.078154833 0.029685425
## Bernard -0.0008482223 -0.002823408 0.101254716 0.044750944 0.099207844 -0.018632957 0.073242939 0.002585816 0.0887691961 0.077503016 0.0912928127 0.0389072222 0.0196242957 0.063128734 0.079724759 0.0001376711 0.014605985 0.009485551 0.1257294025 -0.032812625 0.038863121
## Schwarzl 0.0017283018 -0.001255950 0.084919974 0.053906275 0.083527682 0.001275873 0.076204110 0.011449745 0.0775030162 0.071333469 0.0695365500 0.0360912320 0.0313292785 0.059374933 0.076082938 0.0110684146 0.032656604 0.020213896 0.1082492574 -0.017993362 0.029475893
## Pogorelov 0.0003123053 0.004866269 0.110936833 0.006050132 0.108515121 -0.051852630 0.034239778 -0.005556790 0.0912928127 0.069536550 0.1316887904 0.0429018387 -0.0178580868 0.049587627 0.064136288 -0.0097571867 -0.034847723 -0.009919249 0.1262659033 -0.040605577 0.083388173
## Schoenbeck 0.0962796328 0.092345907 0.050021270 0.021588147 0.047796890 0.025218419 0.031339760 0.031673051 0.0389072222 0.036091232 0.0429018387 0.0521838209 0.0506827495 0.061934591 0.021851643 0.0156017166 0.018304668 0.027045271 0.0316116843 0.044231954 -0.001696297
## Barras 0.1213485575 0.109946728 0.021459208 0.057796170 0.019782605 0.076904527 0.060929729 0.050439994 0.0196242957 0.031329279 -0.0178580868 0.0506827495 0.1006693176 0.074138696 0.021344931 0.0332586748 0.075356808 0.054835648 0.0007401459 0.077133058 -0.060558378
## Smith 0.1182437820 0.107248895 0.082914908 0.038225309 0.078563055 0.012935704 0.067071214 0.018914288 0.0631287337 0.059374933 0.0495876268 0.0619345914 0.0741386955 0.098537083 0.041255428 -0.0075776316 0.025458382 0.020694558 0.0686958906 0.023600196 -0.049254355
## Averyanov -0.0538243533 -0.054407802 0.078029644 0.071434530 0.078381328 0.008783179 0.088246929 0.012586098 0.0797247594 0.076082938 0.0641362881 0.0218516435 0.0213449315 0.041255428 0.094127300 0.0259136938 0.051275411 0.027315719 0.1206012102 -0.031141560 0.056544947
## Ojaniemi -0.0077857139 -0.003057938 -0.020118429 0.049684806 -0.016449479 0.083243759 0.022087834 0.060843409 0.0001376711 0.011068415 -0.0097571867 0.0156017166 0.0332586748 -0.007577632 0.025913694 0.0798381672 0.072485382 0.062902317 -0.0139747931 0.068516345 0.069393009
## Smirnov 0.0024668833 -0.002080090 -0.005625606 0.089877135 -0.002992680 0.099549242 0.074879624 0.058295195 0.0146059852 0.032656604 -0.0348477231 0.0183046679 0.0753568083 0.025458382 0.051275411 0.0724853818 0.114197532 0.073150188 0.0111547575 0.059551948 0.007225588
## Qi 0.0317802523 0.031052643 -0.003043292 0.053593562 -0.001213868 0.078671045 0.037597422 0.055705427 0.0094855514 0.020213896 -0.0099192493 0.0270452714 0.0548356477 0.020694558 0.027315719 0.0629023173 0.073150188 0.059143717 -0.0039695040 0.067448319 0.026713323
## Drews -0.0625433995 -0.063969286 0.140684819 0.061075881 0.138432017 -0.051091862 0.104860654 -0.019048045 0.1257294025 0.108249257 0.1262659033 0.0316116843 0.0007401459 0.068695891 0.120601210 -0.0139747931 0.011154758 -0.003969504 0.1940661914 -0.087639213 0.060882580
## Parkhomenko 0.1341176385 0.132170918 -0.041731385 0.019926505 -0.040764077 0.106201638 -0.009907222 0.078154833 -0.0328126253 -0.017993362 -0.0406055769 0.0442319538 0.0771330575 0.023600196 -0.031141560 0.0685163454 0.059551948 0.067448319 -0.0876392127 0.135577244 -0.008420161
## Terek -0.1286669629 -0.109098103 0.021618852 0.018588016 0.026666944 0.007367987 -0.003423841 0.029685425 0.0388631214 0.029475893 0.0833881734 -0.0016962965 -0.0605583779 -0.049254355 0.056544947 0.0693930094 0.007225588 0.026713323 0.0608825796 -0.008420161 0.199720130
## Gomez 0.0550958892 0.048666389 0.039694497 0.059192302 0.038938933 0.051265163 0.065563426 0.037293232 0.0395741811 0.045172986 0.0137737623 0.0394144104 0.0673853632 0.057961926 0.045425518 0.0321778472 0.063212858 0.044286095 0.0413244857 0.038890819 -0.010804808
## Turi 0.1133169959 0.114114545 0.072152889 -0.019376078 0.068569539 -0.016961458 -0.003164917 0.018427284 0.0488775271 0.033627846 0.0910379295 0.0619640867 0.0176189547 0.061038315 0.007520348 -0.0050376641 -0.037803701 0.002224915 0.0417145834 0.031235534 0.029032309
## Lorenzo -0.1026368017 -0.097356233 -0.062642851 0.108181294 -0.054663628 0.140157371 0.060587838 0.080190074 -0.0154964177 0.011699823 -0.0682408195 -0.0156630685 0.0467047335 -0.041847409 0.056778268 0.1275576985 0.151112094 0.099474756 -0.0213446352 0.072289353 0.088003498
## Karlivans -0.1127499952 -0.101539656 0.020758873 0.055702206 0.025329800 0.035265968 0.039259213 0.034610895 0.0410658215 0.041077536 0.0468811140 -0.0003227692 -0.0170464995 -0.024329611 0.071743631 0.0707489142 0.052499276 0.042579190 0.0654836848 -0.002321983 0.140462662
## Korkizoglou 0.0228650713 0.048566377 0.035665352 -0.095973158 0.036342686 -0.053111718 -0.113627868 0.022703004 0.0214649669 -0.009224225 0.1508852074 0.0397933630 -0.0967809335 -0.036442447 -0.029347701 0.0259618960 -0.119718868 -0.017071839 0.0034100810 0.031645782 0.209809977
## Uldal 0.0189666337 0.026352693 -0.031641129 0.025304929 -0.028093639 0.081351893 -0.008523842 0.065950222 -0.0137886024 -0.005097900 -0.0057083334 0.0206591999 0.0243075664 -0.015537693 0.001929473 0.0808139205 0.051286517 0.059503729 -0.0417517544 0.087563814 0.079872948
## Casarsa 0.0144485415 0.032428345 -0.053829150 -0.022530877 -0.049026847 0.067348898 -0.073016710 0.071878728 -0.0353942904 -0.033934198 0.0180499607 0.0182396052 -0.0214938412 -0.054534367 -0.032311798 0.0915233598 0.004466588 0.049800207 -0.0796507368 0.105081268 0.148607522
## Gomez Turi Lorenzo Karlivans Korkizoglou Uldal Casarsa
## Sebrle 0.05509589 0.113316996 -0.10263680 -0.1127499952 0.022865071 0.018966634 0.014448542
## Clay 0.04866639 0.114114545 -0.09735623 -0.1015396561 0.048566377 0.026352693 0.032428345
## Karpov 0.03969450 0.072152889 -0.06264285 0.0207588733 0.035665352 -0.031641129 -0.053829150
## Macey 0.05919230 -0.019376078 0.10818129 0.0557022062 -0.095973158 0.025304929 -0.022530877
## Warners 0.03893893 0.068569539 -0.05466363 0.0253298005 0.036342686 -0.028093639 -0.049026847
## Zsivoczky 0.05126516 -0.016961458 0.14015737 0.0352659681 -0.053111718 0.081351893 0.067348898
## Hernu 0.06556343 -0.003164917 0.06058784 0.0392592128 -0.113627868 -0.008523842 -0.073016710
## Nool 0.03729323 0.018427284 0.08019007 0.0346108945 0.022703004 0.065950222 0.071878728
## Bernard 0.03957418 0.048877527 -0.01549642 0.0410658215 0.021464967 -0.013788602 -0.035394290
## Schwarzl 0.04517299 0.033627846 0.01169982 0.0410775360 -0.009224225 -0.005097900 -0.033934198
## Pogorelov 0.01377376 0.091037930 -0.06824082 0.0468811140 0.150885207 -0.005708333 0.018049961
## Schoenbeck 0.03941441 0.061964087 -0.01566307 -0.0003227692 0.039793363 0.020659200 0.018239605
## Barras 0.06738536 0.017618955 0.04670473 -0.0170464995 -0.096780934 0.024307566 -0.021493841
## Smith 0.05796193 0.061038315 -0.04184741 -0.0243296109 -0.036442447 -0.015537693 -0.054534367
## Averyanov 0.04542552 0.007520348 0.05677827 0.0717436312 -0.029347701 0.001929473 -0.032311798
## Ojaniemi 0.03217785 -0.005037664 0.12755770 0.0707489142 0.025961896 0.080813921 0.091523360
## Smirnov 0.06321286 -0.037803701 0.15111209 0.0524992759 -0.119718868 0.051286517 0.004466588
## Qi 0.04428609 0.002224915 0.09947476 0.0425791898 -0.017071839 0.059503729 0.049800207
## Drews 0.04132449 0.041714583 -0.02134464 0.0654836848 0.003410081 -0.041751754 -0.079650737
## Parkhomenko 0.03889082 0.031235534 0.07228935 -0.0023219833 0.031645782 0.087563814 0.105081268
## Terek -0.01080481 0.029032309 0.08800350 0.1404626622 0.209809977 0.079872948 0.148607522
## Gomez 0.05623132 0.016253735 0.05000201 0.0180682665 -0.058061655 0.019875439 -0.015874390
## Turi 0.01625374 0.113046329 -0.09003703 -0.0061858761 0.164210281 0.017355258 0.055227515
## Lorenzo 0.05000201 -0.090037026 0.26952285 0.1234913514 -0.101326493 0.105296722 0.080205655
## Karlivans 0.01806827 -0.006185876 0.12349135 0.1229000319 0.072745431 0.062795498 0.081027026
## Korkizoglou -0.05806165 0.164210281 -0.10132649 0.0727454308 0.492213797 0.091838347 0.260565288
## Uldal 0.01987544 0.017355258 0.10529672 0.0627954981 0.091838347 0.095652536 0.133465554
## Casarsa -0.01587439 0.055227515 0.08020566 0.0810270262 0.260565288 0.133465554 0.239233142
lev <- diag(Hat); lev # apalancamientos
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez Turi Lorenzo Karlivans
## 0.30679151 0.27773031 0.12373594 0.08500206 0.11640380 0.11730587 0.10671625 0.05766230 0.08876920 0.07133347 0.13168879 0.05218382 0.10066932 0.09853708 0.09412730 0.07983817 0.11419753 0.05914372 0.19406619 0.13557724 0.19972013 0.05623132 0.11304633 0.26952285 0.12290003
## Korkizoglou Uldal Casarsa
## 0.49221380 0.09565254 0.23923314
pesos <- cbind(x,y,lev) # construcción de una salida conjunta
rownames(pesos) <- lab
pesos # salida
## X110m.hurdle Javeline X1500m y lev
## Sebrle 14.05 70.52 280.01 8893 0.30679151
## Clay 14.13 69.71 282.00 8820 0.27773031
## Karpov 13.97 55.54 278.11 8725 0.12373594
## Macey 14.56 58.46 265.42 8414 0.08500206
## Warners 14.01 55.39 278.05 8343 0.11640380
## Zsivoczky 14.95 63.45 269.54 8287 0.11730587
## Hernu 14.25 57.76 264.35 8237 0.10671625
## Nool 14.80 61.33 276.33 8235 0.05766230
## Bernard 14.17 55.27 276.31 8225 0.08876920
## Schwarzl 14.25 56.32 273.56 8102 0.07133347
## Pogorelov 14.21 53.45 287.63 8084 0.13168879
## Schoenbeck 14.34 60.89 278.82 8077 0.05218382
## Barras 14.37 64.55 267.09 8067 0.10066932
## Smith 14.01 61.52 272.74 8023 0.09853708
## Averyanov 14.39 54.51 271.02 8021 0.09412730
## Ojaniemi 15.01 59.26 275.71 8006 0.07983817
## Smirnov 14.77 60.88 263.31 7993 0.11419753
## Qi 14.78 60.79 272.63 7934 0.05914372
## Drews 14.01 51.53 274.21 7926 0.19406619
## Parkhomenko 14.88 65.82 277.94 7918 0.13557724
## Terek 15.12 50.62 290.36 7893 0.19972013
## Gomez 14.41 60.71 269.70 7865 0.05623132
## Turi 14.26 59.34 290.01 7708 0.11304633
## Lorenzo 15.38 58.36 263.08 7592 0.26952285
## Karlivans 14.98 52.92 278.67 7583 0.12290003
## Korkizoglou 14.96 53.05 317.00 7573 0.49221380
## Uldal 15.09 60.00 281.70 7495 0.09565254
## Casarsa 15.39 58.62 296.12 7404 0.23923314
niv <- 2 * p1 / n; niv # nivel para apalancamiento
## [1] 0.2857143
level <- which(lev>niv) # unidades con fuerte apalancamiento
out <- rbind(level,lev[level]) # salida
rownames(out) <- c("unidad","apalancamiento"); out
## Sebrle Korkizoglou
## unidad 1.0000000 26.0000000
## apalancamiento 0.3067915 0.4922138
# De acuerdo al gráfico de residuos vs estimados y Resultados del apalancamiento
# podemos ver que las observaciones que tienen mayor apalancamiento es Sebrle (0.3067915), Korkizoglou(0.4922138)
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo X110m.hurdle,Javeline,X1500m
lm1 <- lm(y~X110m.hurdle +Javeline+X1500m,data=Decathlon); summary(lm1)
##
## Call:
## lm(formula = y ~ X110m.hurdle + Javeline + X1500m, data = Decathlon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -466.52 -147.34 -29.56 191.88 465.43
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14803.597 2128.092 6.956 3.41e-07 ***
## X110m.hurdle -512.357 114.369 -4.480 0.000156 ***
## Javeline 26.187 10.343 2.532 0.018308 *
## X1500m -3.023 4.606 -0.656 0.517786
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 258.7 on 24 degrees of freedom
## Multiple R-squared: 0.5716, Adjusted R-squared: 0.518
## F-statistic: 10.67 on 3 and 24 DF, p-value: 0.0001198
residuals(lm1)
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez Turi Lorenzo Karlivans
## 287.876742 283.093527 465.428587 341.885869 107.669561 296.487228 21.151158 243.679377 69.528502 -48.293830 30.908300 -130.954353 -256.893253 -388.912403 -17.844307 174.607493 -41.271570 -64.613187 -219.857494 -145.045099 388.517156 -329.948921 -466.521023 -64.437164 -88.786899
## Korkizoglou Uldal Casarsa
## 3.448002 -296.672400 -154.229599
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# modelo sin unidades con fuerte apalancamiento (Todas las observaciones menos la Sebrle,Korkizoglou)
lm2 <- lm(y[-level]~X110m.hurdle +Javeline+X1500m,data=Decathlon[-level,]); summary(lm2)
##
## Call:
## lm(formula = y[-level] ~ X110m.hurdle + Javeline + X1500m, data = Decathlon[-level,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -415.18 -139.64 -45.56 158.50 478.48
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15152.701 2452.361 6.179 3.22e-06 ***
## X110m.hurdle -473.797 118.865 -3.986 0.000624 ***
## Javeline 18.508 11.921 1.552 0.134819
## X1500m -4.729 6.237 -0.758 0.456392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 259.9 on 22 degrees of freedom
## Multiple R-squared: 0.4704, Adjusted R-squared: 0.3982
## F-statistic: 6.514 on 3 and 22 DF, p-value: 0.002547
residuals(lm2)
## Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez Turi Lorenzo Karlivans Uldal
## 405.42902 478.47897 332.96772 117.92327 317.87859 16.98603 266.15417 69.72349 -47.81012 34.89004 -89.87428 -208.86786 -340.63867 -40.99108 172.03043 -43.30121 -51.82448 -245.79623 -88.42773 340.33179 -308.50441 -415.17520 -109.73341 -133.84780 -286.43550
## Casarsa
## -141.56554
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))
legend("bottomright", # Escoge la posición adecuada para tu gráfico
legend=c("Modelo 1", "Modelo 2 sin Obs Sebrle y Korkizoglou"),
pch=c(2, 2),
col=c("black", "red"),
title="Leyenda",
cex=0.5)

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

3) Points ~ Long.jump,High.jump,X400m
# Lectura de los datos
Decathlon <- read.csv("Decathlon.csv", row.names = 1);head(Decathlon)
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus Pole.vault Javeline X1500m Points
## Sebrle 10.85 7.84 16.36 2.12 48.36 14.05 48.72 5.0 70.52 280.01 8893
## Clay 10.44 7.96 15.23 2.06 49.19 14.13 50.11 4.9 69.71 282.00 8820
## Karpov 10.50 7.81 15.93 2.09 46.81 13.97 51.65 4.6 55.54 278.11 8725
## Macey 10.89 7.47 15.73 2.15 48.97 14.56 48.34 4.4 58.46 265.42 8414
## Warners 10.62 7.74 14.48 1.97 47.97 14.01 43.73 4.9 55.39 278.05 8343
## Zsivoczky 10.91 7.14 15.31 2.12 49.40 14.95 45.62 4.7 63.45 269.54 8287
attach(Decathlon) #archivo en uso
## The following objects are masked from Decathlon (pos = 3):
##
## Discus, High.jump, Javeline, Long.jump, Points, Pole.vault, Shot.put, X100m, X110m.hurdle, X1500m, X400m
## The following objects are masked from Decathlon (pos = 4):
##
## Discus, High.jump, Javeline, Long.jump, Points, Pole.vault, Shot.put, X100m, X110m.hurdle, X1500m, X400m
lab = rownames(Decathlon) #etiquetas de las unidades en lab
# regresión a mano
# definición de los datos para el modelo
n <- dim(Decathlon)[1]; n # n es el número de observaciones
## [1] 28
x <- as.matrix(Decathlon[,c(2,4,5)]); x # x es el caracter descriptivo
## Long.jump High.jump X400m
## Sebrle 7.84 2.12 48.36
## Clay 7.96 2.06 49.19
## Karpov 7.81 2.09 46.81
## Macey 7.47 2.15 48.97
## Warners 7.74 1.97 47.97
## Zsivoczky 7.14 2.12 49.40
## Hernu 7.19 2.03 48.73
## Nool 7.53 1.88 48.81
## Bernard 7.48 2.12 49.13
## Schwarzl 7.49 1.94 49.76
## Pogorelov 7.31 2.06 50.79
## Schoenbeck 7.30 1.88 50.30
## Barras 6.99 1.94 49.41
## Smith 6.81 1.91 49.27
## Averyanov 7.34 1.94 49.72
## Ojaniemi 7.50 1.94 49.12
## Smirnov 7.07 1.94 49.11
## Qi 7.34 1.97 49.65
## Drews 7.38 1.88 48.51
## Parkhomenko 6.61 2.03 51.04
## Terek 6.94 1.94 49.56
## Gomez 7.26 1.85 48.61
## Turi 6.91 2.03 51.67
## Lorenzo 7.03 1.85 49.34
## Karlivans 7.26 1.97 50.54
## Korkizoglou 7.07 1.94 51.16
## Uldal 6.99 1.85 50.95
## Casarsa 6.68 1.94 53.20
p <- dim(x)[2]; p # numero de regresores
## [1] 3
y <- Points # y es el caracter respuesta
nomy <- deparse(substitute(Points)); nomy # nomy es su nombre
## [1] "Points"
nom <- c(colnames(x),nomy) ; nom # etiquetas de las variables en nom
## [1] "Long.jump" "High.jump" "X400m" "Points"
# estadísticas
xm <- apply(x,2,mean); xm # cálculo de xm, promedio de x
## Long.jump High.jump X400m
## 7.265714 1.976429 49.610000
ym <- sum(y)/n; ym # cálculo de ym, promedio de y
## [1] 8051.536
ssx <- apply(x^2,2,sum); ssx # ssx es la suma de los x cuadros
## Long.jump High.jump X400m
## 1481.279 109.594 68955.701
ssy <- sum(y^2); ssy # ssy es la suma de los y cuadros
## [1] 1818910257
sxy <- t(x)%*%y ; sxy # sxy es la suma de los productos xy
## [,1]
## Long.jump 1640639.7
## High.jump 446139.3
## X400m 11175238.0
ssxc <- ssx-n*xm^2 ; ssxc # ssxc es ssx centrado sobre el promedio
## Long.jump High.jump X400m
## 3.1420857 0.2184429 43.4424000
ssyc <- ssy-n*ym^2 ; ssyc # ssyc es ssy centrado sobre el promedio
## [1] 3747891
sxyc <- sxy-n*xm*ym; sxyc # sxyc es sxy centrado sobre el promedio
## [,1]
## Long.jump 2635.2743
## High.jump 567.3236
## X400m -8989.2700
varx <- ssxc/n; varx # varx es la varianza de x
## Long.jump High.jump X400m
## 0.112217347 0.007801531 1.551514286
vary <- ssyc/n; vary # vary es la varianza de y
## [1] 133853.2
covxy <- sxyc/n; covxy # covxy es la covarianza de xy
## [,1]
## Long.jump 94.11694
## High.jump 20.26156
## X400m -321.04536
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
"Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats # impresión de la tabla
## Long.jump High.jump X400m Points x1y x2y x3y
## Mínimo 6.6100000 1.850000e+00 46.810000 7.404000e+03 0.000000e+00 0.00000 0.0000
## Máximo 7.9600000 2.150000e+00 53.200000 8.893000e+03 0.000000e+00 0.00000 0.0000
## Promedio 7.2657143 1.976429e+00 49.610000 8.051536e+03 0.000000e+00 0.00000 0.0000
## Total 203.4400000 5.534000e+01 1389.080000 2.254430e+05 0.000000e+00 0.00000 0.0000
## Suma de cuadrados 1481.2790000 1.095940e+02 68955.701200 1.818910e+09 1.640640e+06 446139.31000 11175237.9600
## Cuadrados centrados 3.1420857 2.184429e-01 43.442400 3.747891e+06 2.635274e+03 567.32357 -8989.2700
## Varianza covarianza 0.1122173 7.801531e-03 1.551514 1.338532e+05 9.411694e+01 20.26156 -321.0454
## Desvío estándar 0.3349886 8.832627e-02 1.245598 3.658596e+02 0.000000e+00 0.00000 0.0000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx # matriz de varianza/covarianza de los xy
## Long.jump High.jump X400m
## Long.jump 0.11221735 0.010227551 -0.28002857
## High.jump 0.01022755 0.007801531 -0.01861071
## X400m -0.28002857 -0.018610714 1.55151429
# estimación
X <- cbind(rep(1,n),x); X # se necesita ajuntar la columna de unos
## Long.jump High.jump X400m
## Sebrle 1 7.84 2.12 48.36
## Clay 1 7.96 2.06 49.19
## Karpov 1 7.81 2.09 46.81
## Macey 1 7.47 2.15 48.97
## Warners 1 7.74 1.97 47.97
## Zsivoczky 1 7.14 2.12 49.40
## Hernu 1 7.19 2.03 48.73
## Nool 1 7.53 1.88 48.81
## Bernard 1 7.48 2.12 49.13
## Schwarzl 1 7.49 1.94 49.76
## Pogorelov 1 7.31 2.06 50.79
## Schoenbeck 1 7.30 1.88 50.30
## Barras 1 6.99 1.94 49.41
## Smith 1 6.81 1.91 49.27
## Averyanov 1 7.34 1.94 49.72
## Ojaniemi 1 7.50 1.94 49.12
## Smirnov 1 7.07 1.94 49.11
## Qi 1 7.34 1.97 49.65
## Drews 1 7.38 1.88 48.51
## Parkhomenko 1 6.61 2.03 51.04
## Terek 1 6.94 1.94 49.56
## Gomez 1 7.26 1.85 48.61
## Turi 1 6.91 2.03 51.67
## Lorenzo 1 7.03 1.85 49.34
## Karlivans 1 7.26 1.97 50.54
## Korkizoglou 1 7.07 1.94 51.16
## Uldal 1 6.99 1.85 50.95
## Casarsa 1 6.68 1.94 53.20
p1 <- p + 1; p1 # un regresor más
## [1] 4
XX <- t(X)%*%X; XX # matriz X'X
## Long.jump High.jump X400m
## 28.00 203.440 55.340 1389.080
## Long.jump 203.44 1481.279 402.371 10084.818
## High.jump 55.34 402.371 109.594 2744.896
## X400m 1389.08 10084.818 2744.896 68955.701
XXm1 <- solve(XX); XXm1 # su inversa
## Long.jump High.jump X400m
## 212.097615 -8.9455701 -4.01099846 -2.80464609
## Long.jump -8.945570 0.6440754 -0.58375520 0.10924516
## High.jump -4.010998 -0.5837552 5.24179360 -0.04248405
## X400m -2.804646 0.1092452 -0.04248405 0.04222674
bh <- XXm1 %*% t(X) %*% y # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh
## [,1]
## 7413.6919
## Long.jump 384.1031
## High.jump 1817.3386
## X400m -115.7988
eta <- X %*% bh; eta # valores estimados
## [,1]
## Sebrle 8677.787
## Clay 8518.726
## Karpov 8791.232
## Macey 8519.552
## Warners 8411.938
## Zsivoczky 8288.484
## Hernu 8221.714
## Nool 8070.444
## Bernard 8450.345
## Schwarzl 8054.112
## Pogorelov 8083.781
## Schoenbeck 7809.560
## Barras 7902.590
## Smith 7795.143
## Averyanov 8001.128
## Ojaniemi 8132.064
## Smirnov 7968.058
## Qi 8063.754
## Drews 8047.569
## Parkhomenko 7731.439
## Terek 7866.015
## Gomez 7935.376
## Turi 7773.717
## Lorenzo 7762.499
## Karlivans 7929.965
## Korkizoglou 7730.670
## Uldal 7560.699
## Casarsa 7344.640
res <- y - eta; res # residuos
## [,1]
## Sebrle 215.2129057
## Clay 301.2738710
## Karpov -66.2320229
## Macey -105.5518060
## Warners -68.9375316
## Zsivoczky -1.4841156
## Hernu 15.2859881
## Nool 164.5556170
## Bernard -225.3448670
## Schwarzl 47.8883114
## Pogorelov 0.2190354
## Schoenbeck 267.4395910
## Barras 164.4102930
## Smith 227.8571810
## Averyanov 19.8718296
## Ojaniemi -126.0639689
## Smirnov 24.9423937
## Qi -129.7542464
## Drews -121.5685597
## Parkhomenko 186.5610994
## Terek 26.9852741
## Gomez -70.3761419
## Turi -65.7165824
## Lorenzo -170.4992760
## Karlivans -346.9650395
## Korkizoglou -157.6700120
## Uldal -65.6990397
## Casarsa 59.3598192
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res) # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad # impresión de la tabla
## Long.jump High.jump X400m Points eta residuos % res
## Sebrle 7.84 2.12 48.36 8893 8677.787 215.2129057 2.420025927
## Clay 7.96 2.06 49.19 8820 8518.726 301.2738710 3.415803526
## Karpov 7.81 2.09 46.81 8725 8791.232 -66.2320229 -0.759106280
## Macey 7.47 2.15 48.97 8414 8519.552 -105.5518060 -1.254478321
## Warners 7.74 1.97 47.97 8343 8411.938 -68.9375316 -0.826291880
## Zsivoczky 7.14 2.12 49.40 8287 8288.484 -1.4841156 -0.017908962
## Hernu 7.19 2.03 48.73 8237 8221.714 15.2859881 0.185577129
## Nool 7.53 1.88 48.81 8235 8070.444 164.5556170 1.998246715
## Bernard 7.48 2.12 49.13 8225 8450.345 -225.3448670 -2.739755221
## Schwarzl 7.49 1.94 49.76 8102 8054.112 47.8883114 0.591067779
## Pogorelov 7.31 2.06 50.79 8084 8083.781 0.2190354 0.002709493
## Schoenbeck 7.30 1.88 50.30 8077 7809.560 267.4395910 3.311125307
## Barras 6.99 1.94 49.41 8067 7902.590 164.4102930 2.038059911
## Smith 6.81 1.91 49.27 8023 7795.143 227.8571810 2.840049620
## Averyanov 7.34 1.94 49.72 8021 8001.128 19.8718296 0.247747533
## Ojaniemi 7.50 1.94 49.12 8006 8132.064 -126.0639689 -1.574618648
## Smirnov 7.07 1.94 49.11 7993 7968.058 24.9423937 0.312052968
## Qi 7.34 1.97 49.65 7934 8063.754 -129.7542464 -1.635420298
## Drews 7.38 1.88 48.51 7926 8047.569 -121.5685597 -1.533794596
## Parkhomenko 6.61 2.03 51.04 7918 7731.439 186.5610994 2.356164428
## Terek 6.94 1.94 49.56 7893 7866.015 26.9852741 0.341888687
## Gomez 7.26 1.85 48.61 7865 7935.376 -70.3761419 -0.894801549
## Turi 6.91 2.03 51.67 7708 7773.717 -65.7165824 -0.852576316
## Lorenzo 7.03 1.85 49.34 7592 7762.499 -170.4992760 -2.245775500
## Karlivans 7.26 1.97 50.54 7583 7929.965 -346.9650395 -4.575564282
## Korkizoglou 7.07 1.94 51.16 7573 7730.670 -157.6700120 -2.082002007
## Uldal 6.99 1.85 50.95 7495 7560.699 -65.6990397 -0.876571577
## Casarsa 6.68 1.94 53.20 7404 7344.640 59.3598192 0.801726354
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="Long.jump",ylab="Points",ylim=c(min(y,eta),max(y,eta)),pch=".")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch=".")
text(x[,1],eta,labels=lab,col="red",pos=2)
legend("bottomright",
legend=c("Y", "Eta"),
pch=c("*", "*"),
col=c("black", "red"))

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# Se puede ver que la mayoria de las observaciones pasan por la recta horizontal 0, lo que es bueno ya que indica que tienen un valor residual bajo. Sin embargo hay observaciones como Karlivans o Clay con un alto valor residual. Por otro, también se puede observar la muestra Casarsa con un alto valor de apalancamiento
# datos para anova
ssr <- t(eta) %*% eta; ssr # suma de cuadrados por regresión
## [,1]
## [1,] 1818246549
sse <- ssy - ssr; sse # suma por residuos
## [,1]
## [1,] 663707.9
msr <- ssr/p1; msr # cuadrado promedio por regresión
## [,1]
## [1,] 454561637
mse <- sse/(n-p1); mse # cuadrado promedio por residuos
## [,1]
## [1,] 27654.5
df <- c(p1,n-p1,n) # grados de libertad
ssq <- c(ssr,sse,ssy) # sumas de cuadrados
msq <- c(msr,mse,0) # suma de residuos
anova <- (cbind(df,ssq,msq)) # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
## Degrees of Freedom Sum of squares Mean squares
## Regression 4 1818246549.1 454561637.3
## Residuals 24 663707.9 27654.5
## Total 28 1818910257.0 0.0
# apalancamiento
Hat <- X %*% XXm1 %*% t(X) # matriz H sombrero
rownames(Hat) <- lab # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez
## Sebrle 0.184301568 0.17721711 0.159727774 0.145445128 0.097281295 0.083649363 0.046203208 0.02678197 0.1334440555 0.052689650 0.086199527 -0.001862818 -0.023417759 -0.0649834345 0.030108003 0.0517176057 -0.012612958 0.04395119 0.003198016 -0.031611176 -0.030315323 -0.028471478
## Clay 0.177217110 0.26176692 0.080552960 0.081591627 0.113095455 -0.012791665 -0.036278088 0.08034607 0.0923280984 0.121116366 0.119937887 0.080567002 -0.074232297 -0.1468401173 0.066058275 0.0897219391 -0.062400466 0.06375740 0.011101947 -0.114693044 -0.083673255 -0.027260823
## Karpov 0.159727774 0.08055296 0.247059032 0.140829148 0.127212262 0.108720837 0.114562250 0.04264083 0.1185407205 0.006879078 -0.007150733 -0.047148726 0.039950470 0.0408502498 0.012666543 0.0473663788 0.057300388 0.02901480 0.064964107 -0.019810556 0.031491744 0.049302664
## Macey 0.145445128 0.08159163 0.140829148 0.177283985 0.031459161 0.160645244 0.093158257 -0.04395868 0.1504213568 -0.004784150 0.088042640 -0.052837837 0.019276874 0.0035751124 0.001648833 0.0025516229 0.019728269 0.02702744 -0.034384401 0.088254586 0.019447823 -0.055365703
## Warners 0.097281295 0.11309545 0.127212262 0.031459161 0.127102651 -0.011617148 0.028067056 0.10704994 0.0372400775 0.071086814 0.001087751 0.051558324 0.012061461 -0.0017206463 0.052263462 0.0833732869 0.027616374 0.04623833 0.092689573 -0.087023379 0.002983341 0.082591773
## Zsivoczky 0.083649363 -0.01279166 0.108720837 0.160645244 -0.011617148 0.185205618 0.119909886 -0.07144347 0.1291295085 -0.041107570 0.063306071 -0.071031693 0.062798474 0.0755600660 -0.011801308 -0.0246162808 0.056390987 0.01525414 -0.034675004 0.162489555 0.067879432 -0.040064893
## Hernu 0.046203208 -0.03627809 0.114562250 0.093158257 0.028067056 0.119909886 0.110449250 -0.00762572 0.0728915667 -0.024156600 0.001904033 -0.038188944 0.080627835 0.1081462802 0.004177783 0.0046140647 0.080845976 0.01838911 0.033112453 0.102427300 0.082280505 0.038610803
## Nool 0.026781966 0.08034607 0.042640831 -0.043958680 0.107049940 -0.071443471 -0.007625720 0.13347062 -0.0239250930 0.089588925 -0.011378800 0.100263245 0.020316153 0.0141618219 0.068754263 0.0915002504 0.031689147 0.05003867 0.112847284 -0.090189651 0.013238511 0.114844754
## Bernard 0.133444056 0.09232810 0.118540720 0.150421357 0.037240077 0.129129509 0.072891567 -0.02392509 0.1305294391 0.012065893 0.086445101 -0.028740111 0.012217460 -0.0071227665 0.011919050 0.0139771108 0.013246490 0.03156237 -0.023302685 0.065031679 0.011685266 -0.043246913
## Schwarzl 0.052689650 0.12111637 0.006879078 -0.004784150 0.071086814 -0.041107570 -0.024156600 0.08958892 0.0120658925 0.093374050 0.054559512 0.095955662 -0.009014802 -0.0364806132 0.064762340 0.0744695261 -0.004161209 0.05264793 0.052557434 -0.054973341 -0.013262689 0.043790272
## Pogorelov 0.086199527 0.11993789 -0.007150733 0.088042640 0.001087751 0.063306071 0.001904033 -0.01137880 0.0864451009 0.054559512 0.131101258 0.039793807 -0.017654461 -0.0552295321 0.036217802 0.0229323200 -0.024297202 0.04350217 -0.043010436 0.056964443 -0.015419564 -0.061799041
## Schoenbeck -0.001862818 0.08056700 -0.047148726 -0.052837837 0.051558324 -0.071031693 -0.038188944 0.10026324 -0.0287401108 0.095955662 0.039793807 0.119998414 0.006136912 -0.0100719252 0.071413651 0.0738268338 0.007343504 0.05218153 0.066106778 -0.041944608 0.003996093 0.067997968
## Barras -0.023417759 -0.07423230 0.039950470 0.019276874 0.012061461 0.062798474 0.080627835 0.02031615 0.0122174602 -0.009014802 -0.017654461 0.006136912 0.093023789 0.1309210740 0.019190595 0.0128951798 0.089876082 0.02113673 0.058146273 0.098451261 0.096379287 0.076469330
## Smith -0.064983435 -0.14684012 0.040850250 0.003575112 -0.001720646 0.075560066 0.108146280 0.01416182 -0.0071227665 -0.036480613 -0.055229532 -0.010071925 0.130921074 0.1940761957 0.009754124 -0.0001549192 0.125966552 0.01201445 0.076339634 0.135788428 0.136317132 0.107265289
## Averyanov 0.030108003 0.06605828 0.012666543 0.001648833 0.052263462 -0.011801308 0.004177783 0.06875426 0.0119190503 0.064762340 0.036217802 0.071413651 0.019190595 0.0097541243 0.052020832 0.0564165443 0.021388451 0.04384961 0.052292693 -0.009825105 0.017280387 0.051157802
## Ojaniemi 0.051717606 0.08972194 0.047366379 0.002551623 0.083373287 -0.024616281 0.004614065 0.09150025 0.0139771108 0.074469526 0.022932320 0.073826834 0.012895180 -0.0001549192 0.056416544 0.0715271448 0.020450480 0.04675799 0.071769980 -0.049290962 0.007931201 0.067386122
## Smirnov -0.012612958 -0.06240047 0.057300388 0.019728269 0.027616374 0.056390987 0.080845976 0.03168915 0.0132464905 -0.004161209 -0.024297202 0.007343504 0.089876082 0.1259665522 0.021388451 0.0204504801 0.089407096 0.02259092 0.067884917 0.078718333 0.091704694 0.084583490
## Qi 0.043951190 0.06375740 0.029014803 0.027027443 0.046238330 0.015254141 0.018389113 0.05003867 0.0315623683 0.052647927 0.043502174 0.052181530 0.021136728 0.0120144495 0.043849608 0.0467579855 0.022590917 0.04078134 0.038620190 0.009206710 0.019849953 0.035274608
## Drews 0.003198016 0.01110195 0.064964107 -0.034384401 0.092689573 -0.034675004 0.033112453 0.11284728 -0.0233026850 0.052557434 -0.043010436 0.066106778 0.058146273 0.0763396339 0.052292693 0.0717699801 0.067884917 0.03862019 0.120348114 -0.041524837 0.053179655 0.131957033
## Parkhomenko -0.031611176 -0.11469304 -0.019810556 0.088254586 -0.087023379 0.162489555 0.102427300 -0.09018965 0.0650316789 -0.054973341 0.056964443 -0.041944608 0.098451261 0.1357884282 -0.009825105 -0.0492909621 0.078718333 0.00920671 -0.041524837 0.243665118 0.111291541 -0.025276632
## Terek -0.030315323 -0.08367325 0.031491744 0.019447823 0.002983341 0.067879432 0.082280505 0.01323851 0.0116852659 -0.013262689 -0.015419564 0.003996093 0.096379287 0.1363171320 0.017280387 0.0079312005 0.091704694 0.01984995 0.053179655 0.111291541 0.100656399 0.072803472
## Gomez -0.028471478 -0.02726082 0.049302664 -0.055365703 0.082591773 -0.040064893 0.038610803 0.11484475 -0.0432469134 0.043790272 -0.061799041 0.067997968 0.076469330 0.1072652887 0.051157802 0.0673861224 0.084583490 0.03527461 0.131957033 -0.025276632 0.072803472 0.151410504
## Turi 0.015672375 0.02543206 -0.066365086 0.068743571 -0.058817604 0.088091597 0.019519748 -0.04896736 0.0636981033 0.020061155 0.121761170 0.027477686 0.021680477 0.0095932186 0.023527273 -0.0096368882 0.005098396 0.03234600 -0.057422523 0.145929730 0.030324861 -0.060625584
## Lorenzo -0.060046071 -0.06850676 0.007848553 -0.055062611 0.040145805 -0.017840516 0.044304810 0.08225509 -0.0458133537 0.025545341 -0.049473912 0.059629344 0.090423896 0.1296343749 0.042943164 0.0448098621 0.091357245 0.02975854 0.107915887 0.033247641 0.091032328 0.133048367
## Karlivans 0.035420845 0.08411695 -0.025858685 0.019447723 0.020555875 0.004728162 -0.009976089 0.03818613 0.0287877318 0.066900760 0.080302825 0.072792358 0.002443718 -0.0192095440 0.050093430 0.0430088402 -0.001098577 0.04527290 0.011259675 0.020959711 0.003198162 0.005047364
## Korkizoglou -0.004710184 0.04945096 -0.073078119 -0.005039640 -0.007572545 -0.002445630 -0.016953135 0.03002294 0.0071812482 0.062225526 0.080488758 0.083149790 0.013988841 0.0002615416 0.050719706 0.0336752556 0.005466615 0.04324987 0.006656380 0.050991463 0.017604548 0.007749896
## Uldal -0.059820155 0.00523736 -0.093681342 -0.072927892 0.007306973 -0.056539924 -0.025456310 0.07538193 -0.0506475048 0.070398854 0.028475508 0.113014916 0.037951227 0.0425851404 0.062733813 0.0504508326 0.031809736 0.04374465 0.059439900 0.023367164 0.040595861 0.074270776
## Casarsa -0.055157322 0.02328014 -0.194387548 -0.014217491 -0.093315719 0.012216160 -0.045567296 -0.02588978 0.0004561026 0.057265889 0.142391092 0.098622844 0.007955923 -0.0109971860 0.048267986 0.0005686830 -0.015893728 0.04198047 -0.058068034 0.139378628 0.019518636 -0.053451221
## Turi Lorenzo Karlivans Korkizoglou Uldal Casarsa
## Sebrle 0.015672375 -0.060046071 0.035420845 -0.0047101844 -0.059820155 -0.0551573220
## Clay 0.025432056 -0.068506761 0.084116949 0.0494509566 0.005237360 0.0232801404
## Karpov -0.066365086 0.007848553 -0.025858685 -0.0730781187 -0.093681342 -0.1943875480
## Macey 0.068743571 -0.055062611 0.019447723 -0.0050396399 -0.072927892 -0.0142174912
## Warners -0.058817604 0.040145805 0.020555875 -0.0075725449 0.007306973 -0.0933157194
## Zsivoczky 0.088091597 -0.017840516 0.004728162 -0.0024456295 -0.056539924 0.0122161603
## Hernu 0.019519748 0.044304810 -0.009976089 -0.0169531346 -0.025456310 -0.0455672960
## Nool -0.048967357 0.082255090 0.038186134 0.0300229436 0.075381931 -0.0258897835
## Bernard 0.063698103 -0.045813354 0.028787732 0.0071812482 -0.050647505 0.0004561026
## Schwarzl 0.020061155 0.025545341 0.066900760 0.0622255262 0.070398854 0.0572658888
## Pogorelov 0.121761170 -0.049473912 0.080302825 0.0804887584 0.028475508 0.1423910918
## Schoenbeck 0.027477686 0.059629344 0.072792358 0.0831497904 0.113014916 0.0986228440
## Barras 0.021680477 0.090423896 0.002443718 0.0139888408 0.037951227 0.0079559232
## Smith 0.009593219 0.129634375 -0.019209544 0.0002615416 0.042585140 -0.0109971860
## Averyanov 0.023527273 0.042943164 0.050093430 0.0507197065 0.062733813 0.0482679862
## Ojaniemi -0.009636888 0.044809862 0.043008840 0.0336752556 0.050450833 0.0005686830
## Smirnov 0.005098396 0.091357245 -0.001098577 0.0054666154 0.031809736 -0.0158937284
## Qi 0.032345995 0.029758541 0.045272899 0.0432498664 0.043744649 0.0419804722
## Drews -0.057422523 0.107915887 0.011259675 0.0066563803 0.059439900 -0.0580680344
## Parkhomenko 0.145929730 0.033247641 0.020959711 0.0509914630 0.023367164 0.1393786284
## Terek 0.030324861 0.091032328 0.003198162 0.0176045475 0.040595861 0.0195186360
## Gomez -0.060625584 0.133048367 0.005047364 0.0077498958 0.074270776 -0.0534512210
## Turi 0.164215596 -0.019027372 0.075980221 0.0990931370 0.056206264 0.2064097858
## Lorenzo -0.019027372 0.134575928 0.010074598 0.0262068766 0.087709479 0.0033034653
## Karlivans 0.075980221 0.010074598 0.071777831 0.0786868562 0.068668542 0.1184316834
## Korkizoglou 0.099093137 0.026206877 0.078686856 0.0989840170 0.097070627 0.1668744024
## Uldal 0.056206264 0.087709479 0.068668542 0.0970706266 0.137258712 0.1453948624
## Casarsa 0.206409786 0.003303465 0.118431683 0.1668744024 0.145394862 0.3346285739
lev <- diag(Hat); lev # apalancamientos
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez Turi Lorenzo Karlivans
## 0.18430157 0.26176692 0.24705903 0.17728399 0.12710265 0.18520562 0.11044925 0.13347062 0.13052944 0.09337405 0.13110126 0.11999841 0.09302379 0.19407620 0.05202083 0.07152714 0.08940710 0.04078134 0.12034811 0.24366512 0.10065640 0.15141050 0.16421560 0.13457593 0.07177783
## Korkizoglou Uldal Casarsa
## 0.09898402 0.13725871 0.33462857
pesos <- cbind(x,y,lev) # construcción de una salida conjunta
rownames(pesos) <- lab
pesos # salida
## Long.jump High.jump X400m y lev
## Sebrle 7.84 2.12 48.36 8893 0.18430157
## Clay 7.96 2.06 49.19 8820 0.26176692
## Karpov 7.81 2.09 46.81 8725 0.24705903
## Macey 7.47 2.15 48.97 8414 0.17728399
## Warners 7.74 1.97 47.97 8343 0.12710265
## Zsivoczky 7.14 2.12 49.40 8287 0.18520562
## Hernu 7.19 2.03 48.73 8237 0.11044925
## Nool 7.53 1.88 48.81 8235 0.13347062
## Bernard 7.48 2.12 49.13 8225 0.13052944
## Schwarzl 7.49 1.94 49.76 8102 0.09337405
## Pogorelov 7.31 2.06 50.79 8084 0.13110126
## Schoenbeck 7.30 1.88 50.30 8077 0.11999841
## Barras 6.99 1.94 49.41 8067 0.09302379
## Smith 6.81 1.91 49.27 8023 0.19407620
## Averyanov 7.34 1.94 49.72 8021 0.05202083
## Ojaniemi 7.50 1.94 49.12 8006 0.07152714
## Smirnov 7.07 1.94 49.11 7993 0.08940710
## Qi 7.34 1.97 49.65 7934 0.04078134
## Drews 7.38 1.88 48.51 7926 0.12034811
## Parkhomenko 6.61 2.03 51.04 7918 0.24366512
## Terek 6.94 1.94 49.56 7893 0.10065640
## Gomez 7.26 1.85 48.61 7865 0.15141050
## Turi 6.91 2.03 51.67 7708 0.16421560
## Lorenzo 7.03 1.85 49.34 7592 0.13457593
## Karlivans 7.26 1.97 50.54 7583 0.07177783
## Korkizoglou 7.07 1.94 51.16 7573 0.09898402
## Uldal 6.99 1.85 50.95 7495 0.13725871
## Casarsa 6.68 1.94 53.20 7404 0.33462857
niv <- 2 * p1 / n; niv # nivel para apalancamiento
## [1] 0.2857143
level <- which(lev>niv) # unidades con fuerte apalancamiento
out <- rbind(level,lev[level]) # salida
rownames(out) <- c("unidad","apalancamiento"); out
## Casarsa
## unidad 28.0000000
## apalancamiento 0.3346286
# De acuerdo al gráfico de residuos vs estimados y Resultados del apalancamiento
# podemos ver que la observación Casarsa tiene un alto valor de apalancamiento (0.3346286) respecto a las demás observaciones
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo Long.jump,High.jump,X400m
lm1 <- lm(y~Long.jump+High.jump+X400m,data=Decathlon); summary(lm1)
##
## Call:
## lm(formula = y ~ Long.jump + High.jump + X400m, data = Decathlon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -346.97 -109.56 -0.63 85.62 301.27
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7413.69 2421.87 3.061 0.00536 **
## Long.jump 384.10 133.46 2.878 0.00828 **
## High.jump 1817.34 380.74 4.773 7.39e-05 ***
## X400m -115.80 34.17 -3.389 0.00242 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 166.3 on 24 degrees of freedom
## Multiple R-squared: 0.8229, Adjusted R-squared: 0.8008
## F-statistic: 37.18 on 3 and 24 DF, p-value: 3.506e-09
residuals(lm1)
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez Turi
## 215.2129057 301.2738710 -66.2320229 -105.5518060 -68.9375316 -1.4841157 15.2859881 164.5556170 -225.3448670 47.8883114 0.2190354 267.4395910 164.4102930 227.8571810 19.8718296 -126.0639689 24.9423937 -129.7542464 -121.5685597 186.5610994 26.9852740 -70.3761419 -65.7165824
## Lorenzo Karlivans Korkizoglou Uldal Casarsa
## -170.4992760 -346.9650395 -157.6700120 -65.6990397 59.3598192
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# modelo sin unidades con fuerte apalancamiento (Todas las observaciones menos la Casarsa)
lm2 <- lm(y[-level]~Long.jump+High.jump+X400m,data=Decathlon[-level,]); summary(lm2)
##
## Call:
## lm(formula = y[-level] ~ Long.jump + High.jump + X400m, data = Decathlon[-level,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -336.40 -116.41 -0.39 107.62 303.35
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7828.29 2645.91 2.959 0.00704 **
## Long.jump 380.87 135.99 2.801 0.01015 *
## High.jump 1817.48 387.37 4.692 0.00010 ***
## X400m -123.75 39.38 -3.143 0.00456 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 169.2 on 23 degrees of freedom
## Multiple R-squared: 0.8013, Adjusted R-squared: 0.7753
## F-statistic: 30.91 on 3 and 23 DF, p-value: 3.039e-08
residuals(lm2)
## Sebrle Clay Karpov Macey Warners Zsivoczky Hernu Nool Bernard Schwarzl Pogorelov Schoenbeck Barras Smith Averyanov Ojaniemi Smirnov Qi Drews Parkhomenko Terek Gomez Turi
## 210.2921523 303.3507635 -83.5739304 -106.8201918 -77.2625122 -0.3942746 11.2207903 162.2459102 -225.3041767 52.9971765 12.9221801 276.2380365 165.1200652 226.8760884 24.1779642 -126.0132350 23.5244656 -126.0090402 -126.7489866 198.9954930 28.7265912 -75.1446887 -47.3021343
## Lorenzo Karlivans Korkizoglou Uldal
## -170.2045637 -336.3993870 -142.7826364 -52.7279195
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))
legend("bottomright", # Escoge la posición adecuada para tu gráfico
legend=c("Modelo 1", "Modelo 2 sin Casarsa"),
pch=c(2, 2),
col=c("black", "red"),
title="Leyenda",
cex=0.5)

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

# Comentarios de Modelos (Elección de Modelo):
# Análisis de Varianza
# Modelo 1: Points ~ X100m + Shot.put + Discus
# Degrees of Freedom Sum of squares Mean squares
# Regression 4 1817691554 454422888.4
# Residuals 24 1218703 50779.3
# Total 28 1818910257 0.0
# Modelo 2: Points ~ X110m.hurdle + Javeline + X1500m
# Degrees of Freedom Sum of squares Mean squares
# Regression 4 1817304528 454326132.01
# Residuals 24 1605729 66905.37
# Total 28 1818910257 0.00
# Modelo 3: Points ~ Long.jump + High.jump + X400m
# Degrees of Freedom Sum of squares Mean squares
# Regression 4 1818246549.1 454561637.3
# Residuals 24 663707.9 27654.5
# Total 28 1818910257.0 0.0
# Conclusión:
# Basado en la suma de cuadrados de la regresión y el error cuadrático medio, el Modelo 3 parece ser el mejor de los tres para modelar la variable "Points" en función de "Long.jump", "High.jump", y "X400m". Este modelo no solo explica más variabilidad en los puntos sino que también tiene el menor MSE, lo que sugiere un ajuste más preciso y menos variabilidad en los errores de predicción.