Linnerrud.csv

# Lectura de los datos
Linnerud <- read.csv("Linnerud.csv", row.names = 1);head(Linnerud)
##   Weight Waist Pulse Pulls Squats Jumps
## 1    191    36    50     5    162    60
## 2    189    37    52     2    110    60
## 3    193    38    58    12    101   101
## 4    162    35    62    12    105    37
## 5    189    35    46    13    155    58
## 6    182    36    56     4    101    42
attach(Linnerud) #archivo en uso
lab = rownames(Linnerud) #etiquetas de las unidades en lab
1) Pulls ~ Weight,Waist y Pulse
# regresión a mano
# definición de los datos para el modelo
n     <- dim(Linnerud)[1]; n                 # n es el número de observaciones
## [1] 20
x     <- as.matrix(Linnerud[,1:3]); x        # x es el caracter descriptivo
##    Weight Waist Pulse
## 1     191    36    50
## 2     189    37    52
## 3     193    38    58
## 4     162    35    62
## 5     189    35    46
## 6     182    36    56
## 7     211    38    56
## 8     167    34    60
## 9     176    31    74
## 10    154    33    56
## 11    169    34    50
## 12    166    33    52
## 13    154    34    64
## 14    247    46    50
## 15    193    36    46
## 16    202    37    62
## 17    176    37    54
## 18    157    32    52
## 19    156    33    54
## 20    138    33    68
p     <- dim(x)[2]; p                        # numero de regresores
## [1] 3
y     <- Pulls                               # y es el caracter respuesta   
nomy  <- deparse(substitute(Pulls)); nomy    # nomy es su nombre
## [1] "Pulls"
nom   <- c(colnames(x),nomy) ; nom           # etiquetas de las variables en nom
## [1] "Weight" "Waist"  "Pulse"  "Pulls"
# estadísticas
xm    <- apply(x,2,mean); xm                 # cálculo de xm, promedio de x
## Weight  Waist  Pulse 
##  178.6   35.4   56.1
ym    <- sum(y)/n; ym                        # cálculo de ym, promedio de y
## [1] 9.45
ssx   <- apply(x^2,2,sum); ssx               # ssx es la suma de los x cuadros
## Weight  Waist  Pulse 
## 649542  25258  63932
ssy   <- sum(y^2); ssy                       # ssy es la suma de los y cuadros
## [1] 2317
sxy   <- t(x)%*%y ; sxy                      # sxy es la suma de los productos xy
##         [,1]
## Weight 32789
## Waist   6513
## Pulse  10712
ssxc  <- ssx-n*xm^2 ; ssxc                   # ssxc es ssx centrado sobre el promedio
##  Weight   Waist   Pulse 
## 11582.8   194.8   987.8
ssyc  <- ssy-n*ym^2 ; ssyc                   # ssyc es ssy centrado sobre el promedio
## [1] 530.95
sxyc  <- sxy-n*xm*ym; sxyc                   # sxyc es sxy centrado sobre el promedio
##          [,1]
## Weight -966.4
## Waist  -177.6
## Pulse   109.1
varx  <- ssxc/n; varx                        # varx es la varianza de x
## Weight  Waist  Pulse 
## 579.14   9.74  49.39
vary  <- ssyc/n; vary                        # vary es la varianza de y
## [1] 26.5475
covxy <- sxyc/n; covxy                       # covxy es la covarianza de xy
##           [,1]
## Weight -48.320
## Waist   -8.880
## Pulse    5.455
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
                     "Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats                                        # impresión de la tabla
##                           Weight        Waist        Pulse       Pulls      x1y     x2y       x3y
## Mínimo                 138.00000    31.000000    46.000000    1.000000     0.00    0.00     0.000
## Máximo                 247.00000    46.000000    74.000000   17.000000     0.00    0.00     0.000
## Promedio               178.60000    35.400000    56.100000    9.450000     0.00    0.00     0.000
## Total                 3572.00000   708.000000  1122.000000  189.000000     0.00    0.00     0.000
## Suma de cuadrados   649542.00000 25258.000000 63932.000000 2317.000000 32789.00 6513.00 10712.000
## Cuadrados centrados  11582.80000   194.800000   987.800000  530.950000  -966.40 -177.60   109.100
## Varianza covarianza    579.14000     9.740000    49.390000   26.547500   -48.32   -8.88     5.455
## Desvío estándar         24.06533     3.120897     7.027802    5.152427     0.00    0.00     0.000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx            # matriz de varianza/covarianza de los xy
##        Weight Waist  Pulse
## Weight 579.14 65.36 -61.86
## Waist   65.36  9.74  -7.74
## Pulse  -61.86 -7.74  49.39
# estimación
X      <- cbind(rep(1,n),x); X               # se necesita ajuntar la columna de unos
##      Weight Waist Pulse
## 1  1    191    36    50
## 2  1    189    37    52
## 3  1    193    38    58
## 4  1    162    35    62
## 5  1    189    35    46
## 6  1    182    36    56
## 7  1    211    38    56
## 8  1    167    34    60
## 9  1    176    31    74
## 10 1    154    33    56
## 11 1    169    34    50
## 12 1    166    33    52
## 13 1    154    34    64
## 14 1    247    46    50
## 15 1    193    36    46
## 16 1    202    37    62
## 17 1    176    37    54
## 18 1    157    32    52
## 19 1    156    33    54
## 20 1    138    33    68
p1     <- p + 1; p1                          # un regresor más
## [1] 4
XX     <- t(X)%*%X; XX                       # matriz X'X
##             Weight  Waist  Pulse
##          20   3572    708   1122
## Weight 3572 649542 127756 199152
## Waist   708 127756  25258  39564
## Pulse  1122 199152  39564  63932
XXm1   <- solve(XX); XXm1                    # su inversa
##                           Weight         Waist         Pulse
##        15.25501668  1.432897e-02 -0.3526671297 -9.411326e-02
## Weight  0.01432897  3.616185e-04 -0.0023606976  8.297072e-05
## Waist  -0.35266713 -2.360698e-03  0.0212746646  3.772656e-04
## Pulse  -0.09411326  8.297072e-05  0.0003772656  1.175392e-03
bh     <- XXm1 %*% t(X) %*% y                # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh 
##               [,1]
##        47.96841291
## Weight  0.07884384
## Waist  -1.45584256
## Pulse  -0.01895002
eta    <- X %*% bh; eta                      # valores estimados
##          [,1]
## 1   9.6697532
## 2   8.0183229
## 3   6.7641556
## 4   8.6117242
## 5  11.0437082
## 6   8.8464585
## 7   8.2212448
## 8  10.4996860
## 9  15.3115079
## 10 11.0063587
## 11 10.8468738
## 12 12.0282848
## 13  9.3989160
## 14 -0.4734174
## 15  9.9032410
## 16  8.8537926
## 17  6.9554530
## 18 12.7745328
## 19 11.2019464
## 20  9.5174570
res    <- y - eta; res                       # residuos
##          [,1]
## 1  -4.6697532
## 2  -6.0183229
## 3   5.2358444
## 4   3.3882758
## 5   1.9562918
## 6  -4.8464585
## 7  -0.2212448
## 8  -4.4996860
## 9  -0.3115079
## 10  5.9936413
## 11  6.1531262
## 12  0.9717152
## 13  4.6010840
## 14  1.4734174
## 15 -3.9032410
## 16  3.1462074
## 17 -2.9554530
## 18 -1.7745328
## 19  3.7980536
## 20 -7.5174570
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res)                 # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad                                       # impresión de la tabla
##    Weight Waist Pulse Pulls        eta   residuos       % res
## 1     191    36    50     5  9.6697532 -4.6697532  -93.395064
## 2     189    37    52     2  8.0183229 -6.0183229 -300.916146
## 3     193    38    58    12  6.7641556  5.2358444   43.632037
## 4     162    35    62    12  8.6117242  3.3882758   28.235632
## 5     189    35    46    13 11.0437082  1.9562918   15.048399
## 6     182    36    56     4  8.8464585 -4.8464585 -121.161463
## 7     211    38    56     8  8.2212448 -0.2212448   -2.765560
## 8     167    34    60     6 10.4996860 -4.4996860  -74.994766
## 9     176    31    74    15 15.3115079 -0.3115079   -2.076720
## 10    154    33    56    17 11.0063587  5.9936413   35.256714
## 11    169    34    50    17 10.8468738  6.1531262   36.194860
## 12    166    33    52    13 12.0282848  0.9717152    7.474732
## 13    154    34    64    14  9.3989160  4.6010840   32.864886
## 14    247    46    50     1 -0.4734174  1.4734174  147.341736
## 15    193    36    46     6  9.9032410 -3.9032410  -65.054016
## 16    202    37    62    12  8.8537926  3.1462074   26.218395
## 17    176    37    54     4  6.9554530 -2.9554530  -73.886324
## 18    157    32    52    11 12.7745328 -1.7745328  -16.132117
## 19    156    33    54    15 11.2019464  3.7980536   25.320357
## 20    138    33    68     2  9.5174570 -7.5174570 -375.872850
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="Weight",ylab="Pulls",ylim=c(min(y,eta),max(y,eta)),pch="*")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch="*")
text(x[,1],eta,labels=lab,col="red",pos=2)
# Añadiendo una leyenda
legend("topright",            
       legend=c("Y", "Eta"),  
       pch=c("*", "*"),       
       col=c("black", "red"),
       title="Legend Title")

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# datos para anova
ssr   <- t(eta) %*% eta; ssr       # suma de  cuadrados por regresión
##          [,1]
## [1,] 1966.346
sse   <- ssy - ssr; sse            # suma por residuos
##          [,1]
## [1,] 350.6545
msr   <- ssr/p1; msr               # cuadrado promedio por regresión
##          [,1]
## [1,] 491.5864
mse   <- sse/(n-p1); mse           # cuadrado promedio por residuos
##          [,1]
## [1,] 21.91591
df    <- c(p1,n-p1,n)              # grados de libertad
ssq   <- c(ssr,sse,ssy)            # sumas de cuadrados
msq   <- c(msr,mse,0)              # suma de residuos
anova <- (cbind(df,ssq,msq))       # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
##            Degrees of Freedom Sum of squares Mean squares
## Regression                  4      1966.3455    491.58638
## Residuals                  16       350.6545     21.91591
## Total                      20      2317.0000      0.00000
# apalancamiento
Hat   <- X %*% XXm1 %*% t(X)                  # matriz H sombrero
rownames(Hat) <- lab                          # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
##              1            2             3            4            5          6            7           8            9          10            11           12           13           14           15           16           17           18           19           20
## 1   0.10655710  0.070795483  0.0267442663 -0.019894425  0.143901992 0.04801517  0.084681195  0.02355179  0.020227354  0.03271984  0.0878217868  0.087116980 -0.033406882  0.061911065  0.135338940  0.044948432  0.025666204  0.082872382  0.049672292 -0.079240967
## 2   0.07079548  0.072744656  0.0591447046  0.032960464  0.076977336 0.05388813  0.059433539  0.02994190 -0.044026733  0.04004485  0.0627548367  0.049177635  0.021165578  0.130247731  0.083492901  0.034584771  0.070673506  0.044444385  0.046037095  0.005517226
## 3   0.02674427  0.059144705  0.1045436224  0.080027350 -0.011382549 0.06015351  0.081814272  0.04530832  0.033966692  0.01568300 -0.0003265344 -0.011227007  0.072991190  0.203832839  0.007562888  0.093186628  0.078009743 -0.026308132  0.005319444  0.080955755
## 4  -0.01989443  0.032960464  0.0800273504  0.144583882 -0.065286252 0.05366668 -0.013028942  0.07802181  0.013881794  0.08288966  0.0148177663  0.006434553  0.159045977  0.053273944 -0.050658916  0.027628581  0.103171448  0.014652526  0.062938353  0.220873748
## 5   0.14390199  0.076977336 -0.0113825494 -0.065286252  0.217677155 0.04214121  0.080544327  0.01323973  0.002425648  0.04447671  0.1325686301  0.135519874 -0.081670547 -0.008255014  0.196274082  0.015655049  0.004385998  0.137587408  0.074529859 -0.151310641
## 6   0.04801517  0.053888135  0.0601535085  0.053666680  0.042141211 0.05211761  0.055857841  0.04720808  0.036821951  0.04348146  0.0429084448  0.039575224  0.051309522  0.084090266  0.046061061  0.055259577  0.057207740  0.036631437  0.042309191  0.051295906
## 7   0.08468120  0.059433539  0.0818142724 -0.013028942  0.080544327 0.05585784  0.174977008  0.02892970  0.192415717 -0.03648079  0.0045543385  0.016156756 -0.029278175  0.184517658  0.081615445  0.167363855 -0.005877547 -0.012766013 -0.032443340 -0.082986844
## 8   0.02355179  0.029941901  0.0453083249  0.078021808  0.013239728 0.04720808  0.028929704  0.06993305  0.110931992  0.06584934  0.0378666682  0.046681200  0.089667404 -0.017447145  0.010045775  0.053515097  0.043489458  0.052706217  0.058530119  0.112029493
## 9   0.02022735 -0.044026733  0.0339666916  0.013881794  0.002425648 0.03682195  0.192415717  0.11093199  0.719766801 -0.02712212 -0.0588421522  0.027407036  0.045470797 -0.174755123 -0.034563859  0.289727970 -0.147815572  0.009736357 -0.043585692  0.027931145
## 10  0.03271984  0.040044849  0.0156829955  0.082889660  0.044476706 0.04348146 -0.036480794  0.06584934 -0.027122123  0.11322903  0.0900130217  0.086623986  0.095692819 -0.078870340  0.038499190 -0.032695421  0.076016444  0.108793697  0.112880264  0.128275379
## 11  0.08782179  0.062754837 -0.0003265344  0.014817766  0.132568630 0.04290844  0.004554338  0.03786667 -0.058842152  0.09001302  0.1214671139  0.115919153  0.012633219 -0.044079980  0.120454746 -0.030935850  0.054510550  0.131396451  0.105656818 -0.001159028
## 12  0.08711698  0.049177635 -0.0112270070  0.006434553  0.135519874 0.03957522  0.016156756  0.04668120  0.027407036  0.08662399  0.1159191532  0.122933095  0.009602991 -0.098425320  0.115735082 -0.008523808  0.025639376  0.138872382  0.101702138 -0.006921327
## 13 -0.03340688  0.021165578  0.0729911898  0.159045977 -0.081670547 0.05130952 -0.029278175  0.08966740  0.045470797  0.09569282  0.0126332194  0.009602991  0.180692603  0.002902642 -0.070143001  0.024169199  0.098758089  0.022752300  0.072389389  0.255254885
## 14  0.06191106  0.130247731  0.2038328389  0.053273944 -0.008255014 0.08409027  0.184517658 -0.01744714 -0.174755123 -0.07887034 -0.0440799805 -0.098425320  0.002902642  0.634785903  0.050304161  0.144958452  0.145588874 -0.153010458 -0.085468600 -0.036101554
## 15  0.13533894  0.083492901  0.0075628877 -0.050658916  0.196274082 0.04606106  0.081615445  0.01004578 -0.034563859  0.03849919  0.1204547458  0.115735082 -0.070143001  0.050304161  0.183045989  0.017377195  0.024204766  0.114198754  0.065305599 -0.134150800
## 16  0.04494843  0.034584771  0.0931866280  0.027628581  0.015655049 0.05525958  0.167363855  0.05351510  0.289727970 -0.03269542 -0.0309358502 -0.008523808  0.024169199  0.144958452  0.017377195  0.196649995 -0.013721030 -0.036117379 -0.041306754 -0.001724560
## 17  0.02566620  0.070673506  0.0780097427  0.103171448  0.004385998 0.05720774 -0.005877547  0.04348946 -0.147815572  0.07601644  0.0545105501  0.025639376  0.098758089  0.145588874  0.024204766 -0.013721030  0.130102980  0.030278423  0.070394162  0.129316389
## 18  0.08287238  0.044444385 -0.0263081321  0.014652526  0.137587408 0.03663144 -0.012766013  0.05270622  0.009736357  0.10879370  0.1313964513  0.138872382  0.022752300 -0.153010458  0.114198754 -0.036117379  0.030278423  0.162884880  0.124332114  0.016062269
## 19  0.04967229  0.046037095  0.0053194439  0.062938353  0.074529859 0.04230919 -0.032443340  0.05853012 -0.043585692  0.11288026  0.1056568184  0.101702138  0.072389389 -0.085468600  0.065305599 -0.041306754  0.070394162  0.124332114  0.118015774  0.092791775
## 20 -0.07924097  0.005517226  0.0809557552  0.220873748 -0.151310641 0.05129591 -0.082986844  0.11202949  0.027931145  0.12827538 -0.0011590278 -0.006921327  0.255254885 -0.036101554 -0.134150800 -0.001724560  0.129316389  0.016062269  0.092791775  0.373291750
lev   <- diag(Hat); lev                       # apalancamientos
##          1          2          3          4          5          6          7          8          9         10         11         12         13         14         15         16         17         18         19         20 
## 0.10655710 0.07274466 0.10454362 0.14458388 0.21767716 0.05211761 0.17497701 0.06993305 0.71976680 0.11322903 0.12146711 0.12293309 0.18069260 0.63478590 0.18304599 0.19664999 0.13010298 0.16288488 0.11801577 0.37329175
pesos <- cbind(x,y,lev)                       # construcción de una salida conjunta
rownames(pesos) <- lab
pesos                                         # salida
##    Weight Waist Pulse  y        lev
## 1     191    36    50  5 0.10655710
## 2     189    37    52  2 0.07274466
## 3     193    38    58 12 0.10454362
## 4     162    35    62 12 0.14458388
## 5     189    35    46 13 0.21767716
## 6     182    36    56  4 0.05211761
## 7     211    38    56  8 0.17497701
## 8     167    34    60  6 0.06993305
## 9     176    31    74 15 0.71976680
## 10    154    33    56 17 0.11322903
## 11    169    34    50 17 0.12146711
## 12    166    33    52 13 0.12293309
## 13    154    34    64 14 0.18069260
## 14    247    46    50  1 0.63478590
## 15    193    36    46  6 0.18304599
## 16    202    37    62 12 0.19664999
## 17    176    37    54  4 0.13010298
## 18    157    32    52 11 0.16288488
## 19    156    33    54 15 0.11801577
## 20    138    33    68  2 0.37329175
niv   <- 2 * p1 / n; niv                      # nivel para apalancamiento
## [1] 0.4
level <- which(lev>niv)                       # unidades con fuerte apalancamiento
out   <- rbind(level,lev[level])              # salida
rownames(out) <- c("unidad","apalancamiento"); out
##                        9         14
## unidad         9.0000000 14.0000000
## apalancamiento 0.7197668  0.6347859
# De acuerdo al gráfico de residuos vs estimados y Resultados del apalancamiento

# podemos ver que los puntos 14 y 9 son los que tienen mayor apalancamiento ya que se alejan de donde están la mayor concentración de puntos si observamos el gráfico de residuos vs estimados. Por otro lado los valores más altos de apalancamiento corresponden a las observaciones 9 (apal= 0.71976680) y 14 (apal= 0.63478590).
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo
lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud); summary(lm1)
## 
## Call:
## lm(formula = y ~ Weight + Waist + Pulse, data = Linnerud)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.5175 -4.0524  0.3752  3.4907  6.1531 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 47.96841   18.28462   2.623   0.0184 *
## Weight       0.07884    0.08902   0.886   0.3889  
## Waist       -1.45584    0.68283  -2.132   0.0488 *
## Pulse       -0.01895    0.16050  -0.118   0.9075  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.681 on 16 degrees of freedom
## Multiple R-squared:  0.3396, Adjusted R-squared:  0.2157 
## F-statistic: 2.742 on 3 and 16 DF,  p-value: 0.07742
residuals(lm1)
##          1          2          3          4          5          6          7          8          9         10         11         12         13         14         15         16         17         18         19         20 
## -4.6697532 -6.0183229  5.2358444  3.3882758  1.9562918 -4.8464585 -0.2212448 -4.4996860 -0.3115079  5.9936413  6.1531262  0.9717152  4.6010840  1.4734174 -3.9032410  3.1462074 -2.9554530 -1.7745328  3.7980536 -7.5174570
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# Se ve en el gráfico de residuos vs estimados: se nota observaciones con alto valor residual como la 20,10.

# Por otro lado también un alto grado de apalancamiento en las observaciones 9 y 14

# Se procederá a evaluar el retiro de las observaciones 9 y 14
# modelo sin unidades con fuerte apalancamiento (Todas las observaciones menos la 9 y 14)
lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]); summary(lm2)
## 
## Call:
## lm(formula = y[-level] ~ Weight + Waist + Pulse, data = Linnerud[-level, 
##     ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.8968 -4.1278  0.0871  3.6142  6.3868 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 61.23659   27.79140   2.203   0.0448 *
## Weight       0.14054    0.15682   0.896   0.3853  
## Waist       -2.27499    1.50604  -1.511   0.1531  
## Pulse        0.06383    0.26214   0.244   0.8111  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.924 on 14 degrees of freedom
## Multiple R-squared:  0.2074, Adjusted R-squared:  0.03755 
## F-statistic: 1.221 on 3 and 14 DF,  p-value: 0.3387
residuals(lm2)
##           1           2           3           4           5           6           7           8          10          11          12          13          15          16          17          18          19          20 
## -4.37139407 -4.94299365  6.38684624  3.66322962  1.89002397 -4.48954741 -0.01517551 -5.18678537  5.62055277  6.17046670  0.18942586  4.38487974 -3.39713907  2.59168142 -1.24366285 -2.82072014  3.46714208 -7.89683030
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# En el gráfico de residuos vs estimados:
#Luego de retirar las observaciones con alto grado de apalancamiento (9 y 14) se puede ver que la nube de puntos está más concentrada en la línea con residuo 0. Además ya no tener observaciones con un alto apalancamiento
# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

legend("bottomright", # Escoge la posición adecuada para tu gráfico
       legend=c("Modelo 1", "Modelo 2 sin Obs 9 y 14"), 
       pch=c(2, 2),
       col=c("black", "red"),
       title="Leyenda")

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

# Comentarios:
# Mostrar la diferencia entre betas y entre los residuos de los dos modelos.

# Modelo 1: Con todas las observaciones
# Pulls ~ Weight,Waist,Pulse
# lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud)

#         Estimate(betas) Std. Error t value Pr(>|t|)  
# (Intercept) 47.96841   18.28462   2.623   0.0184 *
# Weight       0.07884    0.08902   0.886   0.3889  
# Waist       -1.45584    0.68283  -2.132   0.0488 *
# Pulse       -0.01895    0.16050  -0.118   0.9075  

# beta = [47.96841 (intercepto) ,0.07884(Weight), -1.45584(Waist), -0.01895(Pulse)]

# residuals(lm1):
#        1          2          3          4          5          6          7 
# -4.6697532 -6.0183229  5.2358444  3.3882758  1.9562918 -4.8464585 -0.2212448 
#         8          9         10         11         12         13         14 
# -4.4996860 -0.3115079  5.9936413  6.1531262  0.9717152  4.6010840  1.4734174 
#        15         16         17         18         19         20 
# -3.9032410  3.1462074 -2.9554530 -1.7745328  3.7980536 -7.5174570 


# Modelo 2: Sin observaciones con alto apalancamiento (observaciones 9 y 14)
# Pulls ~ Weight,Waist,Pulse
# lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]);

#        Estimate(Betas) Std. Error t value Pr(>|t|)  
# (Intercept) 61.23659   27.79140   2.203   0.0448 *
# Weight       0.14054    0.15682   0.896   0.3853  
# Waist       -2.27499    1.50604  -1.511   0.1531  
# Pulse        0.06383    0.26214   0.244   0.8111  

# beta = [61.23659 (intercepto) ,0.14054 (Weight), -2.27499(Waist), 0.06383(Pulse)]


# residuals(lm2):
#       1           2           3           4           5           6 
# -4.37139407 -4.94299365  6.38684624  3.66322962  1.89002397 -4.48954741 
#       7           8          10          11          12          13 
# -0.01517551 -5.18678537  5.62055277  6.17046670  0.18942586  4.38487974 
#      15          16          17          18          19          20 
# -3.39713907  2.59168142 -1.24366285 -2.82072014  3.46714208 -7.89683030

# Comentarios:

# La eliminación de observaciones con alto apalancamiento (9 y 14) ha afectado notablemente las estimaciones de los coeficientes y su significancia estadística. Esto es particularmente evidente en el cambio de la significancia de "Waist" y en los cambios en los coeficientes de "Intercept" y "Pulse" 

# Dada la eliminación de las observaciones que generan un alto apalancamiento del modelo, el rango en el cual oscilan los residuos no sifre una variación significativa, lo que quiere decir que la variabilidad en el residuos no se reduce significativamente.Esto sugiere que las observaciones 9 y 14 no estaban ejerciendo una influencia dramática en la mayoría de las otras predicciones, aunque sí afectaron las estimaciones de los coeficientes como se vio previamente.
2) Squats ~ Weight,Waist y Pulse
head(Linnerud)
##   Weight Waist Pulse Pulls Squats Jumps
## 1    191    36    50     5    162    60
## 2    189    37    52     2    110    60
## 3    193    38    58    12    101   101
## 4    162    35    62    12    105    37
## 5    189    35    46    13    155    58
## 6    182    36    56     4    101    42
# regresión a mano
# definición de los datos para el modelo
n     <- dim(Linnerud)[1]; n                 # n es el número de observaciones
## [1] 20
x     <- as.matrix(Linnerud[,1:3]); x        # x es el caracter descriptivo
##    Weight Waist Pulse
## 1     191    36    50
## 2     189    37    52
## 3     193    38    58
## 4     162    35    62
## 5     189    35    46
## 6     182    36    56
## 7     211    38    56
## 8     167    34    60
## 9     176    31    74
## 10    154    33    56
## 11    169    34    50
## 12    166    33    52
## 13    154    34    64
## 14    247    46    50
## 15    193    36    46
## 16    202    37    62
## 17    176    37    54
## 18    157    32    52
## 19    156    33    54
## 20    138    33    68
p     <- dim(x)[2]; p                        # numero de regresores
## [1] 3
y     <- Squats                               # y es el caracter respuesta   
nomy  <- deparse(substitute(Squats)); nomy    # nomy es su nombre
## [1] "Squats"
nom   <- c(colnames(x),nomy) ; nom           # etiquetas de las variables en nom
## [1] "Weight" "Waist"  "Pulse"  "Squats"
# estadísticas
xm    <- apply(x,2,mean); xm                 # cálculo de xm, promedio de x
## Weight  Waist  Pulse 
##  178.6   35.4   56.1
ym    <- sum(y)/n; ym                        # cálculo de ym, promedio de y
## [1] 145.55
ssx   <- apply(x^2,2,sum); ssx               # ssx es la suma de los x cuadros
## Weight  Waist  Pulse 
## 649542  25258  63932
ssy   <- sum(y^2); ssy                       # ssy es la suma de los y cuadros
## [1] 498073
sxy   <- t(x)%*%y ; sxy                      # sxy es la suma de los productos xy
##          [,1]
## Weight 505432
## Waist  100592
## Pulse  165236
ssxc  <- ssx-n*xm^2 ; ssxc                   # ssxc es ssx centrado sobre el promedio
##  Weight   Waist   Pulse 
## 11582.8   194.8   987.8
ssyc  <- ssy-n*ym^2 ; ssyc                   # ssyc es ssy centrado sobre el promedio
## [1] 74376.95
sxyc  <- sxy-n*xm*ym; sxyc                   # sxyc es sxy centrado sobre el promedio
##            [,1]
## Weight -14472.6
## Waist   -2457.4
## Pulse    1928.9
varx  <- ssxc/n; varx                        # varx es la varianza de x
## Weight  Waist  Pulse 
## 579.14   9.74  49.39
vary  <- ssyc/n; vary                        # vary es la varianza de y
## [1] 3718.847
covxy <- sxyc/n; covxy                       # covxy es la covarianza de xy
##            [,1]
## Weight -723.630
## Waist  -122.870
## Pulse    96.445
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
                     "Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats                                        # impresión de la tabla
##                           Weight        Waist        Pulse       Squats       x1y       x2y        x3y
## Mínimo                 138.00000    31.000000    46.000000     50.00000      0.00      0.00      0.000
## Máximo                 247.00000    46.000000    74.000000    251.00000      0.00      0.00      0.000
## Promedio               178.60000    35.400000    56.100000    145.55000      0.00      0.00      0.000
## Total                 3572.00000   708.000000  1122.000000   2911.00000      0.00      0.00      0.000
## Suma de cuadrados   649542.00000 25258.000000 63932.000000 498073.00000 505432.00 100592.00 165236.000
## Cuadrados centrados  11582.80000   194.800000   987.800000  74376.95000 -14472.60  -2457.40   1928.900
## Varianza covarianza    579.14000     9.740000    49.390000   3718.84750   -723.63   -122.87     96.445
## Desvío estándar         24.06533     3.120897     7.027802     60.98235      0.00      0.00      0.000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx            # matriz de varianza/covarianza de los xy
##        Weight Waist  Pulse
## Weight 579.14 65.36 -61.86
## Waist   65.36  9.74  -7.74
## Pulse  -61.86 -7.74  49.39
# estimación
X      <- cbind(rep(1,n),x); X               # se necesita adjuntar la columna de unos
##      Weight Waist Pulse
## 1  1    191    36    50
## 2  1    189    37    52
## 3  1    193    38    58
## 4  1    162    35    62
## 5  1    189    35    46
## 6  1    182    36    56
## 7  1    211    38    56
## 8  1    167    34    60
## 9  1    176    31    74
## 10 1    154    33    56
## 11 1    169    34    50
## 12 1    166    33    52
## 13 1    154    34    64
## 14 1    247    46    50
## 15 1    193    36    46
## 16 1    202    37    62
## 17 1    176    37    54
## 18 1    157    32    52
## 19 1    156    33    54
## 20 1    138    33    68
p1     <- p + 1; p1                          # un regresor más
## [1] 4
XX     <- t(X)%*%X; XX                       # matriz X'X
##             Weight  Waist  Pulse
##          20   3572    708   1122
## Weight 3572 649542 127756 199152
## Waist   708 127756  25258  39564
## Pulse  1122 199152  39564  63932
XXm1   <- solve(XX); XXm1                    # su inversa
##                           Weight         Waist         Pulse
##        15.25501668  1.432897e-02 -0.3526671297 -9.411326e-02
## Weight  0.01432897  3.616185e-04 -0.0023606976  8.297072e-05
## Waist  -0.35266713 -2.360698e-03  0.0212746646  3.772656e-04
## Pulse  -0.09411326  8.297072e-05  0.0003772656  1.175392e-03
bh     <- XXm1 %*% t(X) %*% y                # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh 
##               [,1]
##        623.2817463
## Weight   0.7276600
## Waist  -17.3872206
## Pulse    0.1393189
eta    <- X %*% bh; eta                      # valores estimados
##         [,1]
## 1  143.29081
## 2  124.72690
## 3  111.08624
## 4  141.24771
## 5  158.66543
## 6  137.57778
## 7  123.90548
## 8  161.99460
## 9  222.65566
## 10 169.36496
## 11 162.05673
## 12 177.53961
## 13 153.09229
## 14  10.16756
## 15 144.18885
## 16 135.57967
## 17 115.54596
## 18 188.37789
## 19 170.54164
## 20 159.39423
res    <- y - eta; res                       # residuos
##          [,1]
## 1   18.709194
## 2  -14.726904
## 3  -10.086236
## 4  -36.247714
## 5   -3.665431
## 6  -36.577780
## 7  -22.905478
## 8  -36.994597
## 9  -22.655662
## 10  81.635038
## 11 -42.056728
## 12  32.460394
## 13  61.907708
## 14  39.832440
## 15 -74.188851
## 16  74.420328
## 17 -55.545962
## 18  41.622113
## 19  54.458356
## 20 -49.394229
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res)                 # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad                                       # impresión de la tabla
##    Weight Waist Pulse Squats       eta   residuos       % res
## 1     191    36    50    162 143.29081  18.709194   11.548885
## 2     189    37    52    110 124.72690 -14.726904  -13.388094
## 3     193    38    58    101 111.08624 -10.086236   -9.986372
## 4     162    35    62    105 141.24771 -36.247714  -34.521632
## 5     189    35    46    155 158.66543  -3.665431   -2.364794
## 6     182    36    56    101 137.57778 -36.577780  -36.215623
## 7     211    38    56    101 123.90548 -22.905478  -22.678691
## 8     167    34    60    125 161.99460 -36.994597  -29.595677
## 9     176    31    74    200 222.65566 -22.655662  -11.327831
## 10    154    33    56    251 169.36496  81.635038   32.523920
## 11    169    34    50    120 162.05673 -42.056728  -35.047273
## 12    166    33    52    210 177.53961  32.460394   15.457330
## 13    154    34    64    215 153.09229  61.907708   28.794283
## 14    247    46    50     50  10.16756  39.832440   79.664881
## 15    193    36    46     70 144.18885 -74.188851 -105.984072
## 16    202    37    62    210 135.57967  74.420328   35.438251
## 17    176    37    54     60 115.54596 -55.545962  -92.576603
## 18    157    32    52    230 188.37789  41.622113   18.096571
## 19    156    33    54    225 170.54164  54.458356   24.203714
## 20    138    33    68    110 159.39423 -49.394229  -44.903844
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="Weight",ylab="Squats",ylim=c(min(y,eta),max(y,eta)),pch="*")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch="*")
text(x[,1],eta,labels=lab,col="red",pos=2)

legend("topright",            
       legend=c("Y", "Eta"), 
       pch=c("*", "*"),       
       col=c("black", "red"))  

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# Se ve en el gráfico de residuos vs estimados: se nota observaciones con alto valor residual como la 15 y 10.

# Por otro lado también un alto grado de apalancamiento en las observaciones 9 y 14

# Se procederá a evaluar el retiro de las observaciones 9 y 14
# datos para anova
ssr   <- t(eta) %*% eta; ssr       # suma de  cuadrados por regresión
##        [,1]
## [1,] 456161
sse   <- ssy - ssr; sse            # suma por residuos
##          [,1]
## [1,] 41911.99
msr   <- ssr/p1; msr               # cuadrado promedio por regresión
##          [,1]
## [1,] 114040.3
mse   <- sse/(n-p1); mse           # cuadrado promedio por residuos
##        [,1]
## [1,] 2619.5
df    <- c(p1,n-p1,n)              # grados de libertad
ssq   <- c(ssr,sse,ssy)            # sumas de cuadrados
msq   <- c(msr,mse,0)              # suma de residuos
anova <- (cbind(df,ssq,msq))       # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
##            Degrees of Freedom Sum of squares Mean squares
## Regression                  4      456161.01     114040.3
## Residuals                  16       41911.99       2619.5
## Total                      20      498073.00          0.0
# apalancamiento
Hat   <- X %*% XXm1 %*% t(X)                  # matriz H sombrero
rownames(Hat) <- lab                          # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
##              1            2             3            4            5          6            7           8            9          10            11           12           13           14           15           16           17           18           19           20
## 1   0.10655710  0.070795483  0.0267442663 -0.019894425  0.143901992 0.04801517  0.084681195  0.02355179  0.020227354  0.03271984  0.0878217868  0.087116980 -0.033406882  0.061911065  0.135338940  0.044948432  0.025666204  0.082872382  0.049672292 -0.079240967
## 2   0.07079548  0.072744656  0.0591447046  0.032960464  0.076977336 0.05388813  0.059433539  0.02994190 -0.044026733  0.04004485  0.0627548367  0.049177635  0.021165578  0.130247731  0.083492901  0.034584771  0.070673506  0.044444385  0.046037095  0.005517226
## 3   0.02674427  0.059144705  0.1045436224  0.080027350 -0.011382549 0.06015351  0.081814272  0.04530832  0.033966692  0.01568300 -0.0003265344 -0.011227007  0.072991190  0.203832839  0.007562888  0.093186628  0.078009743 -0.026308132  0.005319444  0.080955755
## 4  -0.01989443  0.032960464  0.0800273504  0.144583882 -0.065286252 0.05366668 -0.013028942  0.07802181  0.013881794  0.08288966  0.0148177663  0.006434553  0.159045977  0.053273944 -0.050658916  0.027628581  0.103171448  0.014652526  0.062938353  0.220873748
## 5   0.14390199  0.076977336 -0.0113825494 -0.065286252  0.217677155 0.04214121  0.080544327  0.01323973  0.002425648  0.04447671  0.1325686301  0.135519874 -0.081670547 -0.008255014  0.196274082  0.015655049  0.004385998  0.137587408  0.074529859 -0.151310641
## 6   0.04801517  0.053888135  0.0601535085  0.053666680  0.042141211 0.05211761  0.055857841  0.04720808  0.036821951  0.04348146  0.0429084448  0.039575224  0.051309522  0.084090266  0.046061061  0.055259577  0.057207740  0.036631437  0.042309191  0.051295906
## 7   0.08468120  0.059433539  0.0818142724 -0.013028942  0.080544327 0.05585784  0.174977008  0.02892970  0.192415717 -0.03648079  0.0045543385  0.016156756 -0.029278175  0.184517658  0.081615445  0.167363855 -0.005877547 -0.012766013 -0.032443340 -0.082986844
## 8   0.02355179  0.029941901  0.0453083249  0.078021808  0.013239728 0.04720808  0.028929704  0.06993305  0.110931992  0.06584934  0.0378666682  0.046681200  0.089667404 -0.017447145  0.010045775  0.053515097  0.043489458  0.052706217  0.058530119  0.112029493
## 9   0.02022735 -0.044026733  0.0339666916  0.013881794  0.002425648 0.03682195  0.192415717  0.11093199  0.719766801 -0.02712212 -0.0588421522  0.027407036  0.045470797 -0.174755123 -0.034563859  0.289727970 -0.147815572  0.009736357 -0.043585692  0.027931145
## 10  0.03271984  0.040044849  0.0156829955  0.082889660  0.044476706 0.04348146 -0.036480794  0.06584934 -0.027122123  0.11322903  0.0900130217  0.086623986  0.095692819 -0.078870340  0.038499190 -0.032695421  0.076016444  0.108793697  0.112880264  0.128275379
## 11  0.08782179  0.062754837 -0.0003265344  0.014817766  0.132568630 0.04290844  0.004554338  0.03786667 -0.058842152  0.09001302  0.1214671139  0.115919153  0.012633219 -0.044079980  0.120454746 -0.030935850  0.054510550  0.131396451  0.105656818 -0.001159028
## 12  0.08711698  0.049177635 -0.0112270070  0.006434553  0.135519874 0.03957522  0.016156756  0.04668120  0.027407036  0.08662399  0.1159191532  0.122933095  0.009602991 -0.098425320  0.115735082 -0.008523808  0.025639376  0.138872382  0.101702138 -0.006921327
## 13 -0.03340688  0.021165578  0.0729911898  0.159045977 -0.081670547 0.05130952 -0.029278175  0.08966740  0.045470797  0.09569282  0.0126332194  0.009602991  0.180692603  0.002902642 -0.070143001  0.024169199  0.098758089  0.022752300  0.072389389  0.255254885
## 14  0.06191106  0.130247731  0.2038328389  0.053273944 -0.008255014 0.08409027  0.184517658 -0.01744714 -0.174755123 -0.07887034 -0.0440799805 -0.098425320  0.002902642  0.634785903  0.050304161  0.144958452  0.145588874 -0.153010458 -0.085468600 -0.036101554
## 15  0.13533894  0.083492901  0.0075628877 -0.050658916  0.196274082 0.04606106  0.081615445  0.01004578 -0.034563859  0.03849919  0.1204547458  0.115735082 -0.070143001  0.050304161  0.183045989  0.017377195  0.024204766  0.114198754  0.065305599 -0.134150800
## 16  0.04494843  0.034584771  0.0931866280  0.027628581  0.015655049 0.05525958  0.167363855  0.05351510  0.289727970 -0.03269542 -0.0309358502 -0.008523808  0.024169199  0.144958452  0.017377195  0.196649995 -0.013721030 -0.036117379 -0.041306754 -0.001724560
## 17  0.02566620  0.070673506  0.0780097427  0.103171448  0.004385998 0.05720774 -0.005877547  0.04348946 -0.147815572  0.07601644  0.0545105501  0.025639376  0.098758089  0.145588874  0.024204766 -0.013721030  0.130102980  0.030278423  0.070394162  0.129316389
## 18  0.08287238  0.044444385 -0.0263081321  0.014652526  0.137587408 0.03663144 -0.012766013  0.05270622  0.009736357  0.10879370  0.1313964513  0.138872382  0.022752300 -0.153010458  0.114198754 -0.036117379  0.030278423  0.162884880  0.124332114  0.016062269
## 19  0.04967229  0.046037095  0.0053194439  0.062938353  0.074529859 0.04230919 -0.032443340  0.05853012 -0.043585692  0.11288026  0.1056568184  0.101702138  0.072389389 -0.085468600  0.065305599 -0.041306754  0.070394162  0.124332114  0.118015774  0.092791775
## 20 -0.07924097  0.005517226  0.0809557552  0.220873748 -0.151310641 0.05129591 -0.082986844  0.11202949  0.027931145  0.12827538 -0.0011590278 -0.006921327  0.255254885 -0.036101554 -0.134150800 -0.001724560  0.129316389  0.016062269  0.092791775  0.373291750
lev   <- diag(Hat); lev                       # apalancamientos
##          1          2          3          4          5          6          7          8          9         10         11         12         13         14         15         16         17         18         19         20 
## 0.10655710 0.07274466 0.10454362 0.14458388 0.21767716 0.05211761 0.17497701 0.06993305 0.71976680 0.11322903 0.12146711 0.12293309 0.18069260 0.63478590 0.18304599 0.19664999 0.13010298 0.16288488 0.11801577 0.37329175
pesos <- cbind(x,y,lev)                       # construcción de una salida conjunta
rownames(pesos) <- lab
pesos                                         # salida
##    Weight Waist Pulse   y        lev
## 1     191    36    50 162 0.10655710
## 2     189    37    52 110 0.07274466
## 3     193    38    58 101 0.10454362
## 4     162    35    62 105 0.14458388
## 5     189    35    46 155 0.21767716
## 6     182    36    56 101 0.05211761
## 7     211    38    56 101 0.17497701
## 8     167    34    60 125 0.06993305
## 9     176    31    74 200 0.71976680
## 10    154    33    56 251 0.11322903
## 11    169    34    50 120 0.12146711
## 12    166    33    52 210 0.12293309
## 13    154    34    64 215 0.18069260
## 14    247    46    50  50 0.63478590
## 15    193    36    46  70 0.18304599
## 16    202    37    62 210 0.19664999
## 17    176    37    54  60 0.13010298
## 18    157    32    52 230 0.16288488
## 19    156    33    54 225 0.11801577
## 20    138    33    68 110 0.37329175
niv   <- 2 * p1 / n; niv                      # nivel para apalancamiento
## [1] 0.4
level <- which(lev>niv)                       # unidades con fuerte apalancamiento
out   <- rbind(level,lev[level])              # salida
rownames(out) <- c("unidad","apalancamiento"); out
##                        9         14
## unidad         9.0000000 14.0000000
## apalancamiento 0.7197668  0.6347859
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo
lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud); summary(lm1) # regresión con y sin
## 
## Call:
## lm(formula = y ~ Weight + Waist + Pulse, data = Linnerud)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -74.19 -36.68 -12.41  40.28  81.64 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 623.2817   199.9013   3.118  0.00663 **
## Weight        0.7277     0.9733   0.748  0.46552   
## Waist       -17.3872     7.4652  -2.329  0.03328 * 
## Pulse         0.1393     1.7547   0.079  0.93770   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 51.18 on 16 degrees of freedom
## Multiple R-squared:  0.4365, Adjusted R-squared:  0.3308 
## F-statistic: 4.131 on 3 and 16 DF,  p-value: 0.02394
residuals(lm1)
##          1          2          3          4          5          6          7          8          9         10         11         12         13         14         15         16         17         18         19         20 
##  18.709194 -14.726904 -10.086236 -36.247714  -3.665431 -36.577780 -22.905478 -36.994597 -22.655662  81.635038 -42.056728  32.460394  61.907708  39.832440 -74.188851  74.420328 -55.545962  41.622113  54.458356 -49.394229
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# Del gráfico de observa que las muestras 9 y 14 tiene un alto grado de apalancamiento, se procederá a retirar
# modelo sin unidades con fuerte apalancamiento (sin la muestra 9 y 14)
lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]); summary(lm2)
## 
## Call:
## lm(formula = y[-level] ~ Weight + Waist + Pulse, data = Linnerud[-level, 
##     ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.205 -27.651   6.197  27.000  70.780 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 1047.205    255.616   4.097  0.00109 **
## Weight         3.214      1.442   2.229  0.04274 * 
## Waist        -47.515     13.852  -3.430  0.00406 **
## Pulse          3.678      2.411   1.525  0.14943   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45.29 on 14 degrees of freedom
## Multiple R-squared:  0.5382, Adjusted R-squared:  0.4393 
## F-statistic: 5.439 on 3 and 14 DF,  p-value: 0.01085
residuals(lm2)
##          1          2          3          4          5          6          7          8         10         11         12         13         15         16         17         18         19         20 
##  27.455381  22.043138  25.632151 -27.974074  -5.918506 -26.681767 -24.872939 -64.205325  70.780268 -38.854935   5.918045  52.871397 -56.261861  43.475275   6.475736   7.333891  45.707149 -62.923025
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# En el gráfico de residuos vs estimados:
#Luego de retirar las observaciones con alto grado de apalancamiento (9 y 14) se puede ver que la nube de puntos está más concentrada en la línea con residuo 0. Además ya no tener observaciones con un alto apalancamiento (aunque podría evaluarse la observacióm 15 y 16)
# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))
legend("bottomright", # Escoge la posición adecuada para tu gráfico
       legend=c("Modelo 1", "Modelo 2 sin Obs 9 y 14"), 
       pch=c(2, 2),
       col=c("black", "red"),
       title="Leyenda")

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

 # Análisis gráfico:
#Al comparar los puntos negros con los rojos(lm1 vs lm2), puedes evaluar cómo cada modelo se ajusta a los datos. Los puntos rojos son los valores ajustados del segundo modelo después de remover algunos puntos influyentes o atípicos. Si los puntos rojos están más cerca de la línea de identidad que los negros, podríamos inferir que el segundo modelo (lm2) tiene un mejor ajuste después de excluir las observaciones con alto apalancamiento

# Los residuos del modelo lm2 (puntos rojos) están en general más cerca de la línea horizontal que los de lm1 (puntos negros), se podría concluir que lm2 se ajusta mejor a los datos, especialmente después de excluir los outliers.
# Comentarios:
# Mostrar la diferencia entre betas y entre los residuos de los dos modelos.

# Modelo 1: Con todas las observaciones
# Squats ~ Weight,Waist,Pulse
# lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud)

#             Estimate  Std. Error t value Pr(>|t|)   
# (Intercept) 623.2817   199.9013  3.118  0.00663 **
# Weight        0.7277    0.9733   0.748  0.46552   
# Waist       -17.3872    7.4652  -2.329  0.03328 * 
#Pulse         0.1393     1.7547   0.079  0.93770   

# beta = [623.2817 (intercepto) ,0.7277(Weight), -17.3872(Waist), 0.1393(Pulse)]

# residuals(lm1):
#       1          2          3          4          5          6          7 
#  18.709194 -14.726904 -10.086236 -36.247714  -3.665431 -36.577780 -22.905478 
#       8          9         10         11         12         13         14 
# -36.994597 -22.655662  81.635038 -42.056728  32.460394  61.907708  39.832440 
#      15         16         17         18         19         20 
# -74.188851  74.420328 -55.545962  41.622113  54.458356 -49.394229 


# Modelo 2: Sin observaciones con alto apalancamiento (observaciones 9 y 14)
# Squats ~ Weight,Waist,Pulse
# lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]);

#            Estimate    Std. Error t value Pr(>|t|)   
# (Intercept) 1047.205    255.616   4.097  0.00109 **
# Weight         3.214      1.442   2.229  0.04274 * 
# Waist        -47.515     13.852  -3.430  0.00406 **
# Pulse          3.678      2.411   1.525  0.14943   

# beta = [1047.205 (intercepto) ,3.214 (Weight), -47.515(Waist), 3.678(Pulse)]


# residuals(lm2):
#       1          2          3          4          5          6          7 
# 27.455381  22.043138  25.632151 -27.974074  -5.918506 -26.681767 -24.872939 
#       8         10         11         12         13         15         16 
# -64.205325  70.780268 -38.854935   5.918045  52.871397 -56.261861  43.475275 
#      17         18         19         20 
#  6.475736   7.333891  45.707149 -62.923025 

# Comentarios:

# La eliminación de las observaciones 9 y 14 tiene un impacto notable en los coeficientes y su significancia

# Es especialmente notable que "Weight" se vuelva significativo al eliminar dos puntos de datos. Esto podría indicar que estas observaciones eran atípicas en términos de su relación entre el peso y "Squats".

# La consistencia del efecto negativo de "Waist" en ambos modelos subraya que es un predictor relevante y robusto para "Squats"

#  Los cambios en la magnitud y significancia de los coeficientes tras la eliminación de las observaciones de alto apalancamiento sugieren que los resultados del modelo pueden ser más confiables sin estas observaciones


# Al remover las observaciones identificadas como de alto apalancamiento, los residuos todavía muestran variabilidad significativa, pero el rango de estos residuos es algo más moderado comparado con el modelo completo, especialmente en el extremo superior. Los residuos oscilan entre -64.20 y 70.78, lo cual todavía es considerable.

Linnerrud.csv

3) Jumps ~ Weight,Waist,Pulse

# Lectura de los datos
Linnerud <- read.csv("Linnerud.csv", row.names = 1);head(Linnerud)
##   Weight Waist Pulse Pulls Squats Jumps
## 1    191    36    50     5    162    60
## 2    189    37    52     2    110    60
## 3    193    38    58    12    101   101
## 4    162    35    62    12    105    37
## 5    189    35    46    13    155    58
## 6    182    36    56     4    101    42
attach(Linnerud) #archivo en uso
## The following objects are masked from Linnerud (pos = 3):
## 
##     Jumps, Pulls, Pulse, Squats, Waist, Weight
lab = rownames(Linnerud) #etiquetas de las unidades en lab
Jumps ~ Weight,Waist y Pulse
# regresión a mano
# definición de los datos para el modelo
n     <- dim(Linnerud)[1]; n                 # n es el número de observaciones
## [1] 20
x     <- as.matrix(Linnerud[,1:3]); x        # x es el caracter descriptivo
##    Weight Waist Pulse
## 1     191    36    50
## 2     189    37    52
## 3     193    38    58
## 4     162    35    62
## 5     189    35    46
## 6     182    36    56
## 7     211    38    56
## 8     167    34    60
## 9     176    31    74
## 10    154    33    56
## 11    169    34    50
## 12    166    33    52
## 13    154    34    64
## 14    247    46    50
## 15    193    36    46
## 16    202    37    62
## 17    176    37    54
## 18    157    32    52
## 19    156    33    54
## 20    138    33    68
p     <- dim(x)[2]; p                        # numero de regresores
## [1] 3
y     <- Jumps                               # y es el caracter respuesta   
nomy  <- deparse(substitute(Jumps)); nomy    # nomy es su nombre
## [1] "Jumps"
nom   <- c(colnames(x),nomy) ; nom           # etiquetas de las variables en nom
## [1] "Weight" "Waist"  "Pulse"  "Jumps"
# estadísticas
xm    <- apply(x,2,mean); xm                 # cálculo de xm, promedio de x
## Weight  Waist  Pulse 
##  178.6   35.4   56.1
ym    <- sum(y)/n; ym                        # cálculo de ym, promedio de y
## [1] 70.3
ssx   <- apply(x^2,2,sum); ssx               # ssx es la suma de los x cuadros
## Weight  Waist  Pulse 
## 649542  25258  63932
ssy   <- sum(y^2); ssy                       # ssy es la suma de los y cuadros
## [1] 148800
sxy   <- t(x)%*%y ; sxy                      # sxy es la suma de los productos xy
##          [,1]
## Weight 245668
## Waist   49175
## Pulse   79122
ssxc  <- ssx-n*xm^2 ; ssxc                   # ssxc es ssx centrado sobre el promedio
##  Weight   Waist   Pulse 
## 11582.8   194.8   987.8
ssyc  <- ssy-n*ym^2 ; ssyc                   # ssyc es ssy centrado sobre el promedio
## [1] 49958.2
sxyc  <- sxy-n*xm*ym; sxyc                   # sxyc es sxy centrado sobre el promedio
##           [,1]
## Weight -5443.6
## Waist   -597.4
## Pulse    245.4
varx  <- ssxc/n; varx                        # varx es la varianza de x
## Weight  Waist  Pulse 
## 579.14   9.74  49.39
vary  <- ssyc/n; vary                        # vary es la varianza de y
## [1] 2497.91
covxy <- sxyc/n; covxy                       # covxy es la covarianza de xy
##           [,1]
## Weight -272.18
## Waist   -29.87
## Pulse    12.27
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
                     "Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats                                        # impresión de la tabla
##                           Weight        Waist        Pulse       Jumps       x1y      x2y      x3y
## Mínimo                 138.00000    31.000000    46.000000     25.0000      0.00     0.00     0.00
## Máximo                 247.00000    46.000000    74.000000    250.0000      0.00     0.00     0.00
## Promedio               178.60000    35.400000    56.100000     70.3000      0.00     0.00     0.00
## Total                 3572.00000   708.000000  1122.000000   1406.0000      0.00     0.00     0.00
## Suma de cuadrados   649542.00000 25258.000000 63932.000000 148800.0000 245668.00 49175.00 79122.00
## Cuadrados centrados  11582.80000   194.800000   987.800000  49958.2000  -5443.60  -597.40   245.40
## Varianza covarianza    579.14000     9.740000    49.390000   2497.9100   -272.18   -29.87    12.27
## Desvío estándar         24.06533     3.120897     7.027802     49.9791      0.00     0.00     0.00
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx            # matriz de varianza/covarianza de los xy
##        Weight Waist  Pulse
## Weight 579.14 65.36 -61.86
## Waist   65.36  9.74  -7.74
## Pulse  -61.86 -7.74  49.39
# estimación
X      <- cbind(rep(1,n),x); X               # se necesita ajuntar la columna de unos
##      Weight Waist Pulse
## 1  1    191    36    50
## 2  1    189    37    52
## 3  1    193    38    58
## 4  1    162    35    62
## 5  1    189    35    46
## 6  1    182    36    56
## 7  1    211    38    56
## 8  1    167    34    60
## 9  1    176    31    74
## 10 1    154    33    56
## 11 1    169    34    50
## 12 1    166    33    52
## 13 1    154    34    64
## 14 1    247    46    50
## 15 1    193    36    46
## 16 1    202    37    62
## 17 1    176    37    54
## 18 1    157    32    52
## 19 1    156    33    54
## 20 1    138    33    68
p1     <- p + 1; p1                          # un regresor más
## [1] 4
XX     <- t(X)%*%X; XX                       # matriz X'X
##             Weight  Waist  Pulse
##          20   3572    708   1122
## Weight 3572 649542 127756 199152
## Waist   708 127756  25258  39564
## Pulse  1122 199152  39564  63932
XXm1   <- solve(XX); XXm1                    # su inversa
##                           Weight         Waist         Pulse
##        15.25501668  1.432897e-02 -0.3526671297 -9.411326e-02
## Weight  0.01432897  3.616185e-04 -0.0023606976  8.297072e-05
## Waist  -0.35266713 -2.360698e-03  0.0212746646  3.772656e-04
## Pulse  -0.09411326  8.297072e-05  0.0003772656  1.175392e-03
bh     <- XXm1 %*% t(X) %*% y                # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh 
##               [,1]
##        179.8867890
## Weight  -0.5378649
## Waist    0.2337900
## Pulse   -0.3885967
eta    <- X %*% bh; eta                      # valores estimados
##        [,1]
## 1  66.14119
## 2  66.67352
## 3  62.42426
## 4  76.84232
## 5  68.53752
## 6  68.65039
## 7  53.51989
## 8  74.69640
## 9  63.71389
## 10 83.00924
## 11 77.50664
## 12 78.10925
## 13 80.13426
## 14 38.35865
## 15 66.61985
## 16 55.79530
## 17 72.88857
## 18 82.71624
## 19 82.71070
## 20 86.95192
res    <- y - eta; res                       # residuos
##          [,1]
## 1   -6.141189
## 2   -6.673515
## 3   38.575735
## 4  -39.842322
## 5  -10.537515
## 6  -26.650393
## 7  -15.519889
## 8  -34.696400
## 9  -23.713892
## 10 166.990759
## 11 -39.506637
## 12  36.890751
## 13  24.865742
## 14  11.641349
## 15 -35.619845
## 16  64.204696
## 17 -47.888566
## 18  -2.716243
## 19  -9.710705
## 20 -43.951920
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res)                 # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad                                       # impresión de la tabla
##    Weight Waist Pulse Jumps      eta   residuos       % res
## 1     191    36    50    60 66.14119  -6.141189  -10.235314
## 2     189    37    52    60 66.67352  -6.673515  -11.122525
## 3     193    38    58   101 62.42426  38.575735   38.193797
## 4     162    35    62    37 76.84232 -39.842322 -107.681950
## 5     189    35    46    58 68.53752 -10.537515  -18.168130
## 6     182    36    56    42 68.65039 -26.650393  -63.453316
## 7     211    38    56    38 53.51989 -15.519889  -40.841814
## 8     167    34    60    40 74.69640 -34.696400  -86.741001
## 9     176    31    74    40 63.71389 -23.713892  -59.284730
## 10    154    33    56   250 83.00924 166.990759   66.796303
## 11    169    34    50    38 77.50664 -39.506637 -103.964835
## 12    166    33    52   115 78.10925  36.890751   32.078914
## 13    154    34    64   105 80.13426  24.865742   23.681659
## 14    247    46    50    50 38.35865  11.641349   23.282697
## 15    193    36    46    31 66.61985 -35.619845 -114.902727
## 16    202    37    62   120 55.79530  64.204696   53.503914
## 17    176    37    54    25 72.88857 -47.888566 -191.554264
## 18    157    32    52    80 82.71624  -2.716243   -3.395304
## 19    156    33    54    73 82.71070  -9.710705  -13.302336
## 20    138    33    68    43 86.95192 -43.951920 -102.213768
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="Weight",ylab="Jumps",ylim=c(min(y,eta),max(y,eta)),pch=".")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch=".")
text(x[,1],eta,labels=lab,col="red",pos=2)
legend("topright",            
       legend=c("Y", "Eta"), 
       pch=c("*", "*"),       
       col=c("black", "red"))  

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# Del gráfico residuos vs estimados podemos ver que hay observaciones que tienen un alto valor residual como la observación 10.
# datos para anova
ssr   <- t(eta) %*% eta; ssr       # suma de  cuadrados por regresión
##          [,1]
## [1,] 101534.7
sse   <- ssy - ssr; sse            # suma por residuos
##          [,1]
## [1,] 47265.31
msr   <- ssr/p1; msr               # cuadrado promedio por regresión
##          [,1]
## [1,] 25383.67
mse   <- sse/(n-p1); mse           # cuadrado promedio por residuos
##          [,1]
## [1,] 2954.082
df    <- c(p1,n-p1,n)              # grados de libertad
ssq   <- c(ssr,sse,ssy)            # sumas de cuadrados
msq   <- c(msr,mse,0)              # suma de residuos
anova <- (cbind(df,ssq,msq))       # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
##            Degrees of Freedom Sum of squares Mean squares
## Regression                  4      101534.69    25383.673
## Residuals                  16       47265.31     2954.082
## Total                      20      148800.00        0.000
# apalancamiento
Hat   <- X %*% XXm1 %*% t(X)                  # matriz H sombrero
rownames(Hat) <- lab                          # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
##              1            2             3            4            5          6            7           8            9          10            11           12           13           14           15           16           17           18           19           20
## 1   0.10655710  0.070795483  0.0267442663 -0.019894425  0.143901992 0.04801517  0.084681195  0.02355179  0.020227354  0.03271984  0.0878217868  0.087116980 -0.033406882  0.061911065  0.135338940  0.044948432  0.025666204  0.082872382  0.049672292 -0.079240967
## 2   0.07079548  0.072744656  0.0591447046  0.032960464  0.076977336 0.05388813  0.059433539  0.02994190 -0.044026733  0.04004485  0.0627548367  0.049177635  0.021165578  0.130247731  0.083492901  0.034584771  0.070673506  0.044444385  0.046037095  0.005517226
## 3   0.02674427  0.059144705  0.1045436224  0.080027350 -0.011382549 0.06015351  0.081814272  0.04530832  0.033966692  0.01568300 -0.0003265344 -0.011227007  0.072991190  0.203832839  0.007562888  0.093186628  0.078009743 -0.026308132  0.005319444  0.080955755
## 4  -0.01989443  0.032960464  0.0800273504  0.144583882 -0.065286252 0.05366668 -0.013028942  0.07802181  0.013881794  0.08288966  0.0148177663  0.006434553  0.159045977  0.053273944 -0.050658916  0.027628581  0.103171448  0.014652526  0.062938353  0.220873748
## 5   0.14390199  0.076977336 -0.0113825494 -0.065286252  0.217677155 0.04214121  0.080544327  0.01323973  0.002425648  0.04447671  0.1325686301  0.135519874 -0.081670547 -0.008255014  0.196274082  0.015655049  0.004385998  0.137587408  0.074529859 -0.151310641
## 6   0.04801517  0.053888135  0.0601535085  0.053666680  0.042141211 0.05211761  0.055857841  0.04720808  0.036821951  0.04348146  0.0429084448  0.039575224  0.051309522  0.084090266  0.046061061  0.055259577  0.057207740  0.036631437  0.042309191  0.051295906
## 7   0.08468120  0.059433539  0.0818142724 -0.013028942  0.080544327 0.05585784  0.174977008  0.02892970  0.192415717 -0.03648079  0.0045543385  0.016156756 -0.029278175  0.184517658  0.081615445  0.167363855 -0.005877547 -0.012766013 -0.032443340 -0.082986844
## 8   0.02355179  0.029941901  0.0453083249  0.078021808  0.013239728 0.04720808  0.028929704  0.06993305  0.110931992  0.06584934  0.0378666682  0.046681200  0.089667404 -0.017447145  0.010045775  0.053515097  0.043489458  0.052706217  0.058530119  0.112029493
## 9   0.02022735 -0.044026733  0.0339666916  0.013881794  0.002425648 0.03682195  0.192415717  0.11093199  0.719766801 -0.02712212 -0.0588421522  0.027407036  0.045470797 -0.174755123 -0.034563859  0.289727970 -0.147815572  0.009736357 -0.043585692  0.027931145
## 10  0.03271984  0.040044849  0.0156829955  0.082889660  0.044476706 0.04348146 -0.036480794  0.06584934 -0.027122123  0.11322903  0.0900130217  0.086623986  0.095692819 -0.078870340  0.038499190 -0.032695421  0.076016444  0.108793697  0.112880264  0.128275379
## 11  0.08782179  0.062754837 -0.0003265344  0.014817766  0.132568630 0.04290844  0.004554338  0.03786667 -0.058842152  0.09001302  0.1214671139  0.115919153  0.012633219 -0.044079980  0.120454746 -0.030935850  0.054510550  0.131396451  0.105656818 -0.001159028
## 12  0.08711698  0.049177635 -0.0112270070  0.006434553  0.135519874 0.03957522  0.016156756  0.04668120  0.027407036  0.08662399  0.1159191532  0.122933095  0.009602991 -0.098425320  0.115735082 -0.008523808  0.025639376  0.138872382  0.101702138 -0.006921327
## 13 -0.03340688  0.021165578  0.0729911898  0.159045977 -0.081670547 0.05130952 -0.029278175  0.08966740  0.045470797  0.09569282  0.0126332194  0.009602991  0.180692603  0.002902642 -0.070143001  0.024169199  0.098758089  0.022752300  0.072389389  0.255254885
## 14  0.06191106  0.130247731  0.2038328389  0.053273944 -0.008255014 0.08409027  0.184517658 -0.01744714 -0.174755123 -0.07887034 -0.0440799805 -0.098425320  0.002902642  0.634785903  0.050304161  0.144958452  0.145588874 -0.153010458 -0.085468600 -0.036101554
## 15  0.13533894  0.083492901  0.0075628877 -0.050658916  0.196274082 0.04606106  0.081615445  0.01004578 -0.034563859  0.03849919  0.1204547458  0.115735082 -0.070143001  0.050304161  0.183045989  0.017377195  0.024204766  0.114198754  0.065305599 -0.134150800
## 16  0.04494843  0.034584771  0.0931866280  0.027628581  0.015655049 0.05525958  0.167363855  0.05351510  0.289727970 -0.03269542 -0.0309358502 -0.008523808  0.024169199  0.144958452  0.017377195  0.196649995 -0.013721030 -0.036117379 -0.041306754 -0.001724560
## 17  0.02566620  0.070673506  0.0780097427  0.103171448  0.004385998 0.05720774 -0.005877547  0.04348946 -0.147815572  0.07601644  0.0545105501  0.025639376  0.098758089  0.145588874  0.024204766 -0.013721030  0.130102980  0.030278423  0.070394162  0.129316389
## 18  0.08287238  0.044444385 -0.0263081321  0.014652526  0.137587408 0.03663144 -0.012766013  0.05270622  0.009736357  0.10879370  0.1313964513  0.138872382  0.022752300 -0.153010458  0.114198754 -0.036117379  0.030278423  0.162884880  0.124332114  0.016062269
## 19  0.04967229  0.046037095  0.0053194439  0.062938353  0.074529859 0.04230919 -0.032443340  0.05853012 -0.043585692  0.11288026  0.1056568184  0.101702138  0.072389389 -0.085468600  0.065305599 -0.041306754  0.070394162  0.124332114  0.118015774  0.092791775
## 20 -0.07924097  0.005517226  0.0809557552  0.220873748 -0.151310641 0.05129591 -0.082986844  0.11202949  0.027931145  0.12827538 -0.0011590278 -0.006921327  0.255254885 -0.036101554 -0.134150800 -0.001724560  0.129316389  0.016062269  0.092791775  0.373291750
lev   <- diag(Hat); lev                       # apalancamientos
##          1          2          3          4          5          6          7          8          9         10         11         12         13         14         15         16         17         18         19         20 
## 0.10655710 0.07274466 0.10454362 0.14458388 0.21767716 0.05211761 0.17497701 0.06993305 0.71976680 0.11322903 0.12146711 0.12293309 0.18069260 0.63478590 0.18304599 0.19664999 0.13010298 0.16288488 0.11801577 0.37329175
pesos <- cbind(x,y,lev)                       # construcción de una salida conjunta
rownames(pesos) <- lab
pesos                                         # salida
##    Weight Waist Pulse   y        lev
## 1     191    36    50  60 0.10655710
## 2     189    37    52  60 0.07274466
## 3     193    38    58 101 0.10454362
## 4     162    35    62  37 0.14458388
## 5     189    35    46  58 0.21767716
## 6     182    36    56  42 0.05211761
## 7     211    38    56  38 0.17497701
## 8     167    34    60  40 0.06993305
## 9     176    31    74  40 0.71976680
## 10    154    33    56 250 0.11322903
## 11    169    34    50  38 0.12146711
## 12    166    33    52 115 0.12293309
## 13    154    34    64 105 0.18069260
## 14    247    46    50  50 0.63478590
## 15    193    36    46  31 0.18304599
## 16    202    37    62 120 0.19664999
## 17    176    37    54  25 0.13010298
## 18    157    32    52  80 0.16288488
## 19    156    33    54  73 0.11801577
## 20    138    33    68  43 0.37329175
niv   <- 2 * p1 / n; niv                      # nivel para apalancamiento
## [1] 0.4
level <- which(lev>niv)                       # unidades con fuerte apalancamiento
out   <- rbind(level,lev[level])              # salida
rownames(out) <- c("unidad","apalancamiento"); out
##                        9         14
## unidad         9.0000000 14.0000000
## apalancamiento 0.7197668  0.6347859
# De acuerdo al gráfico de residuos vs estimados y Resultados del apalancamiento

# podemos ver que los puntos 14 y 9 son los que tienen mayor apalancamiento ya que se alejan de donde están la mayor concentración de puntos si observamos el gráfico de residuos vs estimados. Por otro lado los valores más altos de apalancamiento corresponden a las observaciones 9 (apal= 0.71976680) y 14 (apal= 0.63478590).
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo
lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud); summary(lm1)
## 
## Call:
## lm(formula = y ~ Weight + Waist + Pulse, data = Linnerud)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -47.89 -34.93 -10.12  14.95 166.99 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 179.8868   212.2842   0.847    0.409
## Weight       -0.5379     1.0336  -0.520    0.610
## Waist         0.2338     7.9276   0.029    0.977
## Pulse        -0.3886     1.8634  -0.209    0.837
## 
## Residual standard error: 54.35 on 16 degrees of freedom
## Multiple R-squared:  0.0539, Adjusted R-squared:  -0.1235 
## F-statistic: 0.3039 on 3 and 16 DF,  p-value: 0.8222
residuals(lm1)
##          1          2          3          4          5          6          7          8          9         10         11         12         13         14         15         16         17         18         19         20 
##  -6.141189  -6.673515  38.575735 -39.842322 -10.537515 -26.650393 -15.519889 -34.696400 -23.713892 166.990759 -39.506637  36.890751  24.865742  11.641349 -35.619845  64.204696 -47.888566  -2.716243  -9.710705 -43.951920
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# modelo sin unidades con fuerte apalancamiento (Todas las observaciones menos la 9 y 14)
lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]); summary(lm2)
## 
## Call:
## lm(formula = y[-level] ~ Weight + Waist + Pulse, data = Linnerud[-level, 
##     ])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -53.02 -25.25 -11.89  17.12 162.90 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  382.112    311.086   1.228    0.240
## Weight         1.172      1.755   0.668    0.515
## Waist        -18.157     16.858  -1.077    0.300
## Pulse          2.207      2.934   0.752    0.464
## 
## Residual standard error: 55.12 on 14 degrees of freedom
## Multiple R-squared:  0.1228, Adjusted R-squared:  -0.06519 
## F-statistic: 0.6532 on 3 and 14 DF,  p-value: 0.5941
residuals(lm2)
##          1          2          3          4          5          6          7          8         10         11         12         13         15         16         17         18         19         20 
##  -2.768104  13.318422  54.540972 -36.414981 -11.750266 -23.461296 -25.146942 -53.018637 162.895341 -35.289092  22.656544  18.392536 -25.283138  36.003234 -10.855545 -19.948702 -12.034552 -51.835795
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# Se observa que los puntos se ajustan a la recta indentidad por lo que sugiere que los datos tienen buen ajuste al modelo.

# Por otro lado el gráfico residuos vs estimados se observa que los puntos están sobre la recta 0. Aunque hay observaciones que aún mantienen un alto valor residual. Sin embargo, ya no persiste las observaciones 9 y 14 que tenian un alto valor de apalancamiento
# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

legend("bottomright", # Escoge la posición adecuada para tu gráfico
       legend=c("Modelo 1", "Modelo 2 sin Obs 9 y 14"), 
       pch=c(2, 2),
       col=c("black", "red"),
       title="Leyenda")

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

# Los puntos rojos (lm2) están sistemáticamente más cerca de la línea diagonal que los puntos negros, podría concluirse que la exclusión de la observación 9 y 14 (alto grado de apalancamiento) mejora la bondad de ajuste del modelo. Esto también podría sugerir que las observaciones excluidas son puntos influyentes y que el modelo original podría estar siendo distorsionado por estos.
# Comentarios:
# Mostrar la diferencia entre betas y entre los residuos de los dos modelos.

# Modelo 1: Con todas las observaciones
# Jumps ~ Weight,Waist,Pulse
# lm1 <- lm(y~Weight+Waist+Pulse,data=Linnerud)

#             Estimate Std. Error t value Pr(>|t|)
# (Intercept) 179.8868   212.2842   0.847    0.409
# Weight       -0.5379     1.0336  -0.520    0.610
# Waist         0.2338     7.9276   0.029    0.977
# Pulse        -0.3886     1.8634  -0.209    0.837

# beta = [179.8868 (intercepto) ,-0.5379 (Weight), 0.2338(Waist), -0.3886(Pulse)]

# residuals(lm1):
#       1          2          3          4          5          6          7 
# -6.141189  -6.673515  38.575735 -39.842322 -10.537515 -26.650393 -15.519889 
#         8          9         10         11         12         13         14 
# -34.696400 -23.713892 166.990759 -39.506637  36.890751  24.865742  11.641349 
#        15         16         17         18         19         20 
# -35.619845  64.204696 -47.888566  -2.716243  -9.710705 -43.951920 


# Modelo 2: Sin observaciones con alto apalancamiento (observaciones 9 y 14)
# Jumps ~ Weight,Waist,Pulse
# lm2 <- lm(y[-level]~Weight+Waist+Pulse,data=Linnerud[-level,]);

#             Estimate Std. Error t value Pr(>|t|)
# (Intercept)  382.112    311.086   1.228    0.240
# Weight         1.172      1.755   0.668    0.515
# Waist        -18.157     16.858  -1.077    0.300
# Pulse          2.207      2.934   0.752    0.464

# beta = [382.112 (intercepto) ,1.172 (Weight), -18.157 (Waist), 2.207(Pulse)]


# residuals(lm2):
#     1          2          3          4          5          6          7 
# -2.768104  13.318422  54.540972 -36.414981 -11.750266 -23.461296 -25.146942 
#         8         10         11         12         13         15         16 
# -53.018637 162.895341 -35.289092  22.656544  18.392536 -25.283138  36.003234 
#        17         18         19         20 
# -10.855545 -19.948702 -12.034552 -51.835795 

# Comentarios:

# Los cambios en los coeficientes y sus significancias entre los dos modelos sugieren que los resultados son sensibles a las observaciones de alto apalancamiento. Este cambio indica que las observaciones eliminadas tenían características atípicas que influían significativamente en las estimaciones del modelo.

# Ninguno de los modelos logra establecer relaciones significativas entre los predictores y la variable respuesta. Esto podría sugerir que otras variables no examinadas podrían ser más relevantes para predecir "Jumps".


# Eliminando las observaciones 9 y 14, que tienen un alto apalancamiento y potencialmente podrían estar sesgando el modelo, se observa una reducción en la magnitud de algunos residuos, aunque sigue habiendo algunos residuos considerablemente altos, como 162.89. Esto indica que, aunque se ha mejorado el ajuste del modelo en algunos casos, aún persisten desafíos con la capacidad del modelo para capturar todas las variabilidades en los datos.
# Comentarios sobre análisis de varianza de modelos:

# Modelo 1:
# Pulls ~ Weight,Waist,Pulse
#                Degrees of Freedom Sum of squares Mean squares
# Regression                  4      1966.3455    491.58638
# Residuals                  16       350.6545     21.91591
# Total                      20      2317.0000      0.00000

# Tiene un MSE más bajo, lo que indica que, en promedio, los residuos están más cerca de cero, sugiriendo un mejor ajuste relativo de las variables al modelo

# MSR = 491.58638(Regresión)
# MSE = 21.91591 (residual)
# F-statistic: MSR/MSE = 491.58638/21.91591 = 22.43


# Modelo 2:
# Squats ~ Weight,Waist, Pulse
#               Degrees of Freedom Sum of squares Mean squares
# Regression                  4      456161.01     114040.3
# Residuals                  16       41911.99       2619.5
# Total                      20      498073.00          0.0

# MSR = 114040.3(Regresión)
# MSE = 2619.5 (residual)
# F-statistic: MSR/MSE = 114040.3/2619.5 = 43.55

# Modelo 3:
# Jumps ~ Weight,Waist, Pulse
#               Degrees of Freedom Sum of squares Mean squares
# Regression                  4      101534.69    25383.673
# Residuals                  16       47265.31     2954.082
# Total                      20      148800.00        0.000

# MSR = 25383.673(Regresión)
# MSE = 2954.082 (residual)
# F-statistic: MSR/MSE = 25383.673/2954.082 = 8.592744

# El Modelo 2 es el que más variabilidad explica, como se evidencia en su SSR relativamente alta. Sin embargo, también tiene un MSE alto, lo que sugiere que, aunque explica más variabilidad, también es propenso a mayores errores en las predicciones individuales.

# El Modelo 1 tiene el balance más favorable entre SSR y MSE, indicando que mientras que no explica tanta variabilidad como los otros modelos, es más consistente en sus predicciones por dato.

Decathlon.csv

1) Points ~ X100m,Shot.put,Discus

# Lectura de los datos
Decathlon <- read.csv("Decathlon.csv", row.names = 1);head(Decathlon)
##           X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus Pole.vault Javeline X1500m Points
## Sebrle    10.85      7.84    16.36      2.12 48.36        14.05  48.72        5.0    70.52 280.01   8893
## Clay      10.44      7.96    15.23      2.06 49.19        14.13  50.11        4.9    69.71 282.00   8820
## Karpov    10.50      7.81    15.93      2.09 46.81        13.97  51.65        4.6    55.54 278.11   8725
## Macey     10.89      7.47    15.73      2.15 48.97        14.56  48.34        4.4    58.46 265.42   8414
## Warners   10.62      7.74    14.48      1.97 47.97        14.01  43.73        4.9    55.39 278.05   8343
## Zsivoczky 10.91      7.14    15.31      2.12 49.40        14.95  45.62        4.7    63.45 269.54   8287
attach(Decathlon) #archivo en uso
lab = rownames(Decathlon) #etiquetas de las unidades en lab
# regresión a mano
# definición de los datos para el modelo
n     <- dim(Decathlon)[1]; n                 # n es el número de observaciones
## [1] 28
x     <- as.matrix(Decathlon[,c(1,3,7)]); x        # x es el caracter descriptivo
##             X100m Shot.put Discus
## Sebrle      10.85    16.36  48.72
## Clay        10.44    15.23  50.11
## Karpov      10.50    15.93  51.65
## Macey       10.89    15.73  48.34
## Warners     10.62    14.48  43.73
## Zsivoczky   10.91    15.31  45.62
## Hernu       10.97    14.65  44.72
## Nool        10.80    14.26  42.05
## Bernard     10.69    14.80  44.75
## Schwarzl    10.98    14.01  42.43
## Pogorelov   10.95    15.10  44.60
## Schoenbeck  10.90    14.77  44.41
## Barras      11.14    14.91  44.83
## Smith       10.85    15.24  49.02
## Averyanov   10.55    14.44  39.88
## Ojaniemi    10.68    14.97  40.35
## Smirnov     10.89    13.88  42.47
## Qi          11.06    13.55  45.13
## Drews       10.87    13.07  40.11
## Parkhomenko 11.14    15.69  41.90
## Terek       10.92    15.15  45.62
## Gomez       11.08    14.57  40.95
## Turi        11.08    13.62  39.83
## Lorenzo     11.10    13.22  40.22
## Karlivans   11.33    13.30  43.34
## Korkizoglou 10.86    14.81  46.07
## Uldal       11.23    13.53  43.01
## Casarsa     11.36    14.92  48.66
p     <- dim(x)[2]; p                        # numero de regresores
## [1] 3
y     <- Points                               # y es el caracter respuesta   
nomy  <- deparse(substitute(Points)); nomy    # nomy es su nombre
## [1] "Points"
nom   <- c(colnames(x),nomy) ; nom           # etiquetas de las variables en nom
## [1] "X100m"    "Shot.put" "Discus"   "Points"
# estadísticas
xm    <- apply(x,2,mean); xm                 # cálculo de xm, promedio de x
##    X100m Shot.put   Discus 
## 10.91571 14.62500 44.37571
ym    <- sum(y)/n; ym                        # cálculo de ym, promedio de y
## [1] 8051.536
ssx   <- apply(x^2,2,sum); ssx               # ssx es la suma de los x cuadros
##     X100m  Shot.put    Discus 
##  3337.720  6008.732 55431.669
ssy   <- sum(y^2); ssy                       # ssy es la suma de los y cuadros
## [1] 1818910257
sxy   <- t(x)%*%y ; sxy                      # sxy es la suma de los productos xy
##              [,1]
## X100m     2459212
## Shot.put  3302452
## Discus   10020967
ssxc  <- ssx-n*xm^2 ; ssxc                   # ssxc es ssx centrado sobre el promedio
##      X100m   Shot.put     Discus 
##   1.441086  19.794100 293.956286
ssyc  <- ssy-n*ym^2 ; ssyc                   # ssyc es ssy centrado sobre el promedio
## [1] 3747891
sxyc  <- sxy-n*xm*ym; sxyc                   # sxyc es sxy centrado sobre el promedio
##               [,1]
## X100m    -1658.966
## Shot.put  5347.735
## Discus   16772.364
varx  <- ssxc/n; varx                        # varx es la varianza de x
##       X100m    Shot.put      Discus 
##  0.05146735  0.70693214 10.49843878
vary  <- ssyc/n; vary                        # vary es la varianza de y
## [1] 133853.2
covxy <- sxyc/n; covxy                       # covxy es la covarianza de xy
##               [,1]
## X100m    -59.24878
## Shot.put 190.99054
## Discus   599.01301
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
                     "Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats                                        # impresión de la tabla
##                            X100m     Shot.put       Discus       Points           x1y          x2y          x3y
## Mínimo              1.044000e+01   13.0700000    39.830000 7.404000e+03       0.00000       0.0000        0.000
## Máximo              1.136000e+01   16.3600000    51.650000 8.893000e+03       0.00000       0.0000        0.000
## Promedio            1.091571e+01   14.6250000    44.375714 8.051536e+03       0.00000       0.0000        0.000
## Total               3.056400e+02  409.5000000  1242.520000 2.254430e+05       0.00000       0.0000        0.000
## Suma de cuadrados   3.337720e+03 6008.7316000 55431.668800 1.818910e+09 2459212.41000 3302451.6100 10020966.520
## Cuadrados centrados 1.441086e+00   19.7941000   293.956286 3.747891e+06   -1658.96571    5347.7350    16772.364
## Varianza covarianza 5.146735e-02    0.7069321    10.498439 1.338532e+05     -59.24878     190.9905      599.013
## Desvío estándar     2.268642e-01    0.8407926     3.240129 3.658596e+02       0.00000       0.0000        0.000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx            # matriz de varianza/covarianza de los xy
##                X100m    Shot.put     Discus
## X100m     0.05146735 -0.07051786 -0.1715041
## Shot.put -0.07051786  0.70693214  1.8137536
## Discus   -0.17150408  1.81375357 10.4984388
# estimación
X      <- cbind(rep(1,n),x); X               # se necesita ajuntar la columna de unos
##               X100m Shot.put Discus
## Sebrle      1 10.85    16.36  48.72
## Clay        1 10.44    15.23  50.11
## Karpov      1 10.50    15.93  51.65
## Macey       1 10.89    15.73  48.34
## Warners     1 10.62    14.48  43.73
## Zsivoczky   1 10.91    15.31  45.62
## Hernu       1 10.97    14.65  44.72
## Nool        1 10.80    14.26  42.05
## Bernard     1 10.69    14.80  44.75
## Schwarzl    1 10.98    14.01  42.43
## Pogorelov   1 10.95    15.10  44.60
## Schoenbeck  1 10.90    14.77  44.41
## Barras      1 11.14    14.91  44.83
## Smith       1 10.85    15.24  49.02
## Averyanov   1 10.55    14.44  39.88
## Ojaniemi    1 10.68    14.97  40.35
## Smirnov     1 10.89    13.88  42.47
## Qi          1 11.06    13.55  45.13
## Drews       1 10.87    13.07  40.11
## Parkhomenko 1 11.14    15.69  41.90
## Terek       1 10.92    15.15  45.62
## Gomez       1 11.08    14.57  40.95
## Turi        1 11.08    13.62  39.83
## Lorenzo     1 11.10    13.22  40.22
## Karlivans   1 11.33    13.30  43.34
## Korkizoglou 1 10.86    14.81  46.07
## Uldal       1 11.23    13.53  43.01
## Casarsa     1 11.36    14.92  48.66
p1     <- p + 1; p1                          # un regresor más
## [1] 4
XX     <- t(X)%*%X; XX                       # matriz X'X
##                      X100m  Shot.put   Discus
##            28.00   305.640   409.500  1242.52
## X100m     305.64  3337.720  4468.011 13558.19
## Shot.put  409.50  4468.011  6008.732 18222.64
## Discus   1242.52 13558.191 18222.640 55431.67
XXm1   <- solve(XX); XXm1                    # su inversa
##                              X100m    Shot.put       Discus
##          134.03355845 -9.940932497 -1.66416391 -0.025849005
## X100m     -9.94093250  0.804052240  0.08353098 -0.001296037
## Shot.put  -1.66416391  0.083530978  0.09941981 -0.015811602
## Discus    -0.02584901 -0.001296037 -0.01581160  0.006112372
bh     <- XXm1 %*% t(X) %*% y                # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh 
##                 [,1]
##          15210.14536
## X100m     -908.93117
## Shot.put   127.89783
## Discus      20.11275
eta    <- X %*% bh; eta                      # valores estimados
##                 [,1]
## Sebrle      8420.544
## Clay        8676.638
## Karpov      8742.604
## Macey       8295.968
## Warners     8288.787
## Zsivoczky   8169.366
## Hernu       8012.316
## Nool        8063.253
## Bernard     8286.604
## Schwarzl    7875.314
## Pogorelov   8085.635
## Schoenbeck  8085.054
## Barras      7893.263
## Smith       8283.332
## Averyanov   8269.863
## Ojaniemi    8228.940
## Smirnov     7941.295
## Qi          7798.071
## Drews       7808.410
## Parkhomenko 7934.093
## Terek       8139.813
## Gomez       7826.276
## Turi        7682.247
## Lorenzo     7620.753
## Karlivans   7484.683
## Korkizoglou 8159.914
## Uldal       7598.355
## Casarsa     7771.609
res    <- y - eta; res                       # residuos
##                    [,1]
## Sebrle       472.456260
## Clay         143.362309
## Karpov       -17.603935
## Macey        118.031986
## Warners       54.212627
## Zsivoczky    117.634373
## Hernu        224.684286
## Nool         171.747178
## Bernard      -61.604500
## Schwarzl     226.686403
## Pogorelov     -1.634832
## Schoenbeck    -8.053684
## Barras       173.736747
## Smith       -260.331992
## Averyanov   -248.862561
## Ojaniemi    -222.940351
## Smirnov       51.704806
## Qi           135.929480
## Drews        117.589512
## Parkhomenko  -16.093210
## Terek       -246.812662
## Gomez         38.723602
## Turi          25.752820
## Lorenzo      -28.753395
## Karlivans     98.317174
## Korkizoglou -586.914006
## Uldal       -103.355238
## Casarsa     -367.609198
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res)                 # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad                                       # impresión de la tabla
##             X100m Shot.put Discus Points      eta    residuos       % res
## Sebrle      10.85    16.36  48.72   8893 8420.544  472.456260  5.31267582
## Clay        10.44    15.23  50.11   8820 8676.638  143.362309  1.62542300
## Karpov      10.50    15.93  51.65   8725 8742.604  -17.603935 -0.20176430
## Macey       10.89    15.73  48.34   8414 8295.968  118.031986  1.40280468
## Warners     10.62    14.48  43.73   8343 8288.787   54.212627  0.64979776
## Zsivoczky   10.91    15.31  45.62   8287 8169.366  117.634373  1.41950493
## Hernu       10.97    14.65  44.72   8237 8012.316  224.684286  2.72774416
## Nool        10.80    14.26  42.05   8235 8063.253  171.747178  2.08557594
## Bernard     10.69    14.80  44.75   8225 8286.604  -61.604500 -0.74899088
## Schwarzl    10.98    14.01  42.43   8102 7875.314  226.686403  2.79790673
## Pogorelov   10.95    15.10  44.60   8084 8085.635   -1.634832 -0.02022306
## Schoenbeck  10.90    14.77  44.41   8077 8085.054   -8.053684 -0.09971133
## Barras      11.14    14.91  44.83   8067 7893.263  173.736747  2.15367233
## Smith       10.85    15.24  49.02   8023 8283.332 -260.331992 -3.24482105
## Averyanov   10.55    14.44  39.88   8021 8269.863 -248.862561 -3.10263759
## Ojaniemi    10.68    14.97  40.35   8006 8228.940 -222.940351 -2.78466589
## Smirnov     10.89    13.88  42.47   7993 7941.295   51.704806  0.64687609
## Qi          11.06    13.55  45.13   7934 7798.071  135.929480  1.71325284
## Drews       10.87    13.07  40.11   7926 7808.410  117.589512  1.48359212
## Parkhomenko 11.14    15.69  41.90   7918 7934.093  -16.093210 -0.20324842
## Terek       10.92    15.15  45.62   7893 8139.813 -246.812662 -3.12698165
## Gomez       11.08    14.57  40.95   7865 7826.276   38.723602  0.49235349
## Turi        11.08    13.62  39.83   7708 7682.247   25.752820  0.33410509
## Lorenzo     11.10    13.22  40.22   7592 7620.753  -28.753395 -0.37873281
## Karlivans   11.33    13.30  43.34   7583 7484.683   98.317174  1.29654720
## Korkizoglou 10.86    14.81  46.07   7573 8159.914 -586.914006 -7.75008591
## Uldal       11.23    13.53  43.01   7495 7598.355 -103.355238 -1.37898916
## Casarsa     11.36    14.92  48.66   7404 7771.609 -367.609198 -4.96500808
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="X100m",ylab="Points",ylim=c(min(y,eta),max(y,eta)),pch=".")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch=".")
text(x[,1],eta,labels=lab,col="red",pos=2)

legend("topright",            
       legend=c("Y", "Eta"), 
       pch=c("*", "*"),       
       col=c("black", "red"))  

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# Del gráfico residuos vs estimados. Se puede ver que hay observaciones como Sebrle y Korkizoglou con alto valor residual. Además de observaciones como  Clay Parkhomenko    Casarsa tienen un valor alto de apalancamiento. Se evaluará los valores para posteriormente analizar el retiro de estas observaciones
# datos para anova
ssr   <- t(eta) %*% eta; ssr       # suma de  cuadrados por regresión
##            [,1]
## [1,] 1817691554
sse   <- ssy - ssr; sse            # suma por residuos
##         [,1]
## [1,] 1218703
msr   <- ssr/p1; msr               # cuadrado promedio por regresión
##           [,1]
## [1,] 454422888
mse   <- sse/(n-p1); mse           # cuadrado promedio por residuos
##         [,1]
## [1,] 50779.3
df    <- c(p1,n-p1,n)              # grados de libertad
ssq   <- c(ssr,sse,ssy)            # sumas de cuadrados
msq   <- c(msr,mse,0)              # suma de residuos
anova <- (cbind(df,ssq,msq))       # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
##            Degrees of Freedom Sum of squares Mean squares
## Regression                  4     1817691554  454422888.4
## Residuals                  24        1218703      50779.3
## Total                      28     1818910257          0.0
# apalancamiento
Hat   <- X %*% XXm1 %*% t(X)                  # matriz H sombrero
rownames(Hat) <- lab                          # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
##                   Sebrle         Clay      Karpov        Macey       Warners    Zsivoczky      Hernu          Nool      Bernard     Schwarzl    Pogorelov Schoenbeck       Barras         Smith   Averyanov     Ojaniemi      Smirnov           Qi       Drews  Parkhomenko         Terek         Gomez
## Sebrle       0.197157998  0.049511478  0.12229595  0.138980203 -0.0035954283  0.101577330 0.04259219 -0.0083280130  0.033107195 -0.017645809  0.085199654 0.04858397  0.082764395  0.0868080078 -0.01052288  0.052449926 -0.038239638 -0.058097628 -0.11772919  0.161776009  0.0867116592  0.0472311193
## Clay         0.049511478  0.304332605  0.27498456  0.070278822  0.1294117632  0.022004846 0.02451966  0.0399280350  0.109794789  0.006319247 -0.003439777 0.03175537 -0.048572972  0.1360285338  0.05549229 -0.013597490  0.047042096  0.081957137  0.04914002 -0.179847545  0.0298518808 -0.1055925664
## Karpov       0.122295952  0.274984563  0.28443020  0.116249337  0.0922766963  0.053675579 0.03086421  0.0134972414  0.094304434 -0.014484582  0.023633471 0.03733748 -0.011550294  0.1520065950  0.01568807 -0.013967080  0.010210989  0.041734482 -0.02640323 -0.098548101  0.0545288655 -0.0852166309
## Macey        0.138980203  0.070278822  0.11624934  0.110690084  0.0051373833  0.074631438 0.04278801 -0.0042128936  0.031129466 -0.000921098  0.060906278 0.04143160  0.066545807  0.0905852564 -0.02746984  0.008231702 -0.012487134  0.002024782 -0.06632134  0.081766024  0.0680916412  0.0208909160
## Warners     -0.003595428  0.129411763  0.09227670  0.005137383  0.1143726228  0.015754460 0.02103447  0.0780392997  0.086392790  0.039955131  0.013159544 0.03539270 -0.028958881  0.0283995866  0.13785505  0.089560970  0.066076294  0.029897498  0.09747152 -0.047782896  0.0178891952  0.0007434437
## Zsivoczky    0.101577330  0.022004846  0.05367558  0.074631438  0.0157544599  0.064265178 0.03857435  0.0197935347  0.031387252  0.015764987  0.059518272 0.04175525  0.059359606  0.0469066955  0.02265493  0.053188045  0.004811589 -0.010900213 -0.02745454  0.106190218  0.0571031171  0.0524813998
## Hernu        0.042592193  0.024519663  0.03086421  0.042788006  0.0210344678  0.038574346 0.03877653  0.0260868775  0.026380766  0.034467835  0.038383411 0.03528733  0.047066040  0.0413183358  0.01149186  0.018985309  0.030252307  0.041790615  0.02420221  0.043494019  0.0387750397  0.0374542798
## Nool        -0.008328013  0.039928035  0.01349724 -0.004212894  0.0780392997  0.019793535 0.02608688  0.0723009996  0.058205120  0.049751604  0.025361196 0.03599251  0.002299974 -0.0005339676  0.11877611  0.094345024  0.061459660  0.021938579  0.09088116  0.019441702  0.0200572959  0.0448346744
## Bernard      0.033107195  0.109794789  0.09430443  0.031129466  0.0863927904  0.031387252 0.02638077  0.0582051202  0.072128014  0.029854013  0.026432062 0.03726853 -0.004006993  0.0413097498  0.09912135  0.073369594  0.045866920  0.019353515  0.05562737 -0.009211401  0.0308935155  0.0092657523
## Schwarzl    -0.017645809  0.006319247 -0.01448458 -0.000921098  0.0399551309  0.015764987 0.03446783  0.0497516040  0.029854013  0.055658984  0.023427489 0.03196622  0.028200569  0.0096880507  0.04942735  0.035483465  0.058564470  0.061309324  0.08407953  0.015292641  0.0197947353  0.0452712848
## Pogorelov    0.085199654 -0.003439777  0.02363347  0.060906278  0.0131595442  0.059518272 0.03838341  0.0253611965  0.026432062  0.023427489  0.058730320 0.04119872  0.061186278  0.0312173903  0.03041959  0.060884391  0.011103802 -0.009322393 -0.01334000  0.115608253  0.0527410899  0.0653387829
## Schoenbeck   0.048583973  0.031755370  0.03733748  0.041431603  0.0353927009  0.041755255 0.03528733  0.0359925113  0.037268530  0.031966225  0.041198719 0.03747385  0.038229837  0.0338966819  0.04287157  0.048485694  0.030301847  0.020573278  0.02500677  0.054071374  0.0397397981  0.0419965717
## Barras       0.082764395 -0.048572972 -0.01155029  0.066545807 -0.0289588811  0.059359606 0.04706604  0.0022999740 -0.004006993  0.028200569  0.061186278 0.03822984  0.091818645  0.0374854847 -0.03702532  0.009627151  0.004614571  0.020682583 -0.02699683  0.128849351  0.0550133208  0.0738824242
## Smith        0.086808008  0.136028534  0.15200659  0.090585256  0.0283995866  0.046906695 0.04131834 -0.0005339676  0.041309750  0.009688051  0.031217390 0.03389668  0.037485485  0.1123457403 -0.04254749 -0.044131914  0.013423023  0.067889146 -0.01627258 -0.031316092  0.0496765768 -0.0287650313
## Averyanov   -0.010522885  0.055492293  0.01568807 -0.027469835  0.1378550493  0.022654929 0.01149186  0.1187761095  0.099121349  0.049427348  0.030419592 0.04287157 -0.037025318 -0.0425474946  0.25093627  0.211876056  0.072916704 -0.050069922  0.11788385  0.050801890  0.0160752000  0.0670985187
## Ojaniemi     0.052449926 -0.013597490 -0.01396708  0.008231702  0.0895609704  0.053188045 0.01898531  0.0943450235  0.073369594  0.035483465  0.060884391 0.04848569  0.009627151 -0.0441319139  0.21187606  0.219156427  0.038113162 -0.093298625  0.04805420  0.157858168  0.0391109945  0.1078009972
## Smirnov     -0.038239638  0.047042096  0.01021099 -0.012487134  0.0660762935  0.004811589 0.03025231  0.0614596600  0.045866920  0.058564470  0.011103802 0.03030185  0.004614571  0.0134230231  0.07291670  0.038113162  0.071801151  0.073772131  0.11034905 -0.031812497  0.0113805542  0.0244706733
## Qi          -0.058097628  0.081957137  0.04173448  0.002024782  0.0298974980 -0.010900213 0.04179062  0.0219385791  0.019353515  0.061309324 -0.009322393 0.02057328  0.020682583  0.0678891459 -0.05006992 -0.093298625  0.073772131  0.170270272  0.10918175 -0.125346133  0.0064322557 -0.0276499352
## Drews       -0.117729194  0.049140023 -0.02640323 -0.066321345  0.0974715235 -0.027454542 0.02420221  0.0908811613  0.055627368  0.084079531 -0.013340001 0.02500677 -0.026996833 -0.0162725758  0.11788385  0.048054203  0.110349048  0.109181752  0.19062451 -0.093770220 -0.0145107554  0.0191377055
## Parkhomenko  0.161776009 -0.179847545 -0.09854810  0.081766024 -0.0477828959  0.106190218 0.04349402  0.0194417018 -0.009211401  0.015292641  0.115608253 0.05407137  0.128849351 -0.0313160922  0.05080189  0.157858168 -0.031812497 -0.125346133 -0.09377022  0.351112842  0.0827133773  0.1819985039
## Terek        0.086711659  0.029851881  0.05452887  0.068091641  0.0178891952  0.057103117 0.03877504  0.0200572959  0.030893516  0.019794735  0.052741090 0.03973980  0.055013321  0.0496765768  0.01607520  0.039110994  0.011380554  0.006432256 -0.01451076  0.082713377  0.0522993092  0.0438134573
## Gomez        0.047231119 -0.105592566 -0.08521663  0.020890916  0.0007434437  0.052481400 0.03745428  0.0448346744  0.009265752  0.045271285  0.065338783 0.04199657  0.073882424 -0.0287650313  0.06709852  0.107800997  0.024470673 -0.027649935  0.01913771  0.181998504  0.0438134573  0.1234391080
## Turi        -0.045277895 -0.068081919 -0.09350828 -0.029494684  0.0296288882  0.010532019 0.03412136  0.0628460865  0.016479920  0.071551701  0.028049982 0.03237378  0.038252934 -0.0328728887  0.07303237  0.066777694  0.068065531  0.049771519  0.10715649  0.062558713  0.0133634203  0.0868556885
## Lorenzo     -0.083184051 -0.036588122 -0.08069825 -0.043527467  0.0357152971 -0.008883213 0.03503576  0.0608732382  0.016009523  0.080733553  0.008360486 0.02653350  0.025581559 -0.0185987919  0.04871011  0.020757616  0.084954295  0.101330661  0.13976489 -0.010078307  0.0008520327  0.0566159490
## Karlivans   -0.057911051 -0.038835501 -0.06024624 -0.003439627 -0.0278428404 -0.003354656 0.05069163  0.0065417432 -0.023654196  0.072359316  0.008189106 0.02097208  0.069287018  0.0367529497 -0.09449552 -0.101583479  0.063280628  0.165429150  0.09097676 -0.034959926  0.0118029367  0.0286743415
## Korkizoglou  0.047740049  0.085847926  0.08637501  0.051849116  0.0420884942  0.036290951 0.03625909  0.0266771347  0.043357751  0.027113813  0.030116750 0.03457483  0.028329521  0.0646076430  0.01593112  0.008440624  0.031949099  0.050852438  0.02544339 -0.003834202  0.0380636447  0.0055468241
## Uldal       -0.043682586 -0.029668140 -0.04942009 -0.001973313 -0.0091676574  0.003634889 0.04598433  0.0192188374 -0.008553210  0.067066639  0.014239095 0.02459860  0.058772081  0.0272251282 -0.05108805 -0.058214271  0.060670804  0.131283651  0.08663689 -0.013898707  0.0150278218  0.0365303830
## Casarsa      0.099727036 -0.023977041  0.02994961  0.107639530 -0.0889053958  0.058736687 0.06332217 -0.0560727654 -0.041613593  0.019949535  0.052696755 0.03033026  0.129262141  0.1074681807 -0.19584123 -0.137764352 -0.012912082  0.107210077 -0.06881938  0.066872940  0.0627080200  0.0258513650
##                     Turi       Lorenzo    Karlivans  Korkizoglou        Uldal     Casarsa
## Sebrle      -0.045277895 -0.0831840509 -0.057911051  0.047740049 -0.043682586  0.09972704
## Clay        -0.068081919 -0.0365881217 -0.038835501  0.085847926 -0.029668140 -0.02397704
## Karpov      -0.093508280 -0.0806982521 -0.060246245  0.086375009 -0.049420093  0.02994961
## Macey       -0.029494684 -0.0435274668 -0.003439627  0.051849116 -0.001973313  0.10763953
## Warners      0.029628888  0.0357152971 -0.027842840  0.042088494 -0.009167657 -0.08890540
## Zsivoczky    0.010532019 -0.0088832132 -0.003354656  0.036290951  0.003634889  0.05873669
## Hernu        0.034121364  0.0350357611  0.050691627  0.036259092  0.045984331  0.06332217
## Nool         0.062846086  0.0608732382  0.006541743  0.026677135  0.019218837 -0.05607277
## Bernard      0.016479920  0.0160095234 -0.023654196  0.043357751 -0.008553210 -0.04161359
## Schwarzl     0.071551701  0.0807335527  0.072359316  0.027113813  0.067066639  0.01994953
## Pogorelov    0.028049982  0.0083604863  0.008189106  0.030116750  0.014239095  0.05269676
## Schoenbeck   0.032373776  0.0265335005  0.020972078  0.034574830  0.024598600  0.03033026
## Barras       0.038252934  0.0255815590  0.069287018  0.028329521  0.058772081  0.12926214
## Smith       -0.032872889 -0.0185987919  0.036752950  0.064607643  0.027225128  0.10746818
## Averyanov    0.073032369  0.0487101090 -0.094495523  0.015931118 -0.051088045 -0.19584123
## Ojaniemi     0.066777694  0.0207576162 -0.101583479  0.008440624 -0.058214271 -0.13776435
## Smirnov      0.068065531  0.0849542954  0.063280628  0.031949099  0.060670804 -0.01291208
## Qi           0.049771519  0.1013306606  0.165429150  0.050852438  0.131283651  0.10721008
## Drews        0.107156490  0.1397648863  0.090976762  0.025443390  0.086636891 -0.06881938
## Parkhomenko  0.062558713 -0.0100783066 -0.034959926 -0.003834202 -0.013898707  0.06687294
## Terek        0.013363420  0.0008520327  0.011802937  0.038063645  0.015027822  0.06270802
## Gomez        0.086855688  0.0566159490  0.028674341  0.005546824  0.036530383  0.02585136
## Turi         0.114018919  0.1161053682  0.089613432  0.009541325  0.084911795  0.00362673
## Lorenzo      0.116105368  0.1389268056  0.131880839  0.017056993  0.115916962  0.01984277
## Karlivans    0.089613432  0.1318808390  0.220826509  0.032168664  0.175229751  0.17164619
## Korkizoglou  0.009541325  0.0170569931  0.032168664  0.047769654  0.029844969  0.04999738
## Uldal        0.084911795  0.1159169616  0.175229751  0.029844969  0.142070699  0.12680270
## Casarsa      0.003626730  0.0198427681  0.171646193  0.049997378  0.126802695  0.29226576
lev   <- diag(Hat); lev                       # apalancamientos
##      Sebrle        Clay      Karpov       Macey     Warners   Zsivoczky       Hernu        Nool     Bernard    Schwarzl   Pogorelov  Schoenbeck      Barras       Smith   Averyanov    Ojaniemi     Smirnov          Qi       Drews Parkhomenko       Terek       Gomez        Turi     Lorenzo   Karlivans 
##  0.19715800  0.30433260  0.28443020  0.11069008  0.11437262  0.06426518  0.03877653  0.07230100  0.07212801  0.05565898  0.05873032  0.03747385  0.09181864  0.11234574  0.25093627  0.21915643  0.07180115  0.17027027  0.19062451  0.35111284  0.05229931  0.12343911  0.11401892  0.13892681  0.22082651 
## Korkizoglou       Uldal     Casarsa 
##  0.04776965  0.14207070  0.29226576
pesos <- cbind(x,y,lev)                       # construcción de una salida conjunta
rownames(pesos) <- lab
pesos                                         # salida
##             X100m Shot.put Discus    y        lev
## Sebrle      10.85    16.36  48.72 8893 0.19715800
## Clay        10.44    15.23  50.11 8820 0.30433260
## Karpov      10.50    15.93  51.65 8725 0.28443020
## Macey       10.89    15.73  48.34 8414 0.11069008
## Warners     10.62    14.48  43.73 8343 0.11437262
## Zsivoczky   10.91    15.31  45.62 8287 0.06426518
## Hernu       10.97    14.65  44.72 8237 0.03877653
## Nool        10.80    14.26  42.05 8235 0.07230100
## Bernard     10.69    14.80  44.75 8225 0.07212801
## Schwarzl    10.98    14.01  42.43 8102 0.05565898
## Pogorelov   10.95    15.10  44.60 8084 0.05873032
## Schoenbeck  10.90    14.77  44.41 8077 0.03747385
## Barras      11.14    14.91  44.83 8067 0.09181864
## Smith       10.85    15.24  49.02 8023 0.11234574
## Averyanov   10.55    14.44  39.88 8021 0.25093627
## Ojaniemi    10.68    14.97  40.35 8006 0.21915643
## Smirnov     10.89    13.88  42.47 7993 0.07180115
## Qi          11.06    13.55  45.13 7934 0.17027027
## Drews       10.87    13.07  40.11 7926 0.19062451
## Parkhomenko 11.14    15.69  41.90 7918 0.35111284
## Terek       10.92    15.15  45.62 7893 0.05229931
## Gomez       11.08    14.57  40.95 7865 0.12343911
## Turi        11.08    13.62  39.83 7708 0.11401892
## Lorenzo     11.10    13.22  40.22 7592 0.13892681
## Karlivans   11.33    13.30  43.34 7583 0.22082651
## Korkizoglou 10.86    14.81  46.07 7573 0.04776965
## Uldal       11.23    13.53  43.01 7495 0.14207070
## Casarsa     11.36    14.92  48.66 7404 0.29226576
niv   <- 2 * p1 / n; niv                      # nivel para apalancamiento
## [1] 0.2857143
level <- which(lev>niv)                       # unidades con fuerte apalancamiento
out   <- rbind(level,lev[level])              # salida
rownames(out) <- c("unidad","apalancamiento"); out
##                     Clay Parkhomenko    Casarsa
## unidad         2.0000000  20.0000000 28.0000000
## apalancamiento 0.3043326   0.3511128  0.2922658
# De acuerdo al gráfico de residuos vs estimados y Resultados del apalancamiento

# podemos ver que las observaciones que tienen mayor apalancamiento es Clay (0.3043326), Parkhomenko(0.3511128) y Casarsa(0.2922658)
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo X100m,Shot.put,Discus
lm1 <- lm(y~X100m+Shot.put+Discus,data=Decathlon); summary(lm1)
## 
## Call:
## lm(formula = y ~ X100m + Shot.put + Discus, data = Decathlon)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -586.91  -72.04   32.24  122.51  472.46 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 15210.15    2608.86   5.830 5.18e-06 ***
## X100m        -908.93     202.06  -4.498 0.000149 ***
## Shot.put      127.90      71.05   1.800 0.084438 .  
## Discus         20.11      17.62   1.142 0.264873    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 225.3 on 24 degrees of freedom
## Multiple R-squared:  0.6748, Adjusted R-squared:  0.6342 
## F-statistic:  16.6 on 3 and 24 DF,  p-value: 4.71e-06
residuals(lm1)
##      Sebrle        Clay      Karpov       Macey     Warners   Zsivoczky       Hernu        Nool     Bernard    Schwarzl   Pogorelov  Schoenbeck      Barras       Smith   Averyanov    Ojaniemi     Smirnov          Qi       Drews Parkhomenko       Terek       Gomez        Turi     Lorenzo   Karlivans 
##  472.456260  143.362309  -17.603935  118.031986   54.212627  117.634373  224.684286  171.747178  -61.604500  226.686403   -1.634832   -8.053684  173.736747 -260.331992 -248.862561 -222.940351   51.704806  135.929480  117.589512  -16.093210 -246.812662   38.723602   25.752820  -28.753395   98.317174 
## Korkizoglou       Uldal     Casarsa 
## -586.914006 -103.355238 -367.609198
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# modelo sin unidades con fuerte apalancamiento (Todas las observaciones menos la Clay Parkhomenko, Casarsa)
lm2 <- lm(y[-level]~X100m+Shot.put+Discus,data=Decathlon[-level,]); summary(lm2)
## 
## Call:
## lm(formula = y[-level] ~ X100m + Shot.put + Discus, data = Decathlon[-level, 
##     ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -590.75  -48.97    0.23  118.72  406.48 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 10948.31    3330.56   3.287  0.00351 **
## X100m        -572.80     260.14  -2.202  0.03899 * 
## Shot.put      171.02      86.95   1.967  0.06255 . 
## Discus         19.61      21.54   0.910  0.37298   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 215.9 on 21 degrees of freedom
## Multiple R-squared:  0.6402, Adjusted R-squared:  0.5888 
## F-statistic: 12.46 on 3 and 21 DF,  p-value: 6.76e-05
residuals(lm2)
##       Sebrle       Karpov        Macey      Warners    Zsivoczky        Hernu         Nool      Bernard     Schwarzl    Pogorelov   Schoenbeck       Barras        Smith    Averyanov     Ojaniemi      Smirnov           Qi        Drews        Terek        Gomez         Turi      Lorenzo    Karlivans 
##  406.4824456   54.0910697   65.5875086  144.0930353   75.2027494  190.0901008  209.7617099   -8.5360580  215.1705846  -48.9718939  -24.4503278   70.8448971 -277.8578489 -135.6755612 -176.0666810   76.0665048  118.7249799  182.4086760 -285.7060995  -31.3016138   -3.8733022  -47.6561660    0.2333375 
##  Korkizoglou        Uldal 
## -590.7508443 -177.9112029
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# Gráfico observados vs estimados: Se ve que las observaciones se ajustan en la recta identidad por lo que se tiene un buen ajuste de los datos respecto al modelo.Por otro lado, en el gráfico residuos vs estimados, la mayor parte de las observaciones están cerca de la recta horizontal 0. Sin embargo, aún persisten observaciones con alto grado residual como Lorenzo
# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

legend("bottomright", # Escoge la posición adecuada para tu gráfico
       legend=c("Modelo 1", "Modelo 2 sin Obs Clay,Parkhomenko,Casarsa"), 
       pch=c(2, 2),
       col=c("black", "red"),
       title="Leyenda",
       cex=0.5)

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

# Análisis de residuales 
# Al eliminar los outliers (como parece ser el caso con lm2) los residuos se reducen (los puntos rojos están más cerca de cero), esto sugeriría que estos puntos tenían una gran influencia y que su eliminación mejora la bondad de ajuste del modelo.

2) Points ~ X110m.hurdle,Javeline,X1500m

# Lectura de los datos
Decathlon <- read.csv("Decathlon.csv", row.names = 1);head(Decathlon)
##           X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus Pole.vault Javeline X1500m Points
## Sebrle    10.85      7.84    16.36      2.12 48.36        14.05  48.72        5.0    70.52 280.01   8893
## Clay      10.44      7.96    15.23      2.06 49.19        14.13  50.11        4.9    69.71 282.00   8820
## Karpov    10.50      7.81    15.93      2.09 46.81        13.97  51.65        4.6    55.54 278.11   8725
## Macey     10.89      7.47    15.73      2.15 48.97        14.56  48.34        4.4    58.46 265.42   8414
## Warners   10.62      7.74    14.48      1.97 47.97        14.01  43.73        4.9    55.39 278.05   8343
## Zsivoczky 10.91      7.14    15.31      2.12 49.40        14.95  45.62        4.7    63.45 269.54   8287
attach(Decathlon) #archivo en uso
## The following objects are masked from Decathlon (pos = 3):
## 
##     Discus, High.jump, Javeline, Long.jump, Points, Pole.vault, Shot.put, X100m, X110m.hurdle, X1500m, X400m
lab = rownames(Decathlon) #etiquetas de las unidades en lab
# regresión a mano
# definición de los datos para el modelo
n     <- dim(Decathlon)[1]; n                 # n es el número de observaciones
## [1] 28
x     <- as.matrix(Decathlon[,c(6,9,10)]); x        # x es el caracter descriptivo
##             X110m.hurdle Javeline X1500m
## Sebrle             14.05    70.52 280.01
## Clay               14.13    69.71 282.00
## Karpov             13.97    55.54 278.11
## Macey              14.56    58.46 265.42
## Warners            14.01    55.39 278.05
## Zsivoczky          14.95    63.45 269.54
## Hernu              14.25    57.76 264.35
## Nool               14.80    61.33 276.33
## Bernard            14.17    55.27 276.31
## Schwarzl           14.25    56.32 273.56
## Pogorelov          14.21    53.45 287.63
## Schoenbeck         14.34    60.89 278.82
## Barras             14.37    64.55 267.09
## Smith              14.01    61.52 272.74
## Averyanov          14.39    54.51 271.02
## Ojaniemi           15.01    59.26 275.71
## Smirnov            14.77    60.88 263.31
## Qi                 14.78    60.79 272.63
## Drews              14.01    51.53 274.21
## Parkhomenko        14.88    65.82 277.94
## Terek              15.12    50.62 290.36
## Gomez              14.41    60.71 269.70
## Turi               14.26    59.34 290.01
## Lorenzo            15.38    58.36 263.08
## Karlivans          14.98    52.92 278.67
## Korkizoglou        14.96    53.05 317.00
## Uldal              15.09    60.00 281.70
## Casarsa            15.39    58.62 296.12
p     <- dim(x)[2]; p                        # numero de regresores
## [1] 3
y     <- Points                               # y es el caracter respuesta   
nomy  <- deparse(substitute(Points)); nomy    # nomy es su nombre
## [1] "Points"
nom   <- c(colnames(x),nomy) ; nom           # etiquetas de las variables en nom
## [1] "X110m.hurdle" "Javeline"     "X1500m"       "Points"
# estadísticas
xm    <- apply(x,2,mean); xm                 # cálculo de xm, promedio de x
## X110m.hurdle     Javeline       X1500m 
##     14.55357     58.94893    277.55071
ym    <- sum(y)/n; ym                        # cálculo de ym, promedio de y
## [1] 8051.536
ssx   <- apply(x^2,2,sum); ssx               # ssx es la suma de los x cuadros
## X110m.hurdle     Javeline       X1500m 
##     5935.872    97967.828  2160424.133
ssy   <- sum(y^2); ssy                       # ssy es la suma de los y cuadros
## [1] 1818910257
sxy   <- t(x)%*%y ; sxy                      # sxy es la suma de los productos xy
##                  [,1]
## X110m.hurdle  3278092
## Javeline     13310718
## X1500m       62538969
ssxc  <- ssx-n*xm^2 ; ssxc                   # ssxc es ssx centrado sobre el promedio
## X110m.hurdle     Javeline       X1500m 
##     5.291443   668.495068  3460.961386
ssyc  <- ssy-n*ym^2 ; ssyc                   # ssyc es ssy centrado sobre el promedio
## [1] 3747891
sxyc  <- sxy-n*xm*ym; sxyc                   # sxyc es sxy centrado sobre el promedio
##                    [,1]
## X110m.hurdle  -2908.684
## Javeline      21094.966
## X1500m       -32896.191
varx  <- ssxc/n; varx                        # varx es la varianza de x
## X110m.hurdle     Javeline       X1500m 
##    0.1889801   23.8748239  123.6057638
vary  <- ssyc/n; vary                        # vary es la varianza de y
## [1] 133853.2
covxy <- sxyc/n; covxy                       # covxy es la covarianza de xy
##                    [,1]
## X110m.hurdle  -103.8816
## Javeline       753.3916
## X1500m       -1174.8640
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
                     "Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats                                        # impresión de la tabla
##                     X110m.hurdle     Javeline       X1500m       Points          x1y          x2y          x3y
## Mínimo                13.9700000    50.620000 2.630800e+02 7.404000e+03       0.0000 0.000000e+00        0.000
## Máximo                15.3900000    70.520000 3.170000e+02 8.893000e+03       0.0000 0.000000e+00        0.000
## Promedio              14.5535714    58.948929 2.775507e+02 8.051536e+03       0.0000 0.000000e+00        0.000
## Total                407.5000000  1650.570000 7.771420e+03 2.254430e+05       0.0000 0.000000e+00        0.000
## Suma de cuadrados   5935.8718000 97967.828100 2.160424e+06 1.818910e+09 3278092.1200 1.331072e+07 62538969.490
## Cuadrados centrados    5.2914429   668.495068 3.460961e+03 3.747891e+06   -2908.6836 2.109497e+04   -32896.191
## Varianza covarianza    0.1889801    23.874824 1.236058e+02 1.338532e+05    -103.8816 7.533916e+02    -1174.864
## Desvío estándar        0.4347184     4.886187 1.111781e+01 3.658596e+02       0.0000 0.000000e+00        0.000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx            # matriz de varianza/covarianza de los xy
##              X110m.hurdle    Javeline      X1500m
## X110m.hurdle    0.1889801  -0.1695462   0.8653474
## Javeline       -0.1695462  23.8748239 -13.6627207
## X1500m          0.8653474 -13.6627207 123.6057638
# estimación
X      <- cbind(rep(1,n),x); X               # se necesita ajuntar la columna de unos
##               X110m.hurdle Javeline X1500m
## Sebrle      1        14.05    70.52 280.01
## Clay        1        14.13    69.71 282.00
## Karpov      1        13.97    55.54 278.11
## Macey       1        14.56    58.46 265.42
## Warners     1        14.01    55.39 278.05
## Zsivoczky   1        14.95    63.45 269.54
## Hernu       1        14.25    57.76 264.35
## Nool        1        14.80    61.33 276.33
## Bernard     1        14.17    55.27 276.31
## Schwarzl    1        14.25    56.32 273.56
## Pogorelov   1        14.21    53.45 287.63
## Schoenbeck  1        14.34    60.89 278.82
## Barras      1        14.37    64.55 267.09
## Smith       1        14.01    61.52 272.74
## Averyanov   1        14.39    54.51 271.02
## Ojaniemi    1        15.01    59.26 275.71
## Smirnov     1        14.77    60.88 263.31
## Qi          1        14.78    60.79 272.63
## Drews       1        14.01    51.53 274.21
## Parkhomenko 1        14.88    65.82 277.94
## Terek       1        15.12    50.62 290.36
## Gomez       1        14.41    60.71 269.70
## Turi        1        14.26    59.34 290.01
## Lorenzo     1        15.38    58.36 263.08
## Karlivans   1        14.98    52.92 278.67
## Korkizoglou 1        14.96    53.05 317.00
## Uldal       1        15.09    60.00 281.70
## Casarsa     1        15.39    58.62 296.12
p1     <- p + 1; p1                          # un regresor más
## [1] 4
XX     <- t(X)%*%X; XX                       # matriz X'X
##                      X110m.hurdle  Javeline     X1500m
##                28.00      407.500   1650.57    7771.42
## X110m.hurdle  407.50     5935.872  24016.94  113126.15
## Javeline     1650.57    24016.941  97967.83  457734.33
## X1500m       7771.42   113126.146 457734.33 2160424.13
XXm1   <- solve(XX); XXm1                    # su inversa
##                           X110m.hurdle      Javeline        X1500m
##              67.68929468 -2.5232989465 -0.1514649671 -0.0792715359
## X110m.hurdle -2.52329895  0.1955042881  0.0006459688 -0.0012972975
## Javeline     -0.15146497  0.0006459688  0.0015990444  0.0001722275
## X1500m       -0.07927154 -0.0012972975  0.0001722275  0.0003170564
bh     <- XXm1 %*% t(X) %*% y                # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh 
##                      [,1]
##              14803.597126
## X110m.hurdle  -512.357276
## Javeline        26.187240
## X1500m          -3.023386
eta    <- X %*% bh; eta                      # valores estimados
##                 [,1]
## Sebrle      8605.123
## Clay        8536.906
## Karpov      8259.571
## Macey       8072.114
## Warners     8235.330
## Zsivoczky   7990.513
## Hernu       8215.849
## Nool        7991.321
## Bernard     8155.471
## Schwarzl    8150.294
## Pogorelov   8053.092
## Schoenbeck  8207.954
## Barras      8323.893
## Smith       8411.912
## Averyanov   8038.844
## Ojaniemi    7831.393
## Smirnov     8034.272
## Qi          7998.613
## Drews       8145.857
## Parkhomenko 8063.045
## Terek       7504.483
## Gomez       8194.949
## Turi        8174.521
## Lorenzo     7656.437
## Karlivans   7671.787
## Korkizoglou 7569.552
## Uldal       7791.672
## Casarsa     7558.230
res    <- y - eta; res                       # residuos
##                    [,1]
## Sebrle       287.876742
## Clay         283.093527
## Karpov       465.428587
## Macey        341.885869
## Warners      107.669561
## Zsivoczky    296.487228
## Hernu         21.151158
## Nool         243.679377
## Bernard       69.528502
## Schwarzl     -48.293830
## Pogorelov     30.908300
## Schoenbeck  -130.954353
## Barras      -256.893253
## Smith       -388.912403
## Averyanov    -17.844307
## Ojaniemi     174.607493
## Smirnov      -41.271570
## Qi           -64.613187
## Drews       -219.857494
## Parkhomenko -145.045099
## Terek        388.517156
## Gomez       -329.948921
## Turi        -466.521023
## Lorenzo      -64.437164
## Karlivans    -88.786899
## Korkizoglou    3.448002
## Uldal       -296.672400
## Casarsa     -154.229599
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res)                 # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad                                       # impresión de la tabla
##             X110m.hurdle Javeline X1500m Points      eta    residuos      % res
## Sebrle             14.05    70.52 280.01   8893 8605.123  287.876742  3.2371162
## Clay               14.13    69.71 282.00   8820 8536.906  283.093527  3.2096772
## Karpov             13.97    55.54 278.11   8725 8259.571  465.428587  5.3344251
## Macey              14.56    58.46 265.42   8414 8072.114  341.885869  4.0632977
## Warners            14.01    55.39 278.05   8343 8235.330  107.669561  1.2905377
## Zsivoczky          14.95    63.45 269.54   8287 7990.513  296.487228  3.5777390
## Hernu              14.25    57.76 264.35   8237 8215.849   21.151158  0.2567823
## Nool               14.80    61.33 276.33   8235 7991.321  243.679377  2.9590695
## Bernard            14.17    55.27 276.31   8225 8155.471   69.528502  0.8453313
## Schwarzl           14.25    56.32 273.56   8102 8150.294  -48.293830 -0.5960729
## Pogorelov          14.21    53.45 287.63   8084 8053.092   30.908300  0.3823392
## Schoenbeck         14.34    60.89 278.82   8077 8207.954 -130.954353 -1.6213242
## Barras             14.37    64.55 267.09   8067 8323.893 -256.893253 -3.1844955
## Smith              14.01    61.52 272.74   8023 8411.912 -388.912403 -4.8474686
## Averyanov          14.39    54.51 271.02   8021 8038.844  -17.844307 -0.2224699
## Ojaniemi           15.01    59.26 275.71   8006 7831.393  174.607493  2.1809579
## Smirnov            14.77    60.88 263.31   7993 8034.272  -41.271570 -0.5163464
## Qi                 14.78    60.79 272.63   7934 7998.613  -64.613187 -0.8143835
## Drews              14.01    51.53 274.21   7926 8145.857 -219.857494 -2.7738770
## Parkhomenko        14.88    65.82 277.94   7918 8063.045 -145.045099 -1.8318401
## Terek              15.12    50.62 290.36   7893 7504.483  388.517156  4.9223002
## Gomez              14.41    60.71 269.70   7865 8194.949 -329.948921 -4.1951548
## Turi               14.26    59.34 290.01   7708 8174.521 -466.521023 -6.0524263
## Lorenzo            15.38    58.36 263.08   7592 7656.437  -64.437164 -0.8487508
## Karlivans          14.98    52.92 278.67   7583 7671.787  -88.786899 -1.1708677
## Korkizoglou        14.96    53.05 317.00   7573 7569.552    3.448002  0.0455302
## Uldal              15.09    60.00 281.70   7495 7791.672 -296.672400 -3.9582708
## Casarsa            15.39    58.62 296.12   7404 7558.230 -154.229599 -2.0830578
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="X110m.hurdle",ylab="Points",ylim=c(min(y,eta),max(y,eta)),pch=".")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch=".")
text(x[,1],eta,labels=lab,col="red",pos=2)

legend("topright",            
       legend=c("Y", "Eta"), 
       pch=c("*", "*"),       
       col=c("black", "red"))

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# datos para anova
ssr   <- t(eta) %*% eta; ssr       # suma de  cuadrados por regresión
##            [,1]
## [1,] 1817304528
sse   <- ssy - ssr; sse            # suma por residuos
##         [,1]
## [1,] 1605729
msr   <- ssr/p1; msr               # cuadrado promedio por regresión
##           [,1]
## [1,] 454326132
mse   <- sse/(n-p1); mse           # cuadrado promedio por residuos
##          [,1]
## [1,] 66905.37
df    <- c(p1,n-p1,n)              # grados de libertad
ssq   <- c(ssr,sse,ssy)            # sumas de cuadrados
msq   <- c(msr,mse,0)              # suma de residuos
anova <- (cbind(df,ssq,msq))       # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
##            Degrees of Freedom Sum of squares Mean squares
## Regression                  4     1817304528 454326132.01
## Residuals                  24        1605729     66905.37
## Total                      28     1818910257         0.00
# apalancamiento
Hat   <- X %*% XXm1 %*% t(X)                  # matriz H sombrero
rownames(Hat) <- lab                          # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
##                    Sebrle         Clay       Karpov        Macey      Warners    Zsivoczky        Hernu         Nool       Bernard     Schwarzl     Pogorelov    Schoenbeck        Barras        Smith    Averyanov      Ojaniemi      Smirnov           Qi         Drews  Parkhomenko        Terek
## Sebrle       0.3067915102  0.291008946  0.029173843 -0.015543849  0.022411503  0.054664503 -0.003038639  0.052617147 -0.0008482223  0.001728302  0.0003123053  0.0962796328  0.1213485575  0.118243782 -0.053824353 -0.0077857139  0.002466883  0.031780252 -0.0625433995  0.134117639 -0.128666963
## Clay         0.2910089461  0.277730313  0.025146118 -0.019725350  0.018997060  0.052473905 -0.010890360  0.053088127 -0.0028234079 -0.001255950  0.0048662685  0.0923459072  0.1099467279  0.107248895 -0.054407802 -0.0030579383 -0.002080090  0.031052643 -0.0639692855  0.132170918 -0.109098103
## Karpov       0.0291738434  0.025146118  0.123735936  0.033551653  0.119894116 -0.039255753  0.073467920 -0.007193827  0.1012547163  0.084919974  0.1109368328  0.0500212704  0.0214592079  0.082914908  0.078029644 -0.0201184294 -0.005625606 -0.003043292  0.1406848190 -0.041731385  0.021618852
## Macey       -0.0155438489 -0.019725350  0.033551653  0.085002059  0.034885134  0.060973348  0.086053049  0.037805926  0.0447509440  0.053906275  0.0060501322  0.0215881473  0.0577961702  0.038225309  0.071434530  0.0496848061  0.089877135  0.053593562  0.0610758812  0.019926505  0.018588016
## Warners      0.0224115029  0.018997060  0.119894116  0.034885134  0.116403799 -0.036397959  0.072643623 -0.005687504  0.0992078436  0.083527682  0.1085151212  0.0477968903  0.0197826050  0.078563055  0.078381328 -0.0164494794 -0.002992680 -0.001213868  0.1384320165 -0.040764077  0.026666944
## Zsivoczky    0.0546645025  0.052473905 -0.039255753  0.060973348 -0.036397959  0.117305872  0.031011091  0.075334954 -0.0186329574  0.001275873 -0.0518526299  0.0252184187  0.0769045270  0.012935704  0.008783179  0.0832437592  0.099549242  0.078671045 -0.0510918624  0.106201638  0.007367987
## Hernu       -0.0030386391 -0.010890360  0.073467920  0.086053049  0.072643623  0.031011091  0.106716251  0.019590996  0.0732429394  0.076204110  0.0342397777  0.0313397597  0.0609297287  0.067071214  0.088246929  0.0220878340  0.074879624  0.037597422  0.1048606541 -0.009907222 -0.003423841
## Nool         0.0526171466  0.053088127 -0.007193827  0.037805926 -0.005687504  0.075334954  0.019590996  0.057662296  0.0025858158  0.011449745 -0.0055567895  0.0316730506  0.0504399941  0.018914288  0.012586098  0.0608434090  0.058295195  0.055705427 -0.0190480448  0.078154833  0.029685425
## Bernard     -0.0008482223 -0.002823408  0.101254716  0.044750944  0.099207844 -0.018632957  0.073242939  0.002585816  0.0887691961  0.077503016  0.0912928127  0.0389072222  0.0196242957  0.063128734  0.079724759  0.0001376711  0.014605985  0.009485551  0.1257294025 -0.032812625  0.038863121
## Schwarzl     0.0017283018 -0.001255950  0.084919974  0.053906275  0.083527682  0.001275873  0.076204110  0.011449745  0.0775030162  0.071333469  0.0695365500  0.0360912320  0.0313292785  0.059374933  0.076082938  0.0110684146  0.032656604  0.020213896  0.1082492574 -0.017993362  0.029475893
## Pogorelov    0.0003123053  0.004866269  0.110936833  0.006050132  0.108515121 -0.051852630  0.034239778 -0.005556790  0.0912928127  0.069536550  0.1316887904  0.0429018387 -0.0178580868  0.049587627  0.064136288 -0.0097571867 -0.034847723 -0.009919249  0.1262659033 -0.040605577  0.083388173
## Schoenbeck   0.0962796328  0.092345907  0.050021270  0.021588147  0.047796890  0.025218419  0.031339760  0.031673051  0.0389072222  0.036091232  0.0429018387  0.0521838209  0.0506827495  0.061934591  0.021851643  0.0156017166  0.018304668  0.027045271  0.0316116843  0.044231954 -0.001696297
## Barras       0.1213485575  0.109946728  0.021459208  0.057796170  0.019782605  0.076904527  0.060929729  0.050439994  0.0196242957  0.031329279 -0.0178580868  0.0506827495  0.1006693176  0.074138696  0.021344931  0.0332586748  0.075356808  0.054835648  0.0007401459  0.077133058 -0.060558378
## Smith        0.1182437820  0.107248895  0.082914908  0.038225309  0.078563055  0.012935704  0.067071214  0.018914288  0.0631287337  0.059374933  0.0495876268  0.0619345914  0.0741386955  0.098537083  0.041255428 -0.0075776316  0.025458382  0.020694558  0.0686958906  0.023600196 -0.049254355
## Averyanov   -0.0538243533 -0.054407802  0.078029644  0.071434530  0.078381328  0.008783179  0.088246929  0.012586098  0.0797247594  0.076082938  0.0641362881  0.0218516435  0.0213449315  0.041255428  0.094127300  0.0259136938  0.051275411  0.027315719  0.1206012102 -0.031141560  0.056544947
## Ojaniemi    -0.0077857139 -0.003057938 -0.020118429  0.049684806 -0.016449479  0.083243759  0.022087834  0.060843409  0.0001376711  0.011068415 -0.0097571867  0.0156017166  0.0332586748 -0.007577632  0.025913694  0.0798381672  0.072485382  0.062902317 -0.0139747931  0.068516345  0.069393009
## Smirnov      0.0024668833 -0.002080090 -0.005625606  0.089877135 -0.002992680  0.099549242  0.074879624  0.058295195  0.0146059852  0.032656604 -0.0348477231  0.0183046679  0.0753568083  0.025458382  0.051275411  0.0724853818  0.114197532  0.073150188  0.0111547575  0.059551948  0.007225588
## Qi           0.0317802523  0.031052643 -0.003043292  0.053593562 -0.001213868  0.078671045  0.037597422  0.055705427  0.0094855514  0.020213896 -0.0099192493  0.0270452714  0.0548356477  0.020694558  0.027315719  0.0629023173  0.073150188  0.059143717 -0.0039695040  0.067448319  0.026713323
## Drews       -0.0625433995 -0.063969286  0.140684819  0.061075881  0.138432017 -0.051091862  0.104860654 -0.019048045  0.1257294025  0.108249257  0.1262659033  0.0316116843  0.0007401459  0.068695891  0.120601210 -0.0139747931  0.011154758 -0.003969504  0.1940661914 -0.087639213  0.060882580
## Parkhomenko  0.1341176385  0.132170918 -0.041731385  0.019926505 -0.040764077  0.106201638 -0.009907222  0.078154833 -0.0328126253 -0.017993362 -0.0406055769  0.0442319538  0.0771330575  0.023600196 -0.031141560  0.0685163454  0.059551948  0.067448319 -0.0876392127  0.135577244 -0.008420161
## Terek       -0.1286669629 -0.109098103  0.021618852  0.018588016  0.026666944  0.007367987 -0.003423841  0.029685425  0.0388631214  0.029475893  0.0833881734 -0.0016962965 -0.0605583779 -0.049254355  0.056544947  0.0693930094  0.007225588  0.026713323  0.0608825796 -0.008420161  0.199720130
## Gomez        0.0550958892  0.048666389  0.039694497  0.059192302  0.038938933  0.051265163  0.065563426  0.037293232  0.0395741811  0.045172986  0.0137737623  0.0394144104  0.0673853632  0.057961926  0.045425518  0.0321778472  0.063212858  0.044286095  0.0413244857  0.038890819 -0.010804808
## Turi         0.1133169959  0.114114545  0.072152889 -0.019376078  0.068569539 -0.016961458 -0.003164917  0.018427284  0.0488775271  0.033627846  0.0910379295  0.0619640867  0.0176189547  0.061038315  0.007520348 -0.0050376641 -0.037803701  0.002224915  0.0417145834  0.031235534  0.029032309
## Lorenzo     -0.1026368017 -0.097356233 -0.062642851  0.108181294 -0.054663628  0.140157371  0.060587838  0.080190074 -0.0154964177  0.011699823 -0.0682408195 -0.0156630685  0.0467047335 -0.041847409  0.056778268  0.1275576985  0.151112094  0.099474756 -0.0213446352  0.072289353  0.088003498
## Karlivans   -0.1127499952 -0.101539656  0.020758873  0.055702206  0.025329800  0.035265968  0.039259213  0.034610895  0.0410658215  0.041077536  0.0468811140 -0.0003227692 -0.0170464995 -0.024329611  0.071743631  0.0707489142  0.052499276  0.042579190  0.0654836848 -0.002321983  0.140462662
## Korkizoglou  0.0228650713  0.048566377  0.035665352 -0.095973158  0.036342686 -0.053111718 -0.113627868  0.022703004  0.0214649669 -0.009224225  0.1508852074  0.0397933630 -0.0967809335 -0.036442447 -0.029347701  0.0259618960 -0.119718868 -0.017071839  0.0034100810  0.031645782  0.209809977
## Uldal        0.0189666337  0.026352693 -0.031641129  0.025304929 -0.028093639  0.081351893 -0.008523842  0.065950222 -0.0137886024 -0.005097900 -0.0057083334  0.0206591999  0.0243075664 -0.015537693  0.001929473  0.0808139205  0.051286517  0.059503729 -0.0417517544  0.087563814  0.079872948
## Casarsa      0.0144485415  0.032428345 -0.053829150 -0.022530877 -0.049026847  0.067348898 -0.073016710  0.071878728 -0.0353942904 -0.033934198  0.0180499607  0.0182396052 -0.0214938412 -0.054534367 -0.032311798  0.0915233598  0.004466588  0.049800207 -0.0796507368  0.105081268  0.148607522
##                   Gomez         Turi     Lorenzo     Karlivans  Korkizoglou        Uldal      Casarsa
## Sebrle       0.05509589  0.113316996 -0.10263680 -0.1127499952  0.022865071  0.018966634  0.014448542
## Clay         0.04866639  0.114114545 -0.09735623 -0.1015396561  0.048566377  0.026352693  0.032428345
## Karpov       0.03969450  0.072152889 -0.06264285  0.0207588733  0.035665352 -0.031641129 -0.053829150
## Macey        0.05919230 -0.019376078  0.10818129  0.0557022062 -0.095973158  0.025304929 -0.022530877
## Warners      0.03893893  0.068569539 -0.05466363  0.0253298005  0.036342686 -0.028093639 -0.049026847
## Zsivoczky    0.05126516 -0.016961458  0.14015737  0.0352659681 -0.053111718  0.081351893  0.067348898
## Hernu        0.06556343 -0.003164917  0.06058784  0.0392592128 -0.113627868 -0.008523842 -0.073016710
## Nool         0.03729323  0.018427284  0.08019007  0.0346108945  0.022703004  0.065950222  0.071878728
## Bernard      0.03957418  0.048877527 -0.01549642  0.0410658215  0.021464967 -0.013788602 -0.035394290
## Schwarzl     0.04517299  0.033627846  0.01169982  0.0410775360 -0.009224225 -0.005097900 -0.033934198
## Pogorelov    0.01377376  0.091037930 -0.06824082  0.0468811140  0.150885207 -0.005708333  0.018049961
## Schoenbeck   0.03941441  0.061964087 -0.01566307 -0.0003227692  0.039793363  0.020659200  0.018239605
## Barras       0.06738536  0.017618955  0.04670473 -0.0170464995 -0.096780934  0.024307566 -0.021493841
## Smith        0.05796193  0.061038315 -0.04184741 -0.0243296109 -0.036442447 -0.015537693 -0.054534367
## Averyanov    0.04542552  0.007520348  0.05677827  0.0717436312 -0.029347701  0.001929473 -0.032311798
## Ojaniemi     0.03217785 -0.005037664  0.12755770  0.0707489142  0.025961896  0.080813921  0.091523360
## Smirnov      0.06321286 -0.037803701  0.15111209  0.0524992759 -0.119718868  0.051286517  0.004466588
## Qi           0.04428609  0.002224915  0.09947476  0.0425791898 -0.017071839  0.059503729  0.049800207
## Drews        0.04132449  0.041714583 -0.02134464  0.0654836848  0.003410081 -0.041751754 -0.079650737
## Parkhomenko  0.03889082  0.031235534  0.07228935 -0.0023219833  0.031645782  0.087563814  0.105081268
## Terek       -0.01080481  0.029032309  0.08800350  0.1404626622  0.209809977  0.079872948  0.148607522
## Gomez        0.05623132  0.016253735  0.05000201  0.0180682665 -0.058061655  0.019875439 -0.015874390
## Turi         0.01625374  0.113046329 -0.09003703 -0.0061858761  0.164210281  0.017355258  0.055227515
## Lorenzo      0.05000201 -0.090037026  0.26952285  0.1234913514 -0.101326493  0.105296722  0.080205655
## Karlivans    0.01806827 -0.006185876  0.12349135  0.1229000319  0.072745431  0.062795498  0.081027026
## Korkizoglou -0.05806165  0.164210281 -0.10132649  0.0727454308  0.492213797  0.091838347  0.260565288
## Uldal        0.01987544  0.017355258  0.10529672  0.0627954981  0.091838347  0.095652536  0.133465554
## Casarsa     -0.01587439  0.055227515  0.08020566  0.0810270262  0.260565288  0.133465554  0.239233142
lev   <- diag(Hat); lev                       # apalancamientos
##      Sebrle        Clay      Karpov       Macey     Warners   Zsivoczky       Hernu        Nool     Bernard    Schwarzl   Pogorelov  Schoenbeck      Barras       Smith   Averyanov    Ojaniemi     Smirnov          Qi       Drews Parkhomenko       Terek       Gomez        Turi     Lorenzo   Karlivans 
##  0.30679151  0.27773031  0.12373594  0.08500206  0.11640380  0.11730587  0.10671625  0.05766230  0.08876920  0.07133347  0.13168879  0.05218382  0.10066932  0.09853708  0.09412730  0.07983817  0.11419753  0.05914372  0.19406619  0.13557724  0.19972013  0.05623132  0.11304633  0.26952285  0.12290003 
## Korkizoglou       Uldal     Casarsa 
##  0.49221380  0.09565254  0.23923314
pesos <- cbind(x,y,lev)                       # construcción de una salida conjunta
rownames(pesos) <- lab
pesos                                         # salida
##             X110m.hurdle Javeline X1500m    y        lev
## Sebrle             14.05    70.52 280.01 8893 0.30679151
## Clay               14.13    69.71 282.00 8820 0.27773031
## Karpov             13.97    55.54 278.11 8725 0.12373594
## Macey              14.56    58.46 265.42 8414 0.08500206
## Warners            14.01    55.39 278.05 8343 0.11640380
## Zsivoczky          14.95    63.45 269.54 8287 0.11730587
## Hernu              14.25    57.76 264.35 8237 0.10671625
## Nool               14.80    61.33 276.33 8235 0.05766230
## Bernard            14.17    55.27 276.31 8225 0.08876920
## Schwarzl           14.25    56.32 273.56 8102 0.07133347
## Pogorelov          14.21    53.45 287.63 8084 0.13168879
## Schoenbeck         14.34    60.89 278.82 8077 0.05218382
## Barras             14.37    64.55 267.09 8067 0.10066932
## Smith              14.01    61.52 272.74 8023 0.09853708
## Averyanov          14.39    54.51 271.02 8021 0.09412730
## Ojaniemi           15.01    59.26 275.71 8006 0.07983817
## Smirnov            14.77    60.88 263.31 7993 0.11419753
## Qi                 14.78    60.79 272.63 7934 0.05914372
## Drews              14.01    51.53 274.21 7926 0.19406619
## Parkhomenko        14.88    65.82 277.94 7918 0.13557724
## Terek              15.12    50.62 290.36 7893 0.19972013
## Gomez              14.41    60.71 269.70 7865 0.05623132
## Turi               14.26    59.34 290.01 7708 0.11304633
## Lorenzo            15.38    58.36 263.08 7592 0.26952285
## Karlivans          14.98    52.92 278.67 7583 0.12290003
## Korkizoglou        14.96    53.05 317.00 7573 0.49221380
## Uldal              15.09    60.00 281.70 7495 0.09565254
## Casarsa            15.39    58.62 296.12 7404 0.23923314
niv   <- 2 * p1 / n; niv                      # nivel para apalancamiento
## [1] 0.2857143
level <- which(lev>niv)                       # unidades con fuerte apalancamiento
out   <- rbind(level,lev[level])              # salida
rownames(out) <- c("unidad","apalancamiento"); out
##                   Sebrle Korkizoglou
## unidad         1.0000000  26.0000000
## apalancamiento 0.3067915   0.4922138
# De acuerdo al gráfico de residuos vs estimados y Resultados del apalancamiento

# podemos ver que las observaciones que tienen mayor apalancamiento es Sebrle (0.3067915), Korkizoglou(0.4922138)
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo X110m.hurdle,Javeline,X1500m
lm1 <- lm(y~X110m.hurdle +Javeline+X1500m,data=Decathlon); summary(lm1)
## 
## Call:
## lm(formula = y ~ X110m.hurdle + Javeline + X1500m, data = Decathlon)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -466.52 -147.34  -29.56  191.88  465.43 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  14803.597   2128.092   6.956 3.41e-07 ***
## X110m.hurdle  -512.357    114.369  -4.480 0.000156 ***
## Javeline        26.187     10.343   2.532 0.018308 *  
## X1500m          -3.023      4.606  -0.656 0.517786    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 258.7 on 24 degrees of freedom
## Multiple R-squared:  0.5716, Adjusted R-squared:  0.518 
## F-statistic: 10.67 on 3 and 24 DF,  p-value: 0.0001198
residuals(lm1)
##      Sebrle        Clay      Karpov       Macey     Warners   Zsivoczky       Hernu        Nool     Bernard    Schwarzl   Pogorelov  Schoenbeck      Barras       Smith   Averyanov    Ojaniemi     Smirnov          Qi       Drews Parkhomenko       Terek       Gomez        Turi     Lorenzo   Karlivans 
##  287.876742  283.093527  465.428587  341.885869  107.669561  296.487228   21.151158  243.679377   69.528502  -48.293830   30.908300 -130.954353 -256.893253 -388.912403  -17.844307  174.607493  -41.271570  -64.613187 -219.857494 -145.045099  388.517156 -329.948921 -466.521023  -64.437164  -88.786899 
## Korkizoglou       Uldal     Casarsa 
##    3.448002 -296.672400 -154.229599
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# modelo sin unidades con fuerte apalancamiento (Todas las observaciones menos la Sebrle,Korkizoglou)
lm2 <- lm(y[-level]~X110m.hurdle +Javeline+X1500m,data=Decathlon[-level,]); summary(lm2)
## 
## Call:
## lm(formula = y[-level] ~ X110m.hurdle + Javeline + X1500m, data = Decathlon[-level, 
##     ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -415.18 -139.64  -45.56  158.50  478.48 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15152.701   2452.361   6.179 3.22e-06 ***
## X110m.hurdle  -473.797    118.865  -3.986 0.000624 ***
## Javeline        18.508     11.921   1.552 0.134819    
## X1500m          -4.729      6.237  -0.758 0.456392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 259.9 on 22 degrees of freedom
## Multiple R-squared:  0.4704, Adjusted R-squared:  0.3982 
## F-statistic: 6.514 on 3 and 22 DF,  p-value: 0.002547
residuals(lm2)
##        Clay      Karpov       Macey     Warners   Zsivoczky       Hernu        Nool     Bernard    Schwarzl   Pogorelov  Schoenbeck      Barras       Smith   Averyanov    Ojaniemi     Smirnov          Qi       Drews Parkhomenko       Terek       Gomez        Turi     Lorenzo   Karlivans       Uldal 
##   405.42902   478.47897   332.96772   117.92327   317.87859    16.98603   266.15417    69.72349   -47.81012    34.89004   -89.87428  -208.86786  -340.63867   -40.99108   172.03043   -43.30121   -51.82448  -245.79623   -88.42773   340.33179  -308.50441  -415.17520  -109.73341  -133.84780  -286.43550 
##     Casarsa 
##  -141.56554
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))
legend("bottomright", # Escoge la posición adecuada para tu gráfico
       legend=c("Modelo 1", "Modelo 2 sin Obs Sebrle y Korkizoglou"), 
       pch=c(2, 2),
       col=c("black", "red"),
       title="Leyenda",
       cex=0.5)

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

3) Points ~ Long.jump,High.jump,X400m

# Lectura de los datos
Decathlon <- read.csv("Decathlon.csv", row.names = 1);head(Decathlon)
##           X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus Pole.vault Javeline X1500m Points
## Sebrle    10.85      7.84    16.36      2.12 48.36        14.05  48.72        5.0    70.52 280.01   8893
## Clay      10.44      7.96    15.23      2.06 49.19        14.13  50.11        4.9    69.71 282.00   8820
## Karpov    10.50      7.81    15.93      2.09 46.81        13.97  51.65        4.6    55.54 278.11   8725
## Macey     10.89      7.47    15.73      2.15 48.97        14.56  48.34        4.4    58.46 265.42   8414
## Warners   10.62      7.74    14.48      1.97 47.97        14.01  43.73        4.9    55.39 278.05   8343
## Zsivoczky 10.91      7.14    15.31      2.12 49.40        14.95  45.62        4.7    63.45 269.54   8287
attach(Decathlon) #archivo en uso
## The following objects are masked from Decathlon (pos = 3):
## 
##     Discus, High.jump, Javeline, Long.jump, Points, Pole.vault, Shot.put, X100m, X110m.hurdle, X1500m, X400m
## The following objects are masked from Decathlon (pos = 4):
## 
##     Discus, High.jump, Javeline, Long.jump, Points, Pole.vault, Shot.put, X100m, X110m.hurdle, X1500m, X400m
lab = rownames(Decathlon) #etiquetas de las unidades en lab
# regresión a mano
# definición de los datos para el modelo
n     <- dim(Decathlon)[1]; n                 # n es el número de observaciones
## [1] 28
x     <- as.matrix(Decathlon[,c(2,4,5)]); x        # x es el caracter descriptivo
##             Long.jump High.jump X400m
## Sebrle           7.84      2.12 48.36
## Clay             7.96      2.06 49.19
## Karpov           7.81      2.09 46.81
## Macey            7.47      2.15 48.97
## Warners          7.74      1.97 47.97
## Zsivoczky        7.14      2.12 49.40
## Hernu            7.19      2.03 48.73
## Nool             7.53      1.88 48.81
## Bernard          7.48      2.12 49.13
## Schwarzl         7.49      1.94 49.76
## Pogorelov        7.31      2.06 50.79
## Schoenbeck       7.30      1.88 50.30
## Barras           6.99      1.94 49.41
## Smith            6.81      1.91 49.27
## Averyanov        7.34      1.94 49.72
## Ojaniemi         7.50      1.94 49.12
## Smirnov          7.07      1.94 49.11
## Qi               7.34      1.97 49.65
## Drews            7.38      1.88 48.51
## Parkhomenko      6.61      2.03 51.04
## Terek            6.94      1.94 49.56
## Gomez            7.26      1.85 48.61
## Turi             6.91      2.03 51.67
## Lorenzo          7.03      1.85 49.34
## Karlivans        7.26      1.97 50.54
## Korkizoglou      7.07      1.94 51.16
## Uldal            6.99      1.85 50.95
## Casarsa          6.68      1.94 53.20
p     <- dim(x)[2]; p                        # numero de regresores
## [1] 3
y     <- Points                               # y es el caracter respuesta   
nomy  <- deparse(substitute(Points)); nomy    # nomy es su nombre
## [1] "Points"
nom   <- c(colnames(x),nomy) ; nom           # etiquetas de las variables en nom
## [1] "Long.jump" "High.jump" "X400m"     "Points"
# estadísticas
xm    <- apply(x,2,mean); xm                 # cálculo de xm, promedio de x
## Long.jump High.jump     X400m 
##  7.265714  1.976429 49.610000
ym    <- sum(y)/n; ym                        # cálculo de ym, promedio de y
## [1] 8051.536
ssx   <- apply(x^2,2,sum); ssx               # ssx es la suma de los x cuadros
## Long.jump High.jump     X400m 
##  1481.279   109.594 68955.701
ssy   <- sum(y^2); ssy                       # ssy es la suma de los y cuadros
## [1] 1818910257
sxy   <- t(x)%*%y ; sxy                      # sxy es la suma de los productos xy
##                 [,1]
## Long.jump  1640639.7
## High.jump   446139.3
## X400m     11175238.0
ssxc  <- ssx-n*xm^2 ; ssxc                   # ssxc es ssx centrado sobre el promedio
##  Long.jump  High.jump      X400m 
##  3.1420857  0.2184429 43.4424000
ssyc  <- ssy-n*ym^2 ; ssyc                   # ssyc es ssy centrado sobre el promedio
## [1] 3747891
sxyc  <- sxy-n*xm*ym; sxyc                   # sxyc es sxy centrado sobre el promedio
##                 [,1]
## Long.jump  2635.2743
## High.jump   567.3236
## X400m     -8989.2700
varx  <- ssxc/n; varx                        # varx es la varianza de x
##   Long.jump   High.jump       X400m 
## 0.112217347 0.007801531 1.551514286
vary  <- ssyc/n; vary                        # vary es la varianza de y
## [1] 133853.2
covxy <- sxyc/n; covxy                       # covxy es la covarianza de xy
##                 [,1]
## Long.jump   94.11694
## High.jump   20.26156
## X400m     -321.04536
stats <- matrix(8*(2*p+1)*0,nrow=8,ncol=(2*p+1)) # construcción de tabla de estadísticas
rownames(stats) <- c("Mínimo","Máximo","Promedio","Total","Suma de cuadrados",
                     "Cuadrados centrados","Varianza covarianza","Desvío estándar")
colnames(stats) <- c(nom,"x1y","x2y","x3y")
stats[,1:p] <- rbind(apply(x,2,min),apply(x,2,max),xm,apply(x,2,sum),ssx,ssxc,varx,sqrt(varx))
stats[,p+1] <- rbind(min(y),max(y),ym,sum(y),ssy,ssyc,vary,sqrt(vary))
stats[,c((p+2):(2*p+1))] <- rbind(t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(rep(0,p)),t(sxy),t(sxyc),t(covxy),t(rep(0,p)))
stats                                        # impresión de la tabla
##                        Long.jump    High.jump        X400m       Points          x1y          x2y           x3y
## Mínimo                 6.6100000 1.850000e+00    46.810000 7.404000e+03 0.000000e+00      0.00000        0.0000
## Máximo                 7.9600000 2.150000e+00    53.200000 8.893000e+03 0.000000e+00      0.00000        0.0000
## Promedio               7.2657143 1.976429e+00    49.610000 8.051536e+03 0.000000e+00      0.00000        0.0000
## Total                203.4400000 5.534000e+01  1389.080000 2.254430e+05 0.000000e+00      0.00000        0.0000
## Suma de cuadrados   1481.2790000 1.095940e+02 68955.701200 1.818910e+09 1.640640e+06 446139.31000 11175237.9600
## Cuadrados centrados    3.1420857 2.184429e-01    43.442400 3.747891e+06 2.635274e+03    567.32357    -8989.2700
## Varianza covarianza    0.1122173 7.801531e-03     1.551514 1.338532e+05 9.411694e+01     20.26156     -321.0454
## Desvío estándar        0.3349886 8.832627e-02     1.245598 3.658596e+02 0.000000e+00      0.00000        0.0000
print("Matriz de varianza y covarianza de los regresores")
## [1] "Matriz de varianza y covarianza de los regresores"
vcovx <- (n-1)/n * cov(x) ; vcovx            # matriz de varianza/covarianza de los xy
##             Long.jump    High.jump       X400m
## Long.jump  0.11221735  0.010227551 -0.28002857
## High.jump  0.01022755  0.007801531 -0.01861071
## X400m     -0.28002857 -0.018610714  1.55151429
# estimación
X      <- cbind(rep(1,n),x); X               # se necesita ajuntar la columna de unos
##               Long.jump High.jump X400m
## Sebrle      1      7.84      2.12 48.36
## Clay        1      7.96      2.06 49.19
## Karpov      1      7.81      2.09 46.81
## Macey       1      7.47      2.15 48.97
## Warners     1      7.74      1.97 47.97
## Zsivoczky   1      7.14      2.12 49.40
## Hernu       1      7.19      2.03 48.73
## Nool        1      7.53      1.88 48.81
## Bernard     1      7.48      2.12 49.13
## Schwarzl    1      7.49      1.94 49.76
## Pogorelov   1      7.31      2.06 50.79
## Schoenbeck  1      7.30      1.88 50.30
## Barras      1      6.99      1.94 49.41
## Smith       1      6.81      1.91 49.27
## Averyanov   1      7.34      1.94 49.72
## Ojaniemi    1      7.50      1.94 49.12
## Smirnov     1      7.07      1.94 49.11
## Qi          1      7.34      1.97 49.65
## Drews       1      7.38      1.88 48.51
## Parkhomenko 1      6.61      2.03 51.04
## Terek       1      6.94      1.94 49.56
## Gomez       1      7.26      1.85 48.61
## Turi        1      6.91      2.03 51.67
## Lorenzo     1      7.03      1.85 49.34
## Karlivans   1      7.26      1.97 50.54
## Korkizoglou 1      7.07      1.94 51.16
## Uldal       1      6.99      1.85 50.95
## Casarsa     1      6.68      1.94 53.20
p1     <- p + 1; p1                          # un regresor más
## [1] 4
XX     <- t(X)%*%X; XX                       # matriz X'X
##                   Long.jump High.jump     X400m
##             28.00   203.440    55.340  1389.080
## Long.jump  203.44  1481.279   402.371 10084.818
## High.jump   55.34   402.371   109.594  2744.896
## X400m     1389.08 10084.818  2744.896 68955.701
XXm1   <- solve(XX); XXm1                    # su inversa
##                       Long.jump   High.jump       X400m
##           212.097615 -8.9455701 -4.01099846 -2.80464609
## Long.jump  -8.945570  0.6440754 -0.58375520  0.10924516
## High.jump  -4.010998 -0.5837552  5.24179360 -0.04248405
## X400m      -2.804646  0.1092452 -0.04248405  0.04222674
bh     <- XXm1 %*% t(X) %*% y                # estimación de los betas
print("Estimaciones de los beta")
## [1] "Estimaciones de los beta"
bh 
##                [,1]
##           7413.6919
## Long.jump  384.1031
## High.jump 1817.3386
## X400m     -115.7988
eta    <- X %*% bh; eta                      # valores estimados
##                 [,1]
## Sebrle      8677.787
## Clay        8518.726
## Karpov      8791.232
## Macey       8519.552
## Warners     8411.938
## Zsivoczky   8288.484
## Hernu       8221.714
## Nool        8070.444
## Bernard     8450.345
## Schwarzl    8054.112
## Pogorelov   8083.781
## Schoenbeck  7809.560
## Barras      7902.590
## Smith       7795.143
## Averyanov   8001.128
## Ojaniemi    8132.064
## Smirnov     7968.058
## Qi          8063.754
## Drews       8047.569
## Parkhomenko 7731.439
## Terek       7866.015
## Gomez       7935.376
## Turi        7773.717
## Lorenzo     7762.499
## Karlivans   7929.965
## Korkizoglou 7730.670
## Uldal       7560.699
## Casarsa     7344.640
res    <- y - eta; res                       # residuos
##                     [,1]
## Sebrle       215.2129057
## Clay         301.2738710
## Karpov       -66.2320229
## Macey       -105.5518060
## Warners      -68.9375316
## Zsivoczky     -1.4841156
## Hernu         15.2859881
## Nool         164.5556170
## Bernard     -225.3448670
## Schwarzl      47.8883114
## Pogorelov      0.2190354
## Schoenbeck   267.4395910
## Barras       164.4102930
## Smith        227.8571810
## Averyanov     19.8718296
## Ojaniemi    -126.0639689
## Smirnov       24.9423937
## Qi          -129.7542464
## Drews       -121.5685597
## Parkhomenko  186.5610994
## Terek         26.9852741
## Gomez        -70.3761419
## Turi         -65.7165824
## Lorenzo     -170.4992760
## Karlivans   -346.9650395
## Korkizoglou -157.6700120
## Uldal        -65.6990397
## Casarsa       59.3598192
por_res <- (res/y)*100
unidad <- cbind(x,y,eta,res,por_res)                 # tabla de resultados
rownames(unidad) <- lab
colnames(unidad) <- c(nom,"eta","residuos","% res")
unidad                                       # impresión de la tabla
##             Long.jump High.jump X400m Points      eta     residuos        % res
## Sebrle           7.84      2.12 48.36   8893 8677.787  215.2129057  2.420025927
## Clay             7.96      2.06 49.19   8820 8518.726  301.2738710  3.415803526
## Karpov           7.81      2.09 46.81   8725 8791.232  -66.2320229 -0.759106280
## Macey            7.47      2.15 48.97   8414 8519.552 -105.5518060 -1.254478321
## Warners          7.74      1.97 47.97   8343 8411.938  -68.9375316 -0.826291880
## Zsivoczky        7.14      2.12 49.40   8287 8288.484   -1.4841156 -0.017908962
## Hernu            7.19      2.03 48.73   8237 8221.714   15.2859881  0.185577129
## Nool             7.53      1.88 48.81   8235 8070.444  164.5556170  1.998246715
## Bernard          7.48      2.12 49.13   8225 8450.345 -225.3448670 -2.739755221
## Schwarzl         7.49      1.94 49.76   8102 8054.112   47.8883114  0.591067779
## Pogorelov        7.31      2.06 50.79   8084 8083.781    0.2190354  0.002709493
## Schoenbeck       7.30      1.88 50.30   8077 7809.560  267.4395910  3.311125307
## Barras           6.99      1.94 49.41   8067 7902.590  164.4102930  2.038059911
## Smith            6.81      1.91 49.27   8023 7795.143  227.8571810  2.840049620
## Averyanov        7.34      1.94 49.72   8021 8001.128   19.8718296  0.247747533
## Ojaniemi         7.50      1.94 49.12   8006 8132.064 -126.0639689 -1.574618648
## Smirnov          7.07      1.94 49.11   7993 7968.058   24.9423937  0.312052968
## Qi               7.34      1.97 49.65   7934 8063.754 -129.7542464 -1.635420298
## Drews            7.38      1.88 48.51   7926 8047.569 -121.5685597 -1.533794596
## Parkhomenko      6.61      2.03 51.04   7918 7731.439  186.5610994  2.356164428
## Terek            6.94      1.94 49.56   7893 7866.015   26.9852741  0.341888687
## Gomez            7.26      1.85 48.61   7865 7935.376  -70.3761419 -0.894801549
## Turi             6.91      2.03 51.67   7708 7773.717  -65.7165824 -0.852576316
## Lorenzo          7.03      1.85 49.34   7592 7762.499 -170.4992760 -2.245775500
## Karlivans        7.26      1.97 50.54   7583 7929.965 -346.9650395 -4.575564282
## Korkizoglou      7.07      1.94 51.16   7573 7730.670 -157.6700120 -2.082002007
## Uldal            6.99      1.85 50.95   7495 7560.699  -65.6990397 -0.876571577
## Casarsa          6.68      1.94 53.20   7404 7344.640   59.3598192  0.801726354
# gráficos clásicos: ejemplo con el primero regresor
plot(x[,1],y,xlab="Long.jump",ylab="Points",ylim=c(min(y,eta),max(y,eta)),pch=".")
text(x[,1],y,labels=lab,pos=2)
points(x[,1],eta,col="red",pch=".")
text(x[,1],eta,labels=lab,col="red",pos=2)
legend("bottomright",            
       legend=c("Y", "Eta"), 
       pch=c("*", "*"),       
       col=c("black", "red"))

# gráficos de valores estimados y residuos
plot(eta,y,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(eta,y,labels=lab,pos=2)
points(eta,eta,col="red")

plot(eta,res,asp=1,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(eta,res,labels=lab,pos=2)

# Se puede ver que la mayoria de las observaciones pasan por la recta horizontal 0, lo que es bueno ya que indica que tienen un valor residual bajo. Sin embargo hay observaciones como Karlivans o Clay con un alto valor residual. Por otro, también se puede observar la muestra Casarsa con un alto valor de apalancamiento
# datos para anova
ssr   <- t(eta) %*% eta; ssr       # suma de  cuadrados por regresión
##            [,1]
## [1,] 1818246549
sse   <- ssy - ssr; sse            # suma por residuos
##          [,1]
## [1,] 663707.9
msr   <- ssr/p1; msr               # cuadrado promedio por regresión
##           [,1]
## [1,] 454561637
mse   <- sse/(n-p1); mse           # cuadrado promedio por residuos
##         [,1]
## [1,] 27654.5
df    <- c(p1,n-p1,n)              # grados de libertad
ssq   <- c(ssr,sse,ssy)            # sumas de cuadrados
msq   <- c(msr,mse,0)              # suma de residuos
anova <- (cbind(df,ssq,msq))       # tabla de anova
colnames(anova) <- c("Degrees of Freedom","Sum of squares","Mean squares")
rownames(anova) <- c("Regression","Residuals","Total")
anova
##            Degrees of Freedom Sum of squares Mean squares
## Regression                  4   1818246549.1  454561637.3
## Residuals                  24       663707.9      27654.5
## Total                      28   1818910257.0          0.0
# apalancamiento
Hat   <- X %*% XXm1 %*% t(X)                  # matriz H sombrero
rownames(Hat) <- lab                          # inclusivo de su etiquetas
colnames(Hat) <- lab
Hat
##                   Sebrle        Clay       Karpov        Macey      Warners    Zsivoczky        Hernu        Nool       Bernard     Schwarzl    Pogorelov   Schoenbeck       Barras         Smith    Averyanov      Ojaniemi      Smirnov         Qi        Drews  Parkhomenko        Terek        Gomez
## Sebrle       0.184301568  0.17721711  0.159727774  0.145445128  0.097281295  0.083649363  0.046203208  0.02678197  0.1334440555  0.052689650  0.086199527 -0.001862818 -0.023417759 -0.0649834345  0.030108003  0.0517176057 -0.012612958 0.04395119  0.003198016 -0.031611176 -0.030315323 -0.028471478
## Clay         0.177217110  0.26176692  0.080552960  0.081591627  0.113095455 -0.012791665 -0.036278088  0.08034607  0.0923280984  0.121116366  0.119937887  0.080567002 -0.074232297 -0.1468401173  0.066058275  0.0897219391 -0.062400466 0.06375740  0.011101947 -0.114693044 -0.083673255 -0.027260823
## Karpov       0.159727774  0.08055296  0.247059032  0.140829148  0.127212262  0.108720837  0.114562250  0.04264083  0.1185407205  0.006879078 -0.007150733 -0.047148726  0.039950470  0.0408502498  0.012666543  0.0473663788  0.057300388 0.02901480  0.064964107 -0.019810556  0.031491744  0.049302664
## Macey        0.145445128  0.08159163  0.140829148  0.177283985  0.031459161  0.160645244  0.093158257 -0.04395868  0.1504213568 -0.004784150  0.088042640 -0.052837837  0.019276874  0.0035751124  0.001648833  0.0025516229  0.019728269 0.02702744 -0.034384401  0.088254586  0.019447823 -0.055365703
## Warners      0.097281295  0.11309545  0.127212262  0.031459161  0.127102651 -0.011617148  0.028067056  0.10704994  0.0372400775  0.071086814  0.001087751  0.051558324  0.012061461 -0.0017206463  0.052263462  0.0833732869  0.027616374 0.04623833  0.092689573 -0.087023379  0.002983341  0.082591773
## Zsivoczky    0.083649363 -0.01279166  0.108720837  0.160645244 -0.011617148  0.185205618  0.119909886 -0.07144347  0.1291295085 -0.041107570  0.063306071 -0.071031693  0.062798474  0.0755600660 -0.011801308 -0.0246162808  0.056390987 0.01525414 -0.034675004  0.162489555  0.067879432 -0.040064893
## Hernu        0.046203208 -0.03627809  0.114562250  0.093158257  0.028067056  0.119909886  0.110449250 -0.00762572  0.0728915667 -0.024156600  0.001904033 -0.038188944  0.080627835  0.1081462802  0.004177783  0.0046140647  0.080845976 0.01838911  0.033112453  0.102427300  0.082280505  0.038610803
## Nool         0.026781966  0.08034607  0.042640831 -0.043958680  0.107049940 -0.071443471 -0.007625720  0.13347062 -0.0239250930  0.089588925 -0.011378800  0.100263245  0.020316153  0.0141618219  0.068754263  0.0915002504  0.031689147 0.05003867  0.112847284 -0.090189651  0.013238511  0.114844754
## Bernard      0.133444056  0.09232810  0.118540720  0.150421357  0.037240077  0.129129509  0.072891567 -0.02392509  0.1305294391  0.012065893  0.086445101 -0.028740111  0.012217460 -0.0071227665  0.011919050  0.0139771108  0.013246490 0.03156237 -0.023302685  0.065031679  0.011685266 -0.043246913
## Schwarzl     0.052689650  0.12111637  0.006879078 -0.004784150  0.071086814 -0.041107570 -0.024156600  0.08958892  0.0120658925  0.093374050  0.054559512  0.095955662 -0.009014802 -0.0364806132  0.064762340  0.0744695261 -0.004161209 0.05264793  0.052557434 -0.054973341 -0.013262689  0.043790272
## Pogorelov    0.086199527  0.11993789 -0.007150733  0.088042640  0.001087751  0.063306071  0.001904033 -0.01137880  0.0864451009  0.054559512  0.131101258  0.039793807 -0.017654461 -0.0552295321  0.036217802  0.0229323200 -0.024297202 0.04350217 -0.043010436  0.056964443 -0.015419564 -0.061799041
## Schoenbeck  -0.001862818  0.08056700 -0.047148726 -0.052837837  0.051558324 -0.071031693 -0.038188944  0.10026324 -0.0287401108  0.095955662  0.039793807  0.119998414  0.006136912 -0.0100719252  0.071413651  0.0738268338  0.007343504 0.05218153  0.066106778 -0.041944608  0.003996093  0.067997968
## Barras      -0.023417759 -0.07423230  0.039950470  0.019276874  0.012061461  0.062798474  0.080627835  0.02031615  0.0122174602 -0.009014802 -0.017654461  0.006136912  0.093023789  0.1309210740  0.019190595  0.0128951798  0.089876082 0.02113673  0.058146273  0.098451261  0.096379287  0.076469330
## Smith       -0.064983435 -0.14684012  0.040850250  0.003575112 -0.001720646  0.075560066  0.108146280  0.01416182 -0.0071227665 -0.036480613 -0.055229532 -0.010071925  0.130921074  0.1940761957  0.009754124 -0.0001549192  0.125966552 0.01201445  0.076339634  0.135788428  0.136317132  0.107265289
## Averyanov    0.030108003  0.06605828  0.012666543  0.001648833  0.052263462 -0.011801308  0.004177783  0.06875426  0.0119190503  0.064762340  0.036217802  0.071413651  0.019190595  0.0097541243  0.052020832  0.0564165443  0.021388451 0.04384961  0.052292693 -0.009825105  0.017280387  0.051157802
## Ojaniemi     0.051717606  0.08972194  0.047366379  0.002551623  0.083373287 -0.024616281  0.004614065  0.09150025  0.0139771108  0.074469526  0.022932320  0.073826834  0.012895180 -0.0001549192  0.056416544  0.0715271448  0.020450480 0.04675799  0.071769980 -0.049290962  0.007931201  0.067386122
## Smirnov     -0.012612958 -0.06240047  0.057300388  0.019728269  0.027616374  0.056390987  0.080845976  0.03168915  0.0132464905 -0.004161209 -0.024297202  0.007343504  0.089876082  0.1259665522  0.021388451  0.0204504801  0.089407096 0.02259092  0.067884917  0.078718333  0.091704694  0.084583490
## Qi           0.043951190  0.06375740  0.029014803  0.027027443  0.046238330  0.015254141  0.018389113  0.05003867  0.0315623683  0.052647927  0.043502174  0.052181530  0.021136728  0.0120144495  0.043849608  0.0467579855  0.022590917 0.04078134  0.038620190  0.009206710  0.019849953  0.035274608
## Drews        0.003198016  0.01110195  0.064964107 -0.034384401  0.092689573 -0.034675004  0.033112453  0.11284728 -0.0233026850  0.052557434 -0.043010436  0.066106778  0.058146273  0.0763396339  0.052292693  0.0717699801  0.067884917 0.03862019  0.120348114 -0.041524837  0.053179655  0.131957033
## Parkhomenko -0.031611176 -0.11469304 -0.019810556  0.088254586 -0.087023379  0.162489555  0.102427300 -0.09018965  0.0650316789 -0.054973341  0.056964443 -0.041944608  0.098451261  0.1357884282 -0.009825105 -0.0492909621  0.078718333 0.00920671 -0.041524837  0.243665118  0.111291541 -0.025276632
## Terek       -0.030315323 -0.08367325  0.031491744  0.019447823  0.002983341  0.067879432  0.082280505  0.01323851  0.0116852659 -0.013262689 -0.015419564  0.003996093  0.096379287  0.1363171320  0.017280387  0.0079312005  0.091704694 0.01984995  0.053179655  0.111291541  0.100656399  0.072803472
## Gomez       -0.028471478 -0.02726082  0.049302664 -0.055365703  0.082591773 -0.040064893  0.038610803  0.11484475 -0.0432469134  0.043790272 -0.061799041  0.067997968  0.076469330  0.1072652887  0.051157802  0.0673861224  0.084583490 0.03527461  0.131957033 -0.025276632  0.072803472  0.151410504
## Turi         0.015672375  0.02543206 -0.066365086  0.068743571 -0.058817604  0.088091597  0.019519748 -0.04896736  0.0636981033  0.020061155  0.121761170  0.027477686  0.021680477  0.0095932186  0.023527273 -0.0096368882  0.005098396 0.03234600 -0.057422523  0.145929730  0.030324861 -0.060625584
## Lorenzo     -0.060046071 -0.06850676  0.007848553 -0.055062611  0.040145805 -0.017840516  0.044304810  0.08225509 -0.0458133537  0.025545341 -0.049473912  0.059629344  0.090423896  0.1296343749  0.042943164  0.0448098621  0.091357245 0.02975854  0.107915887  0.033247641  0.091032328  0.133048367
## Karlivans    0.035420845  0.08411695 -0.025858685  0.019447723  0.020555875  0.004728162 -0.009976089  0.03818613  0.0287877318  0.066900760  0.080302825  0.072792358  0.002443718 -0.0192095440  0.050093430  0.0430088402 -0.001098577 0.04527290  0.011259675  0.020959711  0.003198162  0.005047364
## Korkizoglou -0.004710184  0.04945096 -0.073078119 -0.005039640 -0.007572545 -0.002445630 -0.016953135  0.03002294  0.0071812482  0.062225526  0.080488758  0.083149790  0.013988841  0.0002615416  0.050719706  0.0336752556  0.005466615 0.04324987  0.006656380  0.050991463  0.017604548  0.007749896
## Uldal       -0.059820155  0.00523736 -0.093681342 -0.072927892  0.007306973 -0.056539924 -0.025456310  0.07538193 -0.0506475048  0.070398854  0.028475508  0.113014916  0.037951227  0.0425851404  0.062733813  0.0504508326  0.031809736 0.04374465  0.059439900  0.023367164  0.040595861  0.074270776
## Casarsa     -0.055157322  0.02328014 -0.194387548 -0.014217491 -0.093315719  0.012216160 -0.045567296 -0.02588978  0.0004561026  0.057265889  0.142391092  0.098622844  0.007955923 -0.0109971860  0.048267986  0.0005686830 -0.015893728 0.04198047 -0.058068034  0.139378628  0.019518636 -0.053451221
##                     Turi      Lorenzo    Karlivans   Korkizoglou        Uldal       Casarsa
## Sebrle       0.015672375 -0.060046071  0.035420845 -0.0047101844 -0.059820155 -0.0551573220
## Clay         0.025432056 -0.068506761  0.084116949  0.0494509566  0.005237360  0.0232801404
## Karpov      -0.066365086  0.007848553 -0.025858685 -0.0730781187 -0.093681342 -0.1943875480
## Macey        0.068743571 -0.055062611  0.019447723 -0.0050396399 -0.072927892 -0.0142174912
## Warners     -0.058817604  0.040145805  0.020555875 -0.0075725449  0.007306973 -0.0933157194
## Zsivoczky    0.088091597 -0.017840516  0.004728162 -0.0024456295 -0.056539924  0.0122161603
## Hernu        0.019519748  0.044304810 -0.009976089 -0.0169531346 -0.025456310 -0.0455672960
## Nool        -0.048967357  0.082255090  0.038186134  0.0300229436  0.075381931 -0.0258897835
## Bernard      0.063698103 -0.045813354  0.028787732  0.0071812482 -0.050647505  0.0004561026
## Schwarzl     0.020061155  0.025545341  0.066900760  0.0622255262  0.070398854  0.0572658888
## Pogorelov    0.121761170 -0.049473912  0.080302825  0.0804887584  0.028475508  0.1423910918
## Schoenbeck   0.027477686  0.059629344  0.072792358  0.0831497904  0.113014916  0.0986228440
## Barras       0.021680477  0.090423896  0.002443718  0.0139888408  0.037951227  0.0079559232
## Smith        0.009593219  0.129634375 -0.019209544  0.0002615416  0.042585140 -0.0109971860
## Averyanov    0.023527273  0.042943164  0.050093430  0.0507197065  0.062733813  0.0482679862
## Ojaniemi    -0.009636888  0.044809862  0.043008840  0.0336752556  0.050450833  0.0005686830
## Smirnov      0.005098396  0.091357245 -0.001098577  0.0054666154  0.031809736 -0.0158937284
## Qi           0.032345995  0.029758541  0.045272899  0.0432498664  0.043744649  0.0419804722
## Drews       -0.057422523  0.107915887  0.011259675  0.0066563803  0.059439900 -0.0580680344
## Parkhomenko  0.145929730  0.033247641  0.020959711  0.0509914630  0.023367164  0.1393786284
## Terek        0.030324861  0.091032328  0.003198162  0.0176045475  0.040595861  0.0195186360
## Gomez       -0.060625584  0.133048367  0.005047364  0.0077498958  0.074270776 -0.0534512210
## Turi         0.164215596 -0.019027372  0.075980221  0.0990931370  0.056206264  0.2064097858
## Lorenzo     -0.019027372  0.134575928  0.010074598  0.0262068766  0.087709479  0.0033034653
## Karlivans    0.075980221  0.010074598  0.071777831  0.0786868562  0.068668542  0.1184316834
## Korkizoglou  0.099093137  0.026206877  0.078686856  0.0989840170  0.097070627  0.1668744024
## Uldal        0.056206264  0.087709479  0.068668542  0.0970706266  0.137258712  0.1453948624
## Casarsa      0.206409786  0.003303465  0.118431683  0.1668744024  0.145394862  0.3346285739
lev   <- diag(Hat); lev                       # apalancamientos
##      Sebrle        Clay      Karpov       Macey     Warners   Zsivoczky       Hernu        Nool     Bernard    Schwarzl   Pogorelov  Schoenbeck      Barras       Smith   Averyanov    Ojaniemi     Smirnov          Qi       Drews Parkhomenko       Terek       Gomez        Turi     Lorenzo   Karlivans 
##  0.18430157  0.26176692  0.24705903  0.17728399  0.12710265  0.18520562  0.11044925  0.13347062  0.13052944  0.09337405  0.13110126  0.11999841  0.09302379  0.19407620  0.05202083  0.07152714  0.08940710  0.04078134  0.12034811  0.24366512  0.10065640  0.15141050  0.16421560  0.13457593  0.07177783 
## Korkizoglou       Uldal     Casarsa 
##  0.09898402  0.13725871  0.33462857
pesos <- cbind(x,y,lev)                       # construcción de una salida conjunta
rownames(pesos) <- lab
pesos                                         # salida
##             Long.jump High.jump X400m    y        lev
## Sebrle           7.84      2.12 48.36 8893 0.18430157
## Clay             7.96      2.06 49.19 8820 0.26176692
## Karpov           7.81      2.09 46.81 8725 0.24705903
## Macey            7.47      2.15 48.97 8414 0.17728399
## Warners          7.74      1.97 47.97 8343 0.12710265
## Zsivoczky        7.14      2.12 49.40 8287 0.18520562
## Hernu            7.19      2.03 48.73 8237 0.11044925
## Nool             7.53      1.88 48.81 8235 0.13347062
## Bernard          7.48      2.12 49.13 8225 0.13052944
## Schwarzl         7.49      1.94 49.76 8102 0.09337405
## Pogorelov        7.31      2.06 50.79 8084 0.13110126
## Schoenbeck       7.30      1.88 50.30 8077 0.11999841
## Barras           6.99      1.94 49.41 8067 0.09302379
## Smith            6.81      1.91 49.27 8023 0.19407620
## Averyanov        7.34      1.94 49.72 8021 0.05202083
## Ojaniemi         7.50      1.94 49.12 8006 0.07152714
## Smirnov          7.07      1.94 49.11 7993 0.08940710
## Qi               7.34      1.97 49.65 7934 0.04078134
## Drews            7.38      1.88 48.51 7926 0.12034811
## Parkhomenko      6.61      2.03 51.04 7918 0.24366512
## Terek            6.94      1.94 49.56 7893 0.10065640
## Gomez            7.26      1.85 48.61 7865 0.15141050
## Turi             6.91      2.03 51.67 7708 0.16421560
## Lorenzo          7.03      1.85 49.34 7592 0.13457593
## Karlivans        7.26      1.97 50.54 7583 0.07177783
## Korkizoglou      7.07      1.94 51.16 7573 0.09898402
## Uldal            6.99      1.85 50.95 7495 0.13725871
## Casarsa          6.68      1.94 53.20 7404 0.33462857
niv   <- 2 * p1 / n; niv                      # nivel para apalancamiento
## [1] 0.2857143
level <- which(lev>niv)                       # unidades con fuerte apalancamiento
out   <- rbind(level,lev[level])              # salida
rownames(out) <- c("unidad","apalancamiento"); out
##                   Casarsa
## unidad         28.0000000
## apalancamiento  0.3346286
# De acuerdo al gráfico de residuos vs estimados y Resultados del apalancamiento

# podemos ver que la observación Casarsa tiene un alto valor de apalancamiento (0.3346286) respecto a las demás observaciones
# comparación entre modelo total y modelo sin unidades con apalancamiento
# modelo completo Long.jump,High.jump,X400m
lm1 <- lm(y~Long.jump+High.jump+X400m,data=Decathlon); summary(lm1)
## 
## Call:
## lm(formula = y ~ Long.jump + High.jump + X400m, data = Decathlon)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -346.97 -109.56   -0.63   85.62  301.27 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7413.69    2421.87   3.061  0.00536 ** 
## Long.jump     384.10     133.46   2.878  0.00828 ** 
## High.jump    1817.34     380.74   4.773 7.39e-05 ***
## X400m        -115.80      34.17  -3.389  0.00242 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 166.3 on 24 degrees of freedom
## Multiple R-squared:  0.8229, Adjusted R-squared:  0.8008 
## F-statistic: 37.18 on 3 and 24 DF,  p-value: 3.506e-09
residuals(lm1)
##       Sebrle         Clay       Karpov        Macey      Warners    Zsivoczky        Hernu         Nool      Bernard     Schwarzl    Pogorelov   Schoenbeck       Barras        Smith    Averyanov     Ojaniemi      Smirnov           Qi        Drews  Parkhomenko        Terek        Gomez         Turi 
##  215.2129057  301.2738710  -66.2320229 -105.5518060  -68.9375316   -1.4841157   15.2859881  164.5556170 -225.3448670   47.8883114    0.2190354  267.4395910  164.4102930  227.8571810   19.8718296 -126.0639689   24.9423937 -129.7542464 -121.5685597  186.5610994   26.9852740  -70.3761419  -65.7165824 
##      Lorenzo    Karlivans  Korkizoglou        Uldal      Casarsa 
## -170.4992760 -346.9650395 -157.6700120  -65.6990397   59.3598192
# y su plot estimados observados
plot(lm1$fitted.values,y,pch=20,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm1$fitted.values,y,labels=lab,pos=3,offset(0.5))
points(lm1$fitted.values,lm1$fitted.values,pch=20,col="red")

# y su plot estimados residuos
plot(lm1$fitted.values,lm1$residuals,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm1$fitted.values,lm1$residuals,labels=lab,pos=3,offset(0.5))

# modelo sin unidades con fuerte apalancamiento (Todas las observaciones menos la Casarsa)
lm2 <- lm(y[-level]~Long.jump+High.jump+X400m,data=Decathlon[-level,]); summary(lm2)
## 
## Call:
## lm(formula = y[-level] ~ Long.jump + High.jump + X400m, data = Decathlon[-level, 
##     ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -336.40 -116.41   -0.39  107.62  303.35 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7828.29    2645.91   2.959  0.00704 ** 
## Long.jump     380.87     135.99   2.801  0.01015 *  
## High.jump    1817.48     387.37   4.692  0.00010 ***
## X400m        -123.75      39.38  -3.143  0.00456 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 169.2 on 23 degrees of freedom
## Multiple R-squared:  0.8013, Adjusted R-squared:  0.7753 
## F-statistic: 30.91 on 3 and 23 DF,  p-value: 3.039e-08
residuals(lm2)
##       Sebrle         Clay       Karpov        Macey      Warners    Zsivoczky        Hernu         Nool      Bernard     Schwarzl    Pogorelov   Schoenbeck       Barras        Smith    Averyanov     Ojaniemi      Smirnov           Qi        Drews  Parkhomenko        Terek        Gomez         Turi 
##  210.2921523  303.3507635  -83.5739304 -106.8201918  -77.2625122   -0.3942746   11.2207903  162.2459102 -225.3041767   52.9971765   12.9221801  276.2380365  165.1200652  226.8760884   24.1779642 -126.0132350   23.5244656 -126.0090402 -126.7489866  198.9954930   28.7265912  -75.1446887  -47.3021343 
##      Lorenzo    Karlivans  Korkizoglou        Uldal 
## -170.2045637 -336.3993870 -142.7826364  -52.7279195
# hay que tirar las unidades también de la representación gráfica
plot(lm2$fitted.values,y[-level],pch=20,asp=1,xlab="estimados",ylab="observados")
abline(0,1,col="red")
text(lm2$fitted.values,y[-level],labels=lab,pos=3,offset(0.5))
points(lm2$fitted.values,lm2$fitted.values,pch=20,col="red")

plot(lm2$fitted.values,lm2$residuals,asp=1,pch=20,xlab="estimados",ylab="residuos")
abline(0,0,col="red")
text(lm2$fitted.values,lm2$residuals,labels=lab,pos=3,offset(0.5))

# gráficos de comparación de estimados y residuos
# ambos contra y y no contra los estimados
plot(y,lm1$fitted.values,pch=20,asp=1,ylab="fitted values")
abline(0,1)
text(y,lm1$fitted.values,y,labels=lab,pos=4,offset(0.5))
points(y[-out[1,]],lm2$fitted.values,pch=20,col="red")
text(y[-out[1,]],lm2$fitted.values,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))
legend("bottomright", # Escoge la posición adecuada para tu gráfico
       legend=c("Modelo 1", "Modelo 2 sin Casarsa"), 
       pch=c(2, 2),
       col=c("black", "red"),
       title="Leyenda",
       cex=0.5)

# por esto hay que cambiar el signo a los residuos
plot(y,-lm1$residuals,pch=20,asp=1,ylab="residuals")
abline(h=0)
text(y,-lm1$residuals,labels=lab,pch=20,pos=4,offset(0.5))
points(y[-out[1,]],-lm2$residuals,pch=20,col="red")
text(y[-out[1,]],-lm2$residuals,labels=lab[-out[1,]],col="red",pos=3,offset(0.5))

# Comentarios de Modelos (Elección de Modelo):
# Análisis de Varianza

# Modelo 1: Points ~ X100m + Shot.put + Discus

#              Degrees of Freedom Sum of squares Mean squares
# Regression                  4     1817691554  454422888.4
# Residuals                  24        1218703      50779.3
# Total                      28     1818910257          0.0

# Modelo 2: Points ~ X110m.hurdle + Javeline + X1500m

#             Degrees of Freedom Sum of squares Mean squares
# Regression                  4     1817304528 454326132.01
# Residuals                  24        1605729     66905.37
# Total                      28     1818910257         0.00

# Modelo 3: Points ~ Long.jump + High.jump + X400m

#             Degrees of Freedom Sum of squares Mean squares
# Regression                  4   1818246549.1  454561637.3
# Residuals                  24       663707.9      27654.5
# Total                      28   1818910257.0          0.0

# Conclusión:

# Basado en la suma de cuadrados de la regresión y el error cuadrático medio, el Modelo 3 parece ser el mejor de los tres para modelar la variable "Points" en función de "Long.jump", "High.jump", y "X400m". Este modelo no solo explica más variabilidad en los puntos sino que también tiene el menor MSE, lo que sugiere un ajuste más preciso y menos variabilidad en los errores de predicción.