Examen final

library(PerformanceAnalytics)

## Cargando paquete requerido: xts

## Cargando paquete requerido: zoo

## 
## Adjuntando el paquete: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Adjuntando el paquete: 'PerformanceAnalytics'

## The following object is masked from 'package:graphics':
## 
##     legend

library(corrplot)

## corrplot 0.95 loaded

paquetes <- library(help = "datasets")  # Guarda información del paquete "datasets" en la variable 'paquetes'
head(paquetes$info[[2]])

## [1] "AirPassengers           Monthly Airline Passenger Numbers 1949-1960"   
## [2] "BJsales                 Sales Data with Leading Indicator"             
## [3] "BOD                     Biochemical Oxygen Demand"                     
## [4] "CO2                     Carbon Dioxide Uptake in Grass Plants"         
## [5] "ChickWeight             Weight versus age of chicks on different diets"
## [6] "DNase                   Elisa assay of DNase"

library(MASS)
data("Boston")
datos <- Boston

?Boston

## starting httpd help server ... done

class(datos)

## [1] "data.frame"

dim(datos)

## [1] 506  14

Como se observa tiene 14 variables. En este caso nos vamos a enfocar solo en variables útiles. En este caso vamos a buscar explicar el comportamiento de la variable medv que es el Valor medio de las viviendas ocupadas por sus propietarios en miles de dólares, por medio de las variables rm que es el número de habitaciones y lstat que es el porcentaje de la población de bajo estrato y el dis que es media ponderada de las distancias a cinco centros de empleo de Boston.

x1<-datos$rm
x2<-datos$lstat
x3<-datos$dis
y<-datos$medv

Normalidad

shapiro.test(x1)

## 
##  Shapiro-Wilk normality test
## 
## data:  x1
## W = 0.96087, p-value = 2.412e-10

shapiro.test(x2)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.93691, p-value = 8.287e-14

shapiro.test(x3)

## 
##  Shapiro-Wilk normality test
## 
## data:  x3
## W = 0.90323, p-value < 2.2e-16

shapiro.test(y)

## 
##  Shapiro-Wilk normality test
## 
## data:  y
## W = 0.91718, p-value = 4.941e-16

Dado que el p-valor de todas las variables es menor que 0.05, se rechaza la hipótesis nula, por tanto no se cumple el supuesto de normalidad, debido a esto se calcula el coeficiente de correlación de Spearman:

chart.Correlation(datos, histogram = TRUE, method = "spearman")

## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(as.numeric(x), as.numeric(y), method = method):
## Cannot compute exact p-value with ties

#la de la fila es x y la de la columna es y

mat_cor <- cor(datos, method = "spearman") # Calcula matriz de correlación

#corrplot(mat_cor, method = 'circle') 

significancia1<- cor.mtest(datos,
                           conf.level = .95)
corrplot(mat_cor, 
         p.mat = significancia1$p, #llamado del p-valor para cada coeficiente r
         sig.level = 0.05) #definición del nivel de significancia

cor(x1,y, method="spearman")

## [1] 0.6335764

cor(x2,y, method="spearman")

## [1] -0.8529141

cor(x3,y, method="spearman")

## [1] 0.4458569

cor.test(x1,y, method="spearman")

## Warning in cor.test.default(x1, y, method = "spearman"): Cannot compute exact
## p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x1 and y
## S = 7911922, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6335764

cor.test(x2,y, method="spearman")

## Warning in cor.test.default(x2, y, method = "spearman"): Cannot compute exact
## p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x2 and y
## S = 40008650, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.8529141

cor.test(x3,y, method="spearman")

## Warning in cor.test.default(x3, y, method = "spearman"): Cannot compute exact
## p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x3 and y
## S = 11965217, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4458569

cor(x1,x2, method="spearman")

## [1] -0.6408316

cor(x2,x3, method="spearman")

## [1] -0.5642622

cor(x1,x3, method="spearman")

## [1] 0.2631682

Como se confirmó previamente, ninguna variable sigue una distribución normal. Tambié se observa que el numero de habitaciones y el precio medio de las viviendas tienen un alto coeficiente de correlacion moderadamente alto de 0.63 con una alta significancia. El porcentaje de poblacion de bajo estrato tiene un alto coeficiente de correlación con el precio de las viviendas de -0.83, pero es negativo, es decir a medida que este porcentaje aumenta el precio de de las viviendas disminuye y es significativo. Finalmente, la media ponderada de las distancias a cinco centros de empleo de Boston tiene un coeficiente de correlación moderado positivo con el precio medio. Cabe resaltar un coeficiente de correlación moderado entre el numero de habitaciones y el porcentaje de poblacion de estrato bajo de -0.64 así que hay que tenerlo en cuenta y comprobar si se está presentando la multicolinealidad.

modelo_inicial<-lm(y~1)
scope = list(lower = ~1, upper = y ~ x1 + x2 + x3) 
modelo_both <- stepAIC(modelo_inicial, trace=TRUE, direction="both", scope=scope, k=2)

## Start:  AIC=2246.51
## y ~ 1
## 
##        Df Sum of Sq   RSS    AIC
## + x2    1   23243.9 19472 1851.0
## + x1    1   20654.4 22062 1914.2
## + x3    1    2668.2 40048 2215.9
## <none>              42716 2246.5
## 
## Step:  AIC=1851.01
## y ~ x2
## 
##        Df Sum of Sq   RSS    AIC
## + x1    1    4033.1 15439 1735.6
## + x3    1     772.4 18700 1832.5
## <none>              19472 1851.0
## - x2    1   23243.9 42716 2246.5
## 
## Step:  AIC=1735.58
## y ~ x2 + x1
## 
##        Df Sum of Sq   RSS    AIC
## + x3    1     351.2 15088 1725.9
## <none>              15439 1735.6
## - x1    1    4033.1 19472 1851.0
## - x2    1    6622.6 22062 1914.2
## 
## Step:  AIC=1725.94
## y ~ x2 + x1 + x3
## 
##        Df Sum of Sq   RSS    AIC
## <none>              15088 1725.9
## - x3    1     351.2 15439 1735.6
## - x1    1    3611.8 18700 1832.5
## - x2    1    6461.2 21549 1904.3

modelo_both$anova

## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## y ~ 1
## 
## Final Model:
## y ~ x2 + x1 + x3
## 
## 
##   Step Df   Deviance Resid. Df Resid. Dev      AIC
## 1                          505   42716.30 2246.514
## 2 + x2  1 23243.9140       504   19472.38 1851.009
## 3 + x1  1  4033.0722       503   15439.31 1735.577
## 4 + x3  1   351.1509       502   15088.16 1725.935

Se observa que al aplicar el método both el mejor modelo es con las variables que se habían seleccionado previamente para hacer el análisis. Por tanto proseguimos con este modelo para verificar los supuestos.

modelo<-lm(y ~  x2 +x1 + x3)
summary(modelo)

## 
## Call:
## lm(formula = y ~ x2 + x1 + x3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.992  -3.133  -0.871   1.910  25.944 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.80829    3.36800   0.834 0.404781    
## x2          -0.72333    0.04933 -14.662  < 2e-16 ***
## x1           4.87339    0.44456  10.962  < 2e-16 ***
## x3          -0.46128    0.13495  -3.418 0.000682 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.482 on 502 degrees of freedom
## Multiple R-squared:  0.6468, Adjusted R-squared:  0.6447 
## F-statistic: 306.4 on 3 and 502 DF,  p-value: < 2.2e-16

\[ y = 2.8 + 4.87x1 -0.72x2 -0.46x3 \] Cuando no se tiene en cuenta rm, lstat y dis, podemos observar que las viviendas tienen un precio medio de 2.8 millones. Por cada unidad que aumenta el numero de habitaciones el precio aumenta en promedio 4.87 millones. Por cada unidad que aumenta el porcentaje de poblacion de bajo estrato el precio disminuye en promedio 0.72 millones y por cada unidad que aumenta la media ponderada de las distancias a cinco centros de empleo de Boston el precio disminuye en promedio 0.46 millones.

IC<-confint(modelo)
IC

##                  2.5 %     97.5 %
## (Intercept) -3.8088107  9.4253959
## x2          -0.8202577 -0.6264039
## x1           3.9999512  5.7468201
## x3          -0.7264256 -0.1961375

Ningun coeficiente incluye el 0 por tanto son significativos y hay linealidad.

Evaluación de supuestos

m<-lm(y~  x2 +x1)
ress <- modelo$residuals

plot(modelo)

En el gráfico de residuals vs fitted se nota una acumulación de los puntos pero ningún patron en especifico, parecen estar aleatoriamente alrededor del 0 lo que confirma el supuesto de media 0 y linealidad. En el qq residuals los puntos en su mayoria estan sobre la recta sin embargo se realiza el test de shapiro:

shapiro.test(ress)

## 
##  Shapiro-Wilk normality test
## 
## data:  ress
## W = 0.92521, p-value = 3.542e-15

El test muestra que los residuos no siguen una distribución normal, por tanto se viola este supuesto.

En el grafico de scale-location parecen estar aleatoriamente alrededor de la linea, sin embargo se realiza el test

library(lmtest)
bptest(modelo)

## 
##  studentized Breusch-Pagan test
## 
## data:  modelo
## BP = 31.442, df = 3, p-value = 6.86e-07

Un p-valor menor que 0.05 indica que no se cumple el supuesto de homocedasticidad.

En el gráfico de residuals vs levarage el punto 366 se destaca por su alto leverage además de parecer ser un outlier. Los puntos 369 y 365 no parecen tener alto leverage pero parecen ser atípicos en y. Realizamos el siguiente comando para observar qué puntos destaca r como influyentes:

summary(influence.measures(modelo))

## Potentially influential observations of
##   lm(formula = y ~ x2 + x1 + x3) :
## 
##     dfb.1_ dfb.x2 dfb.x1 dfb.x3 dffit   cov.r   cook.d hat    
## 9   -0.17   0.30   0.10   0.25   0.35_*  1.01    0.03   0.03_*
## 11   0.02  -0.02  -0.01  -0.02  -0.03    1.02_*  0.00   0.02  
## 49  -0.14   0.28   0.07   0.23   0.33_*  1.01    0.03   0.03_*
## 142 -0.06   0.25   0.02   0.05   0.34_*  0.99    0.03   0.02  
## 153 -0.06   0.04   0.05   0.03  -0.06    1.02_*  0.00   0.02  
## 162  0.00  -0.15   0.08  -0.19   0.29_*  0.98    0.02   0.01  
## 163 -0.06  -0.09   0.13  -0.15   0.27_*  0.99    0.02   0.02  
## 164 -0.15   0.00   0.21  -0.10   0.28_*  1.01    0.02   0.02  
## 167 -0.11  -0.05   0.18  -0.14   0.29_*  0.98    0.02   0.02  
## 187 -0.14  -0.01   0.20  -0.08   0.29_*  0.97_*  0.02   0.01  
## 196 -0.18   0.03   0.21   0.06   0.28_*  0.97_*  0.02   0.01  
## 205 -0.18   0.03   0.22   0.03   0.28_*  0.98    0.02   0.01  
## 215 -0.14   0.35   0.07   0.19   0.42_*  0.94_*  0.04   0.02  
## 226 -0.22   0.06   0.27  -0.05   0.30_*  1.01    0.02   0.03_*
## 227  0.01   0.00  -0.01   0.00  -0.01    1.02_*  0.00   0.02  
## 233 -0.03   0.00   0.04  -0.01   0.05    1.03_*  0.00   0.02  
## 254 -0.16   0.09   0.16   0.13   0.21    1.03_*  0.01   0.03_*
## 255  0.00   0.00   0.01  -0.03  -0.04    1.02_*  0.00   0.02  
## 256  0.00   0.00   0.00   0.01   0.01    1.03_*  0.00   0.02  
## 258 -0.20   0.05   0.26  -0.09   0.31_*  1.02    0.02   0.03_*
## 263 -0.20   0.05   0.25  -0.08   0.30_*  1.00    0.02   0.02_*
## 268 -0.26   0.09   0.32  -0.08   0.37_*  0.98    0.03   0.02  
## 284 -0.20   0.05   0.22   0.08   0.30_*  0.97_*  0.02   0.01  
## 287  0.00   0.01   0.00   0.01   0.01    1.03_*  0.00   0.02  
## 352  0.01  -0.01   0.00  -0.05  -0.05    1.03_*  0.00   0.02_*
## 353  0.00  -0.01   0.01  -0.06  -0.07    1.03_*  0.00   0.03_*
## 354 -0.03   0.03   0.01   0.11   0.12    1.04_*  0.00   0.04_*
## 355 -0.01   0.00   0.01  -0.04  -0.05    1.04_*  0.00   0.03_*
## 356 -0.01   0.00   0.02  -0.06  -0.07    1.03_*  0.00   0.03_*
## 365  0.45  -0.12  -0.56   0.18  -0.66_*  0.94_*  0.11   0.03_*
## 366  0.72  -0.55  -0.68  -0.29   0.72_*  1.04_*  0.13   0.08_*
## 368  0.47  -0.32  -0.45  -0.19   0.48_*  1.02    0.06   0.05_*
## 369  0.97  -0.90  -0.84  -0.59   1.03_*  0.87_*  0.26   0.04_*
## 370  0.18  -0.30  -0.08  -0.31   0.40_*  0.94_*  0.04   0.01  
## 371  0.10  -0.24  -0.01  -0.28   0.36_*  0.96_*  0.03   0.02  
## 372  0.24  -0.29  -0.15  -0.36   0.43_*  0.87_*  0.05   0.01  
## 373  0.41  -0.41  -0.30  -0.42   0.54_*  0.85_*  0.07   0.01  
## 374 -0.04   0.24   0.00   0.04   0.35_*  0.99    0.03   0.02  
## 375  0.08   0.33  -0.17   0.07   0.62_*  0.94_*  0.09   0.03_*
## 376  0.15  -0.07  -0.19   0.11  -0.26    0.97_*  0.02   0.01  
## 381  0.15  -0.11  -0.18   0.08  -0.24    0.97_*  0.01   0.01  
## 413  0.02   0.30  -0.08   0.07   0.50_*  0.94_*  0.06   0.02  
## 415 -0.01   0.20  -0.03   0.06   0.30_*  1.01    0.02   0.03_*
## 439 -0.04   0.06   0.03   0.02   0.07    1.03_*  0.00   0.03_*
## 506 -0.17   0.18   0.13   0.14  -0.22    0.97_*  0.01   0.01

La cantidad de puntos que parecen influyentes es grandísima.No cumple los supuestos y hay demasiados puntos influyentes que pueden estar jalando la recta. A pesar de tener un r2 de 0.64, es decir explicar el 64% de la variabilidad del precio medio de las viviendas, el cual a pesar de no ser mayor a 70% sigue siendo alto, pero no cumple los supuestos, así que se concluye que el modelo no es estadísticamente válido para hacer inferencias y predicciones dentro del rango de los datos analizados.

Examen final

2025-05-26

Normalidad

Evaluación de supuestos