Cargar datos
setwd("C:/Users/prestamour/Downloads")
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(MVN)
## Warning: package 'MVN' was built under R version 4.4.3
datos <- read_excel("punto1.xlsx")
datos
## # A tibble: 205 × 3
## citympg highwaympg price
## <dbl> <dbl> <dbl>
## 1 21.1 27 13495
## 2 21.1 27 16500
## 3 19.1 26 16500
## 4 24.1 30 13950
## 5 18.1 22 17450
## 6 19.1 25 15250
## 7 19.1 25 17710
## 8 19.1 25 18920
## 9 17.1 20 23875
## 10 16.1 22 17859.
## # ℹ 195 more rows
#PUNTO 1
#Vector de medias
vector_medias = colMeans(datos)
vector_medias
## citympg highwaympg price
## 25.35366 30.75122 13276.71057
#Matriz de covarianza
S = cov(datos)
S
## citympg highwaympg price
## citympg 43.91573 44.18106 -36079.82
## highwaympg 44.18106 47.42310 -38378.26
## price -36079.81594 -38378.25865 63821761.58
Hay covarianzas muy negativas con respecto a la variable que más aporta (Price)
#PUNTO 2
#Varianza total
VT = sum(diag(S))
VT
## [1] 63821853
#Varianza generalizada
VG=det(S)
VG
## [1] 4275632967
# Seleccionar la mayor varianza
mayor_varianza=S[3,3]
mayor_varianza
## [1] 63821762
# Calcular la participación en la varianza total
porcentaje=round(mayor_varianza/VT*100,3)
print(paste0(porcentaje, "%"))
## [1] "100%"
¿Cual variable aporta más? La variable Price es la qu aporta más, dado qu es la de mayor valor entro dde las varianzas.
¿Cual variable aporta menos? La variable citymg es la que aporta menos
#PUNTO 3
#Distancia de Mahalanobis para todas las observaciones
D2 <- mahalanobis(datos, center = vector_medias, cov = S)
D2
## [1] 0.7640070 0.5060553 1.7093386 0.1232181 2.1484778 1.2259349
## [7] 0.9918857 1.0124621 3.7004890 2.2659659 0.2896380 0.3474236
## [13] 2.0447083 2.0887230 2.0512727 5.2784451 15.3085386 9.2188685
## [19] 13.8704170 28.8477985 4.1572655 3.9908234 1.5165580 1.2207455
## [25] 1.5276503 1.4974061 1.4761751 1.0116560 0.8960605 1.9796623
## [31] 17.5738986 1.4898766 4.6010412 1.7644367 1.6318570 1.5990622
## [37] 1.5990622 0.5474599 0.3272238 0.3658488 0.1949941 0.9585428
## [43] 0.3243068 2.0064119 4.8068424 4.8068424 0.5128064 5.8710174
## [49] 8.1289016 8.2551620 9.0487022 1.5388481 1.4924593 1.4972528
## [55] 1.4765326 3.9807507 3.5255128 2.7635479 3.2910175 0.4640364
## [61] 0.5420120 0.1864584 0.2270007 4.0466120 0.1310282 3.1418069
## [67] 6.8498566 4.1972269 5.7159594 5.6696265 8.2237304 9.5605178
## [73] 10.1645419 14.6986149 19.6533572 1.1139598 3.9823529 1.5308781
## [79] 1.4985992 1.3211001 1.4041606 1.2347701 2.1012669 1.4185459
## [85] 1.5127664 1.6675294 1.3122472 1.5890586 1.5890586 1.0531168
## [91] 11.8310537 0.9016837 0.8835981 0.8490793 0.8518436 0.8310759
## [97] 0.8417031 0.8270469 0.8254484 0.9221136 0.8652867 3.0615477
## [103] 2.7083265 1.6178453 1.0100695 1.6537725 0.9926330 2.3895954
## [109] 0.3618283 2.1729000 10.5220970 1.2659582 1.1917303 1.0888834
## [115] 10.1564513 1.0971205 1.5796502 1.3135522 3.9908234 1.2207455
## [121] 1.5276503 1.4974061 1.4761751 0.8960605 2.0514370 3.9438553
## [127] 9.9504270 11.3259993 14.4896745 18.9970179 2.7506192 2.6122364
## [133] 1.6505196 1.5783000 1.2103462 1.1980586 1.7080817 1.7381715
## [139] 1.2906943 1.1834685 0.9923204 1.1634004 0.6577197 0.2655439
## [145] 8.8734744 0.4677264 1.5935362 0.3500117 1.5992626 11.8387530
## [151] 3.0084582 1.5193511 1.5091173 0.8779257 0.7210505 0.5134913
## [157] 1.3036651 1.2833468 5.9928387 9.3071869 9.2847158 0.4159384
## [163] 0.3129348 0.6022364 0.5756739 2.9164856 2.8432171 1.0479387
## [169] 0.6911348 0.6026631 0.3544849 0.2993824 0.5458549 0.4902089
## [175] 2.6641416 0.3053750 0.2078099 0.1837609 1.4803726 1.1906721
## [181] 1.6126745 1.2340511 8.4100158 1.0612101 8.4995397 1.0247230
## [187] 0.9797350 4.1651947 0.2605432 0.4015565 0.7827249 1.8662461
## [193] 2.7777072 0.1085357 0.3894556 0.3230014 0.7943758 0.8235731
## [199] 1.7348703 1.6802673 0.2524276 1.0196871 1.3813178 8.1801102
## [205] 1.6319265
#Crear tabla con resultados de las dos distancias
tabla_resultados <- data.frame(
Observacion = 1:nrow(datos),
D2_Mahalanobis = D2
)
# Observación más alejada
indice_max <- which.max(D2)
observacion_mas_lejana <- datos[indice_max, ]
observacion_mas_lejana
## # A tibble: 1 × 3
## citympg highwaympg price
## <dbl> <dbl> <dbl>
## 1 45.1 43 6295
# Observación más cercana
indice_min <- which.min(D2)
observacion_mas_cercana <- datos[indice_min, ]
observacion_mas_cercana
## # A tibble: 1 × 3
## citympg highwaympg price
## <dbl> <dbl> <dbl>
## 1 25.1 31 12290
La observación que está a mayor distancia es Price con 6295, por el contrario, la más cercana es highwaympg con 43
#Prueba de Mardia (Skewness y Kurtosis Multivariados) n>=20
library(MVN)
resultado_mardia <- mvn(data = datos, mvn_test = "mardia")