library(ISLR)
library(openxlsx)
conectar <-read.xlsx("heart.xlsx")
head(conectar)
## age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal
## 1 52 1 0 125 212 0 1 168 0 1.0 2 2 3
## 2 53 1 0 140 203 1 0 155 1 3.1 0 0 3
## 3 70 1 0 145 174 0 1 125 1 2.6 0 0 3
## 4 61 1 0 148 203 0 1 161 0 0.0 2 1 3
## 5 62 0 0 138 294 1 1 106 0 1.9 1 3 2
## 6 58 0 0 100 248 0 0 122 0 1.0 1 0 2
## target
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
conest = scale(conectar, center = TRUE, scale = TRUE)
head(conest) #para ver los primeros datos
## age sex cp trestbps chol fbs restecg
## 1 -0.2683056 0.6611813 -0.9153086 -0.3774513 -0.65901038 -0.4186735 0.890820
## 2 -0.1580799 0.6611813 -0.9153086 0.4788735 -0.83345431 2.3861656 -1.003559
## 3 1.7157579 0.6611813 -0.9153086 0.7643151 -1.39555140 -0.4186735 0.890820
## 4 0.7237261 0.6611813 -0.9153086 0.9355801 -0.83345431 -0.4186735 0.890820
## 5 0.8339519 -1.5109689 -0.9153086 0.3646969 0.93036760 2.3861656 0.890820
## 6 0.3930489 -1.5109689 -0.9153086 -1.8046593 0.03876532 -0.4186735 -1.003559
## thalach exang oldpeak slope ca thal target
## 1 0.8209198 -0.7119396 -0.06085868 0.9949476 1.2086307 1.0893199 -1.0261968
## 2 0.2558430 1.4032432 1.72629436 -2.2425804 -0.7316143 1.0893199 -1.0261968
## 3 -1.0481803 1.4032432 1.30078173 -2.2425804 -0.7316143 1.0893199 -1.0261968
## 4 0.5166477 -0.7119396 -0.91188394 0.9949476 0.2385082 1.0893199 -1.0261968
## 5 -1.8740617 -0.7119396 0.70506405 -0.6238164 2.1787531 -0.5218676 -1.0261968
## 6 -1.1785826 -0.7119396 -0.06085868 -0.6238164 -0.7316143 -0.5218676 0.9735213
table(conectar$labs)
## < table of extent 0 >
##Dendograma
conestdist = dist(conest, method = "euclidean") #distancia euclidiana
plot(hclust(conestdist, method = "complete")) #para sacar el cluster
plot(hclust(conestdist, method = "complete"),
labels = conectar$labs,
xlab = "",
ylab = "",
cex = 0.5, #0.5 distancia entre jerarquias
main = "Metodo Complete"
)
konect = kmeans(x = conest, centers = 4, nstart = 20 )
Funcion PCA ponerle paramteros
library(FactoMineR)
konectplot = PCA(X = conest, scale.unit = TRUE, ncp = 64, graph = FALSE)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_pca_ind(konectplot, geom.ind = "point",
col.ind = as.factor(konect$cluster),
axes = c(1,2),
pointsize = 2) #fviz se refiere a una visualización
##Anova Bateo
posicion <- c
bateo <- c(0.359, 0.34, 0.33, 0.341, 0.366, 0.333, 0.37, 0.331, 0.381, 0.332, 0.365, 0.345, 0.313, 0.325, 0.327, 0.337, 0.336, 0.291, 0.34, 0.31, 0.365, 0.356, 0.35, 0.39, 0.388, 0.345, 0.27, 0.306, 0.393, 0.331, 0.365, 0.369, 0.342, 0.329, 0.376, 0.414, 0.327, 0.354, 0.321, 0.37, 0.313, 0.341, 0.325, 0.312, 0.346, 0.34, 0.401, 0.372, 0.352, 0.354, 0.341, 0.365, 0.333, 0.378, 0.385, 0.287, 0.303, 0.334, 0.359, 0.352, 0.321, 0.323, 0.302, 0.349, 0.32, 0.356, 0.34, 0.393, 0.288, 0.339, 0.388, 0.283, 0.311, 0.401, 0.353, 0.42, 0.393, 0.347, 0.424, 0.378, 0.346, 0.355, 0.322, 0.341, 0.306, 0.329, 0.271, 0.32, 0.308, 0.322, 0.388, 0.351, 0.341, 0.31, 0.393, 0.411, 0.323, 0.37, 0.364, 0.321, 0.351, 0.329, 0.327, 0.402, 0.32, 0.353, 0.319, 0.319, 0.343, 0.288, 0.32, 0.338, 0.322, 0.303, 0.356, 0.303, 0.351, 0.325, 0.325, 0.361, 0.375, 0.341, 0.383, 0.328, 0.3, 0.277, 0.359, 0.358, 0.381, 0.324, 0.293, 0.324, 0.329, 0.294, 0.32, 0.361, 0.347, 0.317, 0.316, 0.342, 0.368, 0.319, 0.317, 0.302, 0.321, 0.336, 0.347, 0.279, 0.309, 0.358, 0.318, 0.342, 0.299, 0.332, 0.349, 0.387, 0.335, 0.358, 0.312, 0.307, 0.28, 0.344, 0.314, 0.24, 0.331, 0.357, 0.346, 0.351, 0.293, 0.308, 0.374, 0.362, 0.294, 0.314, 0.374, 0.315, 0.324, 0.382, 0.353, 0.305, 0.338, 0.366, 0.357, 0.326, 0.332, 0.323, 0.306, 0.31, 0.31, 0.333, 0.34, 0.4, 0.389, 0.308, 0.411, 0.278, 0.326, 0.335, 0.316, 0.371, 0.314, 0.384, 0.379, 0.32, 0.395, 0.347, 0.307, 0.326, 0.316, 0.341, 0.308, 0.327, 0.337, 0.36, 0.32, 0.372, 0.306, 0.305, 0.347, 0.281, 0.281, 0.296, 0.306, 0.343, 0.378, 0.393, 0.337, 0.327, 0.336, 0.32, 0.381, 0.306, 0.358, 0.311, 0.284, 0.364, 0.315, 0.342, 0.367, 0.307, 0.351, 0.372, 0.304, 0.296, 0.332, 0.312, 0.437, 0.295, 0.316, 0.298, 0.302, 0.342, 0.364, 0.304, 0.295, 0.305, 0.359, 0.335, 0.338, 0.341, 0.3, 0.378, 0.412, 0.273, 0.308, 0.309, 0.263, 0.291, 0.359, 0.352, 0.262, 0.274, 0.334, 0.343, 0.267, 0.321, 0.3, 0.327, 0.313, 0.316, 0.337, 0.268, 0.342, 0.292, 0.39, 0.332, 0.315, 0.298, 0.298, 0.331, 0.361, 0.272, 0.287, 0.34, 0.317, 0.327, 0.354, 0.317, 0.311, 0.174, 0.302, 0.302, 0.291, 0.29, 0.268, 0.352, 0.341, 0.265, 0.307, 0.36, 0.305, 0.254, 0.279, 0.321, 0.305, 0.35, 0.308, 0.326, 0.219, 0.23, 0.322, 0.405, 0.321, 0.291, 0.312, 0.357, 0.324)
datos2 <- data.frame(posicion = posicion, bateo = bateo)
str(datos2)
## 'data.frame': 327 obs. of 2 variables:
## $ posicion: chr "OF" "IF" "IF" "OF" ...
## $ bateo : num 0.359 0.34 0.33 0.341 0.366 0.333 0.37 0.331 0.381 0.332 ...
table(datos2$posicion)
##
## C DH IF OF
## 39 14 154 120
aggregate(bateo~posicion, data = datos2, FUN = mean) #hacer una tablita de diferentes cosas, en este caso estamos viendo el promedio
## posicion bateo
## 1 C 0.3226154
## 2 DH 0.3477857
## 3 IF 0.3315260
## 4 OF 0.3342500
ggplot(data= datos2, aes(x=posicion, y=bateo, color = posicion)) + geom_boxplot() + theme_bw()
anova = aov(datos2$bateo ~datos2$posicion)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## datos2$posicion 3 0.0076 0.002519 1.994 0.115
## Residuals 323 0.4080 0.001263
plot(TukeyHSD(anova)) #Intervalo de confianza // Es la visualizacion del codigo anterior, para aceptarse la H0 TODAS las lineas tienen que pasar por la linea vertical (punteada).