# Librerías necesarias
library(readr)
library(ggplot2)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(corrplot)
## corrplot 0.84 loaded
library("gplots")
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
Para el conjunto de datos de consumo de proteína,
protein <- read.csv("protein.csv")
protein.data <- protein[2:10]
rownames(protein.data) <- protein[,1]
head(protein.data, 2)
## RedMeat WhiteMeat Eggs Milk Fish Cereals Starch Nuts Fr.Veg
## Albania 10.1 1.4 0.5 8.9 0.2 42.3 0.6 5.5 1.7
## Austria 8.9 14.0 4.3 19.9 2.1 28.0 3.6 1.3 4.3
protein.dist <- dist(protein.data, method = "euclidian")
protein.cmd <- cmdscale(protein.dist)
protein.cmd <- as.data.frame(protein.cmd)
ggplot(protein.cmd, aes(x = V1, y = V2, label = rownames(protein.data))) +
geom_text(alpha = 0.8, size=3, col = "salmon")
protein.dist <- dist(scale(protein.data), method = "euclidian")
protein.cmd <- cmdscale(protein.dist)
protein.cmd <- as.data.frame(protein.cmd)
ggplot(protein.cmd, aes(x = V1, y = V2, label = rownames(protein.data))) +
geom_text(alpha = 0.8, size=3, col = "salmon")
protein.dist <- dist(scale(protein.data), method = "euclidian")
protein.cmd <- cmdscale(protein.dist)
protein.cmd <- protein.cmd%*%matrix(c(0.707,-0.707,0.707,0.707),2,2)
protein.cmd <- as.data.frame(protein.cmd)
ggplot(protein.cmd,aes(x = V1, y = V2, label = rownames(protein.data))) +
geom_text(alpha = 0.7, size = 3, col = "red")
Para la siguiente tabla de contingencia realice un análisis de correspondencia
productos <- matrix(c(66, 60, 67, 45, 35, 275,
12, 13, 13, 15, 12, 65,
95, 84, 94, 63, 49, 365,
30, 32, 32, 36, 28, 158,
205, 189, 206, 159, 124, 883), nrow = 5, byrow = T)
rownames(productos) <- c("Producto A", "Producto B", "Producto C", "Producto D", "Margen activo")
colnames(productos) <- c("Brillo", "Duración", "Olor", "Comodidad", "Limpieza", "Margen activo")
productos
## Brillo Duración Olor Comodidad Limpieza Margen activo
## Producto A 66 60 67 45 35 275
## Producto B 12 13 13 15 12 65
## Producto C 95 84 94 63 49 365
## Producto D 30 32 32 36 28 158
## Margen activo 205 189 206 159 124 883
# Representacion de las filas
ca_ht <- CA(productos[1:4,1:5], graph = F)
filas <- get_ca_row(ca_ht)
head(filas$coord)
## Dim 1 Dim 2 Dim 3
## Producto A -0.06082855 -0.0059168299 -0.0001023606
## Producto B 0.20643585 -0.0004477021 0.0072633936
## Producto C -0.06724154 0.0040817460 0.0001801080
## Producto D 0.18402441 0.0004615380 -0.0032501122
head(filas$cos2)
## Dim 1 Dim 2 Dim 3
## Producto A 0.9906243 9.372864e-03 2.805175e-06
## Producto B 0.9987589 4.697520e-06 1.236430e-03
## Producto C 0.9963216 3.671268e-03 7.148094e-06
## Producto D 0.9996819 6.288193e-06 3.118226e-04
fviz_ca_row(ca_ht, col.row = "cos2",
gradient.cols = c("orange", "yellow","salmon"),
repel = TRUE)
# Representacion de las columnas
ca_ht <- CA(productos[1:4,1:5], graph = F)
columnas <- get_ca_col(ca_ht)
head(columnas$coord)
## Dim 1 Dim 2 Dim 3
## Brillo -0.10663528 0.0066182903 2.416632e-05
## Duración -0.03455815 -0.0039632982 -1.300210e-03
## Olor -0.07969751 -0.0043196923 9.923927e-04
## Comodidad 0.15557791 0.0011688864 -3.426479e-03
## Limpieza 0.16015525 0.0008835009 4.687188e-03
head(columnas$cos2)
## Dim 1 Dim 2 Dim 3
## Brillo 0.9961627 3.837250e-03 5.116222e-08
## Duración 0.9856410 1.296377e-02 1.395228e-03
## Olor 0.9969167 2.928701e-03 1.545740e-04
## Comodidad 0.9994588 5.641747e-05 4.848029e-04
## Limpieza 0.9991138 3.040511e-05 8.557706e-04
fviz_ca_col(ca_ht, col.col = "cos2",
gradient.cols = c("yellow","salmon","steelblue"),
repel = TRUE)
# Representacion de las filas y columnas
fviz_ca_biplot(ca_ht, repel = TRUE)
chisq <- chisq.test(productos)
chisq
##
## Pearson's Chi-squared test
##
## data: productos
## X-squared = 11.335, df = 20, p-value = 0.9371
balloonplot(t(as.table(productos)), xlab ="", ylab="",label = FALSE, show.margins = FALSE)
Según los resultados los datos son independientes.
Para el conjunto de datos iris utilice análisis de clúster jerárquico.
data <-scale(iris[,1:4])
data_d <- dist(data,method = "euclidean")
clust1 <-hclust(data_d,method = "single")
plot(clust1, hang = -0.01, cex = 0.6)
clust1 <-hclust(data_d,method = "complete")
plot(clust1, hang = -0.01, cex = 0.6)
Para los siguientes incisos utilice el conjunto de datos position (posiciones iniciales de jugadores de futbol con 6 integrantes para cada equipo).
position <- read.csv("position.csv")
position <- position[,2:3]
position
## x y
## 1 -1 1
## 2 -2 -3
## 3 8 6
## 4 7 -8
## 5 -12 8
## 6 -15 0
## 7 -13 -10
## 8 15 16
## 9 21 2
## 10 12 -15
## 11 -25 1
## 12 26 0
plot(position, main = 'Posición de jugadores')
labels <- 1:12
text(position$x, position$y, labels, cex = 0.7, pos = 1)
data_d <- dist(position, method = "euclidean")
# k = 3
clust3 <-hclust(data_d, method = "complete")
plot(clust3, hang = -0.01, cex = 0.6)
rect.hclust(clust3, k=3, border = "orange")
# k = 4
clust4 <-hclust(data_d, method = "complete")
plot(clust4, hang = -0.01, cex = 0.6)
rect.hclust(clust4, k=4, border = "orange")
df_analisis <- function(k, m, ajuste) {
matriz <- matrix(data = 1:3, nrow = 1, ncol = 3)
fila <- 1
for (i in 1:k) {
for (elemento in rownames(m)[ajuste==i]) {
matriz <- rbind(matriz, c(m[fila,1], m[fila,2], i))
fila <- fila + 1
}
}
colnames(matriz) <- c("x", "y", "Cluster")
matriz <- matriz[2:13, ]
matriz
}
# k = 3
ajuste3 <- cutree(clust3, k = 3)
table(ajuste3)
## ajuste3
## 1 2 3
## 6 4 2
print(rownames(position)[ajuste3==1])
## [1] "1" "2" "5" "6" "7" "11"
print(rownames(position)[ajuste3==2])
## [1] "3" "8" "9" "12"
print(rownames(position)[ajuste3==3])
## [1] "4" "10"
df_analisis3 <- df_analisis(3, position, ajuste3)
df_analisis3
## x y Cluster
## [1,] -1 1 1
## [2,] -2 -3 1
## [3,] 8 6 1
## [4,] 7 -8 1
## [5,] -12 8 1
## [6,] -15 0 1
## [7,] -13 -10 2
## [8,] 15 16 2
## [9,] 21 2 2
## [10,] 12 -15 2
## [11,] -25 1 3
## [12,] 26 0 3
# k = 4
ajuste4 <- cutree(clust3, k = 4)
table(ajuste4)
## ajuste4
## 1 2 3 4
## 3 4 2 3
print(rownames(position)[ajuste4==1])
## [1] "1" "2" "7"
print(rownames(position)[ajuste4==2])
## [1] "3" "8" "9" "12"
print(rownames(position)[ajuste4==3])
## [1] "4" "10"
print(rownames(position)[ajuste4==4])
## [1] "5" "6" "11"
df_analisis4 <- df_analisis(4, position, ajuste4)
df_analisis4
## x y Cluster
## [1,] -1 1 1
## [2,] -2 -3 1
## [3,] 8 6 1
## [4,] 7 -8 2
## [5,] -12 8 2
## [6,] -15 0 2
## [7,] -13 -10 2
## [8,] 15 16 3
## [9,] 21 2 3
## [10,] 12 -15 4
## [11,] -25 1 4
## [12,] 26 0 4
# k = 3
fviz_cluster(list(data = position, cluster = ajuste3), palette = c("red", "steelblue", "green"),
ellipse.type = "convex",
show.clust.cent = FALSE,
ggtheme = theme_minimal())
# k = 4
fviz_cluster(list(data = position, cluster = ajuste4), palette = c("red", "steelblue", "green", "orange"),
ellipse.type = "convex",
show.clust.cent = FALSE,
ggtheme = theme_minimal())
clust3 <-hclust(data_d, method = "ward.D2")
plot(clust3, hang = -0.01, cex=0.6)
rect.hclust(clust3, k = 2, border = "orange")
kposition <- position
df_d <- dist(kposition)
set.seed(2)
km_p <- kmeans(df_d,nstart = 25,centers = 2)
rownames(kposition)[km_p$cluster==1]
## [1] "3" "4" "8" "9" "10" "12"
rownames(kposition)[km_p$cluster==2]
## [1] "1" "2" "5" "6" "7" "11"
cluster = km_p$cluster
head(cbind(kposition, cluster))
## x y cluster
## 1 -1 1 2
## 2 -2 -3 2
## 3 8 6 1
## 4 7 -8 1
## 5 -12 8 2
## 6 -15 0 2
fviz_cluster(km_p, data = kposition, palette=c("orange", "steelblue"),
star.plot = TRUE,ellipse.type = "eclid",
repel=T,
ggtheme = theme_minimal())