library(dplyr)
petal_length<-
seq(min(iris$Petal.Length),max(iris$Petal.Length),by=0.01)
petal_width <-
seq(min(iris$Petal.Width),max(iris$Petal.Width),by=0.01)
set.seed(161)
cluster_x<-
sample(petal_length,size = 3 )
cluster_y<-
sample(petal_width,size = 3 )
plot(iris$Petal.Length,
iris$Petal.Width,
pch=16,
xlab = "Petal length",
ylab = "Petal Width",
main = "Iris Dataset")
points(cluster_x,cluster_y,col=1:3,pch=17,cex=2)
df<- tibble(petal_length=iris$Petal.Length,
petal_width=iris$Petal.Width)
df<-
df %>%
rowwise %>%
mutate(distance_1 = sqrt((petal_length-cluster_x[1])^2+(petal_width-cluster_y[1])^2 ),
distance_2 = sqrt((petal_length-cluster_x[2])^2+(petal_width-cluster_y[2])^2),
distance_3 = sqrt((petal_length-cluster_x[3])^2+(petal_width-cluster_y[3])^2),
cluster= which.min(c(distance_1,distance_2,distance_3) ),
centroid_dist = c(distance_1,distance_2,distance_3)[cluster],
centroid_dist = centroid_dist^2) %>%
ungroup()
plot(df$petal_length,df$petal_width,col=df$cluster,
pch=16,
xlab = "Petal length",
ylab = "Petal Width",
main = "Iris Dataset")
points(cluster_x,cluster_y,col=1:3,pch=17,cex=2)
new_clusters<-
df %>%
group_by(cluster) %>%
summarise(avg_x = mean(petal_length),
avg_y = mean(petal_width)) %>%
ungroup()
cluster_x<-
new_clusters %>% pull(avg_x)
cluster_y<-
new_clusters %>% pull(avg_y)
df<- tibble(petal_length=iris$Petal.Length,
petal_width=iris$Petal.Width)
df<-
df %>%
rowwise %>%
mutate(distance_1 = sqrt((petal_length-cluster_x[1])^2+(petal_width-cluster_y[1])^2 ),
distance_2 = sqrt((petal_length-cluster_x[2])^2+(petal_width-cluster_y[2])^2),
distance_3 = sqrt((petal_length-cluster_x[3])^2+(petal_width-cluster_y[3])^2),
cluster= which.min(c(distance_1,distance_2,distance_3) )) %>%
ungroup()
plot(df$petal_length,df$petal_width,col=df$cluster,
pch=16,
xlab = "Petal length",
ylab = "Petal Width",
main = "Iris Dataset")
points(cluster_x,cluster_y,col=1:3,pch=17,cex=2)
new_clusters<-
df %>%
group_by(cluster) %>%
summarise(avg_x = mean(petal_length),
avg_y = mean(petal_width)) %>%
ungroup()
cluster_x<-
new_clusters %>% pull(avg_x)
cluster_y<-
new_clusters %>% pull(avg_y)
df<- tibble(petal_length=iris$Petal.Length,
petal_width=iris$Petal.Width)
df<-
df %>%
rowwise %>%
mutate(distance_1 = sqrt((petal_length-cluster_x[1])^2+(petal_width-cluster_y[1])^2 ),
distance_2 = sqrt((petal_length-cluster_x[2])^2+(petal_width-cluster_y[2])^2),
distance_3 = sqrt((petal_length-cluster_x[3])^2+(petal_width-cluster_y[3])^2),
cluster= which.min(c(distance_1,distance_2,distance_3) )) %>%
ungroup()
plot(df$petal_length,df$petal_width,col=df$cluster,
pch=16,
xlab = "Petal length",
ylab = "Petal Width",
main = "Iris Dataset")
points(cluster_x,cluster_y,col=1:3,pch=17,cex=2)
new_clusters<-
df %>%
group_by(cluster) %>%
summarise(avg_x = mean(petal_length),
avg_y = mean(petal_width)) %>%
ungroup()
cluster_x<-
new_clusters %>% pull(avg_x)
cluster_y<-
new_clusters %>% pull(avg_y)
df<- tibble(petal_length=iris$Petal.Length,
petal_width=iris$Petal.Width)
df<-
df %>%
rowwise %>%
mutate(distance_1 = sqrt((petal_length-cluster_x[1])^2+(petal_width-cluster_y[1])^2 ),
distance_2 = sqrt((petal_length-cluster_x[2])^2+(petal_width-cluster_y[2])^2),
distance_3 = sqrt((petal_length-cluster_x[3])^2+(petal_width-cluster_y[3])^2),
cluster= which.min(c(distance_1,distance_2,distance_3) ),
centroid_dist = c(distance_1,distance_2,distance_3)[cluster],
centroid_dist = centroid_dist^2) %>%
ungroup()
plot(df$petal_length,df$petal_width,col=df$cluster,
pch=16,
xlab = "Petal length",
ylab = "Petal Width",
main = "Iris Dataset")
points(cluster_x,cluster_y,col=1:3,pch=17,cex=2)
Comparando con el dataset clasificado por la etiqueta Species:
plot(iris$Petal.Length,iris$Petal.Width,
pch=16,
xlab = "Petal length",
ylab = "Petal Width",
main = "Iris Dataset",
col=iris$Species)
df %>% summarise(sum(centroid_dist))