library(pacman)
p_load(rmdformats, readr, readxl, ggplot2, plotly, DT, xfun, gridExtra, leaflet, GGally, psych, corrplot, cluster)
pMiel = read_csv("EstadoProdMiel.csv")
## Rows: 51 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): Estado
## dbl (2): ProduccionMiel, Year
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
datatable(pMiel)
ggplot(pMiel,aes(Year,ProduccionMiel,color = Estado) ) +geom_line()+ geom_point(size=4)

set.seed(101)
mielCluster = kmeans(pMiel[,2],center=3,nstart = 20)
mielCluster
## K-means clustering with 3 clusters of sizes 14, 17, 20
## 
## Cluster means:
##   ProduccionMiel
## 1       9485.879
## 2       2698.033
## 3       5860.918
## 
## Clustering vector:
##  [1] 1 1 3 1 1 1 1 1 1 1 1 1 1 3 3 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2
## [39] 2 2 2 2 2 2 2 2 2 2 2 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 19545446  4938183 10719279
##  (between_SS / total_SS =  91.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
table(mielCluster$cluster, pMiel$Estado)
##    
##     JALISCO QUINTANA_ROO YUCATAN
##   1       0            0      14
##   2       0           17       0
##   3      17            0       3
clusplot(pMiel, mielCluster$cluster, color= TRUE, shade= TRUE, lines=0)

tot.withinss <- vector(mode="character", length=15)
for (i in 1:15){
  mielCluster <- kmeans(pMiel[2], center=i, nstart=20)
  tot.withinss[i] <- mielCluster$tot.withinss
}
plot(1:15, tot.withinss, type = "b", pch=19)