This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

plot(cars)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

## analisis cluster
install.packages("rattle.data")#instalar paquete
Error in install.packages : Updating loaded packages
install.packages("reshape")
Error in install.packages : Updating loaded packages
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
install.packages("factoextra")
Error in install.packages : Updating loaded packages
install.packages("cluster")
Error in install.packages : Updating loaded packages
library("cluster")
library("factoextra")
library(rattle.data)#cargar la libreria
library(reshape)
library(ggplot2)
#cargar las bases de datos
data(wine, package="rattle.data")#cargar un dataset
head(wine,10)#ver los primeros 10 datos 
help(wine)#ver una descripción de la información
#El conjunto de datos contiene los resultados de un análisis químico de vinos cultivados en una zona
# específica de Italia.
# Se presentan tres tipos de vino en las 178 muestras, con los resultados de 13 análisis químicos registrados para cada muestra.
# Hay una variable categórica= tipo las ottras variables .
#Estadísticos Básicos
with(wine, tapply(Proanthocyanins, list(Type), mean, na.rm=TRUE))
       1        2        3 
1.899322 1.630282 1.153542 
summary(wine[,c("Alcalinity", "Alcohol", "Ash", "Color", "Dilution", "Flavanoids", 
  "Hue", "Magnesium", "Malic", "Nonflavanoids", "Phenols", "Proanthocyanins", "Proline"),
   drop=FALSE], groups=wine$Type, statistics=c("mean", "sd", "IQR", "quantiles"), 
  quantiles=c(0,.25,.5,.75,1))
   Alcalinity       Alcohol           Ash            Color       
 Min.   :10.60   Min.   :11.03   Min.   :1.360   Min.   : 1.280  
 1st Qu.:17.20   1st Qu.:12.36   1st Qu.:2.210   1st Qu.: 3.220  
 Median :19.50   Median :13.05   Median :2.360   Median : 4.690  
 Mean   :19.49   Mean   :13.00   Mean   :2.367   Mean   : 5.058  
 3rd Qu.:21.50   3rd Qu.:13.68   3rd Qu.:2.558   3rd Qu.: 6.200  
 Max.   :30.00   Max.   :14.83   Max.   :3.230   Max.   :13.000  
    Dilution       Flavanoids         Hue           Magnesium     
 Min.   :1.270   Min.   :0.340   Min.   :0.4800   Min.   : 70.00  
 1st Qu.:1.938   1st Qu.:1.205   1st Qu.:0.7825   1st Qu.: 88.00  
 Median :2.780   Median :2.135   Median :0.9650   Median : 98.00  
 Mean   :2.612   Mean   :2.029   Mean   :0.9574   Mean   : 99.74  
 3rd Qu.:3.170   3rd Qu.:2.875   3rd Qu.:1.1200   3rd Qu.:107.00  
 Max.   :4.000   Max.   :5.080   Max.   :1.7100   Max.   :162.00  
     Malic       Nonflavanoids       Phenols      Proanthocyanins
 Min.   :0.740   Min.   :0.1300   Min.   :0.980   Min.   :0.410  
 1st Qu.:1.603   1st Qu.:0.2700   1st Qu.:1.742   1st Qu.:1.250  
 Median :1.865   Median :0.3400   Median :2.355   Median :1.555  
 Mean   :2.336   Mean   :0.3619   Mean   :2.295   Mean   :1.591  
 3rd Qu.:3.083   3rd Qu.:0.4375   3rd Qu.:2.800   3rd Qu.:1.950  
 Max.   :5.800   Max.   :0.6600   Max.   :3.880   Max.   :3.580  
    Proline      
 Min.   : 278.0  
 1st Qu.: 500.5  
 Median : 673.5  
 Mean   : 746.9  
 3rd Qu.: 985.0  
 Max.   :1680.0  
data(weather, package="rattle.data")#vargar una dataset
head(weather)
#pruebas de normalidad de la variable alcohol por tipo de vino
#normalityTest(Alcohol ~ Type, test="shapiro.test", data=wine)
prueba<- shapiro.test(wine$Alcohol)
prueba

    Shapiro-Wilk normality test

data:  wine$Alcohol
W = 0.9818, p-value = 0.02005
#graficas
#with(wine, dotplot(Alcohol, by=Type, bin=FALSE))
boxplot(Alcohol~Type, data=wine, id.method="y", xlab="químico", ylab="", main="boxplot de vino")

###Analisis cluster
#estandarizar la información por variable
wine.stand <- scale(wine[-1])  # el -1 es porque la columna 1 es tipo y esta no es variable 
wine.stand #ver los datos estandarizados
           Alcohol       Malic         Ash   Alcalinity   Magnesium
  [1,]  1.51434077 -0.56066822  0.23139979 -1.166303174  1.90852151
  [2,]  0.24559683 -0.49800856 -0.82566722 -2.483840525  0.01809398
  [3,]  0.19632522  0.02117152  1.10621386 -0.267982252  0.08810981
  [4,]  1.68679140 -0.34583508  0.48655389 -0.806974805  0.92829983
  [5,]  0.29486844  0.22705328  1.83522559  0.450674485  1.27837900
  [6,]  1.47738706 -0.51591132  0.30430096 -1.286079296  0.85828399
  [7,]  1.71142720 -0.41744613  0.30430096 -1.465743481 -0.26196936
  [8,]  1.30493643 -0.16680747  0.88751034 -0.567422559  1.48842650
  [9,]  2.25341491 -0.62332789 -0.71631546 -1.645407665 -0.19195352
 [10,]  1.05857838 -0.88291793 -0.35180959 -1.046527051 -0.12193769
 [11,]  1.35420804 -0.15785609 -0.24245783 -0.447646437  0.36817315
 [12,]  1.37884384 -0.76654998 -0.16955666 -0.806974805 -0.33198519
 [13,]  0.92308146 -0.54276546  0.15849862 -1.046527051 -0.75208020
 [14,]  2.15487169 -0.54276546  0.08559744 -2.423952463 -0.61204853
 [15,]  1.69910930 -0.41744613  0.04914686 -2.244288279  0.15812565
 [16,]  0.77526663 -0.47115441  1.21556562 -0.687198682  0.85828399
 [17,]  1.60056608 -0.37268923  1.28846679  0.151234178  1.41841067
 [18,]  1.02162467 -0.68598755  0.92396093  0.151234178  1.06833150
 [19,]  1.46506916 -0.66808479  0.41365272 -0.896806897  0.57822065
 [20,]  0.78758453  0.68357369  0.70525741 -1.286079296  1.13834733
 [21,]  1.30493643 -0.63227927 -0.31535901 -1.046527051  1.83850567
 [22,] -0.08698653  1.31017034  1.03331269 -0.267982252  0.15812565
 [23,]  0.87380985 -0.42639751 -0.02375431 -0.866862867  0.08810981
 [24,] -0.18552975 -0.65913341  0.55945507 -0.507534498 -0.33198519
 [25,]  0.61513390 -0.47115441  0.88751034  0.151234178 -0.26196936
 [26,]  0.06082829 -0.25632128  3.11099611  1.648435713  1.69847400
 [27,]  0.47963697 -0.50695994  0.92396093 -1.016583020 -0.47201686
 [28,]  0.36877585 -0.55171684 -0.82566722 -0.747086744 -0.40200103
 [29,]  1.07089628 -0.39059199  1.58007149 -0.028430007  0.50820482
 [30,]  1.25566482 -0.58752236 -0.57051311 -1.046527051 -0.26196936
 [31,]  0.89844565 -0.74864721  1.21556562  0.899834945  0.08810981
 [32,]  0.71367712 -0.60542512 -0.02375431 -0.118262099  0.43818899
 [33,]  0.83685614 -0.45325165 -0.02375431 -0.687198682  0.29815732
 [34,]  0.93539936 -0.72179307  1.21556562  0.001514024  2.25860068
 [35,]  0.62745180 -0.48010579  1.03331269 -0.148206130  0.71825232
 [36,]  0.59049809 -0.47115441  0.15849862  0.300954331  0.01809398
 [37,]  0.34414005 -0.62332789  1.72587383 -1.196247204  0.71825232
 [38,]  0.06082829 -0.61437650  0.66880683 -0.447646437 -0.12193769
 [39,]  0.08546410 -0.74864721 -0.97146956 -1.196247204 -0.12193769
 [40,]  1.50202286  1.48024658  0.52300448 -1.884959911  1.97853734
 [41,]  0.68904131 -0.56066822 -0.20600725 -0.986638989  1.20836316
 [42,]  0.50427278  1.34597587 -0.89856839 -0.208094191 -0.68206436
 [43,]  1.08321419 -0.39954337  0.81460917 -1.345967358  0.08810981
 [44,]  0.29486844  1.47129519 -0.27890842 -0.597366590  0.22814148
 [45,]  0.06082829 -0.50695994 -0.97146956 -0.747086744  0.50820482
 [46,]  1.48970496  1.52500348  0.26785038 -0.178150160  0.78826816
 [47,]  1.69910930  1.12219135 -0.31535901 -1.046527051  0.15812565
 [48,]  1.10784999 -0.58752236 -0.89856839 -1.046527051  0.08810981
 [49,]  1.35420804 -0.28317542  0.12204803 -0.208094191  0.22814148
 [50,]  1.15712160 -0.54276546 -0.35180959 -0.627310621  0.57822065
 [51,]  0.06082829 -0.54276546 -1.19017308 -2.124512156 -0.54203270
 [52,]  1.02162467 -0.61437650  0.85105976 -0.687198682 -0.40200103
 [53,]  1.00930677 -0.52486270  0.19494920 -1.645407665  0.78826816
 [54,]  0.94771726 -0.39059199  1.14266445 -0.717142713  1.06833150
 [55,]  0.91076355 -0.59647374 -0.42471076 -0.926750928  1.27837900
 [56,]  0.68904131 -0.54276546  0.34075155  0.300954331  1.13834733
 [57,]  1.50202286 -0.56961960 -0.24245783 -0.956694959  1.27837900
 [58,]  0.35645795 -0.32793232  1.14266445 -0.806974805  0.15812565
 [59,]  0.88612775 -0.81130688  0.48655389 -0.836918836  0.57822065
 [60,] -0.77678907 -1.24992453 -3.66881295 -2.663504709 -0.82209603
 [61,] -0.82606067 -1.10670244 -0.31535901 -1.046527051  0.08810981
 [62,] -0.44420570 -0.87396654 -1.26307425 -0.806974805  0.01809398
 [63,]  0.82453824 -0.97243173 -1.62758012 -0.447646437 -0.40200103
 [64,] -0.77678907 -1.07984830 -0.75276604 -0.148206130 -0.89211187
 [65,] -1.02314711 -0.79340412  0.59590565 -0.148206130  0.29815732
 [66,] -0.77678907 -1.00823725  0.70525741 -0.417702406 -0.12193769
 [67,]  0.13473571 -1.18726487 -2.42949302 -1.345967358 -1.52225438
 [68,] -0.77678907 -1.04404278 -1.62758012  0.031458055 -1.52225438
 [69,]  0.41804746 -1.24992453 -0.02375431 -0.747086744  0.71825232
 [70,] -0.97387550 -1.02614002 -2.24724008 -0.806974805  3.58890153
 [71,] -0.87533228 -0.65018203 -0.57051311  0.271010300  0.22814148
 [72,]  1.05857838 -0.73969583  1.10621386  1.648435713 -0.96212770
 [73,]  0.60281600 -0.60542512 -0.46116135  1.348995406 -0.89211187
 [74,] -0.01307912 -0.59647374  0.85105976  3.145637249  2.74871152
 [75,] -1.28182306 -1.11565382 -0.24245783  0.450674485  0.08810981
 [76,] -1.65136013 -0.40849475 -1.62758012 -1.046527051 -0.19195352
            Phenols    Flavanoids Nonflavanoids Proanthocyanins
  [1,]  0.806721729  1.0319080692   -0.65770780      1.22143845
  [2,]  0.567048088  0.7315652835   -0.81841060     -0.54318872
  [3,]  0.806721729  1.2121137407   -0.49700500      2.12995937
  [4,]  2.484437221  1.4623993954   -0.97911340      1.02925134
  [5,]  0.806721729  0.6614853002    0.22615759      0.40027531
  [6,]  1.557699140  1.3622851335   -0.17559941      0.66234866
  [7,]  0.327374446  0.4912910549   -0.49700500      0.67982021
  [8,]  0.487156874  0.4812796287   -0.41665360     -0.59560339
  [9,]  0.806721729  0.9518166597   -0.57735640      0.67982021
 [10,]  1.094330099  1.1220109049   -1.13981619      0.45268998
 [11,]  1.046395371  1.2922051502   -1.13981619      1.37868246
 [12,] -0.151972837  0.4011882192   -0.81841060     -0.03651359
 [13,]  0.487156874  0.7315652835   -0.57735640      0.38280376
 [14,]  1.286069013  1.6626279192    0.54756319      2.12995937
 [15,]  1.605633868  1.6125707883   -0.57735640      2.39203271
 [16,]  0.886612943  0.8817366764   -0.49700500     -0.22870071
 [17,]  0.806721729  1.1119994787   -0.25595080      0.66234866
 [18,]  1.046395371  1.3722965597    0.30650899      0.22555975
 [19,]  1.605633868  1.9029021478   -0.33630220      0.47016154
 [20,]  0.646939302  1.0018737906   -1.54157319      0.12073042
 [21,]  1.126286585  1.1420337573   -0.97911340      0.88947889
 [22,]  0.183570261  0.3811653668   -0.89876200      0.67982021
 [23,]  0.503135117  0.8517023978   -0.73805920      0.17314508
 [24,]  0.295417961  0.3411196621   -0.81841060     -0.22870071
 [25,]  0.375309174  0.5813938906   -0.65770780      0.12073042
 [26,]  0.535091602  0.6514738740    0.86896878      0.57499088
 [27,]  0.886612943  0.9117709549   -0.17559941     -0.24617226
 [28,]  0.167592018  0.1609139906   -0.73805920     -0.42088782
 [29,]  1.046395371  0.9418052335    0.06545479      0.29544598
 [30,]  0.567048088  0.3010739573   -0.81841060      0.67982021
 [31,]  1.126286585  1.2221251668   -0.57735640      1.37868246
 [32,]  0.902591186  1.1620566097   -1.13981619      0.62740554
 [33,]  0.199548504  0.6614853002    0.46721179      0.66234866
 [34,]  1.046395371  0.7115424311    1.11002298     -0.42088782
 [35,]  0.087700804  0.5013024811   -0.57735640     -0.08892826
 [36,]  0.646939302  0.9518166597   -0.81841060      0.47016154
 [37,]  0.487156874  0.6514738740   -0.17559941     -0.40341627
 [38,]  0.247483232  0.4011882192   -0.57735640     -0.26364382
 [39,]  0.167592018  0.6114281692   -0.65770780     -0.38594471
 [40,]  1.126286585  1.0118852168   -1.30051899      0.85453577
 [41,]  1.365960227  1.2621708716   -0.17559941      1.30879623
 [42,]  0.247483232  0.6514738740   -0.73805920     -0.19375759
 [43,]  1.525742654  1.5324793788   -1.54157319      0.19061664
 [44,]  0.551069845  0.6014167430   -0.33630220      0.12073042
 [45,]  1.126286585  0.9718395121   -0.65770780      0.76717799
 [46,]  0.886612943  0.6214395954   -0.49700500     -0.59560339
 [47,]  1.525742654  1.1420337573   -0.73805920      1.04672289
 [48,]  1.286069013  1.3622851335   -1.22016759      0.95936511
 [49,]  0.726830515  0.8917481025   -0.33630220      1.37868246
 [50,]  0.934547672  1.5124565264   -0.33630220      0.85453577
 [51,]  0.678895787  1.2421480192   -1.54157319      2.30467493
 [52,]  0.247483232  0.9618280859   -1.13981619      1.22143845
 [53,]  2.532371949  1.7126850502   -0.33630220      0.48763309
 [54,]  1.126286585  0.7615995621    0.22615759      0.15567353
 [55,]  0.487156874  0.8717252502   -1.22016759      0.05084419
 [56,]  1.062373614  0.7515881359   -1.30051899      1.50098335
 [57,]  1.445851440  0.9718395121   -0.81841060      0.76717799
 [58,]  1.126286585  1.2021023145   -0.41665360      0.12073042
 [59,]  1.765416296  1.6426050669   -1.38087039      0.78464955
 [60,] -0.503494178 -1.4609370523   -0.65770780     -2.04574255
 [61,] -0.391646479 -0.9403428904    2.15459116     -2.06321410
 [62,] -0.439581207 -0.6199772522    1.35107717     -1.69631142
 [63,] -0.311755265 -0.2395430570   -0.33630220     -1.50412430
 [64,]  1.925198724  1.0719537740   -1.38087039      0.48763309
 [65,] -0.647298363 -0.2795887618    0.70826598     -0.97997762
 [66,]  0.199548504  0.6214395954    0.06545479      0.85453577
 [67,]  1.094330099  1.1520451835   -0.81841060      1.20396690
 [68,] -0.295777022 -0.0293031070   -0.73805920     -0.96250606
 [69,]  0.375309174 -0.7301029403    1.51177997     -2.04574255
 [70,] -0.711211334 -0.7501257927   -1.78262739      1.58834113
 [71,] -1.909579543 -1.0104228737    0.06545479     -0.22870071
 [72,]  1.046395371  0.8316795454   -1.22016759      0.48763309
 [73,] -0.663276606 -0.1894859260   -0.73805920     -0.97997762
 [74,]  1.605633868  0.8617138240   -1.22016759      0.64487710
 [75,]  1.733459810  0.1108568597   -1.86297878      0.10325886
 [76,] -1.094689161 -0.4597944332   -0.17559941     -0.77031895
              Color         Hue    Dilution      Proline
  [1,]  0.251008784  0.36115849  1.84272147  1.010159388
  [2,] -0.292496232  0.40490846  1.11031723  0.962526349
  [3,]  0.268262912  0.31740852  0.78636920  1.391223700
  [4,]  1.182731669 -0.42634104  1.18074072  2.328006800
  [5,] -0.318377423  0.36115849  0.44833648 -0.037767469
  [6,]  0.729810822  0.40490846  0.33565890  2.232740722
  [7,]  0.082781041  0.27365854  1.36384178  1.724654973
  [8,] -0.003489596  0.44865844  1.36384178  1.740532653
  [9,]  0.061213382  0.53615839  0.33565890  0.946648670
 [10,]  0.932546820  0.22990857  1.32158768  0.946648670
 [11,]  0.298457635  1.27990794  0.78636920  2.423272878
 [12,] -0.025057256  0.92990815  0.29340481  1.692899614
 [13,]  0.233754657  0.84240820  0.40608239  1.819921051
 [14,]  0.147484019  1.27990794  0.16664254  1.280079943
 [15,]  1.053325713  1.06115807  0.54692935  2.540767708
 [16,]  0.967055075  1.41115786  0.37791299  1.788165692
 [17,]  0.492566569  0.49240841  0.05396496  1.692899614
 [18,]  0.665107844  0.75490825 -0.05871261  1.216569224
 [19,]  1.570949537  1.19240799  0.29340481  2.963113987
 [20,]  0.018078063  0.01115870  1.05397844  0.311541483
 [21,]  0.255322316  0.57990836  1.54694284  0.105131647
 [22,] -0.240733849  0.31740852  1.27933359  0.073376288
 [23,] -0.542681081  0.66740831  1.95539905  0.914893310
 [24,] -0.486605166  0.57990836  1.43426526  0.851382592
 [25,] -0.663459973  0.71115828  1.70187450  0.311541483
 [26,] -0.637578782  0.75490825  0.82862329  0.263908444
 [27,] -0.111327893 -0.16384119  0.85679269  1.422979059
 [28,] -0.477978102  0.27365854  0.22298133  1.708777293
 [29,] -0.240733849  1.27990794  1.11031723  0.533828998
 [30,] -0.154463212  0.36115849  1.37792647  0.914893310
 [31,]  0.276889975  1.01740810  0.13847314  1.708777293
 [32,]  0.794513800  0.57990836  0.37791299  2.439150558
 [33,] -0.525426953  1.19240799  0.36382829  0.771994193
 [34,]  0.147484019  1.27990794  0.54692935  1.550000497
 [35,] -0.370139806  0.62365833  0.36382829  1.105425466
 [36,]  0.018078063  0.36115849  1.20891011  0.549706678
 [37,] -0.197598531  0.57990836  0.23706602  0.422685241
 [38,] -0.348572146  0.71115828 -0.14322079  1.137180826
 [39,] -0.585816399  0.97365812  0.11030375  0.867260271
 [40,]  0.018078063 -0.29509111  1.29341829  0.041620929
 [41,]  0.462371846 -0.03259127  1.08214784  0.152764686
 [42,] -0.335631551 -0.20759117  0.54692935  0.914893310
 [43,]  0.160424615 -0.33884109  1.33567238  1.105425466
 [44,] -0.301123296 -0.60134093  0.54692935 -0.212421946
 [45,] -0.007803128 -0.33884109  1.03989375  0.438562920
 [46,]  0.078467509 -0.38259106  1.01172435  1.057792427
 [47,] -0.068192574  0.36115849  1.16665602  1.010159388
 [48,]  0.449431250 -0.20759117  1.01172435  0.756116514
 [49,]  0.492566569  0.49240841  0.19481193  0.994281709
 [50,]  1.657220175  0.71115828  0.68777632  1.629388895
 [51,]  0.923919756  0.71115828  0.42016708  1.280079943
 [52,]  0.233754657  1.23615797  1.06806314  1.645266575
 [53,]  0.859216778  0.22990857  0.91313147  1.407101380
 [54,]  0.535701888  0.75490825  0.44833648  1.994575527
 [55,]  0.341592953 -0.16384119  0.82862329  0.994281709
 [56,]  0.514134228  0.09865865  0.58918345  1.184813865
 [57,]  0.570210143 -0.07634125  0.98355496  0.708483475
 [58,]  0.406295932  0.49240841  0.32157420  1.661144254
 [59,]  0.751378481 -0.29509111  0.36382829  1.708777293
 [60,] -1.340684477  0.40490846 -1.11506488 -0.720507695
 [61,] -0.771298270  1.27990794 -1.32633534 -0.212421946
 [62,]  0.298457635  0.09865865 -1.43901291 -0.942795210
 [63,] -0.542681081  1.19240799 -0.21364428 -0.371198742
 [64,] -0.262301509  1.14865802  0.36382829 -1.038061288
 [65,] -0.909331290  2.15490741 -0.53759231 -1.244471124
 [66,] -0.197598531  1.01740810 -0.43899943 -0.218773018
 [67,]  0.104348700  0.71115828  0.80045390 -0.777667342
 [68,] -0.163090276  0.71115828  1.22299481 -0.752263054
 [69,] -0.814433589  0.27365854 -0.96013322  0.009865569
 [70,] -0.952466609  1.41115786  0.64552223 -0.091751580
 [71,] -0.866195971 -0.22509116 -1.11506488  0.390929881
 [72,] -0.723849419  1.76115765  0.77228450 -1.069816648
 [73,] -0.568562272  0.09865865  0.23706602 -0.872933420
 [74,] -0.736790015  1.54240778  1.25116420  0.756116514
 [75,] -0.797179461  0.14240862  0.73003041  0.441738456
 [76,] -0.542681081  1.19240799 -0.66435458 -1.012657001
 [ reached getOption("max.print") -- omitted 102 rows ]
attr(,"scaled:center")
        Alcohol           Malic             Ash      Alcalinity 
     13.0006180       2.3363483       2.3665169      19.4949438 
      Magnesium         Phenols      Flavanoids   Nonflavanoids 
     99.7415730       2.2951124       2.0292697       0.3618539 
Proanthocyanins           Color             Hue        Dilution 
      1.5908989       5.0580899       0.9574494       2.6116854 
        Proline 
    746.8932584 
attr(,"scaled:scale")
        Alcohol           Malic             Ash      Alcalinity 
      0.8118265       1.1171461       0.2743440       3.3395638 
      Magnesium         Phenols      Flavanoids   Nonflavanoids 
     14.2824835       0.6258510       0.9988587       0.1244533 
Proanthocyanins           Color             Hue        Dilution 
      0.5723589       2.3182859       0.2285716       0.7099904 
        Proline 
    314.9074743 
##Porque se debe estandarizar?
#Selección de el metodo de analisi cluster
k.means.fit <- kmeans(wine.stand, 3) # k = 3
k.means.fit
K-means clustering with 3 clusters of sizes 65, 62, 51

Cluster means:
     Alcohol      Malic        Ash Alcalinity   Magnesium     Phenols
1 -0.9234669 -0.3929331 -0.4931257  0.1701220 -0.49032869 -0.07576891
2  0.8328826 -0.3029551  0.3636801 -0.6084749  0.57596208  0.88274724
3  0.1644436  0.8690954  0.1863726  0.5228924 -0.07526047 -0.97657548
   Flavanoids Nonflavanoids Proanthocyanins      Color        Hue
1  0.02075402   -0.03343924      0.05810161 -0.8993770  0.4605046
2  0.97506900   -0.56050853      0.57865427  0.1705823  0.4726504
3 -1.21182921    0.72402116     -0.77751312  0.9388902 -1.1615122
    Dilution    Proline
1  0.2700025 -0.7517257
2  0.7770551  1.1220202
3 -1.2887761 -0.4059428

Clustering vector:
  [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 [36] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 3 1 1 1 1 1 1 1 1
 [71] 1 1 1 2 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1
[106] 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 2 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3
[141] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[176] 3 3 3

Within cluster sum of squares by cluster:
[1] 558.6971 385.6983 326.3537
 (between_SS / total_SS =  44.8 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      
attributes(k.means.fit)
$names
[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      

$class
[1] "kmeans"
k.means.fit$size
[1] 65 62 51
k.means.fit$betweenss
[1] 1030.251
# K-Means
k.means.fit <- kmeans(wine.stand, 3) # k = 3
attributes(k.means.fit)
$names
[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      

$class
[1] "kmeans"
clusplot(wine, k.means.fit$cluster, main='2D representation of the Cluster solution',
         color=TRUE, shade=TRUE,
         labels=2, lines=0)

k.means.fit2 <- kmeans(wine.stand, 2) # k = 2
k.means.fit2
K-means clustering with 2 clusters of sizes 91, 87

Cluster means:
     Alcohol      Malic         Ash Alcalinity  Magnesium    Phenols
1 -0.3106038  0.3374209 -0.04979045  0.4684435 -0.3065948 -0.7482598
2  0.3248845 -0.3529345  0.05207966 -0.4899811  0.3206911  0.7826625
  Flavanoids Nonflavanoids Proanthocyanins      Color        Hue   Dilution
1 -0.7873111     0.5661058      -0.6098110  0.0979495 -0.5385525 -0.6832374
2  0.8235093    -0.5921337       0.6378483 -0.1024529  0.5633135  0.7146506
     Proline
1 -0.5785857
2  0.6051873

Clustering vector:
  [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 [36] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 2 1 2 2 1 1 2
 [71] 1 2 1 2 2 1 2 1 2 2 2 2 1 1 2 2 1 1 1 1 1 1 1 2 2 2 1 2 2 2 2 1 1 1 2
[106] 1 1 1 1 2 2 1 1 1 1 2 1 1 1 1 2 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[141] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[176] 1 1 1

Within cluster sum of squares by cluster:
[1] 884.3435 765.0965
 (between_SS / total_SS =  28.3 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      
attributes(k.means.fit)
$names
[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      

$class
[1] "kmeans"
k.means.fit5 <- kmeans(wine.stand, 5) # k = 5
k.means.fit5
K-means clustering with 5 clusters of sizes 46, 18, 39, 50, 25

Cluster means:
     Alcohol      Malic        Ash Alcalinity   Magnesium    Phenols
1  1.0751808 -0.3606243  0.1664227 -0.8929012  0.46102024  0.9849139
2 -0.1095694 -0.3174890  1.1973403  0.4656465  0.89329191  0.5643850
3 -0.7966872 -0.3203581 -1.0967100 -0.2810348 -0.45585936  0.2102007
4  0.1766166  0.9039567  0.2153615  0.5494898 -0.07712756 -0.9873154
5 -1.0098438 -0.4160139  0.1118419  0.6471073 -0.62605170 -0.5718811
  Flavanoids Nonflavanoids Proanthocyanins      Color        Hue   Dilution
1  1.0567190    -0.6716820       0.6866569  0.3596910  0.4267835  0.7906558
2  0.6559234    -0.2827346       0.4575432 -0.4935547  0.7378944  0.6713442
3  0.2240630    -0.5773564       0.2488552 -0.8242773  0.3353572  0.4826454
4 -1.2236663     0.7114800      -0.7591372  0.9516989 -1.1867156 -1.2857714
5 -0.3188336     0.9171796      -0.4628196 -0.9239973  0.5337084 -0.1195585
     Proline
1  1.3330286
2  0.2159226
3 -0.6938820
4 -0.3952058
5 -0.7353692

Clustering vector:
  [1] 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2 1 1 2 1 1 1 1 2 2
 [36] 1 2 2 3 1 1 3 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 5 5 3 3 5 2 3 3 5 3
 [71] 5 2 5 2 3 3 3 5 3 2 3 3 5 4 3 3 5 5 5 5 5 5 5 3 3 2 2 3 3 3 3 3 3 3 3
[106] 5 3 5 3 2 3 3 5 5 5 5 3 3 4 3 3 2 5 3 3 3 3 5 5 5 4 4 4 4 4 4 4 4 4 4
[141] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
[176] 4 4 4

Within cluster sum of squares by cluster:
[1] 196.5301 151.3741 289.7646 314.6524 152.0411
 (between_SS / total_SS =  52.0 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      
attributes(k.means.fit)
$names
[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      

$class
[1] "kmeans"
distancias1<-dist(wine,method="manhattan")
cluster1<-hclust(distancias1)
plot(cluster1)

distancias2<-dist(wine,method="euclidean")
cluster2<-hclust(distancias2)
distancias3<-dist(wine,method="maximum")
cluster3<-hclust(distancias3)
install.packages("rattle.data")
Installing package into <U+393C><U+3E31>C:/Users/user/Documents/R/win-library/3.3<U+393C><U+3E32>
(as <U+393C><U+3E31>lib<U+393C><U+3E32> is unspecified)
Warning in install.packages :
  package ‘rattle.data’ is in use and will not be installed
wine.stand <- scale(wine[-1])  # To standarize the variables
install.packages("reshape")
Error in install.packages : Updating loaded packages

2 Ejercicio con las frutas

los leemos sin cabecera

descargar http://analisisydecision.es/wp-content/uploads/2009/06/alimentos2.txt

frutas<-read.table(“C:/Users/lenovo/Documents/alimentos2.txt”,header=FALSE,sep=“”) frutas

url=    "http://analisisydecision.es/wp-content/uploads/2009/06/alimentos2.txt"
frutas1<-read.table(url,header=FALSE,sep="\t")
head(frutas1,10)

nombres<-c("nombre","inter_hidratos","kcal","proteinas","grasas")
names(frutas1)<- nombres
names(frutas1)
frutas1.stand <- scale(frutas1[-1]) 
frutas1.stand
k.means.fit <- kmeans(frutas1.stand, 3) 
k.means.fit
k.means.fit1 <- kmeans(frutas1.stand, 4) 
attributes(k.means.fit1)
attributes(k.means.fit)
k.means.fit$centers
k.means.fit$size
k.means.fit$cluster
k.means.fit$withinss
k.means.fit$betweenss

wssplot <- function(data, nc=15, seed=1234){
  wss <- (nrow(data)-1)*sum(apply(data,2,var))
  for (i in 2:nc){
    set.seed(seed)
    wss[i] <- sum(kmeans(data, centers=i)$withinss)}
  plot(1:nc, wss, type="b", xlab="Number of Clusters",
       ylab="Within groups sum of squares")}

wssplot(frutas1.stand, nc=6) 

clusplot(frutas1.stand, k.means.fit$cluster, main='2D representation of the Cluster solution',
         color=TRUE, shade=TRUE,
         labels=2, lines=0)
clusplot(frutas1.stand, k.means.fit1$cluster, main='2D representation of the Cluster solution',
         color=TRUE, shade=TRUE,
         labels=2, lines=0)


table(frutas1[,1],k.means.fit$cluster)

plot(cluster2) # display dendogram
groups <- cutree(cluster2, k=4) # cut tree into 5 clusters
plot(groups)
# draw dendogram with red borders around the 5 clusters
groups <- cutree(cluster2, k=4)
 
par(mfrow=c(2,2))

pie(colSums(frutas1[k.means.fit1$cluster==1,-1]),cex=0.685)

pie(colSums(frutas1[k.means.fit1$cluster==2,-1]),cex=0.7)

pie(colSums(frutas1[k.means.fit1$cluster==3,-1]),cex=0.8)

pie(colSums(frutas1[k.means.fit1$cluster==4,-1]),cex=0.82)


distancias1<-dist(frutas1,method="manhattan")
distancias1
cluster1<-hclust(distancias1)
cluster1
distancias2<-dist(frutas1.stand,method="euclidean")
cluster2<-hclust(distancias2)
cluster2
distancias3<-dist(frutas,method="maximum")
cluster3<-hclust(distancias3)
distancias4<-dist(frutas,method="canberra")
cluster4<-hclust(distancias4)

op <- par(mfcol = c(2, 2)) #Nos permite presentar
par(las =1) #el gráfico en 4 partes
plot(cluster1,main="Método Manhatan")
plot(cluster2,main="Distancia euclídea")
plot(cluster3,main="Distancia por máximos")
plot(cluster4,main="Método Camberra")
paso1<-pam(distancias2,2)
paso2<-pam(distancias2,3)
paso3<-pam(distancias2,4)
paso4<-pam(distancias2,5)
par(mfrow=c(2,2))
plot(paso1)
plot(paso2)
plot(paso3)
plot(paso4)
cluster.final<- kmeans(distancias2,3)
cluster.final$size #Obtenemos el tamaño de los cluster
cluster.final1<- kmeans(distancias2,4)
cluster.final1$size #Obtenemos el tamaño de los cluster
cluster.final2<- kmeans(distancias2,5)
cluster.final2$size #Obtenemos el tamaño de los cluster
cluster.final<- kmeans(distancias2,4)
grupos<-data.frame(frutas)
clus<-as.factor(cluster.final$cluster)
grupos<-cbind(data.frame(frutas),clus)
grupos<-sort_df(grupos,vars='clus')
grupos
nombres<-c("nombre","inter_hidratos","kcal","proteinas","grasas","clus")
names(grupos)<-nombres
aggregate(grupos$inter_hidratos,list(grupos$clus),mean)
aggregate(grupos$kcal,list(grupos$clus),mean)
aggregate(grupos$proteinas,list(grupos$clus),mean)
aggregate(grupos$grasas,list(grupos$clus),mean)




##Porque se debe estandarizar?

#Selección de el metodo de analisi cluster
k.means.fit <- kmeans(wine.stand, 3) # k = 3
k.means.fit
attributes(k.means.fit)
k.means.fit$size
k.means.fit$betweenss
# K-Means
k.means.fit <- kmeans(wine.stand, 3) # k = 3
attributes(k.means.fit)
clusplot(wine, k.means.fit$cluster, main='2D representation of the Cluster solution',
         color=TRUE, shade=TRUE,
         labels=2, lines=0)


k.means.fit2 <- kmeans(wine.stand, 2) # k = 2
k.means.fit2
attributes(k.means.fit)

k.means.fit5 <- kmeans(wine.stand, 5) # k = 5
k.means.fit5
attributes(k.means.fit)





distancias1<-dist(wine,method="manhattan")
cluster1<-hclust(distancias1)
plot(cluster1)

distancias2<-dist(wine,method="euclidean")
cluster2<-hclust(distancias2)

distancias3<-dist(wine,method="maximum")
cluster3<-hclust(distancias3)

distancias4<-dist(frutas,method="canberra")
cluster4<-hclust(distancias4)

op <- par(mfcol = c(2, 2)) #Nos permite presentar
par(las =1) #el gráfico en 4 partes
plot(cluster1,main="Método Manhatan")
plot(cluster2,main="Distancia euclídea")
plot(cluster3,main="Distancia por máximos")
plot(cluster4,main="Método Camberra")
paso1<-pam(distancias2,2)
paso2<-pam(distancias2,3)
paso3<-pam(distancias2,4)
paso4<-pam(distancias2,5)
par(mfrow=c(2,2))
plot(paso1)
plot(paso2)
plot(paso3)
plot(paso4)
cluster.final<- kmeans(distancias2,3)
cluster.final$size #Obtenemos el tamaño de los cluster
cluster.final1<- kmeans(distancias2,4)
cluster.final1$size #Obtenemos el tamaño de los cluster
cluster.final2<- kmeans(distancias2,5)
cluster.final2$size #Obtenemos el tamaño de los cluster
cluster.final<- kmeans(distancias2,4)
grupos<-data.frame(frutas)
clus<-as.factor(cluster.final$cluster)
grupos<-cbind(data.frame(frutas),clus)
grupos<-sort_df(grupos,vars='clus')
grupos
nombres<-c("nombre","inter_hidratos","kcal","proteinas","grasas","clus")
names(grupos)<-nombres
aggregate(grupos$inter_hidratos,list(grupos$clus),mean)
aggregate(grupos$kcal,list(grupos$clus),mean)
aggregate(grupos$proteinas,list(grupos$clus),mean)
aggregate(grupos$grasas,list(grupos$clus),mean)


#Ejercicio 3 estados eeuu
data("USArrests")
#ver la explicacion o configuracion de los datos
help("USArrests")
#ver los datos en la zona de trabajo
USArrests


#nombralos
my_data <- USArrests

# remover algun data faltante (i.e, NA no estan disponibles)
my_data <- na.omit(my_data)

# variables de escala
my_data <- scale(my_data)
 Vizualizar las 6 primeras filas
head(my_data, n = 6)

#Aclaración de las medidas de distancia#
#La clasificación de las observaciones en grupos, requiere algunos métodos para medir la distancia
#para calcular una matriz de distancia entre las filas de una matriz de datos
#el metodo puede ser kendall, sperman
res.dist <- get_dist(USArrests, stand = TRUE, method = "pearson")
#para visualizar una matriz de distancia
fviz_dist(res.dist,gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))

#determinar el numero ideal de cluster grafica
fviz_nbclust(my_data, kmeans, method = "gap_stat")

#Calcule y visualice el agrupamiento de k-means
km.res <- kmeans(my_data, 4, nstart = 25)
#ver componentes y la asignacion para cada estado al grupo q le corresponde
km.res 
fviz_cluster(km.res, data = my_data, frame.type = "convex")+
theme_minimal()


#PAM agrupación: partición alrededor de Medoids. Robusta alternativa a la agrupación de k-means,
# menos sensible a los valores atípicos.
# Compute PAM
library("cluster")
pam.res <- pam(my_data, 4)
# Visualize
fviz_cluster(pam.res)

#utlizando los dendogramas
# Loading and preparing data
data("USArrests")

my_data <- scale(USArrests)

# calcular la matriz de dissimilarity
d <- dist(my_data, method = "euclidean")
d

# cluster jerarquico usando el metdodo de ward
res.hc <- hclust(d, method = "ward.D2" )

# Cortar el árbol en 4 grupos
grp <- cutree(res.hc, k = 4)

# Visualize
plot(res.hc, cex = 0.6) # plot tree
rect.hclust(res.hc, k = 4, border = 2:5) # add rectangle

#elegante forma dendograma
library("factoextra")
# Compute hierarchical clustering and cut into 4 clusters
res <- hcut(USArrests, k = 4, stand = TRUE)

# Visualize
fviz_dend(res, rect = TRUE, cex = 0.5,k_colors = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07"))

 
#validacion
my_data <- scale(USArrests)
# Compute clValid
install.packages("clValid")
library("clValid")
intern <- clValid(my_data,nClust = 2:6,clMethods = c("hierarchical","kmeans","pam"),validation = "internal")
# Summary
summary(intern)

#para calcular el numero optimo
if(!require(devtools)) install.packages("devtools")
devtools::install_github("kassambara/factoextra")
pkgs <- c("cluster",  "NbClust")
install.packages(pkgs)
library(factoextra)
library(cluster)
library(NbClust)

## Load the data
data(iris)
head(iris)

# Remove species column (5) and scale the data
iris.scaled <- scale(iris[, -5]

# K-means clustering
set.seed(123)
km.res <- kmeans(iris.scaled, 3, nstart = 25)
# k-means group number of each observation
km.res$cluster

# Visualize k-means clusters
fviz_cluster(km.res, data = iris.scaled, geom = "point",stand = FALSE, frame.type = "norm")
