This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
## analisis cluster
install.packages("rattle.data")#instalar paquete
Error in install.packages : Updating loaded packages
install.packages("reshape")
Error in install.packages : Updating loaded packages
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
install.packages("factoextra")
Error in install.packages : Updating loaded packages
install.packages("cluster")
Error in install.packages : Updating loaded packages
library("cluster")
library("factoextra")
library(rattle.data)#cargar la libreria
library(reshape)
library(ggplot2)
#cargar las bases de datos
data(wine, package="rattle.data")#cargar un dataset
head(wine,10)#ver los primeros 10 datos
help(wine)#ver una descripción de la información
#El conjunto de datos contiene los resultados de un análisis químico de vinos cultivados en una zona
# específica de Italia.
# Se presentan tres tipos de vino en las 178 muestras, con los resultados de 13 análisis químicos registrados para cada muestra.
# Hay una variable categórica= tipo las ottras variables .
#Estadísticos Básicos
with(wine, tapply(Proanthocyanins, list(Type), mean, na.rm=TRUE))
1 2 3
1.899322 1.630282 1.153542
summary(wine[,c("Alcalinity", "Alcohol", "Ash", "Color", "Dilution", "Flavanoids",
"Hue", "Magnesium", "Malic", "Nonflavanoids", "Phenols", "Proanthocyanins", "Proline"),
drop=FALSE], groups=wine$Type, statistics=c("mean", "sd", "IQR", "quantiles"),
quantiles=c(0,.25,.5,.75,1))
Alcalinity Alcohol Ash Color
Min. :10.60 Min. :11.03 Min. :1.360 Min. : 1.280
1st Qu.:17.20 1st Qu.:12.36 1st Qu.:2.210 1st Qu.: 3.220
Median :19.50 Median :13.05 Median :2.360 Median : 4.690
Mean :19.49 Mean :13.00 Mean :2.367 Mean : 5.058
3rd Qu.:21.50 3rd Qu.:13.68 3rd Qu.:2.558 3rd Qu.: 6.200
Max. :30.00 Max. :14.83 Max. :3.230 Max. :13.000
Dilution Flavanoids Hue Magnesium
Min. :1.270 Min. :0.340 Min. :0.4800 Min. : 70.00
1st Qu.:1.938 1st Qu.:1.205 1st Qu.:0.7825 1st Qu.: 88.00
Median :2.780 Median :2.135 Median :0.9650 Median : 98.00
Mean :2.612 Mean :2.029 Mean :0.9574 Mean : 99.74
3rd Qu.:3.170 3rd Qu.:2.875 3rd Qu.:1.1200 3rd Qu.:107.00
Max. :4.000 Max. :5.080 Max. :1.7100 Max. :162.00
Malic Nonflavanoids Phenols Proanthocyanins
Min. :0.740 Min. :0.1300 Min. :0.980 Min. :0.410
1st Qu.:1.603 1st Qu.:0.2700 1st Qu.:1.742 1st Qu.:1.250
Median :1.865 Median :0.3400 Median :2.355 Median :1.555
Mean :2.336 Mean :0.3619 Mean :2.295 Mean :1.591
3rd Qu.:3.083 3rd Qu.:0.4375 3rd Qu.:2.800 3rd Qu.:1.950
Max. :5.800 Max. :0.6600 Max. :3.880 Max. :3.580
Proline
Min. : 278.0
1st Qu.: 500.5
Median : 673.5
Mean : 746.9
3rd Qu.: 985.0
Max. :1680.0
data(weather, package="rattle.data")#vargar una dataset
head(weather)
#pruebas de normalidad de la variable alcohol por tipo de vino
#normalityTest(Alcohol ~ Type, test="shapiro.test", data=wine)
prueba<- shapiro.test(wine$Alcohol)
prueba
Shapiro-Wilk normality test
data: wine$Alcohol
W = 0.9818, p-value = 0.02005
#graficas
#with(wine, dotplot(Alcohol, by=Type, bin=FALSE))
boxplot(Alcohol~Type, data=wine, id.method="y", xlab="químico", ylab="", main="boxplot de vino")
###Analisis cluster
#estandarizar la información por variable
wine.stand <- scale(wine[-1]) # el -1 es porque la columna 1 es tipo y esta no es variable
wine.stand #ver los datos estandarizados
Alcohol Malic Ash Alcalinity Magnesium
[1,] 1.51434077 -0.56066822 0.23139979 -1.166303174 1.90852151
[2,] 0.24559683 -0.49800856 -0.82566722 -2.483840525 0.01809398
[3,] 0.19632522 0.02117152 1.10621386 -0.267982252 0.08810981
[4,] 1.68679140 -0.34583508 0.48655389 -0.806974805 0.92829983
[5,] 0.29486844 0.22705328 1.83522559 0.450674485 1.27837900
[6,] 1.47738706 -0.51591132 0.30430096 -1.286079296 0.85828399
[7,] 1.71142720 -0.41744613 0.30430096 -1.465743481 -0.26196936
[8,] 1.30493643 -0.16680747 0.88751034 -0.567422559 1.48842650
[9,] 2.25341491 -0.62332789 -0.71631546 -1.645407665 -0.19195352
[10,] 1.05857838 -0.88291793 -0.35180959 -1.046527051 -0.12193769
[11,] 1.35420804 -0.15785609 -0.24245783 -0.447646437 0.36817315
[12,] 1.37884384 -0.76654998 -0.16955666 -0.806974805 -0.33198519
[13,] 0.92308146 -0.54276546 0.15849862 -1.046527051 -0.75208020
[14,] 2.15487169 -0.54276546 0.08559744 -2.423952463 -0.61204853
[15,] 1.69910930 -0.41744613 0.04914686 -2.244288279 0.15812565
[16,] 0.77526663 -0.47115441 1.21556562 -0.687198682 0.85828399
[17,] 1.60056608 -0.37268923 1.28846679 0.151234178 1.41841067
[18,] 1.02162467 -0.68598755 0.92396093 0.151234178 1.06833150
[19,] 1.46506916 -0.66808479 0.41365272 -0.896806897 0.57822065
[20,] 0.78758453 0.68357369 0.70525741 -1.286079296 1.13834733
[21,] 1.30493643 -0.63227927 -0.31535901 -1.046527051 1.83850567
[22,] -0.08698653 1.31017034 1.03331269 -0.267982252 0.15812565
[23,] 0.87380985 -0.42639751 -0.02375431 -0.866862867 0.08810981
[24,] -0.18552975 -0.65913341 0.55945507 -0.507534498 -0.33198519
[25,] 0.61513390 -0.47115441 0.88751034 0.151234178 -0.26196936
[26,] 0.06082829 -0.25632128 3.11099611 1.648435713 1.69847400
[27,] 0.47963697 -0.50695994 0.92396093 -1.016583020 -0.47201686
[28,] 0.36877585 -0.55171684 -0.82566722 -0.747086744 -0.40200103
[29,] 1.07089628 -0.39059199 1.58007149 -0.028430007 0.50820482
[30,] 1.25566482 -0.58752236 -0.57051311 -1.046527051 -0.26196936
[31,] 0.89844565 -0.74864721 1.21556562 0.899834945 0.08810981
[32,] 0.71367712 -0.60542512 -0.02375431 -0.118262099 0.43818899
[33,] 0.83685614 -0.45325165 -0.02375431 -0.687198682 0.29815732
[34,] 0.93539936 -0.72179307 1.21556562 0.001514024 2.25860068
[35,] 0.62745180 -0.48010579 1.03331269 -0.148206130 0.71825232
[36,] 0.59049809 -0.47115441 0.15849862 0.300954331 0.01809398
[37,] 0.34414005 -0.62332789 1.72587383 -1.196247204 0.71825232
[38,] 0.06082829 -0.61437650 0.66880683 -0.447646437 -0.12193769
[39,] 0.08546410 -0.74864721 -0.97146956 -1.196247204 -0.12193769
[40,] 1.50202286 1.48024658 0.52300448 -1.884959911 1.97853734
[41,] 0.68904131 -0.56066822 -0.20600725 -0.986638989 1.20836316
[42,] 0.50427278 1.34597587 -0.89856839 -0.208094191 -0.68206436
[43,] 1.08321419 -0.39954337 0.81460917 -1.345967358 0.08810981
[44,] 0.29486844 1.47129519 -0.27890842 -0.597366590 0.22814148
[45,] 0.06082829 -0.50695994 -0.97146956 -0.747086744 0.50820482
[46,] 1.48970496 1.52500348 0.26785038 -0.178150160 0.78826816
[47,] 1.69910930 1.12219135 -0.31535901 -1.046527051 0.15812565
[48,] 1.10784999 -0.58752236 -0.89856839 -1.046527051 0.08810981
[49,] 1.35420804 -0.28317542 0.12204803 -0.208094191 0.22814148
[50,] 1.15712160 -0.54276546 -0.35180959 -0.627310621 0.57822065
[51,] 0.06082829 -0.54276546 -1.19017308 -2.124512156 -0.54203270
[52,] 1.02162467 -0.61437650 0.85105976 -0.687198682 -0.40200103
[53,] 1.00930677 -0.52486270 0.19494920 -1.645407665 0.78826816
[54,] 0.94771726 -0.39059199 1.14266445 -0.717142713 1.06833150
[55,] 0.91076355 -0.59647374 -0.42471076 -0.926750928 1.27837900
[56,] 0.68904131 -0.54276546 0.34075155 0.300954331 1.13834733
[57,] 1.50202286 -0.56961960 -0.24245783 -0.956694959 1.27837900
[58,] 0.35645795 -0.32793232 1.14266445 -0.806974805 0.15812565
[59,] 0.88612775 -0.81130688 0.48655389 -0.836918836 0.57822065
[60,] -0.77678907 -1.24992453 -3.66881295 -2.663504709 -0.82209603
[61,] -0.82606067 -1.10670244 -0.31535901 -1.046527051 0.08810981
[62,] -0.44420570 -0.87396654 -1.26307425 -0.806974805 0.01809398
[63,] 0.82453824 -0.97243173 -1.62758012 -0.447646437 -0.40200103
[64,] -0.77678907 -1.07984830 -0.75276604 -0.148206130 -0.89211187
[65,] -1.02314711 -0.79340412 0.59590565 -0.148206130 0.29815732
[66,] -0.77678907 -1.00823725 0.70525741 -0.417702406 -0.12193769
[67,] 0.13473571 -1.18726487 -2.42949302 -1.345967358 -1.52225438
[68,] -0.77678907 -1.04404278 -1.62758012 0.031458055 -1.52225438
[69,] 0.41804746 -1.24992453 -0.02375431 -0.747086744 0.71825232
[70,] -0.97387550 -1.02614002 -2.24724008 -0.806974805 3.58890153
[71,] -0.87533228 -0.65018203 -0.57051311 0.271010300 0.22814148
[72,] 1.05857838 -0.73969583 1.10621386 1.648435713 -0.96212770
[73,] 0.60281600 -0.60542512 -0.46116135 1.348995406 -0.89211187
[74,] -0.01307912 -0.59647374 0.85105976 3.145637249 2.74871152
[75,] -1.28182306 -1.11565382 -0.24245783 0.450674485 0.08810981
[76,] -1.65136013 -0.40849475 -1.62758012 -1.046527051 -0.19195352
Phenols Flavanoids Nonflavanoids Proanthocyanins
[1,] 0.806721729 1.0319080692 -0.65770780 1.22143845
[2,] 0.567048088 0.7315652835 -0.81841060 -0.54318872
[3,] 0.806721729 1.2121137407 -0.49700500 2.12995937
[4,] 2.484437221 1.4623993954 -0.97911340 1.02925134
[5,] 0.806721729 0.6614853002 0.22615759 0.40027531
[6,] 1.557699140 1.3622851335 -0.17559941 0.66234866
[7,] 0.327374446 0.4912910549 -0.49700500 0.67982021
[8,] 0.487156874 0.4812796287 -0.41665360 -0.59560339
[9,] 0.806721729 0.9518166597 -0.57735640 0.67982021
[10,] 1.094330099 1.1220109049 -1.13981619 0.45268998
[11,] 1.046395371 1.2922051502 -1.13981619 1.37868246
[12,] -0.151972837 0.4011882192 -0.81841060 -0.03651359
[13,] 0.487156874 0.7315652835 -0.57735640 0.38280376
[14,] 1.286069013 1.6626279192 0.54756319 2.12995937
[15,] 1.605633868 1.6125707883 -0.57735640 2.39203271
[16,] 0.886612943 0.8817366764 -0.49700500 -0.22870071
[17,] 0.806721729 1.1119994787 -0.25595080 0.66234866
[18,] 1.046395371 1.3722965597 0.30650899 0.22555975
[19,] 1.605633868 1.9029021478 -0.33630220 0.47016154
[20,] 0.646939302 1.0018737906 -1.54157319 0.12073042
[21,] 1.126286585 1.1420337573 -0.97911340 0.88947889
[22,] 0.183570261 0.3811653668 -0.89876200 0.67982021
[23,] 0.503135117 0.8517023978 -0.73805920 0.17314508
[24,] 0.295417961 0.3411196621 -0.81841060 -0.22870071
[25,] 0.375309174 0.5813938906 -0.65770780 0.12073042
[26,] 0.535091602 0.6514738740 0.86896878 0.57499088
[27,] 0.886612943 0.9117709549 -0.17559941 -0.24617226
[28,] 0.167592018 0.1609139906 -0.73805920 -0.42088782
[29,] 1.046395371 0.9418052335 0.06545479 0.29544598
[30,] 0.567048088 0.3010739573 -0.81841060 0.67982021
[31,] 1.126286585 1.2221251668 -0.57735640 1.37868246
[32,] 0.902591186 1.1620566097 -1.13981619 0.62740554
[33,] 0.199548504 0.6614853002 0.46721179 0.66234866
[34,] 1.046395371 0.7115424311 1.11002298 -0.42088782
[35,] 0.087700804 0.5013024811 -0.57735640 -0.08892826
[36,] 0.646939302 0.9518166597 -0.81841060 0.47016154
[37,] 0.487156874 0.6514738740 -0.17559941 -0.40341627
[38,] 0.247483232 0.4011882192 -0.57735640 -0.26364382
[39,] 0.167592018 0.6114281692 -0.65770780 -0.38594471
[40,] 1.126286585 1.0118852168 -1.30051899 0.85453577
[41,] 1.365960227 1.2621708716 -0.17559941 1.30879623
[42,] 0.247483232 0.6514738740 -0.73805920 -0.19375759
[43,] 1.525742654 1.5324793788 -1.54157319 0.19061664
[44,] 0.551069845 0.6014167430 -0.33630220 0.12073042
[45,] 1.126286585 0.9718395121 -0.65770780 0.76717799
[46,] 0.886612943 0.6214395954 -0.49700500 -0.59560339
[47,] 1.525742654 1.1420337573 -0.73805920 1.04672289
[48,] 1.286069013 1.3622851335 -1.22016759 0.95936511
[49,] 0.726830515 0.8917481025 -0.33630220 1.37868246
[50,] 0.934547672 1.5124565264 -0.33630220 0.85453577
[51,] 0.678895787 1.2421480192 -1.54157319 2.30467493
[52,] 0.247483232 0.9618280859 -1.13981619 1.22143845
[53,] 2.532371949 1.7126850502 -0.33630220 0.48763309
[54,] 1.126286585 0.7615995621 0.22615759 0.15567353
[55,] 0.487156874 0.8717252502 -1.22016759 0.05084419
[56,] 1.062373614 0.7515881359 -1.30051899 1.50098335
[57,] 1.445851440 0.9718395121 -0.81841060 0.76717799
[58,] 1.126286585 1.2021023145 -0.41665360 0.12073042
[59,] 1.765416296 1.6426050669 -1.38087039 0.78464955
[60,] -0.503494178 -1.4609370523 -0.65770780 -2.04574255
[61,] -0.391646479 -0.9403428904 2.15459116 -2.06321410
[62,] -0.439581207 -0.6199772522 1.35107717 -1.69631142
[63,] -0.311755265 -0.2395430570 -0.33630220 -1.50412430
[64,] 1.925198724 1.0719537740 -1.38087039 0.48763309
[65,] -0.647298363 -0.2795887618 0.70826598 -0.97997762
[66,] 0.199548504 0.6214395954 0.06545479 0.85453577
[67,] 1.094330099 1.1520451835 -0.81841060 1.20396690
[68,] -0.295777022 -0.0293031070 -0.73805920 -0.96250606
[69,] 0.375309174 -0.7301029403 1.51177997 -2.04574255
[70,] -0.711211334 -0.7501257927 -1.78262739 1.58834113
[71,] -1.909579543 -1.0104228737 0.06545479 -0.22870071
[72,] 1.046395371 0.8316795454 -1.22016759 0.48763309
[73,] -0.663276606 -0.1894859260 -0.73805920 -0.97997762
[74,] 1.605633868 0.8617138240 -1.22016759 0.64487710
[75,] 1.733459810 0.1108568597 -1.86297878 0.10325886
[76,] -1.094689161 -0.4597944332 -0.17559941 -0.77031895
Color Hue Dilution Proline
[1,] 0.251008784 0.36115849 1.84272147 1.010159388
[2,] -0.292496232 0.40490846 1.11031723 0.962526349
[3,] 0.268262912 0.31740852 0.78636920 1.391223700
[4,] 1.182731669 -0.42634104 1.18074072 2.328006800
[5,] -0.318377423 0.36115849 0.44833648 -0.037767469
[6,] 0.729810822 0.40490846 0.33565890 2.232740722
[7,] 0.082781041 0.27365854 1.36384178 1.724654973
[8,] -0.003489596 0.44865844 1.36384178 1.740532653
[9,] 0.061213382 0.53615839 0.33565890 0.946648670
[10,] 0.932546820 0.22990857 1.32158768 0.946648670
[11,] 0.298457635 1.27990794 0.78636920 2.423272878
[12,] -0.025057256 0.92990815 0.29340481 1.692899614
[13,] 0.233754657 0.84240820 0.40608239 1.819921051
[14,] 0.147484019 1.27990794 0.16664254 1.280079943
[15,] 1.053325713 1.06115807 0.54692935 2.540767708
[16,] 0.967055075 1.41115786 0.37791299 1.788165692
[17,] 0.492566569 0.49240841 0.05396496 1.692899614
[18,] 0.665107844 0.75490825 -0.05871261 1.216569224
[19,] 1.570949537 1.19240799 0.29340481 2.963113987
[20,] 0.018078063 0.01115870 1.05397844 0.311541483
[21,] 0.255322316 0.57990836 1.54694284 0.105131647
[22,] -0.240733849 0.31740852 1.27933359 0.073376288
[23,] -0.542681081 0.66740831 1.95539905 0.914893310
[24,] -0.486605166 0.57990836 1.43426526 0.851382592
[25,] -0.663459973 0.71115828 1.70187450 0.311541483
[26,] -0.637578782 0.75490825 0.82862329 0.263908444
[27,] -0.111327893 -0.16384119 0.85679269 1.422979059
[28,] -0.477978102 0.27365854 0.22298133 1.708777293
[29,] -0.240733849 1.27990794 1.11031723 0.533828998
[30,] -0.154463212 0.36115849 1.37792647 0.914893310
[31,] 0.276889975 1.01740810 0.13847314 1.708777293
[32,] 0.794513800 0.57990836 0.37791299 2.439150558
[33,] -0.525426953 1.19240799 0.36382829 0.771994193
[34,] 0.147484019 1.27990794 0.54692935 1.550000497
[35,] -0.370139806 0.62365833 0.36382829 1.105425466
[36,] 0.018078063 0.36115849 1.20891011 0.549706678
[37,] -0.197598531 0.57990836 0.23706602 0.422685241
[38,] -0.348572146 0.71115828 -0.14322079 1.137180826
[39,] -0.585816399 0.97365812 0.11030375 0.867260271
[40,] 0.018078063 -0.29509111 1.29341829 0.041620929
[41,] 0.462371846 -0.03259127 1.08214784 0.152764686
[42,] -0.335631551 -0.20759117 0.54692935 0.914893310
[43,] 0.160424615 -0.33884109 1.33567238 1.105425466
[44,] -0.301123296 -0.60134093 0.54692935 -0.212421946
[45,] -0.007803128 -0.33884109 1.03989375 0.438562920
[46,] 0.078467509 -0.38259106 1.01172435 1.057792427
[47,] -0.068192574 0.36115849 1.16665602 1.010159388
[48,] 0.449431250 -0.20759117 1.01172435 0.756116514
[49,] 0.492566569 0.49240841 0.19481193 0.994281709
[50,] 1.657220175 0.71115828 0.68777632 1.629388895
[51,] 0.923919756 0.71115828 0.42016708 1.280079943
[52,] 0.233754657 1.23615797 1.06806314 1.645266575
[53,] 0.859216778 0.22990857 0.91313147 1.407101380
[54,] 0.535701888 0.75490825 0.44833648 1.994575527
[55,] 0.341592953 -0.16384119 0.82862329 0.994281709
[56,] 0.514134228 0.09865865 0.58918345 1.184813865
[57,] 0.570210143 -0.07634125 0.98355496 0.708483475
[58,] 0.406295932 0.49240841 0.32157420 1.661144254
[59,] 0.751378481 -0.29509111 0.36382829 1.708777293
[60,] -1.340684477 0.40490846 -1.11506488 -0.720507695
[61,] -0.771298270 1.27990794 -1.32633534 -0.212421946
[62,] 0.298457635 0.09865865 -1.43901291 -0.942795210
[63,] -0.542681081 1.19240799 -0.21364428 -0.371198742
[64,] -0.262301509 1.14865802 0.36382829 -1.038061288
[65,] -0.909331290 2.15490741 -0.53759231 -1.244471124
[66,] -0.197598531 1.01740810 -0.43899943 -0.218773018
[67,] 0.104348700 0.71115828 0.80045390 -0.777667342
[68,] -0.163090276 0.71115828 1.22299481 -0.752263054
[69,] -0.814433589 0.27365854 -0.96013322 0.009865569
[70,] -0.952466609 1.41115786 0.64552223 -0.091751580
[71,] -0.866195971 -0.22509116 -1.11506488 0.390929881
[72,] -0.723849419 1.76115765 0.77228450 -1.069816648
[73,] -0.568562272 0.09865865 0.23706602 -0.872933420
[74,] -0.736790015 1.54240778 1.25116420 0.756116514
[75,] -0.797179461 0.14240862 0.73003041 0.441738456
[76,] -0.542681081 1.19240799 -0.66435458 -1.012657001
[ reached getOption("max.print") -- omitted 102 rows ]
attr(,"scaled:center")
Alcohol Malic Ash Alcalinity
13.0006180 2.3363483 2.3665169 19.4949438
Magnesium Phenols Flavanoids Nonflavanoids
99.7415730 2.2951124 2.0292697 0.3618539
Proanthocyanins Color Hue Dilution
1.5908989 5.0580899 0.9574494 2.6116854
Proline
746.8932584
attr(,"scaled:scale")
Alcohol Malic Ash Alcalinity
0.8118265 1.1171461 0.2743440 3.3395638
Magnesium Phenols Flavanoids Nonflavanoids
14.2824835 0.6258510 0.9988587 0.1244533
Proanthocyanins Color Hue Dilution
0.5723589 2.3182859 0.2285716 0.7099904
Proline
314.9074743
##Porque se debe estandarizar?
#Selección de el metodo de analisi cluster
k.means.fit <- kmeans(wine.stand, 3) # k = 3
k.means.fit
K-means clustering with 3 clusters of sizes 65, 62, 51
Cluster means:
Alcohol Malic Ash Alcalinity Magnesium Phenols
1 -0.9234669 -0.3929331 -0.4931257 0.1701220 -0.49032869 -0.07576891
2 0.8328826 -0.3029551 0.3636801 -0.6084749 0.57596208 0.88274724
3 0.1644436 0.8690954 0.1863726 0.5228924 -0.07526047 -0.97657548
Flavanoids Nonflavanoids Proanthocyanins Color Hue
1 0.02075402 -0.03343924 0.05810161 -0.8993770 0.4605046
2 0.97506900 -0.56050853 0.57865427 0.1705823 0.4726504
3 -1.21182921 0.72402116 -0.77751312 0.9388902 -1.1615122
Dilution Proline
1 0.2700025 -0.7517257
2 0.7770551 1.1220202
3 -1.2887761 -0.4059428
Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[36] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 3 1 1 1 1 1 1 1 1
[71] 1 1 1 2 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1
[106] 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 2 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3
[141] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[176] 3 3 3
Within cluster sum of squares by cluster:
[1] 558.6971 385.6983 326.3537
(between_SS / total_SS = 44.8 %)
Available components:
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
attributes(k.means.fit)
$names
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
$class
[1] "kmeans"
k.means.fit$size
[1] 65 62 51
k.means.fit$betweenss
[1] 1030.251
# K-Means
k.means.fit <- kmeans(wine.stand, 3) # k = 3
attributes(k.means.fit)
$names
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
$class
[1] "kmeans"
clusplot(wine, k.means.fit$cluster, main='2D representation of the Cluster solution',
color=TRUE, shade=TRUE,
labels=2, lines=0)
k.means.fit2 <- kmeans(wine.stand, 2) # k = 2
k.means.fit2
K-means clustering with 2 clusters of sizes 91, 87
Cluster means:
Alcohol Malic Ash Alcalinity Magnesium Phenols
1 -0.3106038 0.3374209 -0.04979045 0.4684435 -0.3065948 -0.7482598
2 0.3248845 -0.3529345 0.05207966 -0.4899811 0.3206911 0.7826625
Flavanoids Nonflavanoids Proanthocyanins Color Hue Dilution
1 -0.7873111 0.5661058 -0.6098110 0.0979495 -0.5385525 -0.6832374
2 0.8235093 -0.5921337 0.6378483 -0.1024529 0.5633135 0.7146506
Proline
1 -0.5785857
2 0.6051873
Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[36] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 2 1 2 2 1 1 2
[71] 1 2 1 2 2 1 2 1 2 2 2 2 1 1 2 2 1 1 1 1 1 1 1 2 2 2 1 2 2 2 2 1 1 1 2
[106] 1 1 1 1 2 2 1 1 1 1 2 1 1 1 1 2 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[141] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[176] 1 1 1
Within cluster sum of squares by cluster:
[1] 884.3435 765.0965
(between_SS / total_SS = 28.3 %)
Available components:
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
attributes(k.means.fit)
$names
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
$class
[1] "kmeans"
k.means.fit5 <- kmeans(wine.stand, 5) # k = 5
k.means.fit5
K-means clustering with 5 clusters of sizes 46, 18, 39, 50, 25
Cluster means:
Alcohol Malic Ash Alcalinity Magnesium Phenols
1 1.0751808 -0.3606243 0.1664227 -0.8929012 0.46102024 0.9849139
2 -0.1095694 -0.3174890 1.1973403 0.4656465 0.89329191 0.5643850
3 -0.7966872 -0.3203581 -1.0967100 -0.2810348 -0.45585936 0.2102007
4 0.1766166 0.9039567 0.2153615 0.5494898 -0.07712756 -0.9873154
5 -1.0098438 -0.4160139 0.1118419 0.6471073 -0.62605170 -0.5718811
Flavanoids Nonflavanoids Proanthocyanins Color Hue Dilution
1 1.0567190 -0.6716820 0.6866569 0.3596910 0.4267835 0.7906558
2 0.6559234 -0.2827346 0.4575432 -0.4935547 0.7378944 0.6713442
3 0.2240630 -0.5773564 0.2488552 -0.8242773 0.3353572 0.4826454
4 -1.2236663 0.7114800 -0.7591372 0.9516989 -1.1867156 -1.2857714
5 -0.3188336 0.9171796 -0.4628196 -0.9239973 0.5337084 -0.1195585
Proline
1 1.3330286
2 0.2159226
3 -0.6938820
4 -0.3952058
5 -0.7353692
Clustering vector:
[1] 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2 1 1 2 1 1 1 1 2 2
[36] 1 2 2 3 1 1 3 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 5 5 3 3 5 2 3 3 5 3
[71] 5 2 5 2 3 3 3 5 3 2 3 3 5 4 3 3 5 5 5 5 5 5 5 3 3 2 2 3 3 3 3 3 3 3 3
[106] 5 3 5 3 2 3 3 5 5 5 5 3 3 4 3 3 2 5 3 3 3 3 5 5 5 4 4 4 4 4 4 4 4 4 4
[141] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
[176] 4 4 4
Within cluster sum of squares by cluster:
[1] 196.5301 151.3741 289.7646 314.6524 152.0411
(between_SS / total_SS = 52.0 %)
Available components:
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
attributes(k.means.fit)
$names
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
$class
[1] "kmeans"
distancias1<-dist(wine,method="manhattan")
cluster1<-hclust(distancias1)
plot(cluster1)
distancias2<-dist(wine,method="euclidean")
cluster2<-hclust(distancias2)
distancias3<-dist(wine,method="maximum")
cluster3<-hclust(distancias3)
install.packages("rattle.data")
Installing package into <U+393C><U+3E31>C:/Users/user/Documents/R/win-library/3.3<U+393C><U+3E32>
(as <U+393C><U+3E31>lib<U+393C><U+3E32> is unspecified)
Warning in install.packages :
package rattle.data is in use and will not be installed
wine.stand <- scale(wine[-1]) # To standarize the variables
install.packages("reshape")
Error in install.packages : Updating loaded packages
frutas<-read.table(“C:/Users/lenovo/Documents/alimentos2.txt”,header=FALSE,sep=“”) frutas
url= "http://analisisydecision.es/wp-content/uploads/2009/06/alimentos2.txt"
frutas1<-read.table(url,header=FALSE,sep="\t")
head(frutas1,10)
nombres<-c("nombre","inter_hidratos","kcal","proteinas","grasas")
names(frutas1)<- nombres
names(frutas1)
frutas1.stand <- scale(frutas1[-1])
frutas1.stand
k.means.fit <- kmeans(frutas1.stand, 3)
k.means.fit
k.means.fit1 <- kmeans(frutas1.stand, 4)
attributes(k.means.fit1)
attributes(k.means.fit)
k.means.fit$centers
k.means.fit$size
k.means.fit$cluster
k.means.fit$withinss
k.means.fit$betweenss
wssplot <- function(data, nc=15, seed=1234){
wss <- (nrow(data)-1)*sum(apply(data,2,var))
for (i in 2:nc){
set.seed(seed)
wss[i] <- sum(kmeans(data, centers=i)$withinss)}
plot(1:nc, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")}
wssplot(frutas1.stand, nc=6)
clusplot(frutas1.stand, k.means.fit$cluster, main='2D representation of the Cluster solution',
color=TRUE, shade=TRUE,
labels=2, lines=0)
clusplot(frutas1.stand, k.means.fit1$cluster, main='2D representation of the Cluster solution',
color=TRUE, shade=TRUE,
labels=2, lines=0)
table(frutas1[,1],k.means.fit$cluster)
plot(cluster2) # display dendogram
groups <- cutree(cluster2, k=4) # cut tree into 5 clusters
plot(groups)
# draw dendogram with red borders around the 5 clusters
groups <- cutree(cluster2, k=4)
par(mfrow=c(2,2))
pie(colSums(frutas1[k.means.fit1$cluster==1,-1]),cex=0.685)
pie(colSums(frutas1[k.means.fit1$cluster==2,-1]),cex=0.7)
pie(colSums(frutas1[k.means.fit1$cluster==3,-1]),cex=0.8)
pie(colSums(frutas1[k.means.fit1$cluster==4,-1]),cex=0.82)
distancias1<-dist(frutas1,method="manhattan")
distancias1
cluster1<-hclust(distancias1)
cluster1
distancias2<-dist(frutas1.stand,method="euclidean")
cluster2<-hclust(distancias2)
cluster2
distancias3<-dist(frutas,method="maximum")
cluster3<-hclust(distancias3)
distancias4<-dist(frutas,method="canberra")
cluster4<-hclust(distancias4)
op <- par(mfcol = c(2, 2)) #Nos permite presentar
par(las =1) #el gráfico en 4 partes
plot(cluster1,main="Método Manhatan")
plot(cluster2,main="Distancia euclídea")
plot(cluster3,main="Distancia por máximos")
plot(cluster4,main="Método Camberra")
paso1<-pam(distancias2,2)
paso2<-pam(distancias2,3)
paso3<-pam(distancias2,4)
paso4<-pam(distancias2,5)
par(mfrow=c(2,2))
plot(paso1)
plot(paso2)
plot(paso3)
plot(paso4)
cluster.final<- kmeans(distancias2,3)
cluster.final$size #Obtenemos el tamaño de los cluster
cluster.final1<- kmeans(distancias2,4)
cluster.final1$size #Obtenemos el tamaño de los cluster
cluster.final2<- kmeans(distancias2,5)
cluster.final2$size #Obtenemos el tamaño de los cluster
cluster.final<- kmeans(distancias2,4)
grupos<-data.frame(frutas)
clus<-as.factor(cluster.final$cluster)
grupos<-cbind(data.frame(frutas),clus)
grupos<-sort_df(grupos,vars='clus')
grupos
nombres<-c("nombre","inter_hidratos","kcal","proteinas","grasas","clus")
names(grupos)<-nombres
aggregate(grupos$inter_hidratos,list(grupos$clus),mean)
aggregate(grupos$kcal,list(grupos$clus),mean)
aggregate(grupos$proteinas,list(grupos$clus),mean)
aggregate(grupos$grasas,list(grupos$clus),mean)
##Porque se debe estandarizar?
#Selección de el metodo de analisi cluster
k.means.fit <- kmeans(wine.stand, 3) # k = 3
k.means.fit
attributes(k.means.fit)
k.means.fit$size
k.means.fit$betweenss
# K-Means
k.means.fit <- kmeans(wine.stand, 3) # k = 3
attributes(k.means.fit)
clusplot(wine, k.means.fit$cluster, main='2D representation of the Cluster solution',
color=TRUE, shade=TRUE,
labels=2, lines=0)
k.means.fit2 <- kmeans(wine.stand, 2) # k = 2
k.means.fit2
attributes(k.means.fit)
k.means.fit5 <- kmeans(wine.stand, 5) # k = 5
k.means.fit5
attributes(k.means.fit)
distancias1<-dist(wine,method="manhattan")
cluster1<-hclust(distancias1)
plot(cluster1)
distancias2<-dist(wine,method="euclidean")
cluster2<-hclust(distancias2)
distancias3<-dist(wine,method="maximum")
cluster3<-hclust(distancias3)
distancias4<-dist(frutas,method="canberra")
cluster4<-hclust(distancias4)
op <- par(mfcol = c(2, 2)) #Nos permite presentar
par(las =1) #el gráfico en 4 partes
plot(cluster1,main="Método Manhatan")
plot(cluster2,main="Distancia euclídea")
plot(cluster3,main="Distancia por máximos")
plot(cluster4,main="Método Camberra")
paso1<-pam(distancias2,2)
paso2<-pam(distancias2,3)
paso3<-pam(distancias2,4)
paso4<-pam(distancias2,5)
par(mfrow=c(2,2))
plot(paso1)
plot(paso2)
plot(paso3)
plot(paso4)
cluster.final<- kmeans(distancias2,3)
cluster.final$size #Obtenemos el tamaño de los cluster
cluster.final1<- kmeans(distancias2,4)
cluster.final1$size #Obtenemos el tamaño de los cluster
cluster.final2<- kmeans(distancias2,5)
cluster.final2$size #Obtenemos el tamaño de los cluster
cluster.final<- kmeans(distancias2,4)
grupos<-data.frame(frutas)
clus<-as.factor(cluster.final$cluster)
grupos<-cbind(data.frame(frutas),clus)
grupos<-sort_df(grupos,vars='clus')
grupos
nombres<-c("nombre","inter_hidratos","kcal","proteinas","grasas","clus")
names(grupos)<-nombres
aggregate(grupos$inter_hidratos,list(grupos$clus),mean)
aggregate(grupos$kcal,list(grupos$clus),mean)
aggregate(grupos$proteinas,list(grupos$clus),mean)
aggregate(grupos$grasas,list(grupos$clus),mean)
#Ejercicio 3 estados eeuu
data("USArrests")
#ver la explicacion o configuracion de los datos
help("USArrests")
#ver los datos en la zona de trabajo
USArrests
#nombralos
my_data <- USArrests
# remover algun data faltante (i.e, NA no estan disponibles)
my_data <- na.omit(my_data)
# variables de escala
my_data <- scale(my_data)
Vizualizar las 6 primeras filas
head(my_data, n = 6)
#Aclaración de las medidas de distancia#
#La clasificación de las observaciones en grupos, requiere algunos métodos para medir la distancia
#para calcular una matriz de distancia entre las filas de una matriz de datos
#el metodo puede ser kendall, sperman
res.dist <- get_dist(USArrests, stand = TRUE, method = "pearson")
#para visualizar una matriz de distancia
fviz_dist(res.dist,gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))
#determinar el numero ideal de cluster grafica
fviz_nbclust(my_data, kmeans, method = "gap_stat")
#Calcule y visualice el agrupamiento de k-means
km.res <- kmeans(my_data, 4, nstart = 25)
#ver componentes y la asignacion para cada estado al grupo q le corresponde
km.res
fviz_cluster(km.res, data = my_data, frame.type = "convex")+
theme_minimal()
#PAM agrupación: partición alrededor de Medoids. Robusta alternativa a la agrupación de k-means,
# menos sensible a los valores atípicos.
# Compute PAM
library("cluster")
pam.res <- pam(my_data, 4)
# Visualize
fviz_cluster(pam.res)
#utlizando los dendogramas
# Loading and preparing data
data("USArrests")
my_data <- scale(USArrests)
# calcular la matriz de dissimilarity
d <- dist(my_data, method = "euclidean")
d
# cluster jerarquico usando el metdodo de ward
res.hc <- hclust(d, method = "ward.D2" )
# Cortar el árbol en 4 grupos
grp <- cutree(res.hc, k = 4)
# Visualize
plot(res.hc, cex = 0.6) # plot tree
rect.hclust(res.hc, k = 4, border = 2:5) # add rectangle
#elegante forma dendograma
library("factoextra")
# Compute hierarchical clustering and cut into 4 clusters
res <- hcut(USArrests, k = 4, stand = TRUE)
# Visualize
fviz_dend(res, rect = TRUE, cex = 0.5,k_colors = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07"))
#validacion
my_data <- scale(USArrests)
# Compute clValid
install.packages("clValid")
library("clValid")
intern <- clValid(my_data,nClust = 2:6,clMethods = c("hierarchical","kmeans","pam"),validation = "internal")
# Summary
summary(intern)
#para calcular el numero optimo
if(!require(devtools)) install.packages("devtools")
devtools::install_github("kassambara/factoextra")
pkgs <- c("cluster", "NbClust")
install.packages(pkgs)
library(factoextra)
library(cluster)
library(NbClust)
## Load the data
data(iris)
head(iris)
# Remove species column (5) and scale the data
iris.scaled <- scale(iris[, -5]
# K-means clustering
set.seed(123)
km.res <- kmeans(iris.scaled, 3, nstart = 25)
# k-means group number of each observation
km.res$cluster
# Visualize k-means clusters
fviz_cluster(km.res, data = iris.scaled, geom = "point",stand = FALSE, frame.type = "norm")