## Crear un nuevo dataframe con las variables c, h 

library(readxl)
dfcie= read_excel("C:/Users/CAROLINA/OneDrive - FEDERACION NACIONAL DE ARROCEROS FEDEARROZ/Escritorio/Cielab_tueste_cafe.xlsx")

dfcie$c= with(dfcie, sqrt(a^2 + b^2))
dfcie$h= with (dfcie, atan(b/a)*180/pi)

head(dfcie)
## # A tibble: 6 x 6
##       L     a     b tueste     c     h
##   <dbl> <dbl> <dbl> <chr>  <dbl> <dbl>
## 1  15.2  23.7  22.0 verde   32.3  42.9
## 2  12.3  20.8  21.0 verde   29.6  45.3
## 3  13.5  23.3  23.9 verde   33.4  45.7
## 4  13.6  21.1  20.4 verde   29.3  44.1
## 5  13.3  19.5  20.7 verde   28.5  46.7
## 6  12.4  21.9  20.8 verde   30.2  43.6
## estadísticas descrptivas
library(rgl)
plot3d(dfcie$L,
       dfcie$a,
       dfcie$b,
       dfcie$c,
       dfcie$h,
       type='s',
       col=gl(4,30,120, c('red', 'orange',
                          'green','blue', 
                          'yellow')))
## Componenetes principales - con la funcion scale se estandarizan los datos 

dfcie.pca<- prcomp(dfcie[, c (1:3, 5:6)],
                   center = TRUE,
                   scale. = TRUE)
summary(dfcie.pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4      PC5
## Standard deviation     1.9631 0.8961 0.58485 0.03379 0.002343
## Proportion of Variance 0.7708 0.1606 0.06841 0.00023 0.000000
## Cumulative Proportion  0.7708 0.9314 0.99977 1.00000 1.000000
## Con los dos primeros componentes se explica el 93% de la variabilidad ( se observa proportion of variance)
## para ver la estructura. funcion str muestra que cosas se pueden extraer después de hacer componentes principales 

str(dfcie.pca)
## List of 5
##  $ sdev    : num [1:5] 1.96312 0.89607 0.58485 0.03379 0.00234
##  $ rotation: num [1:5, 1:5] 0.424 0.47 0.504 0.495 0.317 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:5] "L" "a" "b" "c" ...
##   .. ..$ : chr [1:5] "PC1" "PC2" "PC3" "PC4" ...
##  $ center  : Named num [1:5] 19.4 20.3 21.2 29.4 46
##   ..- attr(*, "names")= chr [1:5] "L" "a" "b" "c" ...
##  $ scale   : Named num [1:5] 5.92 4 4.99 6.3 2.14
##   ..- attr(*, "names")= chr [1:5] "L" "a" "b" "c" ...
##  $ x       : num [1:120, 1:5] -0.07 -0.561 0.467 -0.697 -0.557 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:5] "PC1" "PC2" "PC3" "PC4" ...
##  - attr(*, "class")= chr "prcomp"
## extraigo desviaciones si lass elevo al cuadrado son varianzas
dfcie.pca$sdev
## [1] 1.963123832 0.896070585 0.584854454 0.033794989 0.002343376
## el componente tiene mayor variabilidad
var.pca = dfcie.pca$sdev**2
100*var.pca/sum(var.pca)
## [1] 7.707710e+01 1.605885e+01 6.841095e+00 2.284203e-02 1.098283e-04
cumsum(100* var.pca/sum(var.pca))
## [1]  77.07710  93.13595  99.97705  99.99989 100.00000
# extramos el segundo componente que da la funcion es la matriz de rotación. acá se elimina la correlación ( haciendo una rotación de los datos sin cambiar su estructura inicial) y se generan las ecuaciones. el componente pc1 es importante porque todas las cargas de las variables son balanceadas.PC1 Y PC2 explican el 93% de la variabilidad.  

## para el PC1= 0,42 L + 0.47 a + 0.50 b + 0.49 c + 0.31 h. este componente recoge la información de las variables L , a , b ,c. para el componente 2 la variable mejor representada es h. 

dfcie.pca$rotation
##         PC1        PC2        PC3         PC4           PC5
## L 0.4238963 -0.2445950 -0.8709525 -0.04389454 -0.0003608497
## a 0.4701343  0.4181110  0.1460228 -0.68428529 -0.3385083918
## b 0.5042066  0.1018846  0.1824241  0.68577797 -0.4814823787
## c 0.4947461  0.2424953  0.1661286  0.12361619  0.8084198326
## h 0.3166822 -0.8343693  0.3990511 -0.21036762  0.0066356413
### el ultimo atributo x son las variables latentes = variables rotadas= nuevas variables

dfcie.pca$x
##                PC1         PC2         PC3           PC4           PC5
##   [1,] -0.06997655  1.88359826  0.26259310 -0.0715136017  5.904148e-03
##   [2,] -0.56115576  0.61493200  0.93057688  0.0184093035 -1.148660e-03
##   [3,]  0.46670360  0.88580732  1.12157671  0.0093385979 -1.088106e-03
##   [4,] -0.69679231  1.05906606  0.49498429 -0.0072661399  6.588990e-04
##   [5,] -0.55650763 -0.12785190  0.94375436  0.0280271522 -1.316539e-03
##   [6,] -0.64969630  1.43508460  0.65372136 -0.0132957642  2.399979e-03
##   [7,]  0.78227993  1.26025548  0.61441254 -0.0418584162  1.861738e-04
##   [8,]  0.55409123 -0.28749731  1.37147148  0.0645081934  5.283263e-04
##   [9,] -1.16276793  0.92971927  0.65890018  0.0179087299  1.833226e-04
##  [10,] -0.00218129  1.40977826  0.53334313 -0.0385326946  1.835492e-03
##  [11,]  0.34201104  1.29943093  0.99927364 -0.0157344230  2.918394e-04
##  [12,]  0.34839252  0.89262618  0.81424686 -0.0083150696 -8.860905e-04
##  [13,]  0.43517856  0.67797467  0.93150834  0.0063975557 -1.348424e-03
##  [14,] -0.10178527  0.05783727  0.89103662  0.0219895786 -1.422835e-03
##  [15,] -0.12933686  1.24349425  0.46760370 -0.0314554382  1.156582e-03
##  [16,]  0.25568211  1.19229095  0.89867546 -0.0151661614  5.198468e-05
##  [17,] -0.32199723  1.29657018  0.74662270 -0.0148784606  1.193001e-03
##  [18,] -0.60914247  0.82209564  0.76640938  0.0075150898 -5.521160e-04
##  [19,] -0.18676624  0.78463941  0.77834024  0.0004258328 -8.636452e-04
##  [20,]  0.50261613 -0.04224386  1.26654292  0.0507009739 -5.274241e-04
##  [21,]  0.13044928  0.08702426  0.98216962  0.0267843784 -1.309514e-03
##  [22,]  0.13328246 -0.52217297  1.07078256  0.0413516946  3.729081e-04
##  [23,]  0.35618119  1.26713398  0.87234690 -0.0216047457  2.950235e-04
##  [24,] -1.57528309  1.06672001  0.34470947  0.0235941841  1.360512e-03
##  [25,]  0.47440478  0.55383323  1.06804864  0.0192709013 -1.433091e-03
##  [26,] -1.18621262  1.46931512  0.25000492 -0.0057359906  3.803494e-03
##  [27,]  0.37385592  0.99108023  0.59640223 -0.0249452578 -4.658537e-04
##  [28,] -1.15177828  0.20505286  1.14173231  0.0426894256 -1.496160e-03
##  [29,] -1.73918791  0.23259810  0.90133600  0.0479743288 -1.579366e-03
##  [30,] -0.25261702  1.51095526  0.74089377 -0.0255939926  2.327419e-03
##  [31,]  2.81400169 -1.14020031  0.03890400  0.0750805692  6.034250e-03
##  [32,]  2.45543699  0.72260536  0.08018136 -0.0397158518 -1.925288e-03
##  [33,]  3.06066163 -0.75261042  0.04355249  0.0704039622  3.607534e-03
##  [34,]  4.09204901  0.55594230 -0.35418600 -0.0157371876 -2.058176e-03
##  [35,]  2.49529807 -0.32282007 -0.06296394  0.0137905911 -1.517480e-04
##  [36,]  3.55760417  0.28056363 -0.49889617 -0.0258120238 -1.917096e-03
##  [37,]  3.28769137  0.71218739 -0.38401118 -0.0591991504 -2.222976e-03
##  [38,]  3.75181952 -0.41082981 -0.14166861  0.0689287769  2.102913e-03
##  [39,]  3.45213766 -0.26719121 -0.44395598  0.0154575250 -6.378968e-05
##  [40,]  3.72715446 -0.01604487 -0.29668728  0.0232344581 -5.035231e-04
##  [41,]  2.53523182 -0.69182490  0.01822740  0.0399419932  2.019294e-03
##  [42,]  4.18111585 -0.09095679 -0.38078903  0.0437253122  2.688213e-04
##  [43,]  3.28136184 -0.09337195 -0.33048771  0.0066025198 -7.579216e-04
##  [44,]  3.16973615 -0.36555101 -0.25554589  0.0263597149  4.850984e-04
##  [45,]  2.24282790 -1.00920965 -0.35973844  0.0095086902  2.269173e-03
##  [46,]  3.04992186 -0.35156760 -0.23049364  0.0225652274  3.052283e-04
##  [47,]  3.03916223 -0.44850731 -0.15046268  0.0348934945  9.998832e-04
##  [48,]  3.06190660  0.18315934 -0.27557071 -0.0157313781 -1.672774e-03
##  [49,]  2.53659878 -1.15925704 -0.26214151  0.0353737326  4.266268e-03
##  [50,]  3.31653942  0.13376843 -0.33081679 -0.0082245787 -1.474123e-03
##  [51,]  2.95349865  0.38777425 -0.56317141 -0.0573469934 -2.232979e-03
##  [52,]  2.89467799  0.15425848 -0.40929867 -0.0301465710 -1.853159e-03
##  [53,]  2.82391325  0.18898058 -0.30715838 -0.0259955495 -1.830014e-03
##  [54,]  2.34714798 -0.40053063 -0.19276390  0.0021684897 -2.781437e-04
##  [55,]  2.89859889 -0.98777403 -0.37450061  0.0364766146  3.446096e-03
##  [56,]  3.03231424 -0.19527554 -0.24036587  0.0113943902 -4.406309e-04
##  [57,]  2.71679911 -0.32622835 -0.24847124  0.0065268120 -2.791496e-04
##  [58,]  2.66851269  0.04473793 -0.70004481 -0.0525752478 -2.056679e-03
##  [59,]  2.82032034 -0.34733764 -0.41576593 -0.0025312896 -4.300798e-04
##  [60,]  3.84679796  0.11636851 -0.28468964  0.0192462430 -8.807923e-04
##  [61,] -3.03807263  0.33112044 -0.57616262  0.0888181498 -1.374541e-03
##  [62,] -1.23391130 -1.66956336  0.49201966 -0.0201289057  2.508416e-03
##  [63,] -1.18216635 -0.86151104  0.17914746 -0.0070542939 -1.231257e-03
##  [64,] -1.22303161 -2.38454198  0.55677997 -0.0513110896  7.378110e-03
##  [65,] -2.36895435  0.38071819 -0.50602047  0.0457055310 -9.315999e-04
##  [66,] -2.58872166  0.72276136 -0.81347111  0.0634605816  8.177009e-04
##  [67,] -2.01223916  0.30662425 -0.50629485  0.0236153176 -1.136483e-03
##  [68,] -0.97834325 -0.92338479  0.07308833 -0.0146935968 -1.083667e-03
##  [69,] -1.60736540 -1.25643182  0.04576205 -0.0187696780 -7.066607e-04
##  [70,] -0.18471099  0.79393943 -0.52665170 -0.0597972246  3.272275e-04
##  [71,] -2.20346765 -1.46064308 -0.13132288 -0.0227806507 -9.475510e-04
##  [72,] -2.19982221  0.14747117 -0.73015111  0.0272163801 -1.594828e-03
##  [73,] -2.74531763  0.01029974 -0.44543750  0.0596557953 -2.214471e-03
##  [74,] -1.47775828 -0.47270254 -0.18960631 -0.0050243385 -2.258890e-03
##  [75,] -0.82368216  0.80202211 -0.50604079 -0.0346629170  7.207909e-04
##  [76,] -1.08484100 -0.42625954  0.39629015  0.0077816736 -1.685378e-03
##  [77,] -0.90762167 -0.43961854  0.08095074 -0.0076458034 -1.883840e-03
##  [78,] -1.51263927 -1.51457530  0.13760833 -0.0272606355  5.189252e-04
##  [79,] -1.83912714 -2.72883728  0.72039555 -0.0783392652  9.080823e-03
##  [80,] -1.61659895 -0.25634007 -0.25697529  0.0018715319 -2.273710e-03
##  [81,] -2.10058628 -0.55812407 -0.20621682  0.0126147779 -2.493860e-03
##  [82,] -2.60057337  0.36695994 -0.33440870  0.0622696928 -1.129937e-03
##  [83,] -2.33096305  0.72234714 -0.90814314  0.0434850879  9.991679e-04
##  [84,] -1.17510687 -0.51900439 -0.02495956 -0.0078293608 -2.004729e-03
##  [85,] -1.53706321  1.10603577 -1.09227260 -0.0139243523  3.811011e-03
##  [86,] -1.73986271  0.04223681 -0.41738016  0.0079198731 -1.874099e-03
##  [87,] -2.19011085 -1.51563966  0.21799987 -0.0171366354 -8.929095e-05
##  [88,] -2.69636595  1.16715663 -1.08518464  0.0764940718  4.029189e-03
##  [89,] -0.86117372 -1.01670961 -0.08952725 -0.0242224823 -9.926170e-04
##  [90,] -2.05592800 -1.57202714  0.18946703 -0.0230108077  2.200656e-04
##  [91,] -0.33432668  0.13431729 -0.10537712 -0.0257947996 -1.810559e-03
##  [92,] -1.39192581 -0.10293702 -0.28036403 -0.0041753672 -2.091599e-03
##  [93,] -2.15839086  0.09640228 -0.35155324  0.0304657264 -1.822349e-03
##  [94,] -1.68245087 -0.64164734 -0.04555089  0.0007501729 -2.134270e-03
##  [95,] -0.47961785  0.57957462 -0.26343209 -0.0350314242 -6.517068e-04
##  [96,] -2.17355219  0.51345028 -0.76666226  0.0317755682 -1.635997e-04
##  [97,] -0.79367692 -1.45550165  0.03461921 -0.0302939540  9.330347e-04
##  [98,] -1.36393708  0.28641850 -0.70432479 -0.0138300109 -1.100243e-03
##  [99,] -1.39436110 -0.64796568 -0.37221564 -0.0169084704 -2.280011e-03
## [100,] -1.40656402 -0.24478844 -0.49354510 -0.0119867270 -2.286416e-03
## [101,] -0.93399336  0.97178606 -0.37578263 -0.0281065957  1.514638e-03
## [102,] -1.50551333  0.62542699 -0.67630352 -0.0050649604  2.786647e-04
## [103,] -1.05105593  0.43426049 -0.71878975 -0.0281522132 -6.133852e-04
## [104,] -1.67341531  1.90408063 -1.24071354 -0.0176523034  1.170999e-02
## [105,] -0.83740252  0.74375474 -0.69500944 -0.0394256554  6.676923e-04
## [106,] -1.06885412 -0.35803133 -0.46658636 -0.0232992533 -2.293448e-03
## [107,] -0.22402563 -1.10107861  0.09693768 -0.0172312041  2.514540e-04
## [108,] -0.58217189 -0.98563237  0.40981051 -0.0021422885  4.985083e-05
## [109,] -0.79678821 -1.11493405  0.16032465 -0.0161038735 -1.844173e-04
## [110,] -0.95652448 -0.59729812 -0.46539278 -0.0293777103 -2.219714e-03
## [111,] -1.61929212 -0.63660275  0.12902809  0.0045974312 -1.944914e-03
## [112,] -2.48325962  0.84017809 -1.27746094  0.0533629965  2.068447e-03
## [113,]  0.14305481 -0.31691199 -0.44122986 -0.0433311915 -2.066760e-03
## [114,] -1.36718744 -1.44920322  0.27706151 -0.0201700463  7.265948e-04
## [115,] -0.63863471 -1.24912981  0.29735505 -0.0124724110  8.136792e-04
## [116,] -1.13995101 -0.70056092  0.06004137 -0.0086584952 -1.678985e-03
## [117,] -0.87071934  0.19085848 -0.19322340 -0.0165985564 -1.634083e-03
## [118,] -1.58367059 -0.33504574 -0.39775425 -0.0047654242 -2.366771e-03
## [119,] -0.25746355  0.37494834 -0.49554224 -0.0475909636 -1.234822e-03
## [120,] -1.57098369 -1.33437135 -0.18188538 -0.0297574826 -8.268043e-04
## matriz de correlacion de las variables originales y de las variables rotadas. se comprieba que no hay correlaciones en las variables rotadas o latentes 

cor(dfcie[, -4])
##           L         a         b         c         h
## L 1.0000000 0.6424449 0.7492989 0.7111114 0.5623375
## a 0.6424449 1.0000000 0.9563176 0.9860056 0.3137566
## b 0.7492989 0.9563176 1.0000000 0.9916594 0.5718356
## c 0.7111114 0.9860056 0.9916594 1.0000000 0.4639981
## h 0.5623375 0.3137566 0.5718356 0.4639981 1.0000000
cor((dfcie.pca$x))
##               PC1          PC2           PC3           PC4           PC5
## PC1  1.000000e+00 1.785553e-16 -1.408711e-16  4.790013e-15 -1.848179e-13
## PC2  1.785553e-16 1.000000e+00  1.472744e-14  9.436019e-15  2.398657e-14
## PC3 -1.408711e-16 1.472744e-14  1.000000e+00  2.543555e-15  2.901817e-14
## PC4  4.790013e-15 9.436019e-15  2.543555e-15  1.000000e+00 -2.839428e-14
## PC5 -1.848179e-13 2.398657e-14  2.901817e-14 -2.839428e-14  1.000000e+00
## mapas de color para observar las correlaciones. variables no rotadas 
heatmap (cor(dfcie[, -4]))

## mapa de calor variables rotadas sin correlación. 
heatmap (cor(dfcie.pca$x))

## matriz de correlaciones de las variables rotadas y originales. puedo observar como todas las variables quedan mejor representadas en el componente principal 1. 
dfcie_join= cbind(dfcie[c(1:3, 5:6)],
                  dfcie.pca$x)
corrplot::corrplot(cor(dfcie_join))

## gráfico plot con una sola dimensión ,permite observar tres grupos claramente definidos. Este gráfico siver descriptivaente para observar los niveles de tostión, se evidencia tres grupos: Claro, medio/oscuro y verde. Sirve para analizar los cluster 

plot(dfcie.pca$x[,1], pch = 16,
     col= as.factor(dfcie$tueste))

plot(dfcie.pca$x[, c(1,2)], pch = 16,
     col= as.factor(dfcie$tueste))

### los análisis se harán con el componente 1 y 2. la variable que queda menos representada es L. para el componente1 la variable que tiene mayor peso es b y para el componentes 2 es h. 

library(ggbiplot)
## Loading required package: ggplot2
## Loading required package: plyr
## Loading required package: scales
## Loading required package: grid
ggbiplot(dfcie.pca)+
  geom_hline(yintercept = 0)+
  geom_vline(xintercept = 0)

## permite observar tres grupos asociados a grados de tostión, y como las tostiones medio oscuro están en un mismo grupo. 
ggbiplot(dfcie.pca,ellipse=TRUE,circle=TRUE, labels=rownames(dfcie[,1:3]), groups=dfcie$tueste)

Y= as.matrix (dfcie[,-4])
mod1= manova (Y ~dfcie$tueste)
summary(mod1)
##               Df Pillai approx F num Df den Df    Pr(>F)    
## dfcie$tueste   3 1.8919   38.928     15    342 < 2.2e-16 ***
## Residuals    116                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ANOVA - SOLO CON B (LA MEJOR componente 1)

mod2= aov(b~ tueste, dfcie)
summary(mod2)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## tueste        3 2663.1   887.7   343.1 <2e-16 ***
## Residuals   116  300.1     2.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ANOVA - SOLO CON h (LA MEJOR componente 2 )

mod2= aov(h~ tueste, dfcie)
summary(mod2)
##              Df Sum Sq Mean Sq F value  Pr(>F)    
## tueste        3  194.1   64.69   21.26 4.8e-11 ***
## Residuals   116  352.9    3.04                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## análisis de cluster - tres cluster optimos 
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
M = dfcie[ ,-4]
Ms = scale(M)
fviz_nbclust(Ms,
             FUNcluster = kmeans,
             method = 'gap_stat',
             diss = get_dist(Ms,
                             'euclidean'))

## cluster con b mejor variable representada en el componente principal 1
M = dfcie[ , c("b")]
Ms = scale(M)
clus1 = kmeans(Ms, 3)
table(dfcie$tueste,
      clus1$cluster)
##         
##           1  2  3
##   claro   0  0 30
##   medio   1 29  0
##   oscuro  6 24  0
##   verde  27  3  0
## correctas clasificaciones 91,6%. se observa como medio y oscuro quedan en un mismo cluster

100*(30+29+24+27)/120
## [1] 91.66667
M=dfcie[,c('h')]
Ms=scale(M)
fviz_nbclust(dfcie[ ,-4],
            FUNcluster =kmeans,
            method = 'gap_stat',
            diss=get_dist(Ms,
                          'euclidean'))

## cluster con h mejor variable representada en el componente principal 1
M = dfcie[ , c("h")]
Ms = scale(M)
clus1 = kmeans(Ms, 3)
table(dfcie$tueste,
      clus1$cluster)
##         
##           1  2  3
##   claro   3 27  0
##   medio   7 10 13
##   oscuro 13  7 10
##   verde  17  3 10
100*(27+13+13+17)/120
## [1] 58.33333
M = dfcie[ , c("b", "h")]
Ms = scale(M)
clus1 = kmeans(Ms, 3)
table(dfcie$tueste,
      clus1$cluster)
##         
##           1  2  3
##   claro   0  0 30
##   medio  15 15  0
##   oscuro 15 15  0
##   verde  17 12  1
100*(30+15+15+20)/120
## [1] 66.66667
M = dfcie[ , c("b", "h","a", "c")]
Ms = scale(M)
clus1 = kmeans(Ms, 3)
table(dfcie$tueste,
      clus1$cluster)
##         
##           1  2  3
##   claro   0  0 30
##   medio   2 28  0
##   oscuro  7 23  0
##   verde  29  1  0
100*(30+28+23+29)/120
## [1] 91.66667
### con la clusterización se evidencia que tres grados de tostión son suficientes claro, medio/oscuro y ver. para el caso de las variables la mejor representda en el componente 1 fue la variable b, y en el componente 2 la h. Para los análisis la variable L es la que menos información está aportanando al análisis.