## Crear un nuevo dataframe con las variables c, h
library(readxl)
dfcie= read_excel("C:/Users/CAROLINA/OneDrive - FEDERACION NACIONAL DE ARROCEROS FEDEARROZ/Escritorio/Cielab_tueste_cafe.xlsx")
dfcie$c= with(dfcie, sqrt(a^2 + b^2))
dfcie$h= with (dfcie, atan(b/a)*180/pi)
head(dfcie)
## # A tibble: 6 x 6
## L a b tueste c h
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 15.2 23.7 22.0 verde 32.3 42.9
## 2 12.3 20.8 21.0 verde 29.6 45.3
## 3 13.5 23.3 23.9 verde 33.4 45.7
## 4 13.6 21.1 20.4 verde 29.3 44.1
## 5 13.3 19.5 20.7 verde 28.5 46.7
## 6 12.4 21.9 20.8 verde 30.2 43.6
## estadísticas descrptivas
library(rgl)
plot3d(dfcie$L,
dfcie$a,
dfcie$b,
dfcie$c,
dfcie$h,
type='s',
col=gl(4,30,120, c('red', 'orange',
'green','blue',
'yellow')))
## Componenetes principales - con la funcion scale se estandarizan los datos
dfcie.pca<- prcomp(dfcie[, c (1:3, 5:6)],
center = TRUE,
scale. = TRUE)
summary(dfcie.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 1.9631 0.8961 0.58485 0.03379 0.002343
## Proportion of Variance 0.7708 0.1606 0.06841 0.00023 0.000000
## Cumulative Proportion 0.7708 0.9314 0.99977 1.00000 1.000000
## Con los dos primeros componentes se explica el 93% de la variabilidad ( se observa proportion of variance)
## para ver la estructura. funcion str muestra que cosas se pueden extraer después de hacer componentes principales
str(dfcie.pca)
## List of 5
## $ sdev : num [1:5] 1.96312 0.89607 0.58485 0.03379 0.00234
## $ rotation: num [1:5, 1:5] 0.424 0.47 0.504 0.495 0.317 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:5] "L" "a" "b" "c" ...
## .. ..$ : chr [1:5] "PC1" "PC2" "PC3" "PC4" ...
## $ center : Named num [1:5] 19.4 20.3 21.2 29.4 46
## ..- attr(*, "names")= chr [1:5] "L" "a" "b" "c" ...
## $ scale : Named num [1:5] 5.92 4 4.99 6.3 2.14
## ..- attr(*, "names")= chr [1:5] "L" "a" "b" "c" ...
## $ x : num [1:120, 1:5] -0.07 -0.561 0.467 -0.697 -0.557 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:5] "PC1" "PC2" "PC3" "PC4" ...
## - attr(*, "class")= chr "prcomp"
## extraigo desviaciones si lass elevo al cuadrado son varianzas
dfcie.pca$sdev
## [1] 1.963123832 0.896070585 0.584854454 0.033794989 0.002343376
## el componente tiene mayor variabilidad
var.pca = dfcie.pca$sdev**2
100*var.pca/sum(var.pca)
## [1] 7.707710e+01 1.605885e+01 6.841095e+00 2.284203e-02 1.098283e-04
cumsum(100* var.pca/sum(var.pca))
## [1] 77.07710 93.13595 99.97705 99.99989 100.00000
# extramos el segundo componente que da la funcion es la matriz de rotación. acá se elimina la correlación ( haciendo una rotación de los datos sin cambiar su estructura inicial) y se generan las ecuaciones. el componente pc1 es importante porque todas las cargas de las variables son balanceadas.PC1 Y PC2 explican el 93% de la variabilidad.
## para el PC1= 0,42 L + 0.47 a + 0.50 b + 0.49 c + 0.31 h. este componente recoge la información de las variables L , a , b ,c. para el componente 2 la variable mejor representada es h.
dfcie.pca$rotation
## PC1 PC2 PC3 PC4 PC5
## L 0.4238963 -0.2445950 -0.8709525 -0.04389454 -0.0003608497
## a 0.4701343 0.4181110 0.1460228 -0.68428529 -0.3385083918
## b 0.5042066 0.1018846 0.1824241 0.68577797 -0.4814823787
## c 0.4947461 0.2424953 0.1661286 0.12361619 0.8084198326
## h 0.3166822 -0.8343693 0.3990511 -0.21036762 0.0066356413
### el ultimo atributo x son las variables latentes = variables rotadas= nuevas variables
dfcie.pca$x
## PC1 PC2 PC3 PC4 PC5
## [1,] -0.06997655 1.88359826 0.26259310 -0.0715136017 5.904148e-03
## [2,] -0.56115576 0.61493200 0.93057688 0.0184093035 -1.148660e-03
## [3,] 0.46670360 0.88580732 1.12157671 0.0093385979 -1.088106e-03
## [4,] -0.69679231 1.05906606 0.49498429 -0.0072661399 6.588990e-04
## [5,] -0.55650763 -0.12785190 0.94375436 0.0280271522 -1.316539e-03
## [6,] -0.64969630 1.43508460 0.65372136 -0.0132957642 2.399979e-03
## [7,] 0.78227993 1.26025548 0.61441254 -0.0418584162 1.861738e-04
## [8,] 0.55409123 -0.28749731 1.37147148 0.0645081934 5.283263e-04
## [9,] -1.16276793 0.92971927 0.65890018 0.0179087299 1.833226e-04
## [10,] -0.00218129 1.40977826 0.53334313 -0.0385326946 1.835492e-03
## [11,] 0.34201104 1.29943093 0.99927364 -0.0157344230 2.918394e-04
## [12,] 0.34839252 0.89262618 0.81424686 -0.0083150696 -8.860905e-04
## [13,] 0.43517856 0.67797467 0.93150834 0.0063975557 -1.348424e-03
## [14,] -0.10178527 0.05783727 0.89103662 0.0219895786 -1.422835e-03
## [15,] -0.12933686 1.24349425 0.46760370 -0.0314554382 1.156582e-03
## [16,] 0.25568211 1.19229095 0.89867546 -0.0151661614 5.198468e-05
## [17,] -0.32199723 1.29657018 0.74662270 -0.0148784606 1.193001e-03
## [18,] -0.60914247 0.82209564 0.76640938 0.0075150898 -5.521160e-04
## [19,] -0.18676624 0.78463941 0.77834024 0.0004258328 -8.636452e-04
## [20,] 0.50261613 -0.04224386 1.26654292 0.0507009739 -5.274241e-04
## [21,] 0.13044928 0.08702426 0.98216962 0.0267843784 -1.309514e-03
## [22,] 0.13328246 -0.52217297 1.07078256 0.0413516946 3.729081e-04
## [23,] 0.35618119 1.26713398 0.87234690 -0.0216047457 2.950235e-04
## [24,] -1.57528309 1.06672001 0.34470947 0.0235941841 1.360512e-03
## [25,] 0.47440478 0.55383323 1.06804864 0.0192709013 -1.433091e-03
## [26,] -1.18621262 1.46931512 0.25000492 -0.0057359906 3.803494e-03
## [27,] 0.37385592 0.99108023 0.59640223 -0.0249452578 -4.658537e-04
## [28,] -1.15177828 0.20505286 1.14173231 0.0426894256 -1.496160e-03
## [29,] -1.73918791 0.23259810 0.90133600 0.0479743288 -1.579366e-03
## [30,] -0.25261702 1.51095526 0.74089377 -0.0255939926 2.327419e-03
## [31,] 2.81400169 -1.14020031 0.03890400 0.0750805692 6.034250e-03
## [32,] 2.45543699 0.72260536 0.08018136 -0.0397158518 -1.925288e-03
## [33,] 3.06066163 -0.75261042 0.04355249 0.0704039622 3.607534e-03
## [34,] 4.09204901 0.55594230 -0.35418600 -0.0157371876 -2.058176e-03
## [35,] 2.49529807 -0.32282007 -0.06296394 0.0137905911 -1.517480e-04
## [36,] 3.55760417 0.28056363 -0.49889617 -0.0258120238 -1.917096e-03
## [37,] 3.28769137 0.71218739 -0.38401118 -0.0591991504 -2.222976e-03
## [38,] 3.75181952 -0.41082981 -0.14166861 0.0689287769 2.102913e-03
## [39,] 3.45213766 -0.26719121 -0.44395598 0.0154575250 -6.378968e-05
## [40,] 3.72715446 -0.01604487 -0.29668728 0.0232344581 -5.035231e-04
## [41,] 2.53523182 -0.69182490 0.01822740 0.0399419932 2.019294e-03
## [42,] 4.18111585 -0.09095679 -0.38078903 0.0437253122 2.688213e-04
## [43,] 3.28136184 -0.09337195 -0.33048771 0.0066025198 -7.579216e-04
## [44,] 3.16973615 -0.36555101 -0.25554589 0.0263597149 4.850984e-04
## [45,] 2.24282790 -1.00920965 -0.35973844 0.0095086902 2.269173e-03
## [46,] 3.04992186 -0.35156760 -0.23049364 0.0225652274 3.052283e-04
## [47,] 3.03916223 -0.44850731 -0.15046268 0.0348934945 9.998832e-04
## [48,] 3.06190660 0.18315934 -0.27557071 -0.0157313781 -1.672774e-03
## [49,] 2.53659878 -1.15925704 -0.26214151 0.0353737326 4.266268e-03
## [50,] 3.31653942 0.13376843 -0.33081679 -0.0082245787 -1.474123e-03
## [51,] 2.95349865 0.38777425 -0.56317141 -0.0573469934 -2.232979e-03
## [52,] 2.89467799 0.15425848 -0.40929867 -0.0301465710 -1.853159e-03
## [53,] 2.82391325 0.18898058 -0.30715838 -0.0259955495 -1.830014e-03
## [54,] 2.34714798 -0.40053063 -0.19276390 0.0021684897 -2.781437e-04
## [55,] 2.89859889 -0.98777403 -0.37450061 0.0364766146 3.446096e-03
## [56,] 3.03231424 -0.19527554 -0.24036587 0.0113943902 -4.406309e-04
## [57,] 2.71679911 -0.32622835 -0.24847124 0.0065268120 -2.791496e-04
## [58,] 2.66851269 0.04473793 -0.70004481 -0.0525752478 -2.056679e-03
## [59,] 2.82032034 -0.34733764 -0.41576593 -0.0025312896 -4.300798e-04
## [60,] 3.84679796 0.11636851 -0.28468964 0.0192462430 -8.807923e-04
## [61,] -3.03807263 0.33112044 -0.57616262 0.0888181498 -1.374541e-03
## [62,] -1.23391130 -1.66956336 0.49201966 -0.0201289057 2.508416e-03
## [63,] -1.18216635 -0.86151104 0.17914746 -0.0070542939 -1.231257e-03
## [64,] -1.22303161 -2.38454198 0.55677997 -0.0513110896 7.378110e-03
## [65,] -2.36895435 0.38071819 -0.50602047 0.0457055310 -9.315999e-04
## [66,] -2.58872166 0.72276136 -0.81347111 0.0634605816 8.177009e-04
## [67,] -2.01223916 0.30662425 -0.50629485 0.0236153176 -1.136483e-03
## [68,] -0.97834325 -0.92338479 0.07308833 -0.0146935968 -1.083667e-03
## [69,] -1.60736540 -1.25643182 0.04576205 -0.0187696780 -7.066607e-04
## [70,] -0.18471099 0.79393943 -0.52665170 -0.0597972246 3.272275e-04
## [71,] -2.20346765 -1.46064308 -0.13132288 -0.0227806507 -9.475510e-04
## [72,] -2.19982221 0.14747117 -0.73015111 0.0272163801 -1.594828e-03
## [73,] -2.74531763 0.01029974 -0.44543750 0.0596557953 -2.214471e-03
## [74,] -1.47775828 -0.47270254 -0.18960631 -0.0050243385 -2.258890e-03
## [75,] -0.82368216 0.80202211 -0.50604079 -0.0346629170 7.207909e-04
## [76,] -1.08484100 -0.42625954 0.39629015 0.0077816736 -1.685378e-03
## [77,] -0.90762167 -0.43961854 0.08095074 -0.0076458034 -1.883840e-03
## [78,] -1.51263927 -1.51457530 0.13760833 -0.0272606355 5.189252e-04
## [79,] -1.83912714 -2.72883728 0.72039555 -0.0783392652 9.080823e-03
## [80,] -1.61659895 -0.25634007 -0.25697529 0.0018715319 -2.273710e-03
## [81,] -2.10058628 -0.55812407 -0.20621682 0.0126147779 -2.493860e-03
## [82,] -2.60057337 0.36695994 -0.33440870 0.0622696928 -1.129937e-03
## [83,] -2.33096305 0.72234714 -0.90814314 0.0434850879 9.991679e-04
## [84,] -1.17510687 -0.51900439 -0.02495956 -0.0078293608 -2.004729e-03
## [85,] -1.53706321 1.10603577 -1.09227260 -0.0139243523 3.811011e-03
## [86,] -1.73986271 0.04223681 -0.41738016 0.0079198731 -1.874099e-03
## [87,] -2.19011085 -1.51563966 0.21799987 -0.0171366354 -8.929095e-05
## [88,] -2.69636595 1.16715663 -1.08518464 0.0764940718 4.029189e-03
## [89,] -0.86117372 -1.01670961 -0.08952725 -0.0242224823 -9.926170e-04
## [90,] -2.05592800 -1.57202714 0.18946703 -0.0230108077 2.200656e-04
## [91,] -0.33432668 0.13431729 -0.10537712 -0.0257947996 -1.810559e-03
## [92,] -1.39192581 -0.10293702 -0.28036403 -0.0041753672 -2.091599e-03
## [93,] -2.15839086 0.09640228 -0.35155324 0.0304657264 -1.822349e-03
## [94,] -1.68245087 -0.64164734 -0.04555089 0.0007501729 -2.134270e-03
## [95,] -0.47961785 0.57957462 -0.26343209 -0.0350314242 -6.517068e-04
## [96,] -2.17355219 0.51345028 -0.76666226 0.0317755682 -1.635997e-04
## [97,] -0.79367692 -1.45550165 0.03461921 -0.0302939540 9.330347e-04
## [98,] -1.36393708 0.28641850 -0.70432479 -0.0138300109 -1.100243e-03
## [99,] -1.39436110 -0.64796568 -0.37221564 -0.0169084704 -2.280011e-03
## [100,] -1.40656402 -0.24478844 -0.49354510 -0.0119867270 -2.286416e-03
## [101,] -0.93399336 0.97178606 -0.37578263 -0.0281065957 1.514638e-03
## [102,] -1.50551333 0.62542699 -0.67630352 -0.0050649604 2.786647e-04
## [103,] -1.05105593 0.43426049 -0.71878975 -0.0281522132 -6.133852e-04
## [104,] -1.67341531 1.90408063 -1.24071354 -0.0176523034 1.170999e-02
## [105,] -0.83740252 0.74375474 -0.69500944 -0.0394256554 6.676923e-04
## [106,] -1.06885412 -0.35803133 -0.46658636 -0.0232992533 -2.293448e-03
## [107,] -0.22402563 -1.10107861 0.09693768 -0.0172312041 2.514540e-04
## [108,] -0.58217189 -0.98563237 0.40981051 -0.0021422885 4.985083e-05
## [109,] -0.79678821 -1.11493405 0.16032465 -0.0161038735 -1.844173e-04
## [110,] -0.95652448 -0.59729812 -0.46539278 -0.0293777103 -2.219714e-03
## [111,] -1.61929212 -0.63660275 0.12902809 0.0045974312 -1.944914e-03
## [112,] -2.48325962 0.84017809 -1.27746094 0.0533629965 2.068447e-03
## [113,] 0.14305481 -0.31691199 -0.44122986 -0.0433311915 -2.066760e-03
## [114,] -1.36718744 -1.44920322 0.27706151 -0.0201700463 7.265948e-04
## [115,] -0.63863471 -1.24912981 0.29735505 -0.0124724110 8.136792e-04
## [116,] -1.13995101 -0.70056092 0.06004137 -0.0086584952 -1.678985e-03
## [117,] -0.87071934 0.19085848 -0.19322340 -0.0165985564 -1.634083e-03
## [118,] -1.58367059 -0.33504574 -0.39775425 -0.0047654242 -2.366771e-03
## [119,] -0.25746355 0.37494834 -0.49554224 -0.0475909636 -1.234822e-03
## [120,] -1.57098369 -1.33437135 -0.18188538 -0.0297574826 -8.268043e-04
## matriz de correlacion de las variables originales y de las variables rotadas. se comprieba que no hay correlaciones en las variables rotadas o latentes
cor(dfcie[, -4])
## L a b c h
## L 1.0000000 0.6424449 0.7492989 0.7111114 0.5623375
## a 0.6424449 1.0000000 0.9563176 0.9860056 0.3137566
## b 0.7492989 0.9563176 1.0000000 0.9916594 0.5718356
## c 0.7111114 0.9860056 0.9916594 1.0000000 0.4639981
## h 0.5623375 0.3137566 0.5718356 0.4639981 1.0000000
cor((dfcie.pca$x))
## PC1 PC2 PC3 PC4 PC5
## PC1 1.000000e+00 1.785553e-16 -1.408711e-16 4.790013e-15 -1.848179e-13
## PC2 1.785553e-16 1.000000e+00 1.472744e-14 9.436019e-15 2.398657e-14
## PC3 -1.408711e-16 1.472744e-14 1.000000e+00 2.543555e-15 2.901817e-14
## PC4 4.790013e-15 9.436019e-15 2.543555e-15 1.000000e+00 -2.839428e-14
## PC5 -1.848179e-13 2.398657e-14 2.901817e-14 -2.839428e-14 1.000000e+00
## mapas de color para observar las correlaciones. variables no rotadas
heatmap (cor(dfcie[, -4]))

## mapa de calor variables rotadas sin correlación.
heatmap (cor(dfcie.pca$x))

## matriz de correlaciones de las variables rotadas y originales. puedo observar como todas las variables quedan mejor representadas en el componente principal 1.
dfcie_join= cbind(dfcie[c(1:3, 5:6)],
dfcie.pca$x)
corrplot::corrplot(cor(dfcie_join))

## gráfico plot con una sola dimensión ,permite observar tres grupos claramente definidos. Este gráfico siver descriptivaente para observar los niveles de tostión, se evidencia tres grupos: Claro, medio/oscuro y verde. Sirve para analizar los cluster
plot(dfcie.pca$x[,1], pch = 16,
col= as.factor(dfcie$tueste))

plot(dfcie.pca$x[, c(1,2)], pch = 16,
col= as.factor(dfcie$tueste))

### los análisis se harán con el componente 1 y 2. la variable que queda menos representada es L. para el componente1 la variable que tiene mayor peso es b y para el componentes 2 es h.
library(ggbiplot)
## Loading required package: ggplot2
## Loading required package: plyr
## Loading required package: scales
## Loading required package: grid
ggbiplot(dfcie.pca)+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 0)

## permite observar tres grupos asociados a grados de tostión, y como las tostiones medio oscuro están en un mismo grupo.
ggbiplot(dfcie.pca,ellipse=TRUE,circle=TRUE, labels=rownames(dfcie[,1:3]), groups=dfcie$tueste)

Y= as.matrix (dfcie[,-4])
mod1= manova (Y ~dfcie$tueste)
summary(mod1)
## Df Pillai approx F num Df den Df Pr(>F)
## dfcie$tueste 3 1.8919 38.928 15 342 < 2.2e-16 ***
## Residuals 116
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ANOVA - SOLO CON B (LA MEJOR componente 1)
mod2= aov(b~ tueste, dfcie)
summary(mod2)
## Df Sum Sq Mean Sq F value Pr(>F)
## tueste 3 2663.1 887.7 343.1 <2e-16 ***
## Residuals 116 300.1 2.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ANOVA - SOLO CON h (LA MEJOR componente 2 )
mod2= aov(h~ tueste, dfcie)
summary(mod2)
## Df Sum Sq Mean Sq F value Pr(>F)
## tueste 3 194.1 64.69 21.26 4.8e-11 ***
## Residuals 116 352.9 3.04
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## análisis de cluster - tres cluster optimos
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
M = dfcie[ ,-4]
Ms = scale(M)
fviz_nbclust(Ms,
FUNcluster = kmeans,
method = 'gap_stat',
diss = get_dist(Ms,
'euclidean'))

## cluster con b mejor variable representada en el componente principal 1
M = dfcie[ , c("b")]
Ms = scale(M)
clus1 = kmeans(Ms, 3)
table(dfcie$tueste,
clus1$cluster)
##
## 1 2 3
## claro 0 0 30
## medio 1 29 0
## oscuro 6 24 0
## verde 27 3 0
## correctas clasificaciones 91,6%. se observa como medio y oscuro quedan en un mismo cluster
100*(30+29+24+27)/120
## [1] 91.66667
M=dfcie[,c('h')]
Ms=scale(M)
fviz_nbclust(dfcie[ ,-4],
FUNcluster =kmeans,
method = 'gap_stat',
diss=get_dist(Ms,
'euclidean'))

## cluster con h mejor variable representada en el componente principal 1
M = dfcie[ , c("h")]
Ms = scale(M)
clus1 = kmeans(Ms, 3)
table(dfcie$tueste,
clus1$cluster)
##
## 1 2 3
## claro 3 27 0
## medio 7 10 13
## oscuro 13 7 10
## verde 17 3 10
100*(27+13+13+17)/120
## [1] 58.33333
M = dfcie[ , c("b", "h")]
Ms = scale(M)
clus1 = kmeans(Ms, 3)
table(dfcie$tueste,
clus1$cluster)
##
## 1 2 3
## claro 0 0 30
## medio 15 15 0
## oscuro 15 15 0
## verde 17 12 1
100*(30+15+15+20)/120
## [1] 66.66667
M = dfcie[ , c("b", "h","a", "c")]
Ms = scale(M)
clus1 = kmeans(Ms, 3)
table(dfcie$tueste,
clus1$cluster)
##
## 1 2 3
## claro 0 0 30
## medio 2 28 0
## oscuro 7 23 0
## verde 29 1 0
100*(30+28+23+29)/120
## [1] 91.66667
### con la clusterización se evidencia que tres grados de tostión son suficientes claro, medio/oscuro y ver. para el caso de las variables la mejor representda en el componente 1 fue la variable b, y en el componente 2 la h. Para los análisis la variable L es la que menos información está aportanando al análisis.