pacman::p_load(pacman,dplyr,GGally,ggplot2,ggthemes,ggvis,httr,lubridate,plotly,rio,rmarkdown,shiny,stringr,tidyr,tidyverse,lattice,caret,pls,MASS,yarrr,psych,ggcorrplot,GGally,CCA,CCP,rpart,rpart.plot,ggrepel)
library(tidyverse)
library(rpart)
library(rpart.plot)
library(caret)
library(rio)
library(stats)
df<-import("CafesFincasTMod.xlsx")
df
##       Finca Dias    T Puntaje       pH     Brix   Acidez_T  cafeina      ACQA
## 1  Corpachi    3 20.0   86.63 5.013333 1.200000 0.01552704 676.8915  412.7191
## 2  Corpachi    5 20.0   86.88 5.023333 1.270000 0.01522152 639.2415  432.7061
## 3  Corpachi    7 20.0   87.19 4.953333 1.070000 0.01521936 613.4221  395.6890
## 4  Corpachi    3 24.0   86.28 5.076667 1.200000 0.01318979 705.0421  426.3423
## 5  Corpachi    5 24.0   87.09 5.026667 2.000000 0.01250129 665.7463  347.7119
## 6  Corpachi    7 24.0   85.79 4.976667 1.930000 0.01251373 684.4820  432.7155
## 7  Corpachi    3 30.0   86.32 5.013333 2.000000 0.01387144 737.6643  490.8567
## 8  Corpachi    5 30.0   86.21 4.900833 1.933333 0.01382723 644.5686  444.3011
## 9  Corpachi    7 30.0   86.61 4.973333 2.000000 0.01487416 665.9543  794.6219
## 10 Lamastus    3 24.0   88.56 5.126667 1.666667 0.01486505 642.1583  358.4777
## 11 Lamastus    5 24.0   89.22 5.120000 1.733333 0.01487919 687.7630  387.1427
## 12 Lamastus    7 24.0   88.92 5.133333 1.733333 0.01759923 754.7001  455.3662
## 13 Lamastus    3 17.4   88.67 5.156667 1.533333 0.01385646 593.4005  337.9377
## 14 Lamastus    5 17.4   89.06 5.116667 1.600000 0.01283884 666.6153  365.9894
## 15 Lamastus    7 17.4   88.78 5.140000 1.866667 0.01522806 698.9411  424.4354
## 16 Lamastus    3 30.0   89.17 5.136667 1.666667 0.01351684 707.1914  395.0926
## 17 Lamastus    5 30.0   89.06 5.126667 1.800000 0.01792097 728.4771  445.4386
## 18 Lamastus    7 30.0   89.22 5.136667 1.600000 0.01655830 663.1918  427.2639
## 19    Nuguo    3 24.0   88.64 4.803333 1.666667 0.01418219 637.0126  962.5415
## 20    Nuguo    5 24.0   88.83 4.717083 1.400000 0.01455186 545.7977  722.3411
## 21    Nuguo    7 24.0   88.89 4.810000 1.466667 0.01354944 557.9616  545.4887
## 22    Nuguo    3 16.8   88.75 4.813333 2.000000 0.01628978 713.2795 1079.2788
## 23    Nuguo    5 16.8   88.64 4.793333 1.600000 0.01628978 603.5466  936.4135
## 24    Nuguo    7 16.8   88.86 4.816667 1.600000 0.01628978 638.5900  822.7184
## 25    Nuguo    3 30.0   88.83 4.756667 1.000000 0.01187095 496.6943  665.4603
## 26    Nuguo    5 30.0   89.36 4.773333 1.200000 0.01454032 621.4622  352.6826
## 27    Nuguo    7 30.0   89.44 4.773333 1.000000 0.01375357 549.8571  335.5529
## 28  Hartman    3 22.0   89.44 5.123333 1.466667 0.01253475 616.1912  334.2410
## 29  Hartman    5 22.0   88.17 5.146667 1.733333 0.01352358 698.7151  340.8357
## 30  Hartman    7 22.0   89.31 5.123333 1.600000 0.01624853 660.3540  312.9145
## 31  Hartman    3 24.0   89.06 5.106667 1.533333 0.01318423 634.0289  349.6764
## 32  Hartman    5 24.0   89.14 5.113333 1.466667 0.01624368 638.4986  339.9476
## 33  Hartman    7 24.0   88.86 5.120000 1.533333 0.01442571 602.5066  393.0651
## 34  Hartman    3 30.0   88.81 5.126667 1.600000 0.01488552 634.9193  382.7102
## 35  Hartman    5 30.0   88.58 5.113333 1.466667 0.01284032 565.5702  342.1963
## 36  Hartman    7 30.0   88.72 5.110000 1.800000 0.01487284 652.2148  356.7956
##         CCQA      BCQA Polifenoles     DPPH      ABTS Altitud
## 1   692.0006  397.6602    2605.202 9.558333  9.272007    1375
## 2   751.4449  431.4551    2655.128 8.116667  8.922589    1375
## 3   714.3046  401.5600    2419.110 8.341667  9.428944    1375
## 4   796.3562  452.5129    2523.502 7.916667  9.850906    1375
## 5   530.6221  316.1868    2512.155 8.191667  9.682600    1375
## 6   867.3671  471.4100    2646.050 8.408333  9.369725    1375
## 7   982.9008  544.0083    2861.355 8.258333  8.848266    1375
## 8   887.8658  493.3681    2514.427 7.450000  9.074013    1375
## 9   613.9056  563.8531    2394.150 6.941667  9.598452    1375
## 10  791.6082  400.7497    2398.688 6.533333 11.485583    1725
## 11  839.1843  435.6342    2600.663 8.950000 10.424579    1725
## 12 1011.9601  515.0036    2859.372 9.091667 12.159479    1725
## 13  818.8040  388.6442    2162.672 7.875000 10.441773    1725
## 14  814.6953  422.3483    2385.072 8.966667 10.525564    1725
## 15  934.7048  469.6386    2564.352 8.483333 10.658000    1725
## 16  861.2222  445.8483    2416.843 7.225000 10.287493    1725
## 17 1021.1566  483.3067    2301.103 8.441667 11.028307    1725
## 18  970.3016  492.1124    2455.422 8.166667  9.828609    1725
## 19 1438.7022  799.8410    2382.803 7.275000  8.802599    1850
## 20 1273.0713  668.2987    2051.472 6.416667  8.034790    1850
## 21 1150.4129  579.1414    2115.015 6.366667  8.212699    1850
## 22 2364.1469 1154.8671    2802.638 8.475000 11.703458    1850
## 23 1707.4406  881.4282    2357.838 6.533333  9.865311    1850
## 24 1955.6328  965.0719    2559.813 6.641667  9.656431    1850
## 25 1093.1416  584.1859    1972.043 6.191667  8.563113    1850
## 26  808.2782  399.2524    2257.983 7.358333  9.521237    1850
## 27  703.9463  363.8275    1997.005 6.983333  7.875763    1850
## 28  778.5748  383.9267    2112.743 6.166667 11.198028    1800
## 29  777.0782  393.8884    2169.478 7.083333 11.367056    1800
## 30  709.6745  351.0742    2455.422 7.766667 12.571379    1800
## 31  805.8314  399.1329    2407.765 7.825000 11.345927    1800
## 32  787.0173  383.5201    2382.800 7.475000 11.895268    1800
## 33  861.9407  425.5310    2267.063 7.091667 10.517353    1800
## 34  832.5573  432.7403    2498.540 8.000000  9.151861    1800
## 35  799.2656  390.4530    2205.788 5.208333  8.752717    1800
## 36  824.1285  402.3845    2484.923 6.058333  8.689695    1800

Pretratamiento de los datos

Cada uno de estos parámetros se convirtió al tipo factor

df <- df %>% filter(Dias>0)
df0<- df %>%  mutate(Temp=ifelse(T<24,"Finca_T",ifelse(T==24.0,"T24","T30")),Altura=ifelse(Altitud<1700,"H1","H2")) 

df0$Finca<-factor(df0$Finca)
df0$Tiempo<-factor(df0$Dias)
df0$Altura<-factor(df0$Altura)
df0$Temp<-factor(df0$Temp)
df3<-df0 %>% filter(Dias>=2)

boxplot(Puntaje~Finca,data=df0,col=c("blue","orange","red","green"))

library(stats)

#modelo<-lm(Puntaje~Temp+Altitud+Tiempo+Brix,data=df0)
modelo<-lm(Puntaje~T+Altitud+Dias+Brix,data=df)
summary(modelo)
## 
## Call:
## lm(formula = Puntaje ~ T + Altitud + Dias + Brix, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.87854 -0.39225 -0.00819  0.33329  0.72275 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 79.5015449  0.9878009  80.483  < 2e-16 ***
## T            0.0034453  0.0162057   0.213    0.833    
## Altitud      0.0052903  0.0004095  12.918 5.12e-14 ***
## Dias         0.0328103  0.0461337   0.711    0.482    
## Brix        -0.2173427  0.2745752  -0.792    0.435    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4505 on 31 degrees of freedom
## Multiple R-squared:   0.85,  Adjusted R-squared:  0.8307 
## F-statistic: 43.92 on 4 and 31 DF,  p-value: 2.402e-12

PRUEBAS DE NORMALIDAD

qqnorm(modelo$residuals)
qqline(modelo$residuals,col="blue")

plot(modelo$fitted.values,df0$Puntaje,xlim=c(85,90),ylim=c(85,90))
abline(0,1,col="blue")

Prueba de Normalidad Shapiro-Wilks

shapiro.test(modelo$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  modelo$residuals
## W = 0.96513, p-value = 0.3078

El p-value>0.05 indica que podemos asumir que la distribución de los datos no es significativamente diferente a la distribución normal. En otras palabras los datos siguen una distribución normal

Efecto del Tiempo de Fermentación a diferentes temperaturas sobre el puntaje del café

library(ggrepel)
library(ggplot2)
df1<- df0 %>% group_by(Altura,Temp,Tiempo) %>% summarize(Puntaje=mean(Puntaje))
## `summarise()` has grouped output by 'Altura', 'Temp'. You can override using
## the `.groups` argument.
df1
## # A tibble: 18 × 4
## # Groups:   Altura, Temp [6]
##    Altura Temp    Tiempo Puntaje
##    <fct>  <fct>   <fct>    <dbl>
##  1 H1     Finca_T 3         86.6
##  2 H1     Finca_T 5         86.9
##  3 H1     Finca_T 7         87.2
##  4 H1     T24     3         86.3
##  5 H1     T24     5         87.1
##  6 H1     T24     7         85.8
##  7 H1     T30     3         86.3
##  8 H1     T30     5         86.2
##  9 H1     T30     7         86.6
## 10 H2     Finca_T 3         89.0
## 11 H2     Finca_T 5         88.6
## 12 H2     Finca_T 7         89.0
## 13 H2     T24     3         88.8
## 14 H2     T24     5         89.1
## 15 H2     T24     7         88.9
## 16 H2     T30     3         88.9
## 17 H2     T30     5         89  
## 18 H2     T30     7         89.1
g = df1 %>% ggplot() +  aes(x = Tiempo, y = Puntaje, color = Altura) +  geom_line(aes(group = Altura)) +geom_point() 
g +labs(x="Tiempo de Fermentación",y="Puntaje")+scale_color_manual(values=c("orange", "blue")) +facet_wrap(~Temp)

df1<- df0 %>% group_by(Altura,Temp,Tiempo) %>% summarize(cafeina=mean(cafeina))
## `summarise()` has grouped output by 'Altura', 'Temp'. You can override using
## the `.groups` argument.
df1
## # A tibble: 18 × 4
## # Groups:   Altura, Temp [6]
##    Altura Temp    Tiempo cafeina
##    <fct>  <fct>   <fct>    <dbl>
##  1 H1     Finca_T 3         677.
##  2 H1     Finca_T 5         639.
##  3 H1     Finca_T 7         613.
##  4 H1     T24     3         705.
##  5 H1     T24     5         666.
##  6 H1     T24     7         684.
##  7 H1     T30     3         738.
##  8 H1     T30     5         645.
##  9 H1     T30     7         666.
## 10 H2     Finca_T 3         641.
## 11 H2     Finca_T 5         656.
## 12 H2     Finca_T 7         666.
## 13 H2     T24     3         638.
## 14 H2     T24     5         624.
## 15 H2     T24     7         638.
## 16 H2     T30     3         613.
## 17 H2     T30     5         639.
## 18 H2     T30     7         622.
g = df1 %>% ggplot() +  aes(x = Tiempo, y = cafeina, color = Altura) +  geom_line(aes(group = Altura)) +geom_point() 
g +labs(x="Tiempo de Fermentación",y="Cafeina")+scale_color_manual(values=c("orange", "blue")) +facet_wrap(~Temp)

df1<- df0 %>% group_by(Altura,Temp,Tiempo) %>% summarize(Brix=mean(Brix))
## `summarise()` has grouped output by 'Altura', 'Temp'. You can override using
## the `.groups` argument.
df1
## # A tibble: 18 × 4
## # Groups:   Altura, Temp [6]
##    Altura Temp    Tiempo  Brix
##    <fct>  <fct>   <fct>  <dbl>
##  1 H1     Finca_T 3       1.2 
##  2 H1     Finca_T 5       1.27
##  3 H1     Finca_T 7       1.07
##  4 H1     T24     3       1.2 
##  5 H1     T24     5       2   
##  6 H1     T24     7       1.93
##  7 H1     T30     3       2   
##  8 H1     T30     5       1.93
##  9 H1     T30     7       2   
## 10 H2     Finca_T 3       1.67
## 11 H2     Finca_T 5       1.64
## 12 H2     Finca_T 7       1.69
## 13 H2     T24     3       1.62
## 14 H2     T24     5       1.53
## 15 H2     T24     7       1.58
## 16 H2     T30     3       1.42
## 17 H2     T30     5       1.49
## 18 H2     T30     7       1.47
g = df1 %>% ggplot() +  aes(x = Tiempo, y = Brix, color = Altura) +  geom_line(aes(group = Altura)) +geom_point() 
g +labs(x="Tiempo de Fermentación",y="Brix")+scale_color_manual(values=c("orange", "blue")) +facet_wrap(~Temp)

df1<- df0 %>% group_by(Altura,Temp,Tiempo) %>% summarize(pH=mean(pH))
## `summarise()` has grouped output by 'Altura', 'Temp'. You can override using
## the `.groups` argument.
df1
## # A tibble: 18 × 4
## # Groups:   Altura, Temp [6]
##    Altura Temp    Tiempo    pH
##    <fct>  <fct>   <fct>  <dbl>
##  1 H1     Finca_T 3       5.01
##  2 H1     Finca_T 5       5.02
##  3 H1     Finca_T 7       4.95
##  4 H1     T24     3       5.08
##  5 H1     T24     5       5.03
##  6 H1     T24     7       4.98
##  7 H1     T30     3       5.01
##  8 H1     T30     5       4.90
##  9 H1     T30     7       4.97
## 10 H2     Finca_T 3       5.03
## 11 H2     Finca_T 5       5.02
## 12 H2     Finca_T 7       5.03
## 13 H2     T24     3       5.01
## 14 H2     T24     5       4.98
## 15 H2     T24     7       5.02
## 16 H2     T30     3       5.01
## 17 H2     T30     5       5.00
## 18 H2     T30     7       5.01
g = df1 %>% ggplot() +  aes(x = Tiempo, y = pH, color = Altura) +  geom_line(aes(group = Altura)) +geom_point() 
g +labs(x="Tiempo de Fermentación",y="pH")+scale_color_manual(values=c("orange", "blue")) +facet_wrap(~Temp)

df1<- df0 %>% group_by(Altura,Temp,Tiempo) %>% summarize(Acidez=mean(Acidez_T))
## `summarise()` has grouped output by 'Altura', 'Temp'. You can override using
## the `.groups` argument.
df1
## # A tibble: 18 × 4
## # Groups:   Altura, Temp [6]
##    Altura Temp    Tiempo Acidez
##    <fct>  <fct>   <fct>   <dbl>
##  1 H1     Finca_T 3      0.0155
##  2 H1     Finca_T 5      0.0152
##  3 H1     Finca_T 7      0.0152
##  4 H1     T24     3      0.0132
##  5 H1     T24     5      0.0125
##  6 H1     T24     7      0.0125
##  7 H1     T30     3      0.0139
##  8 H1     T30     5      0.0138
##  9 H1     T30     7      0.0149
## 10 H2     Finca_T 3      0.0142
## 11 H2     Finca_T 5      0.0142
## 12 H2     Finca_T 7      0.0159
## 13 H2     T24     3      0.0141
## 14 H2     T24     5      0.0152
## 15 H2     T24     7      0.0152
## 16 H2     T30     3      0.0134
## 17 H2     T30     5      0.0151
## 18 H2     T30     7      0.0151
g = df1 %>% ggplot() +  aes(x = Tiempo, y = Acidez, color = Altura) +  geom_line(aes(group = Altura)) +geom_point() 
g +labs(x="Tiempo de Fermentación",y="Acidez")+scale_color_manual(values=c("orange", "blue")) +facet_wrap(~Temp)

df1<- df0 %>% group_by(Altura,Temp,Tiempo) %>% summarize(ABTS=mean(ABTS))
## `summarise()` has grouped output by 'Altura', 'Temp'. You can override using
## the `.groups` argument.
df1
## # A tibble: 18 × 4
## # Groups:   Altura, Temp [6]
##    Altura Temp    Tiempo  ABTS
##    <fct>  <fct>   <fct>  <dbl>
##  1 H1     Finca_T 3       9.27
##  2 H1     Finca_T 5       8.92
##  3 H1     Finca_T 7       9.43
##  4 H1     T24     3       9.85
##  5 H1     T24     5       9.68
##  6 H1     T24     7       9.37
##  7 H1     T30     3       8.85
##  8 H1     T30     5       9.07
##  9 H1     T30     7       9.60
## 10 H2     Finca_T 3      11.1 
## 11 H2     Finca_T 5      10.6 
## 12 H2     Finca_T 7      11.0 
## 13 H2     T24     3      10.5 
## 14 H2     T24     5      10.1 
## 15 H2     T24     7      10.3 
## 16 H2     T30     3       9.33
## 17 H2     T30     5       9.77
## 18 H2     T30     7       8.80
g = df1 %>% ggplot() +  aes(x = Tiempo, y = ABTS, color = Altura) +  geom_line(aes(group = Altura)) +geom_point() 
g +labs(x="Tiempo de Fermentación",y="ABTS")+scale_color_manual(values=c("orange", "blue")) +facet_wrap(~Temp)

df1<- df0 %>% group_by(Altura,Temp,Tiempo) %>% summarize(CCQA=mean(CCQA))
## `summarise()` has grouped output by 'Altura', 'Temp'. You can override using
## the `.groups` argument.
df1
## # A tibble: 18 × 4
## # Groups:   Altura, Temp [6]
##    Altura Temp    Tiempo  CCQA
##    <fct>  <fct>   <fct>  <dbl>
##  1 H1     Finca_T 3       692.
##  2 H1     Finca_T 5       751.
##  3 H1     Finca_T 7       714.
##  4 H1     T24     3       796.
##  5 H1     T24     5       531.
##  6 H1     T24     7       867.
##  7 H1     T30     3       983.
##  8 H1     T30     5       888.
##  9 H1     T30     7       614.
## 10 H2     Finca_T 3      1321.
## 11 H2     Finca_T 5      1100.
## 12 H2     Finca_T 7      1200.
## 13 H2     T24     3      1012.
## 14 H2     T24     5       966.
## 15 H2     T24     7      1008.
## 16 H2     T30     3       929.
## 17 H2     T30     5       876.
## 18 H2     T30     7       833.
g = df1 %>% ggplot() +  aes(x = Tiempo, y = CCQA, color = Altura) +  geom_line(aes(group = Altura)) +geom_point() 
g +labs(x="Tiempo de Fermentación",y="CCQA")+scale_color_manual(values=c("orange", "blue")) +facet_wrap(~Temp)

df1<- df0 %>% group_by(Altura,Temp,Tiempo) %>% summarize(DPPH=mean(DPPH))
## `summarise()` has grouped output by 'Altura', 'Temp'. You can override using
## the `.groups` argument.
df1
## # A tibble: 18 × 4
## # Groups:   Altura, Temp [6]
##    Altura Temp    Tiempo  DPPH
##    <fct>  <fct>   <fct>  <dbl>
##  1 H1     Finca_T 3       9.56
##  2 H1     Finca_T 5       8.12
##  3 H1     Finca_T 7       8.34
##  4 H1     T24     3       7.92
##  5 H1     T24     5       8.19
##  6 H1     T24     7       8.41
##  7 H1     T30     3       8.26
##  8 H1     T30     5       7.45
##  9 H1     T30     7       6.94
## 10 H2     Finca_T 3       7.51
## 11 H2     Finca_T 5       7.53
## 12 H2     Finca_T 7       7.63
## 13 H2     T24     3       7.21
## 14 H2     T24     5       7.61
## 15 H2     T24     7       7.52
## 16 H2     T30     3       7.14
## 17 H2     T30     5       7.00
## 18 H2     T30     7       7.07
g = df1 %>% ggplot() +  aes(x = Tiempo, y = DPPH, color = Altura) +  geom_line(aes(group = Altura)) +geom_point() 
g +labs(x="Tiempo de Fermentación",y="DPPH")+scale_color_manual(values=c("orange", "blue")) +facet_wrap(~Temp)

df1<- df0 %>% group_by(Altura,Temp,Tiempo) %>% summarize(Polifenoles=mean(Polifenoles))
## `summarise()` has grouped output by 'Altura', 'Temp'. You can override using
## the `.groups` argument.
df1
## # A tibble: 18 × 4
## # Groups:   Altura, Temp [6]
##    Altura Temp    Tiempo Polifenoles
##    <fct>  <fct>   <fct>        <dbl>
##  1 H1     Finca_T 3            2605.
##  2 H1     Finca_T 5            2655.
##  3 H1     Finca_T 7            2419.
##  4 H1     T24     3            2524.
##  5 H1     T24     5            2512.
##  6 H1     T24     7            2646.
##  7 H1     T30     3            2861.
##  8 H1     T30     5            2514.
##  9 H1     T30     7            2394.
## 10 H2     Finca_T 3            2359.
## 11 H2     Finca_T 5            2304.
## 12 H2     Finca_T 7            2527.
## 13 H2     T24     3            2396.
## 14 H2     T24     5            2345.
## 15 H2     T24     7            2414.
## 16 H2     T30     3            2296.
## 17 H2     T30     5            2255.
## 18 H2     T30     7            2312.
g = df1 %>% ggplot() +  aes(x = Tiempo, y = Polifenoles, color = Altura) +  geom_line(aes(group = Altura)) +geom_point() 
g +labs(x="Tiempo de Fermentación",y="Polifenoles")+scale_color_manual(values=c("orange", "blue")) +facet_wrap(~Temp)