coffeData =read.table("coffeData (2).csv",header = TRUE,
sep = ";",dec = ".",stringsAsFactors = TRUE)
summary(coffeData)
## X Species Country.of.Origin Fragrance...Aroma
## Min. : 1 Arabica:1309 Mexico :237 Min. :5.080
## 1st Qu.: 335 Robusta: 28 Colombia :183 1st Qu.:7.420
## Median : 669 Guatemala :180 Median :7.580
## Mean : 669 Brazil :132 Mean :7.572
## 3rd Qu.:1003 Taiwan : 75 3rd Qu.:7.750
## Max. :1337 United States (Hawaii): 73 Max. :8.750
## (Other) :457
## Flavor Aftertaste Salt...Acid Mouthfeel
## Min. :6.080 Min. :6.170 Min. :5.250 Min. :5.080
## 1st Qu.:7.330 1st Qu.:7.250 1st Qu.:7.330 1st Qu.:7.330
## Median :7.580 Median :7.420 Median :7.580 Median :7.500
## Mean :7.527 Mean :7.407 Mean :7.541 Mean :7.524
## 3rd Qu.:7.750 3rd Qu.:7.580 3rd Qu.:7.750 3rd Qu.:7.750
## Max. :8.830 Max. :8.670 Max. :8.750 Max. :8.750
##
## Balance Bitter...Sweet Uniform.Cup Clean.Cup
## Min. : 5.250 Min. :5.250 Min. : 6.000 Min. : 0.000
## 1st Qu.:10.000 1st Qu.:7.330 1st Qu.:10.000 1st Qu.:10.000
## Median :10.000 Median :7.500 Median :10.000 Median :10.000
## Mean : 9.868 Mean :7.527 Mean : 9.844 Mean : 9.849
## 3rd Qu.:10.000 3rd Qu.:7.670 3rd Qu.:10.000 3rd Qu.:10.000
## Max. :10.000 Max. :8.580 Max. :10.000 Max. :10.000
##
## Cupper.Points quality_score
## Min. : 5.17 Min. :63.08
## 1st Qu.: 7.25 1st Qu.:81.17
## Median : 7.50 Median :82.50
## Mean : 7.51 Mean :82.17
## 3rd Qu.: 7.75 3rd Qu.:83.67
## Max. :10.00 Max. :90.58
##
library(ggplot2)
g1 = ggplot(coffeData,aes(x=Flavor))+
geom_histogram(fill="blue")
g2= ggplot(coffeData,aes(x=quality_score))+
geom_histogram(fill="pink")
###Diagrama de cajas
g4 = ggplot(coffeData,aes(x=quality_score))+
geom_boxplot(fill="pink")
g3 = ggplot(coffeData,aes(x=Flavor))+
geom_boxplot(fill="blue")
###Panel grafico
library(gridExtra)
grid.arrange(g1,g2,g3,g4)
###Analisis con 2 variables numericas
ggplot(coffeData, aes(x=Flavor, y=quality_score))+
geom_jitter()+
geom_smooth(method="lm", colour="red")
cor(coffeData$Flavor, coffeData$quality_score)
## [1] 0.8348271
Se tiene que el coeficiente de correlación es de \(r=0.83\), lo cuál indica una relación alta entre las variables, a mayor calificación del sabor mayor calidad del café.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
nuevosDatos = filter(coffeData,
Country.of.Origin %in% c("Colombia", "Brazil","Mexico"))
ggplot(nuevosDatos, aes(x=Country.of.Origin,y=quality_score,fill=Country.of.Origin))+
geom_boxplot()+
labs(title="Diagrama de calidad vs Pais",
x="Pais",Y="Calidad del cafe")
## Ignoring unknown labels:
## • Y : "Calidad del cafe"
Se observa que para Colombia se presenta una mayor calidad de café, en comparación de Brazil y Mexico.