Archivos Excel
library(readxl)
datos_excel <- read_excel(path = "ExperimentoMaiz.xlsx")
datos_excel
Archivos planos
library(tidyverse)
datos_csv <- read_csv(file = "ExperimentoMaiz.csv")
datos_csv
Tipos de datos
- Cuantitativa:
- Continuas: numeric
- Discretas: integer
- Cualitativas:
- Nominales: character (chr) o string (texto)
- Ordinales: factor
Manipulando las variables
- Puedo acceder con el signo pesos ($)
datos_csv$yield
## [1] 59.8 64.5 59.5 65.1 64.2 56.4 63.5 58.3 61.9 58.9 57.2 58.0 62.0 51.6 62.9
## [16] 60.2 55.4 53.7 54.5 56.1 61.0 70.3 68.2 71.9 68.2 65.2 65.6 65.7 66.1 64.8
## [31] 64.1 66.1 71.8 62.5 64.8 63.2 63.3 68.1 67.3 59.4 75.6 81.5 79.9 82.1 81.2
## [46] 79.6 74.3 72.7 83.4 80.9 81.2 85.5 69.1 53.7 67.2 73.1 66.9 71.5 76.7 76.3
## [61] 58.8 60.4 60.0 58.5 60.1 58.0 60.7 60.0 62.9 67.7 56.2 62.2 62.8 55.7 61.2
## [76] 60.1 58.3 64.1 60.2 62.5 64.4 73.2 72.3 71.9 74.2 68.0 71.6 74.6 74.5 71.5
## [91] 75.4 74.5 71.4 59.6 64.5 74.9 73.8 76.7 82.2 70.4 62.7 78.3 76.9 83.2 85.4
## [106] 80.6 73.2 73.7 75.6 72.0 72.4 82.0 77.0 71.8 77.9 86.0 76.9 90.2 81.0 85.9
## [121] 53.4 70.8 71.5 71.5 78.4 73.0 69.4 66.1 74.4 70.0 65.7 70.0 75.6 67.3 65.5
## [136] 71.7 65.0 72.5 73.1 65.4
- Puedo solicitar el tipo o clase de dato:
class(datos_csv$yield)
## [1] "numeric"
- Puedo ver el tipo de dato para todas las columnas:
str(datos_csv)
## spec_tbl_df [140 x 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ gen : chr [1:140] "G01" "G02" "G03" "G04" ...
## $ loc : chr [1:140] "L1" "L1" "L1" "L1" ...
## $ yield: num [1:140] 59.8 64.5 59.5 65.1 64.2 56.4 63.5 58.3 61.9 58.9 ...
## - attr(*, "spec")=
## .. cols(
## .. gen = col_character(),
## .. loc = col_character(),
## .. yield = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
Categórica ordinal
meses <- c("enero", "febrero", "marzo", "abril", "mayo", "enero")
class(meses)
## [1] "character"
- Podemos convertir a tipo factor (ordinal) la variable “meses”:
meses_desordenados <- factor(meses)
meses
## [1] "enero" "febrero" "marzo" "abril" "mayo" "enero"
meses_desordenados
## [1] enero febrero marzo abril mayo enero
## Levels: abril enero febrero marzo mayo
- Podemos controlar el orden de los factores (ordinales):
meses_ordenado <- factor(meses,
levels = c("enero", "febrero", "marzo", "abril", "mayo"))
meses_ordenado
## [1] enero febrero marzo abril mayo enero
## Levels: enero febrero marzo abril mayo
Medidas de tendencia central
Moda
moda <- function(x) {
ux = unique(x)
tab = tabulate(match(x, ux))
ux[tab == max(tab)]
}
moda(datos_csv$yield)
## [1] 71.5
Medidas de posición
Percentiles
- probs: percentil, decil o cuartil. Por ejemplo el percentil 25 sería
el 0.25 en probs.
quantile(datos_csv$yield, probs = 0.25)
## 25%
## 62.425
- Puedo calcular varios percentiles al tiempo:
quantile(datos_csv$yield, probs = c(0.25, 0.30, 0.35))
## 25% 30% 35%
## 62.425 63.270 64.500
- Calculando todos los percentiles:
quantile(datos_csv$yield, probs = seq(from = 0, to = 1, by = 0.01))
## 0% 1% 2% 3% 4% 5% 6% 7% 8% 9% 10%
## 51.600 53.517 53.700 54.653 55.568 56.080 56.268 56.984 58.000 58.153 58.300
## 11% 12% 13% 14% 15% 16% 17% 18% 19% 20% 21%
## 58.587 58.868 59.407 59.546 59.770 60.000 60.063 60.102 60.200 60.360 60.757
## 22% 23% 24% 25% 26% 27% 28% 29% 30% 31% 32%
## 61.116 61.879 62.072 62.425 62.528 62.753 62.892 62.993 63.270 63.554 64.100
## 33% 34% 35% 36% 37% 38% 39% 40% 41% 42% 43%
## 64.187 64.426 64.500 64.800 64.886 65.082 65.242 65.460 65.599 65.700 66.008
## 44% 45% 46% 47% 48% 49% 50% 51% 52% 53% 54%
## 66.100 66.540 67.182 67.300 67.588 68.011 68.150 68.200 69.184 69.802 70.018
## 55% 56% 57% 58% 59% 60% 61% 62% 63% 64% 65%
## 70.345 70.736 71.423 71.500 71.500 71.540 71.679 71.800 71.857 71.900 72.105
## 66% 67% 68% 69% 70% 71% 72% 73% 74% 75% 76%
## 72.374 72.526 72.856 73.091 73.130 73.200 73.708 73.988 74.286 74.425 74.500
## 77% 78% 79% 80% 81% 82% 83% 84% 85% 86% 87%
## 74.609 75.110 75.562 75.600 76.013 76.692 76.774 76.900 77.135 78.116 78.393
## 88% 89% 90% 91% 92% 93% 94% 95% 96% 97% 98%
## 79.696 80.397 80.910 81.098 81.200 81.635 82.066 82.250 83.288 85.060 85.588
## 99% 100%
## 85.961 90.200
- Puedo calcular el percentil 90 para cada genotipo:
datos_csv %>%
group_by(gen) %>%
summarise(promedio = mean(yield),
mediana = median(yield),
percentil_90 = quantile(yield, probs = 0.90))
Deciles
quantile(datos_csv$yield, probs = seq(from = 0, to = 1, by = 0.1))
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 51.60 58.30 60.36 63.27 65.46 68.15 71.54 73.13 75.60 80.91 90.20
Cuartiles
quantile(datos_csv$yield, probs = c(0, 0.25, 0.50, 0.75, 1))
## 0% 25% 50% 75% 100%
## 51.600 62.425 68.150 74.425 90.200
Medidas de dispersión
Varianza
var(datos_csv$yield)
## [1] 70.12358
Desviación estándar
sd(datos_csv$yield)
## [1] 8.373982
datos_csv %>%
group_by(gen) %>%
summarise(promedio = mean(yield),
mediana = median(yield),
percentil_90 = quantile(yield, probs = 0.90),
desviacion = sd(yield))
Coefiente de variación
(sd(datos_csv$yield) / mean(datos_csv$yield)) * 100
## [1] 12.19085
datos_csv %>%
group_by(gen) %>%
summarise(promedio = mean(yield),
mediana = median(yield),
percentil_90 = quantile(yield, probs = 0.90),
desviacion = sd(yield),
coef_var = (desviacion / promedio) * 100)
Rango
range(datos_csv$yield)
## [1] 51.6 90.2
min(datos_csv$yield)
## [1] 51.6
max(datos_csv$yield)
## [1] 90.2
Rango intercuartílico
IQR(datos_csv$yield)
## [1] 12
datos_csv %>%
group_by(gen) %>%
summarise(promedio = mean(yield),
mediana = median(yield),
percentil_90 = quantile(yield, probs = 0.90),
desviacion = sd(yield),
coef_var = (desviacion / promedio) * 100,
maximo = max(yield),
minimo = min(yield),
RIC = IQR(yield))
Medidas de distribución
Coeficiente de asimetría
library(moments)
skewness(datos_csv$yield)
## [1] 0.206373
Coeficiente de curtosis
kurtosis(datos_csv$yield)
## [1] 2.337344
datos_csv %>%
group_by(gen) %>%
summarise(promedio = mean(yield),
mediana = median(yield),
percentil_90 = quantile(yield, probs = 0.90),
desviacion = sd(yield),
coef_var = (desviacion / promedio) * 100,
maximo = max(yield),
minimo = min(yield),
RIC = IQR(yield),
CAsimetria = skewness(yield),
CCurtosis = kurtosis(yield))