#Funciones basicas de R
Largo de sepalos
mean(iris$Sepal.Length)
## [1] 5.843333
tapply(iris$Sepal.Length,
iris$Species,
mean)
## setosa versicolor virginica
## 5.006 5.936 6.588
tapply(iris$Sepal.Width,
iris$Species,
mean)
## setosa versicolor virginica
## 3.428 2.770 2.974
tapply(iris$Petal.Length,
iris$Species,
mean)
## setosa versicolor virginica
## 1.462 4.260 5.552
tapply(iris$Petal.Width,
iris$Species,
mean)
## setosa versicolor virginica
## 0.246 1.326 2.026
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dplyr)
library(dplyr)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
summarise(iris,
mean(Sepal.Length))
## mean(Sepal.Length)
## 1 5.843333
summarise(
group_by(iris, Species),
media = mean(Sepal.Length)
)
## # A tibble: 3 × 2
## Species media
## <fct> <dbl>
## 1 setosa 5.01
## 2 versicolor 5.94
## 3 virginica 6.59
iris %>%
group_by(Species) %>%
summarise(media = mean(Sepal.Length))
## # A tibble: 3 × 2
## Species media
## <fct> <dbl>
## 1 setosa 5.01
## 2 versicolor 5.94
## 3 virginica 6.59
iris%>%
group_by(Species) %>%
summarise(media_v1 = mean(Sepal.Length),
desv_v1 = sd(Sepal.Length),
media_v2 = mean(Petal.Length))
## # A tibble: 3 × 4
## Species media_v1 desv_v1 media_v2
## <fct> <dbl> <dbl> <dbl>
## 1 setosa 5.01 0.352 1.46
## 2 versicolor 5.94 0.516 4.26
## 3 virginica 6.59 0.636 5.55
iris %>%
group_by(Species) %>%
summarise_all(mean)
## # A tibble: 3 × 5
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 setosa 5.01 3.43 1.46 0.246
## 2 versicolor 5.94 2.77 4.26 1.33
## 3 virginica 6.59 2.97 5.55 2.03
iris %>%
group_by(Species) %>%
summarise_all(c('media' = mean,
'desv' = sd))
## # A tibble: 3 × 9
## Species Sepal.Len…¹ Sepal…² Petal…³ Petal…⁴ Sepal…⁵ Sepal…⁶ Petal…⁷ Petal…⁸
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 setosa 5.01 3.43 1.46 0.246 0.352 0.379 0.174 0.105
## 2 versicolor 5.94 2.77 4.26 1.33 0.516 0.314 0.470 0.198
## 3 virginica 6.59 2.97 5.55 2.03 0.636 0.322 0.552 0.275
## # … with abbreviated variable names ¹Sepal.Length_media, ²Sepal.Width_media,
## # ³Petal.Length_media, ⁴Petal.Width_media, ⁵Sepal.Length_desv,
## # ⁶Sepal.Width_desv, ⁷Petal.Length_desv, ⁸Petal.Width_desv
iris %>%
group_by(Species) %>%
summarise(media = mean(Sepal.Length),
desv = sd(Sepal.Length)) %>%
mutate(cv = 100 * desv / media)
## # A tibble: 3 × 4
## Species media desv cv
## <fct> <dbl> <dbl> <dbl>
## 1 setosa 5.01 0.352 7.04
## 2 versicolor 5.94 0.516 8.70
## 3 virginica 6.59 0.636 9.65
set.seed(123)
datos = data.frame(
trt = gl(4, 30, 120,
c('t1','t2','t3','t4')),
blq = gl(3, 10, 120, c('b1','b2','b3')),
rto = rnorm(120, 3, 0.3)
)
datos
## trt blq rto
## 1 t1 b1 2.831857
## 2 t1 b1 2.930947
## 3 t1 b1 3.467612
## 4 t1 b1 3.021153
## 5 t1 b1 3.038786
## 6 t1 b1 3.514519
## 7 t1 b1 3.138275
## 8 t1 b1 2.620482
## 9 t1 b1 2.793944
## 10 t1 b1 2.866301
## 11 t1 b2 3.367225
## 12 t1 b2 3.107944
## 13 t1 b2 3.120231
## 14 t1 b2 3.033205
## 15 t1 b2 2.833248
## 16 t1 b2 3.536074
## 17 t1 b2 3.149355
## 18 t1 b2 2.410015
## 19 t1 b2 3.210407
## 20 t1 b2 2.858163
## 21 t1 b3 2.679653
## 22 t1 b3 2.934608
## 23 t1 b3 2.692199
## 24 t1 b3 2.781333
## 25 t1 b3 2.812488
## 26 t1 b3 2.493992
## 27 t1 b3 3.251336
## 28 t1 b3 3.046012
## 29 t1 b3 2.658559
## 30 t1 b3 3.376144
## 31 t2 b1 3.127939
## 32 t2 b1 2.911479
## 33 t2 b1 3.268538
## 34 t2 b1 3.263440
## 35 t2 b1 3.246474
## 36 t2 b1 3.206592
## 37 t2 b1 3.166175
## 38 t2 b1 2.981426
## 39 t2 b1 2.908211
## 40 t2 b1 2.885859
## 41 t2 b2 2.791588
## 42 t2 b2 2.937625
## 43 t2 b2 2.620381
## 44 t2 b2 3.650687
## 45 t2 b2 3.362389
## 46 t2 b2 2.663067
## 47 t2 b2 2.879135
## 48 t2 b2 2.860003
## 49 t2 b2 3.233990
## 50 t2 b2 2.974989
## 51 t2 b3 3.075996
## 52 t2 b3 2.991436
## 53 t2 b3 2.987139
## 54 t2 b3 3.410581
## 55 t2 b3 2.932269
## 56 t2 b3 3.454941
## 57 t2 b3 2.535374
## 58 t2 b3 3.175384
## 59 t2 b3 3.037156
## 60 t2 b3 3.064782
## 61 t3 b1 3.113892
## 62 t3 b1 2.849303
## 63 t3 b1 2.900038
## 64 t3 b1 2.694427
## 65 t3 b1 2.678463
## 66 t3 b1 3.091059
## 67 t3 b1 3.134463
## 68 t3 b1 3.015901
## 69 t3 b1 3.276680
## 70 t3 b1 3.615025
## 71 t3 b2 2.852691
## 72 t3 b2 2.307249
## 73 t3 b2 3.301722
## 74 t3 b2 2.787240
## 75 t3 b2 2.793597
## 76 t3 b2 3.307671
## 77 t3 b2 2.914568
## 78 t3 b2 2.633785
## 79 t3 b2 3.054391
## 80 t3 b2 2.958333
## 81 t3 b3 3.001729
## 82 t3 b3 3.115584
## 83 t3 b3 2.888802
## 84 t3 b3 3.193313
## 85 t3 b3 2.933854
## 86 t3 b3 3.099535
## 87 t3 b3 3.329052
## 88 t3 b3 3.130554
## 89 t3 b3 2.902221
## 90 t3 b3 3.344642
## 91 t4 b1 3.298051
## 92 t4 b1 3.164519
## 93 t4 b1 3.071620
## 94 t4 b1 2.811628
## 95 t4 b1 3.408196
## 96 t4 b1 2.819922
## 97 t4 b1 3.656200
## 98 t4 b1 3.459783
## 99 t4 b1 2.929290
## 100 t4 b1 2.692074
## 101 t4 b2 2.786878
## 102 t4 b2 3.077065
## 103 t4 b2 2.925992
## 104 t4 b2 2.895737
## 105 t4 b2 2.714514
## 106 t4 b2 2.986492
## 107 t4 b2 2.764529
## 108 t4 b2 2.499617
## 109 t4 b2 2.885932
## 110 t4 b2 3.275699
## 111 t4 b3 2.827396
## 112 t4 b3 3.182389
## 113 t4 b3 2.514635
## 114 t4 b3 2.983331
## 115 t4 b3 3.155822
## 116 t4 b3 3.090346
## 117 t4 b3 3.031703
## 118 t4 b3 2.807788
## 119 t4 b3 2.745089
## 120 t4 b3 2.692761
t1 = datos %>%
group_by(trt, blq) %>%
summarise(media = mean(rto))
## `summarise()` has grouped output by 'trt'. You can override using the `.groups`
## argument.
t2 = t1 %>%
group_by(trt) %>%
summarise(sum(media))
t1%>%
group_by(trt) %>%
mutate(tot = sum(media),
porc = 100*media/tot)
## # A tibble: 12 × 5
## # Groups: trt [4]
## trt blq media tot porc
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 t1 b1 3.02 8.96 33.7
## 2 t1 b2 3.06 8.96 34.2
## 3 t1 b3 2.87 8.96 32.1
## 4 t2 b1 3.10 9.16 33.8
## 5 t2 b2 3.00 9.16 32.7
## 6 t2 b3 3.07 9.16 33.5
## 7 t3 b1 3.04 9.02 33.7
## 8 t3 b2 2.89 9.02 32.0
## 9 t3 b3 3.09 9.02 34.3
## 10 t4 b1 3.13 8.92 35.1
## 11 t4 b2 2.88 8.92 32.3
## 12 t4 b3 2.90 8.92 32.6
se conservan las mismas filas, ahora si da el total en el tratamiento para cada tratamiento tenemos el bloque y se reslizara la union hacia la izquierda, con que variabl se coge la tabla 1 con la variable de tratamientos y totales
t1
## # A tibble: 12 × 3
## # Groups: trt [4]
## trt blq media
## <fct> <fct> <dbl>
## 1 t1 b1 3.02
## 2 t1 b2 3.06
## 3 t1 b3 2.87
## 4 t2 b1 3.10
## 5 t2 b2 3.00
## 6 t2 b3 3.07
## 7 t3 b1 3.04
## 8 t3 b2 2.89
## 9 t3 b3 3.09
## 10 t4 b1 3.13
## 11 t4 b2 2.88
## 12 t4 b3 2.90
t2
## # A tibble: 4 × 2
## trt `sum(media)`
## <fct> <dbl>
## 1 t1 8.96
## 2 t2 9.16
## 3 t3 9.02
## 4 t4 8.92
left_join(t1, t2,
'trt')
## # A tibble: 12 × 4
## # Groups: trt [4]
## trt blq media `sum(media)`
## <fct> <fct> <dbl> <dbl>
## 1 t1 b1 3.02 8.96
## 2 t1 b2 3.06 8.96
## 3 t1 b3 2.87 8.96
## 4 t2 b1 3.10 9.16
## 5 t2 b2 3.00 9.16
## 6 t2 b3 3.07 9.16
## 7 t3 b1 3.04 9.02
## 8 t3 b2 2.89 9.02
## 9 t3 b3 3.09 9.02
## 10 t4 b1 3.13 8.92
## 11 t4 b2 2.88 8.92
## 12 t4 b3 2.90 8.92
right_join(t1, t2, 'trt')
## # A tibble: 12 × 4
## # Groups: trt [4]
## trt blq media `sum(media)`
## <fct> <fct> <dbl> <dbl>
## 1 t1 b1 3.02 8.96
## 2 t1 b2 3.06 8.96
## 3 t1 b3 2.87 8.96
## 4 t2 b1 3.10 9.16
## 5 t2 b2 3.00 9.16
## 6 t2 b3 3.07 9.16
## 7 t3 b1 3.04 9.02
## 8 t3 b2 2.89 9.02
## 9 t3 b3 3.09 9.02
## 10 t4 b1 3.13 8.92
## 11 t4 b2 2.88 8.92
## 12 t4 b3 2.90 8.92
df1 = data.frame(
trt = c('t1','t2'),
tot = c(12, 15)
)
df2 = data.frame(
trt = c('t1','t2', 't3'),
tot = c(12, 15, 18)
)
left_join(df1, df2, 'trt')
## trt tot.x tot.y
## 1 t1 12 12
## 2 t2 15 15
right_join(df1, df2, 'trt')
## trt tot.x tot.y
## 1 t1 12 12
## 2 t2 15 15
## 3 t3 NA 18
df1 sigue con dos filas, se va a traer la informacion que se repita en la t2 la x hace referencia a la tabla 1, se le asigna una y
bind_rows(df1, df2)
## trt tot
## 1 t1 12
## 2 t2 15
## 3 t1 12
## 4 t2 15
## 5 t3 18
bind_cols(df2, df2)
## New names:
## • `trt` -> `trt...1`
## • `tot` -> `tot...2`
## • `trt` -> `trt...3`
## • `tot` -> `tot...4`
## trt...1 tot...2 trt...3 tot...4
## 1 t1 12 t1 12
## 2 t2 15 t2 15
## 3 t3 18 t3 18
cbind(df2,df2)
## trt tot trt tot
## 1 t1 12 t1 12
## 2 t2 15 t2 15
## 3 t3 18 t3 18
datos %>%
sample_n(10)
## trt blq rto
## 1 t1 b2 3.033205
## 2 t4 b3 3.031703
## 3 t1 b2 3.536074
## 4 t3 b3 3.329052
## 5 t2 b1 3.268538
## 6 t2 b1 2.885859
## 7 t4 b3 3.155822
## 8 t1 b1 2.866301
## 9 t3 b3 2.902221
## 10 t3 b2 2.307249
datos %>%
sample_frac(0,1)
## [1] trt blq rto
## <0 rows> (or 0-length row.names)
# Tabla resumen y ordenada tratamiento y valor descendente del rendiemno y los 3 meres de cada bloque
datos %>%
group_by(trt) %>%
top_n(3) %>%
arrange(trt, desc(rto)) %>%
rename('tratamiento' = trt,
'bloque' = blq,
'rendimiento' = rto) %>%
rename_all(toupper)
## Selecting by rto
## # A tibble: 12 × 3
## # Groups: TRATAMIENTO [4]
## TRATAMIENTO BLOQUE RENDIMIENTO
## <fct> <fct> <dbl>
## 1 t1 b2 3.54
## 2 t1 b1 3.51
## 3 t1 b1 3.47
## 4 t2 b2 3.65
## 5 t2 b3 3.45
## 6 t2 b3 3.41
## 7 t3 b1 3.62
## 8 t3 b3 3.34
## 9 t3 b3 3.33
## 10 t4 b1 3.66
## 11 t4 b1 3.46
## 12 t4 b1 3.41
3 filas para el boque 1 y 3 para el bloque tratamiento 2 - Se escogen los mejores top 3 de forma ascendente
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.3