library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
mean(iris$Sepal.Length)
## [1] 5.843333
tapply(iris$Sepal.Length,
iris$Species,
mean)
## setosa versicolor virginica
## 5.006 5.936 6.588
tapply(iris$Sepal.Width,
iris$Species,
mean)
## setosa versicolor virginica
## 3.428 2.770 2.974
tapply(iris$Petal.Length,
iris$Species,
mean)
## setosa versicolor virginica
## 1.462 4.260 5.552
tapply(iris$Petal.Width,
iris$Species,
mean)
## setosa versicolor virginica
## 0.246 1.326 2.026
summarise(iris,
mean(Sepal.Length))
## mean(Sepal.Length)
## 1 5.843333
summarise(
group_by(iris, Species),
media = mean(Sepal.Length)
)
## # A tibble: 3 × 2
## Species media
## <fct> <dbl>
## 1 setosa 5.01
## 2 versicolor 5.94
## 3 virginica 6.59
PIPE %>% (Ctrl + shift + m)
iris %>%
group_by(Species) %>%
summarise(media = mean(Sepal.Length))
## # A tibble: 3 × 2
## Species media
## <fct> <dbl>
## 1 setosa 5.01
## 2 versicolor 5.94
## 3 virginica 6.59
iris %>%
group_by(Species) %>%
summarise(media_v1 = mean(Sepal.Length),
desv_v1 = sd(Sepal.Length),
media_v2 = mean(Petal.Length))
## # A tibble: 3 × 4
## Species media_v1 desv_v1 media_v2
## <fct> <dbl> <dbl> <dbl>
## 1 setosa 5.01 0.352 1.46
## 2 versicolor 5.94 0.516 4.26
## 3 virginica 6.59 0.636 5.55
iris %>%
group_by(Species) %>%
summarise_all(c('media' = mean,
'desv' = sd))
## # A tibble: 3 × 9
## Species Sepal.Length_media Sepal.Width_media Petal.Length_media
## <fct> <dbl> <dbl> <dbl>
## 1 setosa 5.01 3.43 1.46
## 2 versicolor 5.94 2.77 4.26
## 3 virginica 6.59 2.97 5.55
## # ℹ 5 more variables: Petal.Width_media <dbl>, Sepal.Length_desv <dbl>,
## # Sepal.Width_desv <dbl>, Petal.Length_desv <dbl>, Petal.Width_desv <dbl>
iris %>%
group_by(Species) %>%
summarise(media = mean(Sepal.Length),
desv = sd(Sepal.Length)) %>%
mutate(cv = 100 * desv / media)
## # A tibble: 3 × 4
## Species media desv cv
## <fct> <dbl> <dbl> <dbl>
## 1 setosa 5.01 0.352 7.04
## 2 versicolor 5.94 0.516 8.70
## 3 virginica 6.59 0.636 9.65
datos = data.frame(
trt = gl(4,30,120, c('t1', 't2', 't3', 't4')),
blq = gl(3,10,120, c('b1', 'b2', 'b3')),
rto = rnorm(120,3,0.3)
)
datos
## trt blq rto
## 1 t1 b1 3.772825
## 2 t1 b1 3.268873
## 3 t1 b1 2.980482
## 4 t1 b1 2.843886
## 5 t1 b1 3.047816
## 6 t1 b1 3.488423
## 7 t1 b1 3.192405
## 8 t1 b1 2.704778
## 9 t1 b1 3.527524
## 10 t1 b1 2.792789
## 11 t1 b2 3.066748
## 12 t1 b2 2.809357
## 13 t1 b2 2.922661
## 14 t1 b2 2.445063
## 15 t1 b2 2.199840
## 16 t1 b2 3.053641
## 17 t1 b2 3.133348
## 18 t1 b2 2.566307
## 19 t1 b2 2.594735
## 20 t1 b2 3.205023
## 21 t1 b3 3.195768
## 22 t1 b3 3.253186
## 23 t1 b3 3.243581
## 24 t1 b3 3.433685
## 25 t1 b3 2.561771
## 26 t1 b3 2.748606
## 27 t1 b3 2.992749
## 28 t1 b3 2.756285
## 29 t1 b3 3.461040
## 30 t1 b3 3.002031
## 31 t2 b1 3.459532
## 32 t2 b1 3.262394
## 33 t2 b1 3.119684
## 34 t2 b1 2.137728
## 35 t2 b1 2.859659
## 36 t2 b1 3.170586
## 37 t2 b1 3.571267
## 38 t2 b1 2.812300
## 39 t2 b1 2.785066
## 40 t2 b1 3.029665
## 41 t2 b2 3.334420
## 42 t2 b2 2.738489
## 43 t2 b2 3.242817
## 44 t2 b2 2.940265
## 45 t2 b2 2.926465
## 46 t2 b2 3.129247
## 47 t2 b2 2.916225
## 48 t2 b2 3.109695
## 49 t2 b2 3.453330
## 50 t2 b2 2.597413
## 51 t2 b3 3.200790
## 52 t2 b3 3.061165
## 53 t2 b3 2.998505
## 54 t2 b3 3.273917
## 55 t2 b3 3.116882
## 56 t2 b3 3.210602
## 57 t2 b3 3.348717
## 58 t2 b3 3.060147
## 59 t2 b3 2.804424
## 60 t2 b3 3.095830
## 61 t3 b1 3.093261
## 62 t3 b1 2.816742
## 63 t3 b1 3.008248
## 64 t3 b1 3.057734
## 65 t3 b1 3.213256
## 66 t3 b1 3.548793
## 67 t3 b1 3.142374
## 68 t3 b1 2.917751
## 69 t3 b1 2.671271
## 70 t3 b1 3.166420
## 71 t3 b2 3.493289
## 72 t3 b2 3.101556
## 73 t3 b2 2.767183
## 74 t3 b2 2.828658
## 75 t3 b2 2.988608
## 76 t3 b2 2.807803
## 77 t3 b2 2.773988
## 78 t3 b2 2.896395
## 79 t3 b2 3.005557
## 80 t3 b2 3.397154
## 81 t3 b3 2.442622
## 82 t3 b3 2.963006
## 83 t3 b3 2.934073
## 84 t3 b3 3.195663
## 85 t3 b3 4.011779
## 86 t3 b3 2.989733
## 87 t3 b3 2.513602
## 88 t3 b3 3.327901
## 89 t3 b3 2.631882
## 90 t3 b3 3.242029
## 91 t4 b1 3.022146
## 92 t4 b1 2.688779
## 93 t4 b1 3.088977
## 94 t4 b1 2.816304
## 95 t4 b1 3.065372
## 96 t4 b1 2.797100
## 97 t4 b1 2.849695
## 98 t4 b1 3.002501
## 99 t4 b1 3.266520
## 100 t4 b1 3.393023
## 101 t4 b2 2.881650
## 102 t4 b2 3.387901
## 103 t4 b2 3.050226
## 104 t4 b2 2.631520
## 105 t4 b2 2.957621
## 106 t4 b2 2.681866
## 107 t4 b2 3.836270
## 108 t4 b2 3.384324
## 109 t4 b2 2.706179
## 110 t4 b2 2.473571
## 111 t4 b3 3.442172
## 112 t4 b3 3.338651
## 113 t4 b3 3.277279
## 114 t4 b3 2.847723
## 115 t4 b3 2.559592
## 116 t4 b3 3.453699
## 117 t4 b3 2.545247
## 118 t4 b3 3.300916
## 119 t4 b3 2.942004
## 120 t4 b3 2.941820
t1 = datos %>%
group_by(trt, blq) %>%
summarise(media = mean(rto))
## `summarise()` has grouped output by 'trt'. You can override using the `.groups`
## argument.
t2 = t1 %>%
group_by(trt) %>%
summarise(sum(media))
t1 %>%
group_by(trt) %>%
mutate(tot = sum(media))
## # A tibble: 12 × 4
## # Groups: trt [4]
## trt blq media tot
## <fct> <fct> <dbl> <dbl>
## 1 t1 b1 3.16 9.03
## 2 t1 b2 2.80 9.03
## 3 t1 b3 3.06 9.03
## 4 t2 b1 3.02 9.18
## 5 t2 b2 3.04 9.18
## 6 t2 b3 3.12 9.18
## 7 t3 b1 3.06 9.09
## 8 t3 b2 3.01 9.09
## 9 t3 b3 3.03 9.09
## 10 t4 b1 3.00 9.06
## 11 t4 b2 3.00 9.06
## 12 t4 b3 3.06 9.06
left_join(t1, t2, 'trt')
## # A tibble: 12 × 4
## # Groups: trt [4]
## trt blq media `sum(media)`
## <fct> <fct> <dbl> <dbl>
## 1 t1 b1 3.16 9.03
## 2 t1 b2 2.80 9.03
## 3 t1 b3 3.06 9.03
## 4 t2 b1 3.02 9.18
## 5 t2 b2 3.04 9.18
## 6 t2 b3 3.12 9.18
## 7 t3 b1 3.06 9.09
## 8 t3 b2 3.01 9.09
## 9 t3 b3 3.03 9.09
## 10 t4 b1 3.00 9.06
## 11 t4 b2 3.00 9.06
## 12 t4 b3 3.06 9.06
right_join(t1, t1, 'trt')
## Warning in right_join(t1, t1, "trt"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## # A tibble: 36 × 5
## # Groups: trt [4]
## trt blq.x media.x blq.y media.y
## <fct> <fct> <dbl> <fct> <dbl>
## 1 t1 b1 3.16 b1 3.16
## 2 t1 b1 3.16 b2 2.80
## 3 t1 b1 3.16 b3 3.06
## 4 t1 b2 2.80 b1 3.16
## 5 t1 b2 2.80 b2 2.80
## 6 t1 b2 2.80 b3 3.06
## 7 t1 b3 3.06 b1 3.16
## 8 t1 b3 3.06 b2 2.80
## 9 t1 b3 3.06 b3 3.06
## 10 t2 b1 3.02 b1 3.02
## # ℹ 26 more rows
df1 = data.frame(
trt = c('t1','t2'),
tot = c(12, 15)
)
df2 =data.frame(
trt = c('t1','t2', 't3'),
tot = c(12, 15, 18)
)
left_join(df1, df2, 'trt')
## trt tot.x tot.y
## 1 t1 12 12
## 2 t2 15 15
right_join(df1, df2, 'trt')
## trt tot.x tot.y
## 1 t1 12 12
## 2 t2 15 15
## 3 t3 NA 18
rbind(df1, df2)
## trt tot
## 1 t1 12
## 2 t2 15
## 3 t1 12
## 4 t2 15
## 5 t3 18
bind_rows(df1, df2)
## trt tot
## 1 t1 12
## 2 t2 15
## 3 t1 12
## 4 t2 15
## 5 t3 18
bind_cols(df2, df2)
## New names:
## • `trt` -> `trt...1`
## • `tot` -> `tot...2`
## • `trt` -> `trt...3`
## • `tot` -> `tot...4`
## trt...1 tot...2 trt...3 tot...4
## 1 t1 12 t1 12
## 2 t2 15 t2 15
## 3 t3 18 t3 18
cbind(df2, df2)
## trt tot trt tot
## 1 t1 12 t1 12
## 2 t2 15 t2 15
## 3 t3 18 t3 18
datos %>%
group_by(trt, blq) %>%
sample_n(3)
## # A tibble: 36 × 3
## # Groups: trt, blq [12]
## trt blq rto
## <fct> <fct> <dbl>
## 1 t1 b1 3.77
## 2 t1 b1 3.49
## 3 t1 b1 2.84
## 4 t1 b2 3.05
## 5 t1 b2 2.57
## 6 t1 b2 2.92
## 7 t1 b3 3.20
## 8 t1 b3 2.75
## 9 t1 b3 2.99
## 10 t2 b1 3.17
## # ℹ 26 more rows
datos %>%
sample_frac(0.1)
## trt blq rto
## 1 t3 b1 3.057734
## 2 t3 b3 3.195663
## 3 t3 b2 3.005557
## 4 t2 b3 3.200790
## 5 t1 b3 3.002031
## 6 t4 b1 3.088977
## 7 t1 b3 3.243581
## 8 t2 b2 3.453330
## 9 t3 b1 2.816742
## 10 t3 b3 3.242029
## 11 t1 b3 3.461040
## 12 t3 b2 2.896395
datos %>%
group_by(trt) %>%
top_n(3) %>%
arrange(trt, desc(rto)) %>%
rename('tratamiento' = trt,
'bloque' = blq,
'rendimiento' = rto) %>%
rename_all(toupper)
## Selecting by rto
## # A tibble: 12 × 3
## # Groups: TRATAMIENTO [4]
## TRATAMIENTO BLOQUE RENDIMIENTO
## <fct> <fct> <dbl>
## 1 t1 b1 3.77
## 2 t1 b1 3.53
## 3 t1 b1 3.49
## 4 t2 b1 3.57
## 5 t2 b1 3.46
## 6 t2 b2 3.45
## 7 t3 b3 4.01
## 8 t3 b1 3.55
## 9 t3 b2 3.49
## 10 t4 b2 3.84
## 11 t4 b3 3.45
## 12 t4 b3 3.44