How to do aggregation/ summarization
Summarization after grouping
library(tidyverse)
iris %>%
group_by(Species) %>%
summarize(Support = mean(Sepal.Length)) %>% # average
arrange(-Support) # sort
## # A tibble: 3 × 2
## Species Support
## <fct> <dbl>
## 1 virginica 6.59
## 2 versicolor 5.94
## 3 setosa 5.01
iris %>%
group_by(Species) %>%
summarize(mean_s = mean(Sepal.Width),
meas_p = mean(Petal.Length),
diff = mean(Sepal.Width-Petal.Length)) %>%
arrange(-diff)
## # A tibble: 3 × 4
## Species mean_s meas_p diff
## <fct> <dbl> <dbl> <dbl>
## 1 setosa 3.43 1.46 1.97
## 2 versicolor 2.77 4.26 -1.49
## 3 virginica 2.97 5.55 -2.58
iris %>%
group_by(Species) %>%
summarize(n = n(),
meas_p = mean(Petal.Length),
sd = sd(Petal.Length))
## # A tibble: 3 × 4
## Species n meas_p sd
## <fct> <int> <dbl> <dbl>
## 1 setosa 50 1.46 0.174
## 2 versicolor 50 4.26 0.470
## 3 virginica 50 5.55 0.552
Summarization with upgroup
iris %>%
ungroup( ) %>%
summarize(n = n(),
meas_p = mean(Petal.Length),
sd = sd(Petal.Length))
## n meas_p sd
## 1 150 3.758 1.765298
Mutate new variables after grouping
iris %>%
group_by(Species) %>%
mutate(n = n(),
meas_p = mean(Petal.Length),
sd = sd(Petal.Length))
## # A tibble: 150 × 8
## # Groups: Species [3]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species n meas_p sd
## <dbl> <dbl> <dbl> <dbl> <fct> <int> <dbl> <dbl>
## 1 5.1 3.5 1.4 0.2 setosa 50 1.46 0.174
## 2 4.9 3 1.4 0.2 setosa 50 1.46 0.174
## 3 4.7 3.2 1.3 0.2 setosa 50 1.46 0.174
## 4 4.6 3.1 1.5 0.2 setosa 50 1.46 0.174
## 5 5 3.6 1.4 0.2 setosa 50 1.46 0.174
## 6 5.4 3.9 1.7 0.4 setosa 50 1.46 0.174
## 7 4.6 3.4 1.4 0.3 setosa 50 1.46 0.174
## 8 5 3.4 1.5 0.2 setosa 50 1.46 0.174
## 9 4.4 2.9 1.4 0.2 setosa 50 1.46 0.174
## 10 4.9 3.1 1.5 0.1 setosa 50 1.46 0.174
## # … with 140 more rows
iris %>%
group_by(Species) %>%
mutate(n = n(),
meas_p = mean(Petal.Length, na.rm = T),
sd = sd(Petal.Length)) %>%
summarize (n_mean = paste ("sample size:",mean(n)),
meas_p = mean(Petal.Length),
sd = sd(Petal.Length))
## # A tibble: 3 × 4
## Species n_mean meas_p sd
## <fct> <chr> <dbl> <dbl>
## 1 setosa sample size: 50 1.46 0.174
## 2 versicolor sample size: 50 4.26 0.470
## 3 virginica sample size: 50 5.55 0.552
Recode and generate new variables, then value label
irisifelse <- iris %>%
mutate(Species2 = ifelse(Species == "setosa", NA, Species))
# relabel values
irisifelse$Species2 <- factor(irisifelse$Species2,labels = c( "versi","virg"))
irisifelse
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species Species2
## 1 5.1 3.5 1.4 0.2 setosa <NA>
## 2 4.9 3.0 1.4 0.2 setosa <NA>
## 3 4.7 3.2 1.3 0.2 setosa <NA>
## 4 4.6 3.1 1.5 0.2 setosa <NA>
## 5 5.0 3.6 1.4 0.2 setosa <NA>
## 6 5.4 3.9 1.7 0.4 setosa <NA>
## 7 4.6 3.4 1.4 0.3 setosa <NA>
## 8 5.0 3.4 1.5 0.2 setosa <NA>
## 9 4.4 2.9 1.4 0.2 setosa <NA>
## 10 4.9 3.1 1.5 0.1 setosa <NA>
## 11 5.4 3.7 1.5 0.2 setosa <NA>
## 12 4.8 3.4 1.6 0.2 setosa <NA>
## 13 4.8 3.0 1.4 0.1 setosa <NA>
## 14 4.3 3.0 1.1 0.1 setosa <NA>
## 15 5.8 4.0 1.2 0.2 setosa <NA>
## 16 5.7 4.4 1.5 0.4 setosa <NA>
## 17 5.4 3.9 1.3 0.4 setosa <NA>
## 18 5.1 3.5 1.4 0.3 setosa <NA>
## 19 5.7 3.8 1.7 0.3 setosa <NA>
## 20 5.1 3.8 1.5 0.3 setosa <NA>
## 21 5.4 3.4 1.7 0.2 setosa <NA>
## 22 5.1 3.7 1.5 0.4 setosa <NA>
## 23 4.6 3.6 1.0 0.2 setosa <NA>
## 24 5.1 3.3 1.7 0.5 setosa <NA>
## 25 4.8 3.4 1.9 0.2 setosa <NA>
## 26 5.0 3.0 1.6 0.2 setosa <NA>
## 27 5.0 3.4 1.6 0.4 setosa <NA>
## 28 5.2 3.5 1.5 0.2 setosa <NA>
## 29 5.2 3.4 1.4 0.2 setosa <NA>
## 30 4.7 3.2 1.6 0.2 setosa <NA>
## 31 4.8 3.1 1.6 0.2 setosa <NA>
## 32 5.4 3.4 1.5 0.4 setosa <NA>
## 33 5.2 4.1 1.5 0.1 setosa <NA>
## 34 5.5 4.2 1.4 0.2 setosa <NA>
## 35 4.9 3.1 1.5 0.2 setosa <NA>
## 36 5.0 3.2 1.2 0.2 setosa <NA>
## 37 5.5 3.5 1.3 0.2 setosa <NA>
## 38 4.9 3.6 1.4 0.1 setosa <NA>
## 39 4.4 3.0 1.3 0.2 setosa <NA>
## 40 5.1 3.4 1.5 0.2 setosa <NA>
## 41 5.0 3.5 1.3 0.3 setosa <NA>
## 42 4.5 2.3 1.3 0.3 setosa <NA>
## 43 4.4 3.2 1.3 0.2 setosa <NA>
## 44 5.0 3.5 1.6 0.6 setosa <NA>
## 45 5.1 3.8 1.9 0.4 setosa <NA>
## 46 4.8 3.0 1.4 0.3 setosa <NA>
## 47 5.1 3.8 1.6 0.2 setosa <NA>
## 48 4.6 3.2 1.4 0.2 setosa <NA>
## 49 5.3 3.7 1.5 0.2 setosa <NA>
## 50 5.0 3.3 1.4 0.2 setosa <NA>
## 51 7.0 3.2 4.7 1.4 versicolor versi
## 52 6.4 3.2 4.5 1.5 versicolor versi
## 53 6.9 3.1 4.9 1.5 versicolor versi
## 54 5.5 2.3 4.0 1.3 versicolor versi
## 55 6.5 2.8 4.6 1.5 versicolor versi
## 56 5.7 2.8 4.5 1.3 versicolor versi
## 57 6.3 3.3 4.7 1.6 versicolor versi
## 58 4.9 2.4 3.3 1.0 versicolor versi
## 59 6.6 2.9 4.6 1.3 versicolor versi
## 60 5.2 2.7 3.9 1.4 versicolor versi
## 61 5.0 2.0 3.5 1.0 versicolor versi
## 62 5.9 3.0 4.2 1.5 versicolor versi
## 63 6.0 2.2 4.0 1.0 versicolor versi
## 64 6.1 2.9 4.7 1.4 versicolor versi
## 65 5.6 2.9 3.6 1.3 versicolor versi
## 66 6.7 3.1 4.4 1.4 versicolor versi
## 67 5.6 3.0 4.5 1.5 versicolor versi
## 68 5.8 2.7 4.1 1.0 versicolor versi
## 69 6.2 2.2 4.5 1.5 versicolor versi
## 70 5.6 2.5 3.9 1.1 versicolor versi
## 71 5.9 3.2 4.8 1.8 versicolor versi
## 72 6.1 2.8 4.0 1.3 versicolor versi
## 73 6.3 2.5 4.9 1.5 versicolor versi
## 74 6.1 2.8 4.7 1.2 versicolor versi
## 75 6.4 2.9 4.3 1.3 versicolor versi
## 76 6.6 3.0 4.4 1.4 versicolor versi
## 77 6.8 2.8 4.8 1.4 versicolor versi
## 78 6.7 3.0 5.0 1.7 versicolor versi
## 79 6.0 2.9 4.5 1.5 versicolor versi
## 80 5.7 2.6 3.5 1.0 versicolor versi
## 81 5.5 2.4 3.8 1.1 versicolor versi
## 82 5.5 2.4 3.7 1.0 versicolor versi
## 83 5.8 2.7 3.9 1.2 versicolor versi
## 84 6.0 2.7 5.1 1.6 versicolor versi
## 85 5.4 3.0 4.5 1.5 versicolor versi
## 86 6.0 3.4 4.5 1.6 versicolor versi
## 87 6.7 3.1 4.7 1.5 versicolor versi
## 88 6.3 2.3 4.4 1.3 versicolor versi
## 89 5.6 3.0 4.1 1.3 versicolor versi
## 90 5.5 2.5 4.0 1.3 versicolor versi
## 91 5.5 2.6 4.4 1.2 versicolor versi
## 92 6.1 3.0 4.6 1.4 versicolor versi
## 93 5.8 2.6 4.0 1.2 versicolor versi
## 94 5.0 2.3 3.3 1.0 versicolor versi
## 95 5.6 2.7 4.2 1.3 versicolor versi
## 96 5.7 3.0 4.2 1.2 versicolor versi
## 97 5.7 2.9 4.2 1.3 versicolor versi
## 98 6.2 2.9 4.3 1.3 versicolor versi
## 99 5.1 2.5 3.0 1.1 versicolor versi
## 100 5.7 2.8 4.1 1.3 versicolor versi
## 101 6.3 3.3 6.0 2.5 virginica virg
## 102 5.8 2.7 5.1 1.9 virginica virg
## 103 7.1 3.0 5.9 2.1 virginica virg
## 104 6.3 2.9 5.6 1.8 virginica virg
## 105 6.5 3.0 5.8 2.2 virginica virg
## 106 7.6 3.0 6.6 2.1 virginica virg
## 107 4.9 2.5 4.5 1.7 virginica virg
## 108 7.3 2.9 6.3 1.8 virginica virg
## 109 6.7 2.5 5.8 1.8 virginica virg
## 110 7.2 3.6 6.1 2.5 virginica virg
## 111 6.5 3.2 5.1 2.0 virginica virg
## 112 6.4 2.7 5.3 1.9 virginica virg
## 113 6.8 3.0 5.5 2.1 virginica virg
## 114 5.7 2.5 5.0 2.0 virginica virg
## 115 5.8 2.8 5.1 2.4 virginica virg
## 116 6.4 3.2 5.3 2.3 virginica virg
## 117 6.5 3.0 5.5 1.8 virginica virg
## 118 7.7 3.8 6.7 2.2 virginica virg
## 119 7.7 2.6 6.9 2.3 virginica virg
## 120 6.0 2.2 5.0 1.5 virginica virg
## 121 6.9 3.2 5.7 2.3 virginica virg
## 122 5.6 2.8 4.9 2.0 virginica virg
## 123 7.7 2.8 6.7 2.0 virginica virg
## 124 6.3 2.7 4.9 1.8 virginica virg
## 125 6.7 3.3 5.7 2.1 virginica virg
## 126 7.2 3.2 6.0 1.8 virginica virg
## 127 6.2 2.8 4.8 1.8 virginica virg
## 128 6.1 3.0 4.9 1.8 virginica virg
## 129 6.4 2.8 5.6 2.1 virginica virg
## 130 7.2 3.0 5.8 1.6 virginica virg
## 131 7.4 2.8 6.1 1.9 virginica virg
## 132 7.9 3.8 6.4 2.0 virginica virg
## 133 6.4 2.8 5.6 2.2 virginica virg
## 134 6.3 2.8 5.1 1.5 virginica virg
## 135 6.1 2.6 5.6 1.4 virginica virg
## 136 7.7 3.0 6.1 2.3 virginica virg
## 137 6.3 3.4 5.6 2.4 virginica virg
## 138 6.4 3.1 5.5 1.8 virginica virg
## 139 6.0 3.0 4.8 1.8 virginica virg
## 140 6.9 3.1 5.4 2.1 virginica virg
## 141 6.7 3.1 5.6 2.4 virginica virg
## 142 6.9 3.1 5.1 2.3 virginica virg
## 143 5.8 2.7 5.1 1.9 virginica virg
## 144 6.8 3.2 5.9 2.3 virginica virg
## 145 6.7 3.3 5.7 2.5 virginica virg
## 146 6.7 3.0 5.2 2.3 virginica virg
## 147 6.3 2.5 5.0 1.9 virginica virg
## 148 6.5 3.0 5.2 2.0 virginica virg
## 149 6.2 3.4 5.4 2.3 virginica virg
## 150 5.9 3.0 5.1 1.8 virginica virg
str(irisifelse)
## 'data.frame': 150 obs. of 6 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Species2 : Factor w/ 2 levels "versi","virg": NA NA NA NA NA NA NA NA NA NA ...