Data
iris
Mean and standard deviation for each groups
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
summary <-
iris %>%
group_by(Species) %>%
summarise(Sepal.Length_m = mean(Sepal.Length, na.rm = T),
Sepal.Length_sd = sd(Sepal.Length, na.rm = T))
summary
## # A tibble: 3 × 3
## Species Sepal.Length_m Sepal.Length_sd
## <fct> <dbl> <dbl>
## 1 setosa 5.01 0.352
## 2 versicolor 5.94 0.516
## 3 virginica 6.59 0.636
Histogram plot
library(ggplot2)
iris %>%
ggplot() +aes() +
geom_histogram(aes(y = ..density.., x = Sepal.Length, fill = Species),alpha=0.15) +
facet_grid(Species ~ .)+
#group1
stat_function(data =summary %>% filter(Species == "setosa"),
fun = dnorm,
args = list(mean = filter(summary,
Species == "setosa")$Sepal.Length_m,
sd = filter(summary,
Species == "setosa")$Sepal.Length_sd)) +
#group2
stat_function(data = summary %>% filter(Species == "versicolor"),
fun = dnorm,
args = list(mean = filter(summary,
Species == "versicolor")$Sepal.Length_m,
sd = filter(summary,
Species == "versicolor")$Sepal.Length_sd)) +
#group3
stat_function(data = summary %>% filter(Species == "virginica"),
fun = dnorm,
args = list(mean = filter(summary,
Species == "virginica")$Sepal.Length_m,
sd = filter(summary,
Species == "virginica")$Sepal.Length_sd)) +
labs(title = "histograms with a normal curve") +theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.