Histograms with a normal curve using ggplot2

Data
iris
Mean and standard deviation for each groups

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

summary <-
    iris %>% 
    group_by(Species) %>% 
    summarise(Sepal.Length_m = mean(Sepal.Length, na.rm = T),
              Sepal.Length_sd = sd(Sepal.Length, na.rm = T))
summary

## # A tibble: 3 × 3
##   Species    Sepal.Length_m Sepal.Length_sd
##   <fct>               <dbl>           <dbl>
## 1 setosa               5.01           0.352
## 2 versicolor           5.94           0.516
## 3 virginica            6.59           0.636

Histogram plot

library(ggplot2)
  iris %>% 
    ggplot() +aes() +
    geom_histogram(aes(y = ..density.., x = Sepal.Length, fill = Species),alpha=0.15)   +
    facet_grid(Species ~ .)+
    #group1
    stat_function(data =summary %>% filter(Species == "setosa"),
                  fun = dnorm,
                  args = list(mean = filter(summary, 
                                            Species == "setosa")$Sepal.Length_m,
                              sd = filter(summary, 
                                          Species == "setosa")$Sepal.Length_sd)) +
    #group2
    stat_function(data = summary %>% filter(Species == "versicolor"),
                  fun = dnorm,
                  args = list(mean = filter(summary, 
                                            Species == "versicolor")$Sepal.Length_m,
                              sd = filter(summary, 
                                          Species == "versicolor")$Sepal.Length_sd)) +
    #group3
    stat_function(data = summary %>% filter(Species == "virginica"),
                  fun = dnorm,
                  args = list(mean = filter(summary, 
                                            Species == "virginica")$Sepal.Length_m,
                              sd = filter(summary, 
                                          Species == "virginica")$Sepal.Length_sd)) +
    labs(title = "histograms with a normal curve") +theme_bw()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Histograms with a normal curve using ggplot2

by majeda mobajer

11/7/2021