In this example we will calculate mean and 95% confidence intervals for a numerical column grouped by another column. eg. We will use Species and Sepal.Length fields and calculate the mean and the confidence intervals of Sepal.Length for each Species.

Wath the youtube video link for easier understanding of the following examples

https://youtu.be/29P6zkCkrJY

Plot the mean and confidence intervals in ggplot

Packages needed as below

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Preparing the data

Using the iris built in data set we will use Species and Sepal.Length columns for our example. Sepaal.Length is our numerical column for which we will calculate the meam and the 95% confidence intervals for the mean.

As we are using Species in our data we will be grouping our data by this column. Which means that we will calculate mean and the confidence for each Species.

data <- iris %>% select(Species, Sepal.Length)


t.test(data$Sepal.Length,conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  data$Sepal.Length
## t = 86.425, df = 149, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  5.709732 5.976934
## sample estimates:
## mean of x 
##  5.843333
dt <- data%>%
  dplyr::group_by(Species)%>%
  dplyr::summarise(
    mean = mean(Sepal.Length),
    lci = t.test(Sepal.Length, conf.level = 0.95)$conf.int[1],
    uci = t.test(Sepal.Length, conf.level = 0.95)$conf.int[2])
dt

Plotting in ggplot

Bar Chart

pl1 <- ggplot(data = dt)
pl1 <- pl1 + geom_bar(aes(x=Species, y=mean, fill = Species), stat="identity")
pl1 <- pl1 + geom_errorbar(aes(x=Species, ymin=lci, ymax= uci), width = 0.4, color ="red", size = 1)
pl1 <- pl1 + geom_text(aes(x=Species, y=lci, label = round(lci,1)), size= 2, vjust = 1)
pl1 <- pl1 + geom_text(aes(x=Species, y=uci, label = round(uci,1)), size= 2, vjust = -1)
pl1 <- pl1 + theme_classic()
pl1 <- pl1 + labs(title = "Bar chart with 95% confidence intervals")
pl1 <- pl1 + labs(x= "Species", y = "Mean Sepal Length")
pl1

Line Chart

pl2 <- ggplot(data = dt)
pl2 <- pl2 + geom_line(aes(x=Species, y=mean), group = 1)
pl2 <- pl2 + geom_point(aes(x=Species, y=mean), color= "red")
pl2 <- pl2 + geom_errorbar(aes(x=Species, ymin=lci, ymax= uci), width = 0.4, color ="red", size = 1)
pl2 <- pl2 + geom_text(aes(x=Species, y=lci, label = round(lci,1)), size= 2, vjust = 1)
pl2 <- pl2 + geom_text(aes(x=Species, y=uci, label = round(uci,1)), size= 2, vjust = -1)
pl2 <- pl2 + theme_classic()
pl2 <- pl2 + labs(title = "Line chart with 95% confidence intervals")
pl2 <- pl2 + labs(x= "Species", y = "Mean Sepal Length")
pl2

Dot Plot

pl3 <- ggplot(data = dt)
pl3 <- pl3 + geom_point(aes(x=Species, y=mean), color= "red", size = 3)
pl3 <- pl3 + geom_errorbar(aes(x=Species, ymin=lci, ymax= uci), width = 0.4, color ="red", size = 1)
pl3 <- pl3 + geom_text(aes(x=Species, y=lci, label = round(lci,1)), size= 2, vjust = 1)
pl3 <- pl3 + geom_text(aes(x=Species, y=uci, label = round(uci,1)), size= 2, vjust = -1)
pl3 <- pl3 + theme_classic()
pl3 <- pl3 + labs(title = "Point chart with 95% confidence intervals")
pl3 <- pl3 + labs(x= "Species", y = "Mean Sepal Length")
pl3

How to show the mean and the standard deviations

In the example we will use Mean + 1 SD and Mean - 1 SD

dt <- data%>%
  dplyr::group_by(Species)%>%
  dplyr::summarise(
    mean = mean(Sepal.Length)
    ,sd  = sd(Sepal.Length)
    ,mean_pls_sd = mean + sd
    ,mean_mns_sd = mean - sd)
dt
pl1 <- ggplot(data = dt)
pl1 <- pl1 + geom_bar(aes(x=Species, y=mean, fill = Species), stat="identity")
pl1 <- pl1 + geom_errorbar(aes(x=Species, ymin=mean_mns_sd, ymax= mean_pls_sd), width = 0.4, color ="red", size = 1)
pl1 <- pl1 + geom_text(aes(x=Species, y=mean_mns_sd, label = round(mean_mns_sd,1)), size= 2, vjust = 1)
pl1 <- pl1 + geom_text(aes(x=Species, y=mean_pls_sd, label = round(mean_pls_sd,1)), size= 2, vjust = -1)
pl1 <- pl1 + theme_classic()
pl1 <- pl1 + labs(title = "Bar chart showing mean and the SD")
pl1 <- pl1 + labs(x= "Species", y = "Mean Sepal Length")
pl1