Introduction

ggpubr is an R package that provides a convenient interface for creating publication-ready plots using the ggplot2 package. It extends the functionality of ggplot2 by providing additional themes, scales, and geoms that can be used to create more complex and visually appealing plots. Additionally, ggpubr also includes tools for creating complex figures by combining multiple plots and adding annotations and captions. Overall, ggpubr is a useful tool for data scientists and researchers looking to create high-quality plots for publication and presentation purposes.

ggpubr figures

First, load data and packages needed

library(ggplot2)
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.1
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
gap = read.csv("gapminderData5.csv")
str(gap)
## 'data.frame':    1704 obs. of  6 variables:
##  $ country  : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ pop      : num  8425333 9240934 10267083 11537966 13079460 ...
##  $ continent: chr  "Asia" "Asia" "Asia" "Asia" ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ gdpPercap: num  779 821 853 836 740 ...
gap07 = gap %>% 
  filter(year == 2007 & continent != "Oceania")

Scatter plots

Simple

ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
          xlab = "GDP per capita ($)", ylab = "Life Expectancy (yrs)",
          main = "GapMinder Data 2007") +
  xscale("log10", .format = TRUE)

With regression line

ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
          xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)", 
          main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) + 
  xscale("log10", .format = TRUE) +
  stat_regline_equation(aes(color = continent))

Histograms

gghistogram(gap07, x = "lifeExp", fill = "continent", 
            main = "GapMinder Life Expectancy", palette = "npg")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

Density plots

Simple

ggdensity(gap07, x = "lifeExp", fill = "continent", 
          main = "GapMinder Life Expectancy", palette = "jco")

Multiple

ggdensity(gap07, x = "lifeExp", fill = "continent", 
          main = "GapMinder Life Expectancy", palette = "jco",
          facet.by = "continent")

## Violin Plots

Vertical

ggviolin(gap07, x = "continent", y = "lifeExp", 
         fill = "continent", palette = "jco",
         add = c("boxplot", "jitter"),
         ylab = "Life expectancy (yrs)")

Horizontal

ggviolin(gap07, x = "continent", y = "lifeExp", 
         fill = "continent", palette = "jco",
         add = c("boxplot", "jitter"),
         ylab = "Life expectancy (yrs)",
         rotate = TRUE)

Bar Plots

Simple

ggbarplot(gap07,
          x = "country",
          y = "lifeExp",
          fill = "continent",
          palette = "jco",
          x.text.angle = 90,
          ylab = "Life expectancy (yrs)",
          xlab = "Country") +
  font("x.text", size = 4)

Sorted by value

ggbarplot(gap07,
          x = "country",
          y = "lifeExp",
          fill = "continent",
          palette = "jco",
          sort.val = "desc",
          sort.by.groups = FALSE,
          x.text.angle = 90,
          ylab = "Life expectancy (yrs)",
          xlab = "Country") +
  font("x.text", size = 4)

Cleveland Plots

ggdotchart(gap07,
           x = "country",
           y = "lifeExp",
           color = "continent",
           palette = "jco",
           sorting = "descending",
           rotate = TRUE,
           group = "continent",
           add = "segments",
           ylab = "Life expectancy (yrs)",
           xlab = "Country") +
  font("y.text", size = 4)

Adding comparisons

Subset new data to make the comparisons

gap_sub = gap %>% 
  filter(continent %in% c("Asia", "Africa"),
         year %in% c(1957, 1982, 2007))

Between 2 groups

ggboxplot(gap_sub, x = "continent", y = "lifeExp", 
          ylab = "Years", col = "continent", add = "jitter") + 
  stat_compare_means(method = "t.test", label.y = 90)

ggboxplot(gap_sub, x = "continent", y = "lifeExp", 
          ylab = "Years", col = "continent", add = "jitter", facet.by = "year") + 
  stat_compare_means(method = "t.test", label.y = 90)

Between 2+ groups

ggboxplot(gap_sub, x = "year", y = "lifeExp") + 
  stat_compare_means(label.y = 80, method = "anova")

comps = list( c('1957', '1982'), 
              c('1957', '2007'), 
              c('1982', '2007'))
comps
## [[1]]
## [1] "1957" "1982"
## 
## [[2]]
## [1] "1957" "2007"
## 
## [[3]]
## [1] "1982" "2007"
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") + 
  stat_compare_means(method = "t.test", comparisons = comps, 
                     bracket.size = .6, size = 4)

Choosing vone of the groups as reference

ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") + 
  stat_compare_means(label = "p.signif", method = "t.test",
                     ref.group = "1957") + 
  stat_compare_means(label.y = 110, method = "anova")

Thank you for reading!