# load the package 
library(pubrplot)

we will use diamonds dataset so load it from ggplot2

diamonds <-  ggplot2::diamonds

see what functions the package have

help(package="pubrplot")

Help Pages

Normality Assessment Plot with Shapiro-Wilk and Kolmogorov–Smirnov Tests

This function visualizes the distribution of multiple numeric variables using boxplots or histograms with overlaid normal distribution curves. It automatically selects the appropriate normality test based on sample size: the Shapiro–Wilk test is applied when sample size is <= 5000, while the Kolmogorov–Smirnov test is used for larger samples (> 5000). The resulting p-values are displayed directly on the plots.

# Example 1: Boxplots with Shapiro-Wilk test (n <= 5000)
plot_norm(
  data = diamonds[1:4000, ], # select only 4000 sample size
  vars = c("carat", "price"),
  geom = "box",
  color_bar = "firebrick"
)

# Example 2: Histograms with Shapiro-Wilk test (n <= 5000)
plot_norm(
  data = diamonds[1:4000, ],
  vars = c("carat", "x", "y"),
  geom = "hist",
  bins = 20, # adjust bins nunber
  p.ypos = 2
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the pubrplot package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## ℹ The deprecated feature was likely used in the pubrplot package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Example 3: Kolmogorov-Smirnov test automatically applied (n > 5000)
plot_norm(
  data = diamonds[1:6000, ],
  vars = c("carat", "x"),
  geom = "hist",
  bins = 25, 
  color_line = "darkgreen",
  p.ypos = 2.24 # adjust p-value position
)

#Bar plot Creates a publication-quality bar plot for a categorical variable, with optional grouping by another variable. Automatically calculates counts and percentages and can display them on the bars. Also performs Chi-square or Fisher exact test if by is provided.

# Example using CO2 dataset
plot_bar(
  CO2,
  var = "Type",
  by = "Treatment",
  fill.lab = "Plant Type",
  color.bar = c("lightblue","lightgreen"),
  border.color = "black",
  bar.width = 0.5,
  text.size = 3,
  label = c("Quebec","Mississippi")
)

# Example using diamonds dataset
plot_bar(
  ggplot2::diamonds,
  var = "cut",
  by = "color",
  y.lab = "Distribution (%)",
  fill.lab = "Cut",
  text.size = 2,
  bar.width = 0.9,
  color.bar = c("#a465db","steelblue","darkgreen","darkred","#fcba03")
)

# Simple bar plot without grouping
plot_bar(ggplot2::diamonds, var = "cut")

Correlation Heatmap Plot

Creates a publication-ready correlation heatmap for numeric variables in a data frame. Each tile shows the correlation coefficient, with optional significance stars.

plot_cor(mtcars)

change varialbe label

plot_cor(mtcars, var.labels = c("Miles per Gallon", "Cylinders", "Displacement (cu.in.)", "Gross Horsepower", "Rear Axle Ratio", "Weight ", "Mile Time (sec)", "Engine", "Transmission ", "Gears", "Carburetors"))

off stars of significance

plot_cor(mtcars, method = "spearman", stars = FALSE)

Error Bars Plot

This function creates a line plot with points and customizable error bars (standard deviation, standard error, or confidence interval) for a numeric variable grouped by a categorical variable. Mean values can optionally be displayed above the points.

plot_errorbar(
  data = iris,
  var = Sepal.Length,
  by = Species,
  error = "se",
  err.mult = 1,
  point.shape = 19,
  point.size = 3,
  line.color = "red",
  line.size = 0.5,
  color.point = "blue",
  color.error = "blue",
  show.mean = TRUE,
  text.size = 3,
  err.width = 0.05,
  title = "Mean Sepal Length by Species",
  rotate = TRUE
)

showing mean value

SD instead of Confident interaval (SE)

change x and y labels

plot_errorbar(
  data = iris,
  var = Sepal.Length,
  by = Species,
  error = "sd",
  err.mult = 1,
  point.shape = 19,
  point.size = 3,
  line.color = "red",
  line.size = 0.5,
  color.point = "blue",
  color.error = "blue",
  show.mean = F,
  text.size = 3,
  err.width = 0.05,
  title = "Mean Sepal Length by Species",
  rotate = TRUE,
  y.lab = "Sepal Length(mm)",
  x.lab = "species"
    
)

Line Plot with Error Bars by Group and Time

This function creates a line plot showing the mean of a numeric variable over time for different groups, with optional error bars (standard deviation, standard error, or 95% confidence interval). Multiple groups are displayed on the same plot with customizable colors, point shapes, and line thickness.

# Let a hypothetical dataset to run this plot 
set.seed(123)
n_subj <- 10
time_points <- c("T1","T2","T3")
groups <- c("DrugA","DrugB")

df <- expand.grid(
  id = 1:n_subj,
  time = time_points,
  group = groups
)

# Arrange by group, id, time
df <- dplyr::arrange(df, group, id, time)

# Add BMI column
df <- dplyr::mutate(df,
  BMI = dplyr::case_when(
    time == "T1" & group == "DrugA" ~ 29 + stats::rnorm(dplyr::n(), 0, 0.3),
    time == "T2" & group == "DrugA" ~ 26 + stats::rnorm(dplyr::n(), 0, 0.3),
    time == "T3" & group == "DrugA" ~ 22 + stats::rnorm(dplyr::n(), 0, 0.3),
    time == "T1" & group == "DrugB" ~ 28 + stats::rnorm(dplyr::n(), 0, 0.3),
    time == "T2" & group == "DrugB" ~ 25 + stats::rnorm(dplyr::n(), 0, 0.2),
    time == "T3" & group == "DrugB" ~ 21 + stats::rnorm(dplyr::n(), 0, 0.2)
  )
)

Now run plot

plot_line(
  df,
  var=BMI,
  time=time,
  group=group,
  err.mult = 1.5,
  point.shape = 19, # shape of point
  point.size = 3,
  line.size = 1,
  color.lines = c("red", "blue"),
  show.mean = FALSE,
  text.size = 3.5,
  err.width = 0.05,
  x.lab = "Time",
  y.lab = "BMI change (Kg/m^2)",
  title = "BMI change over follow up"
)

universal plot funcion for numeric comparison

Publication-Quality Numeric Plot with Optional Grouping and Statistical Tests

reates a publication-ready plot for numeric variables, including bar plots, violin plots, boxplots, and combinations (violin + box, violin + jitter, box + jitter). Supports error bars (SD, SE, CI), group comparisons, and automatic or specified statistical tests with optional post-hoc annotations.

Violin + Box plot for iris dataset

plot_numeric(
  data = iris,
  var = Sepal.Length,
  by = Species,
  geom_type = "violin_box",
  box.width = 0.1,
  color.violin = c("#377eb8", "#ff7f00", "#4daf4a"),
  color.box = c("darkgreen", "#a65628", "#f781bf"),
  box.color = "black",
  color.jitter = "red",
  position.p = c(.5,9),
  jitter.size = 2,
  ptext.size = 4,
  show.posthoc = TRUE
)

bar plot with CI

control bar width ‘bar.width’ argument

# Simple bar plot with error bars
plot_numeric(
  data = iris,
  var = Sepal.Length,
  by = Species,
  geom_type = "bar",
  bar.width = .7,
    position.p = c(.1, 7),
  error = "se"
)

Violin plot with jitter points

plot_numeric(
  data = iris,
  var = Sepal.Length,
  by = Species,
   position.p = c(.5,9),
  geom_type = "violin_jitter"
)

Scatter Plot with Linear Regression and Equation Annotation

This function creates a scatter plot of a numeric outcome against a numeric predictor, optionally grouped by a factor (by). A linear regression line is added with optional standard error (SE) shading, and the regression equation and R² value are displayed on the plot.

# Basic scatter plot with regression line and equation
plot_scatter(mtcars, "mpg", "wt")
## `geom_smooth()` using formula = 'y ~ x'

# scatter plot with regression line and equationgrouped by cylinder
plot_scatter(mtcars, "mpg", "wt", by = "cyl",
           line_color = "red",
           se_fill = "#ff000055",
           line_size = 0.9,
           se = TRUE,
           eq_position = c(0.5, 0.95),
           ncol_by = 2)
## `geom_smooth()` using formula = 'y ~ x'

Off SE

# scatter plot with regression line and equationgrouped by cylinder
plot_scatter(mtcars, "mpg", "wt", by = "cyl",
           point_color = "blue",
           line_color = "darkgreen",
           line_size = 0.9,
           se = FALSE,
           eq_position = c(0.5, 0.95),
           ncol_by = 2)
## `geom_smooth()` using formula = 'y ~ x'

Plot Linear Regression Estimates with Confidence Intervals

This function fits univariate and multivariate linear regression models for a given outcome and a set of predictors. It returns a ggplot showing point estimates and 95% confidence intervals for each predictor. Reference levels of factors can optionally be added, and univariate and multivariate results are plotted side by side.

# modify labels in mtcars dataset
mtcars2 <- dplyr::mutate(
  mtcars,
  cyl = factor(cyl),
  am = factor(am, labels = c("Automatic", "Manual")),
  gear = factor(gear)
)

# run plot
plot_lm(
  data = mtcars2,
  outcome = "mpg",
  predictors = c("cyl", "hp", "wt", "am", "gear"),
  point_shape = 18
)
## `height` was translated to `width`.

Off reference category

plot_lm(
  data = mtcars2,
  outcome = "mpg",ref = F,
  predictors = c("cyl", "hp", "wt", "am", "gear"),
  point_shape = 18
)
## `height` was translated to `width`.

# Plot Odds Ratios from Logistic Regression with CI

This function fits univariate and multivariate logistic regression models and plots odds ratios with 95% confidence intervals. Reference levels can optionally be displayed.

 #Load built-in infertility dataset
infert1 <- datasets::infert
infert$case <- factor(infert$case, levels = c(0,1), labels = c("Control","Infertile"))
infert$induced <- factor(infert$induced, levels = c(0,1), labels = c("No","Yes"))
infert$spontaneous <- factor(infert$spontaneous, levels = c(0,1), labels = c("No","Yes"))

# Plot with reference levels
plot_or(
  data = infert1,
  outcome = "case",
  predictors = c("parity","induced","spontaneous","age"),
  ref = TRUE
)
## `height` was translated to `width`.

# Plot without reference levels
plot_or(
  data = infert1,
  outcome = "case",
  predictors = c("parity","induced","spontaneous","age"),
  ref = FALSE
)
## `height` was translated to `width`.