# load the package
library(pubrplot)
we will use diamonds dataset so load it from ggplot2
diamonds <- ggplot2::diamonds
see what functions the package have
help(package="pubrplot")
This function visualizes the distribution of multiple numeric variables using boxplots or histograms with overlaid normal distribution curves. It automatically selects the appropriate normality test based on sample size: the Shapiro–Wilk test is applied when sample size is <= 5000, while the Kolmogorov–Smirnov test is used for larger samples (> 5000). The resulting p-values are displayed directly on the plots.
# Example 1: Boxplots with Shapiro-Wilk test (n <= 5000)
plot_norm(
data = diamonds[1:4000, ], # select only 4000 sample size
vars = c("carat", "price"),
geom = "box",
color_bar = "firebrick"
)
# Example 2: Histograms with Shapiro-Wilk test (n <= 5000)
plot_norm(
data = diamonds[1:4000, ],
vars = c("carat", "x", "y"),
geom = "hist",
bins = 20, # adjust bins nunber
p.ypos = 2
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the pubrplot package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## ℹ The deprecated feature was likely used in the pubrplot package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Example 3: Kolmogorov-Smirnov test automatically applied (n > 5000)
plot_norm(
data = diamonds[1:6000, ],
vars = c("carat", "x"),
geom = "hist",
bins = 25,
color_line = "darkgreen",
p.ypos = 2.24 # adjust p-value position
)
#Bar plot Creates a publication-quality bar plot for a categorical variable, with optional grouping by another variable. Automatically calculates counts and percentages and can display them on the bars. Also performs Chi-square or Fisher exact test if by is provided.
# Example using CO2 dataset
plot_bar(
CO2,
var = "Type",
by = "Treatment",
fill.lab = "Plant Type",
color.bar = c("lightblue","lightgreen"),
border.color = "black",
bar.width = 0.5,
text.size = 3,
label = c("Quebec","Mississippi")
)
# Example using diamonds dataset
plot_bar(
ggplot2::diamonds,
var = "cut",
by = "color",
y.lab = "Distribution (%)",
fill.lab = "Cut",
text.size = 2,
bar.width = 0.9,
color.bar = c("#a465db","steelblue","darkgreen","darkred","#fcba03")
)
# Simple bar plot without grouping
plot_bar(ggplot2::diamonds, var = "cut")
Creates a publication-ready correlation heatmap for numeric variables in a data frame. Each tile shows the correlation coefficient, with optional significance stars.
plot_cor(mtcars)
plot_cor(mtcars, var.labels = c("Miles per Gallon", "Cylinders", "Displacement (cu.in.)", "Gross Horsepower", "Rear Axle Ratio", "Weight ", "Mile Time (sec)", "Engine", "Transmission ", "Gears", "Carburetors"))
plot_cor(mtcars, method = "spearman", stars = FALSE)
This function creates a line plot with points and customizable error bars (standard deviation, standard error, or confidence interval) for a numeric variable grouped by a categorical variable. Mean values can optionally be displayed above the points.
plot_errorbar(
data = iris,
var = Sepal.Length,
by = Species,
error = "se",
err.mult = 1,
point.shape = 19,
point.size = 3,
line.color = "red",
line.size = 0.5,
color.point = "blue",
color.error = "blue",
show.mean = TRUE,
text.size = 3,
err.width = 0.05,
title = "Mean Sepal Length by Species",
rotate = TRUE
)
plot_errorbar(
data = iris,
var = Sepal.Length,
by = Species,
error = "sd",
err.mult = 1,
point.shape = 19,
point.size = 3,
line.color = "red",
line.size = 0.5,
color.point = "blue",
color.error = "blue",
show.mean = F,
text.size = 3,
err.width = 0.05,
title = "Mean Sepal Length by Species",
rotate = TRUE,
y.lab = "Sepal Length(mm)",
x.lab = "species"
)
This function creates a line plot showing the mean of a numeric variable over time for different groups, with optional error bars (standard deviation, standard error, or 95% confidence interval). Multiple groups are displayed on the same plot with customizable colors, point shapes, and line thickness.
# Let a hypothetical dataset to run this plot
set.seed(123)
n_subj <- 10
time_points <- c("T1","T2","T3")
groups <- c("DrugA","DrugB")
df <- expand.grid(
id = 1:n_subj,
time = time_points,
group = groups
)
# Arrange by group, id, time
df <- dplyr::arrange(df, group, id, time)
# Add BMI column
df <- dplyr::mutate(df,
BMI = dplyr::case_when(
time == "T1" & group == "DrugA" ~ 29 + stats::rnorm(dplyr::n(), 0, 0.3),
time == "T2" & group == "DrugA" ~ 26 + stats::rnorm(dplyr::n(), 0, 0.3),
time == "T3" & group == "DrugA" ~ 22 + stats::rnorm(dplyr::n(), 0, 0.3),
time == "T1" & group == "DrugB" ~ 28 + stats::rnorm(dplyr::n(), 0, 0.3),
time == "T2" & group == "DrugB" ~ 25 + stats::rnorm(dplyr::n(), 0, 0.2),
time == "T3" & group == "DrugB" ~ 21 + stats::rnorm(dplyr::n(), 0, 0.2)
)
)
Now run plot
plot_line(
df,
var=BMI,
time=time,
group=group,
err.mult = 1.5,
point.shape = 19, # shape of point
point.size = 3,
line.size = 1,
color.lines = c("red", "blue"),
show.mean = FALSE,
text.size = 3.5,
err.width = 0.05,
x.lab = "Time",
y.lab = "BMI change (Kg/m^2)",
title = "BMI change over follow up"
)
reates a publication-ready plot for numeric variables, including bar plots, violin plots, boxplots, and combinations (violin + box, violin + jitter, box + jitter). Supports error bars (SD, SE, CI), group comparisons, and automatic or specified statistical tests with optional post-hoc annotations.
plot_numeric(
data = iris,
var = Sepal.Length,
by = Species,
geom_type = "violin_box",
box.width = 0.1,
color.violin = c("#377eb8", "#ff7f00", "#4daf4a"),
color.box = c("darkgreen", "#a65628", "#f781bf"),
box.color = "black",
color.jitter = "red",
position.p = c(.5,9),
jitter.size = 2,
ptext.size = 4,
show.posthoc = TRUE
)
control bar width ‘bar.width’ argument
# Simple bar plot with error bars
plot_numeric(
data = iris,
var = Sepal.Length,
by = Species,
geom_type = "bar",
bar.width = .7,
position.p = c(.1, 7),
error = "se"
)
plot_numeric(
data = iris,
var = Sepal.Length,
by = Species,
position.p = c(.5,9),
geom_type = "violin_jitter"
)
This function creates a scatter plot of a numeric outcome against a numeric predictor, optionally grouped by a factor (by). A linear regression line is added with optional standard error (SE) shading, and the regression equation and R² value are displayed on the plot.
# Basic scatter plot with regression line and equation
plot_scatter(mtcars, "mpg", "wt")
## `geom_smooth()` using formula = 'y ~ x'
# scatter plot with regression line and equationgrouped by cylinder
plot_scatter(mtcars, "mpg", "wt", by = "cyl",
line_color = "red",
se_fill = "#ff000055",
line_size = 0.9,
se = TRUE,
eq_position = c(0.5, 0.95),
ncol_by = 2)
## `geom_smooth()` using formula = 'y ~ x'
Off SE
# scatter plot with regression line and equationgrouped by cylinder
plot_scatter(mtcars, "mpg", "wt", by = "cyl",
point_color = "blue",
line_color = "darkgreen",
line_size = 0.9,
se = FALSE,
eq_position = c(0.5, 0.95),
ncol_by = 2)
## `geom_smooth()` using formula = 'y ~ x'
This function fits univariate and multivariate linear regression models for a given outcome and a set of predictors. It returns a ggplot showing point estimates and 95% confidence intervals for each predictor. Reference levels of factors can optionally be added, and univariate and multivariate results are plotted side by side.
# modify labels in mtcars dataset
mtcars2 <- dplyr::mutate(
mtcars,
cyl = factor(cyl),
am = factor(am, labels = c("Automatic", "Manual")),
gear = factor(gear)
)
# run plot
plot_lm(
data = mtcars2,
outcome = "mpg",
predictors = c("cyl", "hp", "wt", "am", "gear"),
point_shape = 18
)
## `height` was translated to `width`.
Off reference category
plot_lm(
data = mtcars2,
outcome = "mpg",ref = F,
predictors = c("cyl", "hp", "wt", "am", "gear"),
point_shape = 18
)
## `height` was translated to `width`.
# Plot Odds Ratios from Logistic Regression with CI
This function fits univariate and multivariate logistic regression models and plots odds ratios with 95% confidence intervals. Reference levels can optionally be displayed.
#Load built-in infertility dataset
infert1 <- datasets::infert
infert$case <- factor(infert$case, levels = c(0,1), labels = c("Control","Infertile"))
infert$induced <- factor(infert$induced, levels = c(0,1), labels = c("No","Yes"))
infert$spontaneous <- factor(infert$spontaneous, levels = c(0,1), labels = c("No","Yes"))
# Plot with reference levels
plot_or(
data = infert1,
outcome = "case",
predictors = c("parity","induced","spontaneous","age"),
ref = TRUE
)
## `height` was translated to `width`.
# Plot without reference levels
plot_or(
data = infert1,
outcome = "case",
predictors = c("parity","induced","spontaneous","age"),
ref = FALSE
)
## `height` was translated to `width`.