# Load the ggplot2 package
library(ggplot2)Warning: package 'ggplot2' was built under R version 4.5.3
Create ** multiple histograms** using ggplot2::facet_wrap() to visualize how a variable (e.g.,Sepal.Length) is distributed across different groups(e.g., Species) in a built-in R dataset.
# Load the ggplot2 package
library(ggplot2)Warning: package 'ggplot2' was built under R version 4.5.3
# Load the iris dataset
data(iris)
# View the first few rows of the dataset
head(iris) Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
# Create histograms using facet_wrap for grouped data
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(binwidth = 0.3,fill = "skyblue", color = "black") +
facet_wrap( ~ Species) +
labs(title = "Distribution of Sepal Length by Species",
x ="Sepal Length(cm)",
y = "Frequency") +
theme_minimal()# Load the ggplot2 for plotting
library(ggplot2)plot_density_by_group <- function(data, continous_var, group_var,
fill_colors = NULL) {
# Check if the specified columns exist
if (!(continous_var %in% names(data)) || !(group_var %in% names(data))) {
stop("Invalid column names. Make sure both variables exist in the dataset.")
}
# Create the ggplot object
p <- ggplot(data, aes_string(x = continous_var,
color = group_var,
fill = group_var)) +
geom_density(alpha = 0.4) +
labs(title = paste("Density plot of", continous_var, "by", group_var),
x = continous_var,
y = "Density") +
theme_minimal()
# Apply custom fill color if provided
if (!is.null(fill_colors)) {
p <- p +
scale_fill_manual(values = fill_colors) +
scale_color_manual(values = fill_colors)
}
# Return the plot
return(p)
}# Basic usage
plot_density_by_group(iris,"Sepal.Length", "Species")Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
# Define custom colors
custom_colors <- c("setosa" = "steelblue",
"versicolor" = "forestgreen",
"virginica" = "darkorange")
# Plot with correct argument name
plot_density_by_group(iris, "Petal.Length", "Species", fill_colors = custom_colors)plot_density_by_group(iris, "Sepal.Length", "Species", fill_colors = custom_colors)plot_density_by_group(iris, "Petal.Width", "Species", fill_colors = custom_colors)plot_density_by_group(iris, "Sepal.Width", "Species", fill_colors = custom_colors)To generate a basic box plot using ggplot2,enhanced with notches and outliers, and grouped by a categorical variable using an in built dataset in R.
We use the ggplot2 package for data visualization
#install.packages("ggplot2") # Uncomment if needed
library(ggplot2)iris dataset.This dataset#load and preview the dataset
data(iris)
head(iris) Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
str(iris)'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
Sepal.Length,grouped by Species.We’ll enhance the plot using:- Notches to show the confidence interval around the median.ggplot(iris, aes(x = Species,y = Sepal.Length))ggplot(iris, aes(x = Species,y = Sepal.Length)) +
geom_boxplot(
notch = TRUE,
notchwidth = 0.6,
outlier.colour = "red",
outlier.shape = 16,
fill = "skyblue",
alpha = 0.7
)ggplot(iris, aes(x = Species,y = Sepal.Length)) +
geom_boxplot(
notch = TRUE,
notchwidth = 0.6,
outlier.colour = "red",
outlier.shape = 16,
fill = "skyblue",
alpha = 0.7
) +
labs(
title = "Sepal length distribution by iris Species",
subtitle = "Box plot with notches and outlier highlighting",
x = "Species",
y = "Sepal.Length (cm)"
) +
theme_minimal()Box plot: each box summarizes the distribution of Sepal.Length for a speciesshowing the interquartile range(IQR),median, and potential outliers.
Notches: The notches give a rough 95% confidence interval around the median.If the notches of two boxes overlap, the medians are significantly different.
outliers :points that fall outside 1.5 x IQR from the quartiles are considered outliers and shown in red.
grouping: the plot groups values based on the categorical variable species,helping compare between groups.
Aesthetics: theme_minimal() provides a clean background, while colors and transperancy make the plot.
#Box plots: six valuevalues are usually displayed: the lowest value, the lowest quartile (Q1), the median (Q2), the upper quartile (Q3), the highest, and the mean
##Percentile the sample 100 pth percentile is a value such that at least 100p% of the observations are of
##The following rule simplifies the calculation of sample percentiles. Calculate the sample 100 pth percentile: 1.Order the n observations from smallest to largest. 2.Determine the product np. If np is not an integer,round it up to the next integer and find the corresponding ordered value. If np is an integer,say k,calculate the mean of the kth and (k+1)st ordered observations.
136 143 147 151 158 160 161 163 165 167 173 174 181 181 185 188 190 205
ggplot(iris, aes(x = Species,y = Sepal.Length)) +
geom_boxplot(
notch = TRUE,
notchwidth = 0.6,
outlier.colour = "red",
outlier.shape = 16,
fill = "skyblue",
alpha = 0.7
) +
labs(
title = "Sepal length distribution by iris Species",
subtitle = "Box plot with notches and outlier highlighting",
x = "Species",
y = "Petal.Length (cm)"
) +
theme_minimal()ggplot(iris, aes(x = Species,y = Sepal.Length)) +
geom_boxplot(
notch = TRUE,
notchwidth = 0.6,
outlier.colour = "red",
outlier.shape = 16,
fill = "skyblue",
alpha = 0.7
) +
labs(
title = "Sepal length distribution by iris Species",
subtitle = "Box plot with notches and outlier highlighting",
x = "Species",
y = "Sepal.Width (cm)"
) +
theme_minimal()ggplot(iris, aes(x = Species,y = Sepal.Length)) +
geom_boxplot(
notch = TRUE,
notchwidth = 0.6,
outlier.colour = "red",
outlier.shape = 16,
fill = "skyblue",
alpha = 0.7
) +
labs(
title = "Sepal length distribution by iris Species",
subtitle = "Box plot with notches and outlier highlighting",
x = "Species",
y = "Petal.Width (cm)"
) +
theme_minimal()To generate a basic box plot using ggplot2,enhanced with notches and outliers, and grouped by a categorical variable using an in built dataset in R.
We use the ggplot2 package for data visualization
#install.packages("ggplot2") # Uncomment if needed
library(ggplot2)iris dataset.This dataset#load and preview the dataset
data(iris)
head(iris) Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
str(iris)'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
We create a violin plot for Sepal.Length grouped by Species.
ggplot(iris, aes(x = Species,y = Sepal.Length))ggplot(iris, aes(x = Species,y = Sepal.Length)) +
geom_violin(
notch = TRUE,
notchwidth = 0.6,
outlier.colour = "red",
outlier.shape = 16,
fill = "skyblue",
alpha = 0.7
)Warning in geom_violin(notch = TRUE, notchwidth = 0.6, outlier.colour = "red", : Ignoring unknown parameters: `notch`, `notchwidth`, `outlier.colour`, and
`outlier.shape`
ggplot(iris, aes(x = Species,y = Sepal.Length)) +
geom_violin(
fill = "green", alpha = 0.7) +
labs(
title = "Sepal length distribution by iris Species",
subtitle = "Box plot with notches and outlier highlighting",
x = "Species",
y = "Petal.Width (cm)"
) +
theme_minimal()ggplot(iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_violin(alpha = 0.7) +
labs(
title = "Sepal Length Distribution by Iris Species",
subtitle = "Violin Plot with Legend",
x = "Species",
y = "Sepal Length (cm)",
fill = "Species" # Legend title
) +
theme_minimal()To create multiple dot plots for grouped data and compare the distribution of tooth length across different supplement types and dosage levels using ggplot2 and dplyr.
We use ggplot2 for visualization and dplyr for data manipulation.
#install.packages("ggplot2") # Uncomment if needed
#install.packages("dplyr") # Uncomment if needed
library(ggplot2)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
We will use the built-in ToothGrowth dataset. It contains:
# len → Tooth length
# supp → Supplement type (VC or OJ)
# dose → Dosage level
# Load and preview dataset
data(ToothGrowth)
head(ToothGrowth) len supp dose
1 4.2 VC 0.5
2 11.5 VC 0.5
3 7.3 VC 0.5
4 5.8 VC 0.5
5 6.4 VC 0.5
6 10.0 VC 0.5
str(ToothGrowth)'data.frame': 60 obs. of 3 variables:
$ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
$ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
$ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
Convert categorical variables into factors for better grouping.
ToothGrowth <- ToothGrowth %>%
mutate(
supp = as.factor(supp),
dose = as.factor(dose)
)We first create a simple dot plot of tooth length grouped by supplement type.
ggplot(ToothGrowth, aes(x = supp, y = len))ggplot(ToothGrowth, aes(x = supp, y = len, color = supp)) +
geom_dotplot(
binaxis = "y",
stackdir = "center",
dotsize = 0.8
)Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.
We now create multiple dot plots based on dosage levels.
ggplot(ToothGrowth, aes(x = supp, y = len, color = supp)) +
geom_dotplot(
binaxis = "y",
stackdir = "center",
dotsize = 0.8
) +
facet_wrap(~ dose)Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.
Enhance the plot with titles and clean styling.
ggplot(ToothGrowth, aes(x = supp, y = len, color = supp)) +
geom_dotplot(
binaxis = "y",
stackdir = "center",
dotsize = 0.8
) +
facet_wrap(~ dose) +
labs(
title = "Multiple Dot Plots of Tooth Growth",
subtitle = "Comparison across supplement types and dosage levels",
x = "Supplement Type",
y = "Tooth Length"
) +
theme_minimal()Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.
# Load required Libraries
library(ggplot2)
library(tidyr)
library(dplyr)we use the built-in mtcars dataset
#Preview the dataset
head(mtcars) mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# Use built-in mtcars dataset
data(mtcars)# Compute correlation matrix
cor_matrix <- cor(mtcars)
cor_matrix mpg cyl disp hp drat wt
mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
qsec vs am gear carb
mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
# Convert matrix to a data frame for plotting
cor_df <- as.data.frame(as.table(cor_matrix))
head(cor_df) Var1 Var2 Freq
1 mpg mpg 1.0000000
2 cyl mpg -0.8521620
3 disp mpg -0.8475514
4 hp mpg -0.7761684
5 drat mpg 0.6811719
6 wt mpg -0.8676594
car(mtcars) computes pairwise correlation.Freq). ## Step 2: Visualize Using ggplot2:: geom_tileggplot(cor_df, aes(x = Var1, y = Var2, fill = Freq)) +
geom_tile(color = "white")ggplot(cor_df, aes(x = Var1, y = Var2, fill = Freq)) +
geom_tile(color = "white") + #Draw tile borders
scale_fill_gradient2(
low = "blue" , mid = "white" , high = "red",
midpoint = 0, limit = c(-1, 1),
name = "Correlation"
)ggplot(cor_df, aes(x = Var1, y = Var2, fill = Freq)) +
geom_tile(color = "white") + #Draw tile borders
scale_fill_gradient2(
low = "blue" , mid = "white" , high = "red",
midpoint = 0, limit = c(-1, 1),
name = "Correlation"
) +
geom_text(aes(label = round(Freq, 2)), size = 3) +
# Show values
theme_minimal() +
labs(
title = "Correlation Matrix (mtcars)",
x = "", y = ""
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))