library(ggplot2)
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(binwidth = 0.3, fill = "skyblue", color = "black") +
facet_wrap(~ Species) + # creates a histogram for each species
labs(
title = "Distribution of Sepal Length by Species",
x = "Sepal Length",
y = "Count"
+
) theme_minimal()
Program 15
Program 15
All programs
Program 9
Create multiple histograms using ggplot2 to visualize how a variable is distributed across different groups in a built R dataset.
Program 10
Develop an R program to draw a density curve representing the probability density function of a continuous variable, with separate curves for each group, using ggplot2.
# Load required package
library(ggplot2)
# Define a function to draw density plots
<- function(data, continuous_var, group_var, fill_colors = NULL) {
plot_density_by_group
# Check if the columns exist in the dataset
if (!(continuous_var %in% names(data)) || !(group_var %in% names(data))) {
stop("Invalid column names: Make sure both variables exist in the dataset.")
}
# Create the ggplot object
<- ggplot(data, aes_string(x = continuous_var, color = group_var, fill = group_var)) +
p geom_density(alpha = 0.4) + # semi-transparent fill
labs(
title = paste("Density Plot of", continuous_var, "by", group_var),
x = continuous_var,
y = "Density"
+
) theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) # center the title
# If custom fill colors are provided
if (!is.null(fill_colors)) {
<- p +
p scale_fill_manual(values = fill_colors) +
scale_color_manual(values = fill_colors)
}
# Return the plot
return(p)
}
# Example usage with iris dataset
# Plot Sepal.Length by Species
plot_density_by_group(iris, "Sepal.Length", "Species")
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
Program 11
To generate a basic box plot using ggplot2, enhanced with notches and outliers and grouped by using a categorical variable using the in-built dataset in R.
library(ggplot2)
data(iris)
head(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
str(iris)
'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
geom_boxplot(
notch = TRUE,
notchwidth = 0.5,
outlier.colour = "red",
outlier.shape = 16,
fill = "skyblue"
+
) labs(
title = "Basic Box Plot",
x = "Species",
y = "Sepal Length"
+
) theme_minimal()
Program 12
Develop a script to create a violin plot displaying the distribution of a continous variable with separate violins for each plot .
library(ggplot2)
data(iris)
head(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
ggplot(iris, aes(x = Species, y = Petal.Length, fill = Species)) +
geom_violin(trim = FALSE, alpha = 0.6, color = 'black') +
labs(
title = "Distribution of Petal Length by Iris Species",
x = "Species",
y = "Petal Length (cm)"
+
) theme_minimal(base_size = 14)
Program 13
Write an R program to create multiple dot plots for grouped data , comparing the distribution of variable across different using ggplot2’s position dodge function .
library(ggplot2)
<- mtcars
data $cyl <- as.factor(data$cyl)
data$gear <- as.factor(data$gear)
dataggplot(data, aes(x = cyl, y = mpg, color = gear)) +
geom_dotplot(binaxis = 'y', stackdir = 'center',
position = position_dodge(width = 0.7),
dotsize = 0.6) +
labs(title = "Dot Plot of MPG by Cylinder and Gear",
x = "Number of Cylinders",
y = "Miles Per Gallon (MPG)",
color = "Gear") +
theme_minimal()
Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.
Program 14
Develop a R program to calculate and visualize a correlation matrix for a given dataset, with color coded cells indicating the strength and direction of correlation , using ggplot2 geom_time function .
# Load necessary libraries
library(ggplot2)
library(reshape2)
# Define a reusable function
<- function(data, title = "Correlation Matrix Heatmap") {
plot_correlation_matrix # Step 1: Filter numeric columns
<- data[sapply(data, is.numeric)]
numeric_data
# Step 2: Calculate the correlation matrix
<- cor(numeric_data, use = "complete.obs")
cor_matrix
# Step 3: Melt the matrix into long format
<- melt(cor_matrix, varnames = c("Variable1", "Variable2"), value.name = "Correlation")
cor_df
# Step 4: Create the heatmap
ggplot(cor_df, aes(x = Variable1, y = Variable2, fill = Correlation)) +
geom_tile(color = "white") +
geom_text(aes(label = round(Correlation, 2)), color = "black", size = 3.5) +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1, 1),
name = "Correlation"
+
) theme_minimal(base_size = 12) +
coord_fixed() +
labs(
title = title,
x = NULL,
y = NULL
+
) theme(
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
panel.grid = element_blank()
)
}
# Example usage with the built-in mtcars dataset
plot_correlation_matrix(mtcars)