# Load the ggplot2 package
library(ggplot2)Program 15
USN
1NT24IS066
Program 9
Create multiple histograms using ggplot2::facet_wrap() to visualize how a variable (e.g., Sepal.Length) is distributed across different groups (e.g., Species) in a built-in R dataset.
Step 1: Load and Explore the Dataset
# Load the iris dataset
data(iris)
# View the first few rows of the dataset
head(iris) Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
Step 2: Create Grouped Histograms Using facet_wrap
# Create histograms using facet_wrap for grouped data
p <- ggplot(iris, aes(x = Sepal.Length))
pp <- p +
geom_histogram(binwidth = 0.3, fill = "skyblue", color ="black")
pp <- p +
facet_wrap(~ Species)
pp <- p +
labs(title = "Distribution of Sepal Length by Species",
x = "Sepal Length (cm)",
y = "Frequency")
pp <- p +
theme_minimal()
p| ## Program 10 |
| 10. Develop an R function to draw a density curve representing the probability density function of a continuous variable, with separate curves for each group, using ggplot2. |
| ## Step 1: Load Required Library |
| ::: {.cell} |
{.r .cell-code} # Load ggplot2 for plotting library(ggplot2) ::: |
| ## Step 2: Define the Function |
| ::: {.cell} |
| ```{.r .cell-code} plot_density_by_group <- function(data, continuous_var, group_var, fill_colors = NULL) { |
| # Check if the specified columns exist if (!(continuous_var %in% names(data)) || !(group_var %in% names(data))) { stop(“Invalid column names. Make sure both variables exist in the dataset.”) } |
| # Create the ggplot object p <- ggplot(data, aes_string(x = continuous_var, color = group_var, fill = group_var)) + geom_density(alpha = 0.4) + labs(title = paste(“Density Plot of”, continuous_var, “by”, group_var), x = continuous_var, y = “Density”) + theme_minimal() |
| # Apply custom fill colors if provided if (!is.null(fill_colors)) { p <- p + scale_fill_manual(values = fill_colors) + scale_color_manual(values = fill_colors) } |
| return(p) } ``` ::: |
# Basic usage
plot_density_by_group(iris, "Sepal.Length", "Species")Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
# Define custom colors
custom_colors <- c("setosa" = "steelblue",
"versicolor" = "forestgreen",
"virginica" = "darkorange")
# Plot with custom colors
plot_density_by_group(iris, "Petal.Length", "Species", fill_colors = custom_colors)Program 11
To generate a basic box plot using ggplot2 enchanced with notches and outliers, and grouped by a categorical variable using an in-built dataset in R.
Step 1: Load Required Package
We use the ggplot2 package for data visualization. If it’s not already installed, you can install it using:
#install.packages("ggplot") # Uncomment if needed
library(ggplot2)Step 2: Use an inbuilt Dataset
We will use the built-in iris dataset. This dataset contains measurements of sepal and petal dimensions for three species of iris flowers: - setosa - versicolor - virginica
# Load and preview the dataset
data(iris)
head(iris) Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
str(iris)'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
The Species column is categorical, making it suitable for grouping, while Sepal.Length is a numeric variable we’ll analyze
Step 3: Create a Notched Box Plot Grouped by Species
We now create a box plot for Sepal.Length, grouped by Species. We’ll enhance the plot using: - Notches to show the confidence interval around the median - Outlier highlighting using color and shape - Aesthetic enhancements like fill color and theme
p <- ggplot(iris, aes(x = Species, y = Sepal.Length))
pp <- p +
geom_boxplot(
notch = TRUE,
notchwidth = 0.6,
outlier.color = "red",
outlier.shape = 16,
fill = "skyblue",
alpha = 0.7
)
pp <- p +
labs(
title = "Sepal Length Distribution by Iris Species",
subtitle = "Box Plot with Notches and Outlier Highlighting",
x = "Species",
y = "Sepal Length (cm)"
)
pp <- p +
theme_minimal()
pBox Plot: Each box summarizes the distribution of
Sepal.Lengthfor a species showing the interquartile range (IQR), median, and potential outliers.Notches: The notches give a rough 95% confidence interval around the median. If notches of two boxes do not overlap, the medians are significantly different.
Outliers: Points that fall outside 1.5 x IQR from the quartiles are considered outliers and shown in red.
Grouping: The plot groups values based on the categorical variable
Species, helping compare between groups.Aesthetics:
theme_minimal()provides a clean background, while colors and transparency make the plot readable.
Program 12
Step 1: Intall/Load the requred library
We use ggplot2 library, we have already installed it, so we are directly loading it
library(ggplot2)Step 2: Analyze the dataset - IRIS/ mtcars
We use iris dataset for violin
tail(iris) Sepal.Length Sepal.Width Petal.Length Petal.Width Species
145 6.7 3.3 5.7 2.5 virginica
146 6.7 3.0 5.2 2.3 virginica
147 6.3 2.5 5.0 1.9 virginica
148 6.5 3.0 5.2 2.0 virginica
149 6.2 3.4 5.4 2.3 virginica
150 5.9 3.0 5.1 1.8 virginica
head(iris,n=1) Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
tail(iris, n=1) Sepal.Length Sepal.Width Petal.Length Petal.Width Species
150 5.9 3 5.1 1.8 virginica
str(iris)'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
table(iris$Species)
setosa versicolor virginica
50 50 50
p=ggplot(iris, aes(Species, Sepal.Length, fill = Species))
q=ggplot(iris, aes(Species, Sepal.Width, fill = Species))
r=ggplot(iris, aes(Species, Petal.Length, fill = Species))
s=ggplot(iris, aes(Species, Petal.Width, fill = Species))
p;q;r;s;p=p+geom_violin()
q=q+geom_violin()
r=r+geom_violin()
s=s+geom_violin()
p;q;r;s;Step 5: Update the title, label
p=p+labs(
title = 'violin plot showing density of sepal length',
x = 'Species',
y = 'Sepal Length'
)
q=q+labs(
title = 'Violin Plot showing density of sepal width',
x= 'Species',
y = 'Sepal Width'
)
r=r+labs(
title = 'Violin Plot showing density of Petal Length',
x= 'Species',
y = 'Petal Length'
)
s=s+labs(
title = 'Violin Plot showing density of Petal width',
x= 'Species',
y = 'Petal Width'
)
p;q;r;s;Step 6: apply the clean theme
p=p+theme_minimal()
q=q+theme_minimal()
r=r+theme_minimal()
s=s+theme_minimal()
p;q;r;s;Program 13
To create multiple dot plots for grouped data using ggplot2, and compare distributions across different categories.
Step 1: Load Required Package
library(ggplot2)Step 2
data(mtcars)head(mtcars) mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
str(mtcars)'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : num 0 0 1 1 0 1 0 1 1 1 ...
$ am : num 1 1 1 0 0 0 0 0 0 0 ...
$ gear: num 4 4 4 3 3 3 3 4 4 4 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...
Step 3
ggplot(mtcars, aes(x = as.factor(cyl), y = mpg)) +
geom_dotplot(
binaxis = "y",
stackdir = "center",
fill = "skyblue",
dotsize = 1
) +
labs(
title = "Dot Plot of Mileage by Cylinder Groups",
x = "Number of Cylinders",
y = "Miles per Gallon (mpg)"
) +
theme_minimal()Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.
Step 4
ggplot(mtcars, aes(x = as.factor(cyl), y = mpg, fill = as.factor(cyl))) +
geom_dotplot(
binaxis = "y",
stackdir = "center",
dotsize = 1
) +
labs(
title = "Grouped Dot Plot of Mileage",
x = "Number of Cylinders",
y = "Miles per Gallon (mpg)",
fill = "Cylinders"
) +
theme_minimal()Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.
Program 14
Develop a script in r to calculate and visualize a correlation matrix for a given dataset, with color-coded cells indicating the strength and direction of correlation,using ggplot2’s geom_tile function
#load the required libraries
library(ggplot2)
library(tidyr)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Dataset
We use the built-in mtcars dataset
#preview the dataset
head(mtcars) mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
#use the built-in mtcars dataset
data(mtcars)#compute correlation matrix
cor_matrix <- cor(mtcars)#convert matrix to a data frame for plotting
cor_df <- as.data.frame(as.table(cor_matrix))
head(cor_df) Var1 Var2 Freq
1 mpg mpg 1.0000000
2 cyl mpg -0.8521620
3 disp mpg -0.8475514
4 hp mpg -0.7761684
5 drat mpg 0.6811719
6 wt mpg -0.8676594
Explanation
cor(mtcars)computes pairwise correlation.as.table()flattens matrix into a long format table.- The result has 3 columns:
Var1,Var2and the correlation valueFreq.
Visualize using ggplot
p<-ggplot(cor_df,aes(x=Var1,y=Var2,fill = Freq))
pp<-p+
geom_tile(color="white")
pp<-p+ #draw title borders
scale_fill_gradient2(
low="blue",mid="white",high="red",
midpoint=0,limit = c(-1,1),
name="correlation"
)
pp<-p +
geom_text(aes(label=round(Freq,2)),size=3)
pp<-p+ #Show values
theme_minimal()
pp<-p+
labs(
title = "Correlation Matrix(mtcars)",
x = "",y = ""
)
pp<-p+
theme(axis.text.x=element_text(angle = 45,hjust = 1))
p