Program 15

Author

Parth Bishnoi

Program 15

All programs

Program 9

Create multiple histograms using ggplot2 to visualize how a variable is distributed across different groups in a built R dataset.

library(ggplot2)

ggplot(iris, aes(x = Sepal.Length)) +
  geom_histogram(binwidth = 0.3, fill = "skyblue", color = "black") +
  facet_wrap(~ Species) +  # creates a histogram for each species
  labs(
    title = "Distribution of Sepal Length by Species",
    x = "Sepal Length",
    y = "Count"
  ) +
  theme_minimal()

Program 10

Develop an R program to draw a density curve representing the probability density function of a continuous variable, with separate curves for each group, using ggplot2.

# Load required package
library(ggplot2)

# Define a function to draw density plots
plot_density_by_group <- function(data, continuous_var, group_var, fill_colors = NULL) {
  
  # Check if the columns exist in the dataset
  if (!(continuous_var %in% names(data)) || !(group_var %in% names(data))) {
    stop("Invalid column names: Make sure both variables exist in the dataset.")
  }

  # Create the ggplot object
  p <- ggplot(data, aes_string(x = continuous_var, color = group_var, fill = group_var)) +
    geom_density(alpha = 0.4) +  # semi-transparent fill
    labs(
      title = paste("Density Plot of", continuous_var, "by", group_var),
      x = continuous_var,
      y = "Density"
    ) +
    theme_minimal() +
    theme(plot.title = element_text(hjust = 0.5))  # center the title

  # If custom fill colors are provided
  if (!is.null(fill_colors)) {
    p <- p +
      scale_fill_manual(values = fill_colors) +
      scale_color_manual(values = fill_colors)
  }

  # Return the plot
  return(p)
}

# Example usage with iris dataset
# Plot Sepal.Length by Species
plot_density_by_group(iris, "Sepal.Length", "Species")
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.

Program 11

To generate a basic box plot using ggplot2, enhanced with notches and outliers and grouped by using a categorical variable using the in-built dataset in R.

library(ggplot2)
data(iris)
head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
str(iris)
'data.frame':   150 obs. of  5 variables:
 $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
  geom_boxplot(
    notch = TRUE,
    notchwidth = 0.5,
    outlier.colour = "red",
    outlier.shape = 16,
    fill = "skyblue"
  ) +
  labs(
    title = "Basic Box Plot",
    x = "Species",
    y = "Sepal Length"
  ) +
  theme_minimal()

Program 12

Develop a script to create a violin plot displaying the distribution of a continous variable with separate violins for each plot .

library(ggplot2)
data(iris)
head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
ggplot(iris, aes(x = Species, y = Petal.Length, fill = Species)) +
  geom_violin(trim = FALSE, alpha = 0.6, color = 'black') +
  labs(
    title = "Distribution of Petal Length by Iris Species", 
    x = "Species",
    y = "Petal Length (cm)"
  ) +
  theme_minimal(base_size = 14)

Program 13

Write an R program to create multiple dot plots for grouped data , comparing the distribution of variable across different using ggplot2’s position dodge function .

library(ggplot2)
data <- mtcars
data$cyl <- as.factor(data$cyl)
data$gear <- as.factor(data$gear)
ggplot(data, aes(x = cyl, y = mpg, color = gear)) +
  geom_dotplot(binaxis = 'y', stackdir = 'center', 
               position = position_dodge(width = 0.7),
               dotsize = 0.6) +
  labs(title = "Dot Plot of MPG by Cylinder and Gear",
       x = "Number of Cylinders",
       y = "Miles Per Gallon (MPG)",
       color = "Gear") +
 theme_minimal()
Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.

Program 14

Develop a R program to calculate and visualize a correlation matrix for a given dataset, with color coded cells indicating the strength and direction of correlation , using ggplot2 geom_time function .

# Load necessary libraries
library(ggplot2)
library(reshape2)

# Define a reusable function
plot_correlation_matrix <- function(data, title = "Correlation Matrix Heatmap") {
  # Step 1: Filter numeric columns
  numeric_data <- data[sapply(data, is.numeric)]
  
  # Step 2: Calculate the correlation matrix
  cor_matrix <- cor(numeric_data, use = "complete.obs")
  
  # Step 3: Melt the matrix into long format
  cor_df <- melt(cor_matrix, varnames = c("Variable1", "Variable2"), value.name = "Correlation")
  
  # Step 4: Create the heatmap
  ggplot(cor_df, aes(x = Variable1, y = Variable2, fill = Correlation)) +
    geom_tile(color = "white") +
    geom_text(aes(label = round(Correlation, 2)), color = "black", size = 3.5) +
    scale_fill_gradient2(
      low = "blue",
      high = "red",
      mid = "white",
      midpoint = 0,
      limit = c(-1, 1),
      name = "Correlation"
    ) +
    theme_minimal(base_size = 12) +
    coord_fixed() +
    labs(
      title = title,
      x = NULL,
      y = NULL
    ) +
    theme(
      axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
      panel.grid = element_blank()
    )
}

# Example usage with the built-in mtcars dataset
plot_correlation_matrix(mtcars)