Develop an R program to quickly explore a given dataset, including categorical analysis using the group by command, and visualize the findings using ggplot2 features
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# Function to explore a datasetexplore_dataset <-function(data, categorical_var, numerical_var) {# Summary statistics summary_stats <- data %>%group_by(!!sym(categorical_var)) %>%summarise(Count =n(),Mean =mean(!!sym(numerical_var), na.rm =TRUE),Median =median(!!sym(numerical_var), na.rm =TRUE),SD =sd(!!sym(numerical_var), na.rm =TRUE) )print(summary_stats)# Visualization using ggplot2 p <-ggplot(data, aes(x =!!sym(categorical_var), y =!!sym(numerical_var), fill =!!sym(categorical_var))) +geom_boxplot(outlier.color ="red", alpha =0.7) +theme_minimal() +labs(title =paste("Distribution of", numerical_var, "by", categorical_var),x = categorical_var,y = numerical_var)print(p)}# Example usage# Sample datasetdata <-data.frame(Category =rep(c("A", "B", "C"), each =100),Values =c(rnorm(100, 10, 3), rnorm(100, 20, 4), rnorm(100, 15, 5)))# Call the functionexplore_dataset(data, "Category", "Values")
# A tibble: 3 × 5
Category Count Mean Median SD
<chr> <int> <dbl> <dbl> <dbl>
1 A 100 9.92 9.86 3.05
2 B 100 19.6 19.3 3.81
3 C 100 15.1 16.0 4.46
Write an R script to create a scatter plot, incorporating categorical analysis through color-coded data points representing different groups, using ggplot2.
# Load required librarylibrary(ggplot2)# Sample data (replace with your actual dataset)data <-data.frame(Category =rep(c("A", "B", "C"), each =100),X_values =c(rnorm(100, 5, 2), rnorm(100, 10, 3), rnorm(100, 15, 4)),Y_values =c(rnorm(100, 20, 5), rnorm(100, 25, 6), rnorm(100, 30, 7)))# Create scatter plot with color-coded categoriesp <-ggplot(data, aes(x = X_values, y = Y_values, color = Category)) +geom_point(size =3, alpha =0.7) +theme_minimal() +labs(title ="Scatter Plot with Categorical Analysis",x ="X Values",y ="Y Values",color ="Category" ) +scale_color_brewer(palette ="Set2")# Display the scatter plotprint(p)
Implement an R function to generate a line graph depicting the trend of a time-series dataset, with separate lines for each group, utilizing ggplot2’s group aesthetic
# Load required librarylibrary(ggplot2)# Function to generate a time-series line graph with separate lines for each groupplot_time_series <-function(data, time_var, value_var, group_var) {# Create the line graph p <-ggplot(data, aes_string(x = time_var, y = value_var, color = group_var, group = group_var)) +geom_line(size =1.2) +# Line with size 1.2 for claritygeom_point(size =3, alpha =0.7) +# Adding points to make the trend clearertheme_minimal() +# Minimal themelabs(title =paste("Time-Series Trend of", value_var, "by", group_var),x ="Time",y ="Values",color ="Group" )# Print the plotprint(p)}# Example usage# Sample time-series datasetdata <-data.frame(Time =rep(1:12, each =3), # Time variable (e.g., months)Value =c(rnorm(12, 50, 5), rnorm(12, 60, 5), rnorm(12, 55, 5)), # Value variableGroup =rep(c("A", "B", "C"), times =12) # Groups (e.g., different regions or categories))# Call the function to generate the line graphplot_time_series(data, "Time", "Value", "Group")
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Develop a script in R to produce a bar graph displaying the frequency distribution of categorical data in a given dataset, grouped by a specific variable, using ggplot2.
Warning: package 'ggplot2' is in use and will not be installed
# Load ggplot2 librarylibrary(ggplot2)
# Load the ggplot2 librarylibrary(ggplot2)
# Load required librarylibrary(ggplot2)# Function to create a bar graph showing frequency distribution of categorical datagenerate_bar_graph <-function(data, categorical_var, group_var =NULL) { p <-ggplot(data, aes_string(x = categorical_var, fill = group_var)) +geom_bar(position ="dodge", color ="black", alpha =0.8) +theme_minimal() +labs(title =paste("Frequency Distribution of", categorical_var, "by", group_var),x = categorical_var,y ="Frequency",fill ="Group" )print(p)}# Example usage# Sample datasetdata <-data.frame(Category =sample(c("A", "B", "C"), 300, replace =TRUE),Group =sample(c("X", "Y"), 300, replace =TRUE))# Call the functiongenerate_bar_graph(data, "Category", "Group")
5. Implement an R program to create a histogram illustrating the distribution of a continuous variable, with overlays of density curves for each group, using ggplot2
Step 1: Load Required Library
{r}
library(ggplot2)
# Load the necessary librarylibrary(ggplot2)
Step 2: Explore the Inbuilt Dataset
# Use the built-in 'iris' dataset# 'Petal.Length' is a continuous variable# 'Species' is a categorical grouping variablestr(iris) # Shows the structure of the dataset
Step 3: Create Histogram with Group-wise Density Curves Step
3.1: Initialize the ggplot with aesthetic mappings
# Start ggplot with iris dataset# Map Petal.Length to x-axis and fill by Species (grouping variable)p <-ggplot(data = iris, aes(x = Petal.Length, fill = Species))p
Explanation:
This initializes the plot and tells ggplot to map:
Petal.Length (continuous variable) to the x-axis
Species (categorical) to fill aesthetic to distinguish groups
Step 3.2: Add Histogram Layer
# Add histogram with density scalingp <- p +geom_histogram(aes(y = ..density..),alpha =0.4, # Set transparencyposition ="identity",# Overlap histogramsbins =30) # Number of binsp
Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.
Explanation:
aes(y=..density..) normalizes the histogram to density
alpha= 0.4 makes bars semi-transparent so overlaps are visible
position= “identity” lets different group histograms stack on top
bins =30 controls histogram resolution
Step 3.3: Add Density Curve Layer
# Overlay density curves for each groupp <- p +geom_density(aes(color = Species), # Line color by groupsize =1.2)# Line thickness
Explanation: This overlays smooth density curves for each species using color. The aes(color = Species) ensures each curve is colored by group.
STEP 3.4 ADD LABELS AND THEM
# Add title and axis labels, and apply clean themep <- p +labs(title ="Distribution of Petal Length with Group-wise Density Curves", x ="Petal Length", y ="Density")+theme_minimal()p
Explanation:
labs() adds a title and axis labels
theme_minimal() applies a clean, modern plot style
Step 3.5: Display the Plot
Summary
Used built-in iris dataset
Visualized Petal.Length as histogram
Grouped and color-coded by Species
Overlaid group-wise density curves for better interpretation
Write an R script to construct a box plot showcasing the distribution of a continuous variable, grouped by a categorical variable, using ggplot2’s fill aesthetic.
library(ggplot2)
# Load required librarylibrary(ggplot2)# Function to create a bar graph showing frequency distribution of categorical datagenerate_bar_graph <-function(data, categorical_var, group_var =NULL) { p <-ggplot(data, aes_string(x = categorical_var, fill = group_var)) +geom_bar(position ="dodge", color ="black", alpha =0.8) +theme_minimal() +labs(title =paste("Frequency Distribution of", categorical_var, "by", group_var),x = categorical_var,y ="Frequency",fill ="Group" )print(p)}# Function to create a box plot showing the distribution of a continuous variable by a categorical variablegenerate_box_plot <-function(data, continuous_var, categorical_var) { p <-ggplot(data, aes_string(x = categorical_var, y = continuous_var, fill = categorical_var)) +geom_boxplot(alpha =0.7) +theme_minimal() +labs(title =paste("Distribution of", continuous_var, "by", categorical_var),x = categorical_var,y = continuous_var ) +theme(legend.position ="none")print(p)}# Example usage# Sample datasetdata <-data.frame(Category =sample(c("A", "B", "C"), 300, replace =TRUE),Group =sample(c("X", "Y"), 300, replace =TRUE),Value =c(rnorm(150, 50, 10), rnorm(150, 60, 15)))# Call the bar graph functiongenerate_bar_graph(data, "Category", "Group")
# Call the box plot functiongenerate_box_plot(data, "Value", "Category")
Develop a function in R to plot a function curve based on a mathematical equation provided as input, with different curve styles for each group, using ggplot2.
library(ggplot2)
# Function to plot a function curve based on a mathematical equation with different styles for each group
plot_function_curve <- function(equation, x_range = c(-10, 10), groups = c("Group A", "Group B")) {
# Generate x-values
x_values <- seq(x_range[1], x_range[2], length.out = 500)
# Create data frame
data <- data.frame(
x = rep(x_values, length(groups)),
Group = rep(groups, each = length(x_values))
)
# Apply the mathematical equation to the x-values
data$y <- eval(parse(text = equation), envir = list(x = data$x)) # Calculate y using the equation
# Create the plot
p <- ggplot(data, aes(x = x, y = y, color = Group, linetype = Group)) +
geom_line(size = 1.2) +
theme_minimal() +
labs(
title = paste("Function Curve for:", equation),
x = "X",
y = "Y",
color = "Group",
linetype = "Group"
)
print(p)
}
# Example Usage
# Plotting sine function with two groups
plot_function_curve("sin(x)")