program15

Author

sirisha B A

1.Create multiple histograms using ggplot2 :: factor_wrap() to visuaalize how a variable (e.g Sepal.Length) is distributed across different groups ( e.g. Species in a built-in R data set)

variable is distributed across different groups in a built R dataset.

 library(ggplot2)
 ggplot(iris, aes(x = Sepal.Length)) +
 geom_histogram(binwidth = 0.3, fill = "skyblue", color = "black") +
 facet_wrap(~ Species) +  # creates a histogram for each species
 labs(
 title = "Distribution of Sepal Length by Species",
 x = "Sepal Length",
 y = "Count"
 ) +
 theme_minimal()

2.Develop an R function to draw a density curve representing the probability function of a continuous variable, with separate curves for each group, using ggplot2.

probability density function of a continuous variable, with separate curves for each group, using ggplot2

 # Load required libraries
 library(ggplot2)
 # Use iris dataset: Continuous variable = Sepal.Length, Grouping variable = Species
 data <- iris
 # Plot density curves
 ggplot(data, aes(x = Sepal.Length, color = Species, fill = Species)) +
 geom_density(alpha = 0.4, linewidth = 1) +
 labs(title = "Density Plot of Sepal Length by Species",
 x = "Sepal Length",
 y = "Density") +
 theme_minimal() +
 theme(plot.title = element_text(hjust = 0.5))

3.To generate a basic box plot using ggplot2, enhanced with notches and outliers, and grouped by a categorical variable using an in-built dataset in R.

library(ggplot2)
 data(iris)
 head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
 ggplot(iris, aes(x = Species, y = Sepal.Length)) +
  geom_boxplot(
    notch = TRUE,
    notchwidth = 0.5,
    outlier.colour = "red",
    outlier.shape = 16,
    fill = "skyblue"
  ) +
  labs(
    title = "Basic Box Plot",
    x = "Species",
    y = "Sepal Length"
  ) +
  theme_minimal()

4.Develop a script in R to create a violin plot displaying the distribution poof a continuous variable with separate violins for each group using ggplot2.

library(ggplot2)
 data(iris)
 head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
mtcars$cyl <- as.factor(mtcars$cyl)
 ggplot(mtcars, aes(x=cyl, y=mpg, fill = cyl)) +

geom_violin(trim = FALSE) +

labs (

title = "Distribution of MPG by Number of Cylinders",
x= "Number of Cylinders",
y = "Miles Per Gallon (MPG)"
)+
theme_minimal()

5.Write an R program to create multiple dot plots for grouped data , comparing the distribution of variable across different using ggplot2’s position dodge function.

comparing the distribution of variable across different using ggplot2’s position dodge function .

 library(ggplot2)
 ToothGrowth$dose<-as.factor(ToothGrowth$dose)
ggplot(ToothGrowth,aes(x=dose,y=len,color=supp))+
  geom_dotplot(
    binaxis = 'y',   #The axis to bin ,x means group verticaly                            ,y means group horizontally
     stackdir ='center',    #which direction to stack the dots
    position = position_dodge(width=0.8),
    dotsize = 0.6,    #The diameter of the dots relative to                                binwidth
    binwidth = 1.5  #controls spacing of dots on y-axis
  )+
  labs (

title = "dot plot of tooth length by dose and supplement type",
x= "Dose(mg/day)",
y = "Tooth length",
color="Supplement Type"
)+
theme_minimal()

6.Develop a R program to calculate and visualize a correlation matrix for a given dataset, with color coded cells indicating the strength and direction of correlation , using ggplot2 geom_time function .

library(ggplot2) 
library(tidyr)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
head(mtcars)
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
#dim(mtcars)
str(mtcars)
'data.frame':   32 obs. of  11 variables:
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
 $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
 $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
 $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
#use built-in mtcars dataset
data(mtcars)

# compute correlation matrix
cor_matrix <- cor(mtcars)
 cor_df <- as.data.frame(as.table(cor_matrix))
 head(cor_df)
  Var1 Var2       Freq
1  mpg  mpg  1.0000000
2  cyl  mpg -0.8521620
3 disp  mpg -0.8475514
4   hp  mpg -0.7761684
5 drat  mpg  0.6811719
6   wt  mpg -0.8676594
 ggplot(cor_df, aes(x = Var1, y = Var2, fill = Freq)) +
 geom_tile(color = "white") +
 scale_fill_gradient2(
 low = "blue",
 high = "red",
 mid = "white",
 midpoint = 0,
 limit = c(-1, 1),
 name = "Correlation"
 ) +
 geom_text(aes(label = round(Freq, 2)), size = 3) +
 theme_minimal() +
 theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
 labs(title = "Correlation Matrix (mtcars)",
 x = "",
 y = "")