Program 15

Author

Tejaswini Reddy

Compiling programs from 9-14.

9.Create multiple histograms using ggplot2::facet_wrap() to visualize how a variable (e.g., sepal.length) is distributed across different groups (e.g.,Species) in a built-in R dataset.

Step-1: Load Necessary Libraries and Explore the dataset.

library(ggplot2) 

Step-2: Create histograms using facet_wrap for grouped data

ggplot(iris, aes(x=Sepal.Length))+   
  geom_histogram(binwidth = 0.3, fill="skyblue",color="black")+   
  facet_wrap(~Species)+   
  labs(title = "dhafkk",        
       x= "sjdk",       
       y="djfk")+   
  theme_minimal()

Step-3: Display Histogram

ggplot(iris, aes(x=Sepal.Length))+   
  geom_histogram(binwidth = 0.3,fill="skyblue" , color = "black")+   
  facet_wrap(~Species)+   
  labs(title = "distribution of Sepal Length by Species",        
       x= "sepal length",        
       y= "frequency")+   
  theme_minimal()

  1. Program Develop an R function to draw a density curve representing the probability density function of a continuous variable, with separate curves for each group, using ggplot2.

    Step-1: Load Required Library.

    library(ggplot2) 

    Step-2: Explore the dataset and define the function

    plot_density_by_group <- function(data, numeric_var, group_var, fill = TRUE) {   
      ggplot(data, aes_string(x = numeric_var, color = group_var, fill = group_var)) +     
        geom_density(alpha = if (fill) 0.4 else 0) +     
        labs(       
          title = paste("Density Plot of", numeric_var, "by", group_var),       
          x = numeric_var,       
          y = "Density"     ) +     
        theme_minimal() }

    Step-3 : Load the plot

    data(iris) 
    head(iris)
      Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    1          5.1         3.5          1.4         0.2  setosa
    2          4.9         3.0          1.4         0.2  setosa
    3          4.7         3.2          1.3         0.2  setosa
    4          4.6         3.1          1.5         0.2  setosa
    5          5.0         3.6          1.4         0.2  setosa
    6          5.4         3.9          1.7         0.4  setosa

    Step-4 : Call the Function with Example

    plot_density_by_group(iris, "Sepal.Length", "Species")
    Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
    ℹ Please use tidy evaluation idioms with `aes()`.
    ℹ See also `vignette("ggplot2-in-packages")` for more information.

    11. Develop an R program to create a box plot to compare the Sepal Length across different species in the iris dataset. Ensure the plot highlights outliers with a specific color and shape, includes notches to compare medians, and applies appropriate labeling and styling.

    Step-1: Load the library

    library(ggplot2)

    Step-2: Explore the dataset

    data(iris) 
    head(iris,10) 
       Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    1           5.1         3.5          1.4         0.2  setosa
    2           4.9         3.0          1.4         0.2  setosa
    3           4.7         3.2          1.3         0.2  setosa
    4           4.6         3.1          1.5         0.2  setosa
    5           5.0         3.6          1.4         0.2  setosa
    6           5.4         3.9          1.7         0.4  setosa
    7           4.6         3.4          1.4         0.3  setosa
    8           5.0         3.4          1.5         0.2  setosa
    9           4.4         2.9          1.4         0.2  setosa
    10          4.9         3.1          1.5         0.1  setosa
    tail(iris,10) 
        Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
    141          6.7         3.1          5.6         2.4 virginica
    142          6.9         3.1          5.1         2.3 virginica
    143          5.8         2.7          5.1         1.9 virginica
    144          6.8         3.2          5.9         2.3 virginica
    145          6.7         3.3          5.7         2.5 virginica
    146          6.7         3.0          5.2         2.3 virginica
    147          6.3         2.5          5.0         1.9 virginica
    148          6.5         3.0          5.2         2.0 virginica
    149          6.2         3.4          5.4         2.3 virginica
    150          5.9         3.0          5.1         1.8 virginica
    str(iris)
    'data.frame':   150 obs. of  5 variables:
     $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
     $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
     $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
     $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
     $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

    Step-3: Create the box-plot for Species against Sepal Length

    ggplot(iris, aes(x=Species,y=Sepal.Length))+   
      geom_boxplot(outliers = TRUE, outlier.colour = "red",notch = TRUE, fill = "yellow", alpha=10, outlier.shape = "square", outlier.size = 5)+   
      labs(title = "Box Plot Representation",        
           x="Species",        
           y="Sepal.Length(in cm)")+   
      theme_minimal()

12.Develop a script in R to create a violin plot displaying the distribution of a continuous variable, with separate violind for each groups, using ggplot2.

Step-1: Load the libraries

library(ggplot2)

Step-2: Create the violin plot

violin_plot <- ggplot(data = iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
  geom_violin(trim = FALSE) +
# Show full distribution without trimming tails  
  geom_boxplot(width = 0.1, fill = "white") +  # Add a boxplot inside violins
  labs(title = "Distribution of Sepal Length by Species",       
       x = "Species",        y = "Sepal Length") +  
  theme_minimal() +   theme(legend.position = "none")

Step-3: Display the plot

print(violin_plot)

13.Write an R program to create many dotplots from grouped data. Comparing the distributions of variables across using ggplot2 ’s position_dodge function.

Step-1 load the necessary Libraries.

library(ggplot2) 
library(dplyr) 

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
data("ToothGrowth")

Step-2: Explore Dataset.

head(ToothGrowth) 
   len supp dose
1  4.2   VC  0.5
2 11.5   VC  0.5
3  7.3   VC  0.5
4  5.8   VC  0.5
5  6.4   VC  0.5
6 10.0   VC  0.5
class(ToothGrowth$supp) 
[1] "factor"
table(ToothGrowth$dose)

0.5   1   2 
 20  20  20 

Step-3: Converting dose column

# if a column is converted into factor, it is easy to group 
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
ggplot(ToothGrowth, aes(x=dose , y= len, color = supp))+   
  geom_dotplot(binaxis = "y",                 
               stackdir = 'center',            
               position = position_dodge(width = 1.5), 
               dotsize = 0.8)+   
  labs(title = "Tooth length by dose and suppliment type",
       x= "Dose",        
       y=" length",        
       color = "suplement")+   
  theme_minimal()
Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.

14.Develop an R program to calculate and visualize a co-relational matrix for a given dataset, with color coded cells indicating the strength and direction of co-relations using ggplot2’s geom_tile function.

Step-1: Load the necessary libraries.

library(ggplot2) 
library(tidyr) 
library(dplyr)
head(mtcars)
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
data(mtcars) 
cor_matrix <- cor(mtcars) 
cor_matrix # convert  matrix to a data frame for plotting 
            mpg        cyl       disp         hp        drat         wt
mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684  0.68117191 -0.8676594
cyl  -0.8521620  1.0000000  0.9020329  0.8324475 -0.69993811  0.7824958
disp -0.8475514  0.9020329  1.0000000  0.7909486 -0.71021393  0.8879799
hp   -0.7761684  0.8324475  0.7909486  1.0000000 -0.44875912  0.6587479
drat  0.6811719 -0.6999381 -0.7102139 -0.4487591  1.00000000 -0.7124406
wt   -0.8676594  0.7824958  0.8879799  0.6587479 -0.71244065  1.0000000
qsec  0.4186840 -0.5912421 -0.4336979 -0.7082234  0.09120476 -0.1747159
vs    0.6640389 -0.8108118 -0.7104159 -0.7230967  0.44027846 -0.5549157
am    0.5998324 -0.5226070 -0.5912270 -0.2432043  0.71271113 -0.6924953
gear  0.4802848 -0.4926866 -0.5555692 -0.1257043  0.69961013 -0.5832870
carb -0.5509251  0.5269883  0.3949769  0.7498125 -0.09078980  0.4276059
            qsec         vs          am       gear        carb
mpg   0.41868403  0.6640389  0.59983243  0.4802848 -0.55092507
cyl  -0.59124207 -0.8108118 -0.52260705 -0.4926866  0.52698829
disp -0.43369788 -0.7104159 -0.59122704 -0.5555692  0.39497686
hp   -0.70822339 -0.7230967 -0.24320426 -0.1257043  0.74981247
drat  0.09120476  0.4402785  0.71271113  0.6996101 -0.09078980
wt   -0.17471588 -0.5549157 -0.69249526 -0.5832870  0.42760594
qsec  1.00000000  0.7445354 -0.22986086 -0.2126822 -0.65624923
vs    0.74453544  1.0000000  0.16834512  0.2060233 -0.56960714
am   -0.22986086  0.1683451  1.00000000  0.7940588  0.05753435
gear -0.21268223  0.2060233  0.79405876  1.0000000  0.27407284
carb -0.65624923 -0.5696071  0.05753435  0.2740728  1.00000000
cor_df <- as.data.frame(as.table(cor_matrix)) 
head(cor_df)
  Var1 Var2       Freq
1  mpg  mpg  1.0000000
2  cyl  mpg -0.8521620
3 disp  mpg -0.8475514
4   hp  mpg -0.7761684
5 drat  mpg  0.6811719
6   wt  mpg -0.8676594

Step-2: Visualize using ggplot2

ggplot(cor_df, aes(x=Var1, y= Var2, fill = Freq))+   
  geom_tile(color ="white")+   
  scale_fill_gradient2(     
    low = "blue", mid = "white", high = "red",     midpoint = 0, limit = c(-1,1),     name = "Correlation"   )+   geom_text(aes(label= round (Freq, 2)), size=3)+   theme_minimal()+   
  labs(     title = "Correlation matrix (mtcars)", x="", y=""   )+   
  theme(axis.text.x= element_text(angle=45 , hjust = 1))