program 15

Author

Jaishree –1NT23IS088

PROGRAM 9

Create multiple histogram using ggplot2::face_wrap() to visualize how a variable (e.g sepal.length)is distributed across different groups (e.g species)in a built-in R dataset .

library(ggplot2)
#load the iris dataset
data("iris")

#view the first few rows of the dataset
head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
ggplot(iris, aes(x = Sepal.Length)) +
  geom_histogram(binwidth = 0.3, fill = "skyblue", color = "black") +
  facet_wrap(~ Species, scales = "free") +  # Split the histograms by Species
  theme_minimal() +  # Minimal theme for better visualization
  labs(title = "Distribution of Sepal Length by Species",
       x = "Sepal Length",
       y = "Frequency")

PROGRAM-10

Develop an R function to draw a density curve representing the representing the probability density function of a continuous variable,with separate curves for each group,using ggplot2

library(ggplot2)
plot_density_by_group <- function(data, continuous_var, group_var, fill_colors = NULL) {
  # Check if the specified columns exist
  if (!(continuous_var %in% names(data)) || !(group_var %in% names(data))) {
    stop("Invalid column names. Make sure both variables exist in the dataset.")
  }

  # Create the ggplot object
  p <- ggplot(data, aes_string(x = continuous_var, color = group_var, fill = group_var)) +
    geom_density(alpha = 0.4) +
    labs(title = paste("Density Plot of", continuous_var, "by", group_var),
         x = continuous_var,
         y = "Density") +
    theme_minimal()

  # Apply custom fill colors if provided
  if (!is.null(fill_colors)) {
    p <- p + scale_fill_manual(values = fill_colors) +
             scale_color_manual(values = fill_colors)
  }

  # Return the plot
  return(p)
}
plot_density_by_group(iris, "Sepal.Length", "Species")
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.

# Define custom colors
custom_colors <- c("setosa" = "steelblue",
                   "versicolor" = "forestgreen",
                   "virginica" = "darkorange")

# Plot with custom colors
plot_density_by_group(iris, "Petal.Length", "Species", fill_colors = custom_colors)

PROGRAM 11

To generate a basic box plot using ggplot2,enhanced with notches and ouliers ,and grouped by a categorical variable using an in-built data set R.

data(iris)
head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
ggplot(iris, aes(x = Species, y = Sepal.Length))+
  geom_boxplot(
    notch = TRUE,
    notchwidth = 0.5,
    outlier.colour = "black",
    fill = "pink" ) +
  labs(title = "basic box plot",
       x = "Species",
       y = "Sepal.Length")+
  theme_minimal()

PROGRAM 12

Develop a script in R to create a violin plot displaying the distribution of continuous variable ,with separate violins foe each group using ggplot2.

library(ggplot2)
data(iris)
head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
str(iris)
'data.frame':   150 obs. of  5 variables:
 $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
ggplot(iris, aes(x = Species , y = Sepal.Length , fill = Species))+
  geom_violin(trim = FALSE, alpha=0.6, color= "black")+
    labs(title = "Distributions of petal length by iris Species",
       x = "Species",
       y = "Petal.Length")+
  theme_minimal(base_size = 14)

PROGRAM 13

Write R programming to create multiple dot plots of group data,comparing the distribution of variables across different categories,using ggplot2’s position_dodge function.

library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
data("ToothGrowth")
head(ToothGrowth)
   len supp dose
1  4.2   VC  0.5
2 11.5   VC  0.5
3  7.3   VC  0.5
4  5.8   VC  0.5
5  6.4   VC  0.5
6 10.0   VC  0.5
#table(ToothGrowth$supp)
ToothGrowth$dose<-as.factor(ToothGrowth$dose)
ToothGrowth$dose
 [1] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 1   1   1   1   1   1   1   1   1  
[20] 1   2   2   2   2   2   2   2   2   2   2   0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
[39] 0.5 0.5 1   1   1   1   1   1   1   1   1   1   2   2   2   2   2   2   2  
[58] 2   2   2  
Levels: 0.5 1 2
ggplot(ToothGrowth,aes(x=dose,y=len,color=supp,fill='pink'))+
  geom_dotplot(
     binaxis = "y",
     stackdir = "center",
     position = position_dodge(width = 0.8),
     dotsize=0.6,
     binwidth=1.5
     
     
  )+
  labs(
    title="Dot Plot of tooth length by dose and supplement type",
    x="Dose(mg/day)",
    y="Tooth length",
    color="supplement type"
  )+
  theme_minimal()

PROGRAM 14

Develop r program,to calculate and visualize correlation matrix for a given data set,with color coded cells indicating the strength and relations of correlations,using ggplot2 ,geom_tile function.

library(ggplot2)
library(tidyr)
library(dplyr)
dim(mtcars)
[1] 32 11
#use built-in mtcar dataset
data("mtcars")

#compute correlation matrix
cor_matrix<-cor(mtcars)
cor_matrix
            mpg        cyl       disp         hp        drat         wt
mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684  0.68117191 -0.8676594
cyl  -0.8521620  1.0000000  0.9020329  0.8324475 -0.69993811  0.7824958
disp -0.8475514  0.9020329  1.0000000  0.7909486 -0.71021393  0.8879799
hp   -0.7761684  0.8324475  0.7909486  1.0000000 -0.44875912  0.6587479
drat  0.6811719 -0.6999381 -0.7102139 -0.4487591  1.00000000 -0.7124406
wt   -0.8676594  0.7824958  0.8879799  0.6587479 -0.71244065  1.0000000
qsec  0.4186840 -0.5912421 -0.4336979 -0.7082234  0.09120476 -0.1747159
vs    0.6640389 -0.8108118 -0.7104159 -0.7230967  0.44027846 -0.5549157
am    0.5998324 -0.5226070 -0.5912270 -0.2432043  0.71271113 -0.6924953
gear  0.4802848 -0.4926866 -0.5555692 -0.1257043  0.69961013 -0.5832870
carb -0.5509251  0.5269883  0.3949769  0.7498125 -0.09078980  0.4276059
            qsec         vs          am       gear        carb
mpg   0.41868403  0.6640389  0.59983243  0.4802848 -0.55092507
cyl  -0.59124207 -0.8108118 -0.52260705 -0.4926866  0.52698829
disp -0.43369788 -0.7104159 -0.59122704 -0.5555692  0.39497686
hp   -0.70822339 -0.7230967 -0.24320426 -0.1257043  0.74981247
drat  0.09120476  0.4402785  0.71271113  0.6996101 -0.09078980
wt   -0.17471588 -0.5549157 -0.69249526 -0.5832870  0.42760594
qsec  1.00000000  0.7445354 -0.22986086 -0.2126822 -0.65624923
vs    0.74453544  1.0000000  0.16834512  0.2060233 -0.56960714
am   -0.22986086  0.1683451  1.00000000  0.7940588  0.05753435
gear -0.21268223  0.2060233  0.79405876  1.0000000  0.27407284
carb -0.65624923 -0.5696071  0.05753435  0.2740728  1.00000000
#convert matrix to a data frame for plotting
cor_df<-as.data.frame(as.table(cor_matrix))
head(cor_df)
  Var1 Var2       Freq
1  mpg  mpg  1.0000000
2  cyl  mpg -0.8521620
3 disp  mpg -0.8475514
4   hp  mpg -0.7761684
5 drat  mpg  0.6811719
6   wt  mpg -0.8676594
ggplot(cor_df,aes(x=Var1,y=Var2,fill=Freq))+
  geom_tile(color="white")+
  scale_fill_gradient2(
    low="yellow",mid="orange",high='red',
    midpoint=0,limit=c(-1,1),
    name="Correlation"
  )+
  geom_text(aes(label=round(Freq,2)),size=3)+
 theme_minimal() +
  labs(
    title="correlation matrix(mtcars)",
    x="",
    y="",
  )

  theme(axis.text.x=element_text(angle=45,hjust=1))
List of 1
 $ axis.text.x:List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 1
  ..$ vjust        : NULL
  ..$ angle        : num 45
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 - attr(*, "class")= chr [1:2] "theme" "gg"
 - attr(*, "complete")= logi FALSE
 - attr(*, "validate")= logi TRUE