program15

Author

Vinayaa-1nt23is249

15.Create a R program to calculate and visualize co relation matrix for a given data set with color coded cells the strength and direction of co relations , using ggplot2 geom_tile function

library(ggplot2)
data(iris)
head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
ggplot(iris,aes(x=Sepal.Length)) +
  geom_histogram(binwidth = 0.3, fill = "skyblue", color  = "black") +
  facet_wrap(~ Species) +
  labs(title = "Distribution of Sepal Length by Species",
       x = "Sepal Length(cm)",
       y = "Frequency") +
  theme_minimal()

Develop an R function to draw a density curve representing the probability density function of a continous variable, with separate curves for each group, using ggplot2

library(ggplot2)
plot_density_by_group <- function(data, continuous_var, group_var, fill_colors = NULL) {
  # Check if the specified columns exist
  if (!(continuous_var %in% names(data)) || !(group_var %in% names(data))) {
    stop("Invalid column names. Make sure both variables exist in the dataset.")
  }

  # Create the ggplot object
  p <- ggplot(data, aes_string(x = continuous_var, color = group_var, fill = group_var)) +
    geom_density(alpha = 0.4) +
    labs(title = paste("Density Plot of", continuous_var, "by", group_var),
         x = continuous_var,
         y = "Density") +
    theme_minimal()

  # Apply custom fill colors if provided
  if (!is.null(fill_colors)) {
    p <- p + scale_fill_manual(values = fill_colors) +
             scale_color_manual(values = fill_colors)
  }

  # Return the plot
  return(p)
}
data(iris)
head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
plot_density_by_group(iris, "Sepal.Length", "Species")
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.

# Define custom colors
custom_colors <- c("setosa" = "steelblue",
                   "versicolor" = "forestgreen",
                   "virginica" = "darkorange")

# Plot with custom colors
plot_density_by_group(iris, "Petal.Length", "Species", fill_colors = custom_colors)

To generate a box plot using ggplot2, enhanced with notches and outliers, and grouped by a categorical variable using an in-built dataset in R

data(iris)
head(iris,10)
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1           5.1         3.5          1.4         0.2  setosa
2           4.9         3.0          1.4         0.2  setosa
3           4.7         3.2          1.3         0.2  setosa
4           4.6         3.1          1.5         0.2  setosa
5           5.0         3.6          1.4         0.2  setosa
6           5.4         3.9          1.7         0.4  setosa
7           4.6         3.4          1.4         0.3  setosa
8           5.0         3.4          1.5         0.2  setosa
9           4.4         2.9          1.4         0.2  setosa
10          4.9         3.1          1.5         0.1  setosa
tail(iris,10)
    Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
141          6.7         3.1          5.6         2.4 virginica
142          6.9         3.1          5.1         2.3 virginica
143          5.8         2.7          5.1         1.9 virginica
144          6.8         3.2          5.9         2.3 virginica
145          6.7         3.3          5.7         2.5 virginica
146          6.7         3.0          5.2         2.3 virginica
147          6.3         2.5          5.0         1.9 virginica
148          6.5         3.0          5.2         2.0 virginica
149          6.2         3.4          5.4         2.3 virginica
150          5.9         3.0          5.1         1.8 virginica
str(iris)
'data.frame':   150 obs. of  5 variables:
 $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
ggplot(iris, aes(x=Species, y=Sepal.Length))+
geom_boxplot(outliers = TRUE, outlier.colour = "pink", notch= TRUE, fill = "beige")+
labs(title= "graph", x="Species", y="Species.Length")+
theme_minimal()

Develop a script in R to create a violin plot displaying the distribution of a continous variable with a separate violins for each group using ggplot2

violin_plot <- ggplot(data = iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
  geom_violin(trim = FALSE) +  # Show full distribution without trimming tails
  geom_boxplot(width = 0.1, fill = "white") +  # Add a boxplot inside violins
  labs(title = "Distribution of Sepal Length by Species",
       x = "Species",
       y = "Sepal Length") +
  theme_minimal() +
  theme(legend.position = "none")
print(violin_plot)

Write an R program to create multiple dot plots for grouped data, comparing the distributions of variables across different categories, using ggplot2’s position_dodge function.

library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyr)
head(mtcars)
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
dim(mtcars)
[1] 32 11
data(mtcars)
cor_matrix <- cor(mtcars)
cor_matrix
            mpg        cyl       disp         hp        drat         wt
mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684  0.68117191 -0.8676594
cyl  -0.8521620  1.0000000  0.9020329  0.8324475 -0.69993811  0.7824958
disp -0.8475514  0.9020329  1.0000000  0.7909486 -0.71021393  0.8879799
hp   -0.7761684  0.8324475  0.7909486  1.0000000 -0.44875912  0.6587479
drat  0.6811719 -0.6999381 -0.7102139 -0.4487591  1.00000000 -0.7124406
wt   -0.8676594  0.7824958  0.8879799  0.6587479 -0.71244065  1.0000000
qsec  0.4186840 -0.5912421 -0.4336979 -0.7082234  0.09120476 -0.1747159
vs    0.6640389 -0.8108118 -0.7104159 -0.7230967  0.44027846 -0.5549157
am    0.5998324 -0.5226070 -0.5912270 -0.2432043  0.71271113 -0.6924953
gear  0.4802848 -0.4926866 -0.5555692 -0.1257043  0.69961013 -0.5832870
carb -0.5509251  0.5269883  0.3949769  0.7498125 -0.09078980  0.4276059
            qsec         vs          am       gear        carb
mpg   0.41868403  0.6640389  0.59983243  0.4802848 -0.55092507
cyl  -0.59124207 -0.8108118 -0.52260705 -0.4926866  0.52698829
disp -0.43369788 -0.7104159 -0.59122704 -0.5555692  0.39497686
hp   -0.70822339 -0.7230967 -0.24320426 -0.1257043  0.74981247
drat  0.09120476  0.4402785  0.71271113  0.6996101 -0.09078980
wt   -0.17471588 -0.5549157 -0.69249526 -0.5832870  0.42760594
qsec  1.00000000  0.7445354 -0.22986086 -0.2126822 -0.65624923
vs    0.74453544  1.0000000  0.16834512  0.2060233 -0.56960714
am   -0.22986086  0.1683451  1.00000000  0.7940588  0.05753435
gear -0.21268223  0.2060233  0.79405876  1.0000000  0.27407284
carb -0.65624923 -0.5696071  0.05753435  0.2740728  1.00000000
cor_df <- as.data.frame(as.table(cor_matrix))
head(cor_df)
  Var1 Var2       Freq
1  mpg  mpg  1.0000000
2  cyl  mpg -0.8521620
3 disp  mpg -0.8475514
4   hp  mpg -0.7761684
5 drat  mpg  0.6811719
6   wt  mpg -0.8676594
ggplot(cor_df,aes(x = Var1, y = Var2, fill = Freq))+
  geom_tile(color= "white")+
  scale_fill_gradient2(
    low= "blue",mid ="white",high= "yellow",
    midpoint =0,limit = c(-1,1),
    name ="correlation"
  )+
  geom_text(aes(label = round(Freq,2)),size =3)+
  theme_minimal()+
  labs(
    title="correlation matrix",
    x="Var1",y="Var2"
    
  )+
  theme(axis.text.x = element_text(angle = 45,hjust = 1))

Create a R program to calculate and visualize co relation matrix for a given data set with color coded cells the strength and direction of co relations , using ggplot2 geom_tile function

data(mtcars)
head(mtcars, n=10)
                   mpg cyl  disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
Duster 360        14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
Merc 240D         24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
Merc 230          22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
Merc 280          19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
cor_matrix <- cor(mtcars)
cor_df <- as.data.frame(as.table(cor_matrix))
cor_df
    Var1 Var2        Freq
1    mpg  mpg  1.00000000
2    cyl  mpg -0.85216196
3   disp  mpg -0.84755138
4     hp  mpg -0.77616837
5   drat  mpg  0.68117191
6     wt  mpg -0.86765938
7   qsec  mpg  0.41868403
8     vs  mpg  0.66403892
9     am  mpg  0.59983243
10  gear  mpg  0.48028476
11  carb  mpg -0.55092507
12   mpg  cyl -0.85216196
13   cyl  cyl  1.00000000
14  disp  cyl  0.90203287
15    hp  cyl  0.83244745
16  drat  cyl -0.69993811
17    wt  cyl  0.78249579
18  qsec  cyl -0.59124207
19    vs  cyl -0.81081180
20    am  cyl -0.52260705
21  gear  cyl -0.49268660
22  carb  cyl  0.52698829
23   mpg disp -0.84755138
24   cyl disp  0.90203287
25  disp disp  1.00000000
26    hp disp  0.79094859
27  drat disp -0.71021393
28    wt disp  0.88797992
29  qsec disp -0.43369788
30    vs disp -0.71041589
31    am disp -0.59122704
32  gear disp -0.55556920
33  carb disp  0.39497686
34   mpg   hp -0.77616837
35   cyl   hp  0.83244745
36  disp   hp  0.79094859
37    hp   hp  1.00000000
38  drat   hp -0.44875912
39    wt   hp  0.65874789
40  qsec   hp -0.70822339
41    vs   hp -0.72309674
42    am   hp -0.24320426
43  gear   hp -0.12570426
44  carb   hp  0.74981247
45   mpg drat  0.68117191
46   cyl drat -0.69993811
47  disp drat -0.71021393
48    hp drat -0.44875912
49  drat drat  1.00000000
50    wt drat -0.71244065
51  qsec drat  0.09120476
52    vs drat  0.44027846
53    am drat  0.71271113
54  gear drat  0.69961013
55  carb drat -0.09078980
56   mpg   wt -0.86765938
57   cyl   wt  0.78249579
58  disp   wt  0.88797992
59    hp   wt  0.65874789
60  drat   wt -0.71244065
61    wt   wt  1.00000000
62  qsec   wt -0.17471588
63    vs   wt -0.55491568
64    am   wt -0.69249526
65  gear   wt -0.58328700
66  carb   wt  0.42760594
67   mpg qsec  0.41868403
68   cyl qsec -0.59124207
69  disp qsec -0.43369788
70    hp qsec -0.70822339
71  drat qsec  0.09120476
72    wt qsec -0.17471588
73  qsec qsec  1.00000000
74    vs qsec  0.74453544
75    am qsec -0.22986086
76  gear qsec -0.21268223
77  carb qsec -0.65624923
78   mpg   vs  0.66403892
79   cyl   vs -0.81081180
80  disp   vs -0.71041589
81    hp   vs -0.72309674
82  drat   vs  0.44027846
83    wt   vs -0.55491568
84  qsec   vs  0.74453544
85    vs   vs  1.00000000
86    am   vs  0.16834512
87  gear   vs  0.20602335
88  carb   vs -0.56960714
89   mpg   am  0.59983243
90   cyl   am -0.52260705
91  disp   am -0.59122704
92    hp   am -0.24320426
93  drat   am  0.71271113
94    wt   am -0.69249526
95  qsec   am -0.22986086
96    vs   am  0.16834512
97    am   am  1.00000000
98  gear   am  0.79405876
99  carb   am  0.05753435
100  mpg gear  0.48028476
101  cyl gear -0.49268660
102 disp gear -0.55556920
103   hp gear -0.12570426
104 drat gear  0.69961013
105   wt gear -0.58328700
106 qsec gear -0.21268223
107   vs gear  0.20602335
108   am gear  0.79405876
109 gear gear  1.00000000
110 carb gear  0.27407284
111  mpg carb -0.55092507
112  cyl carb  0.52698829
113 disp carb  0.39497686
114   hp carb  0.74981247
115 drat carb -0.09078980
116   wt carb  0.42760594
117 qsec carb -0.65624923
118   vs carb -0.56960714
119   am carb  0.05753435
120 gear carb  0.27407284
121 carb carb  1.00000000
ggplot(cor_df, aes(x=Var1, y=Var2, fill = Freq)) + geom_tile(color = "white") + 
  scale_fill_gradient2(
    low="blue", mid="white", high="red", midpoint = 0, limit= c(-1,1),name="Corelation"
  ) + geom_text(aes(label = round(Freq, 2)), size =3) + theme_minimal()+
  labs(title="Corelation matrix(mtcars)", x="", y="")+ theme(axis.text.x = element_text(angle = 45, hjust=1))