iris
box_plot = ggplot(iris, aes(x = variety, y = sepal.length, fill = variety)) +
#fill for default color
#aes foe axis
geom_boxplot() +
labs(title = "Boxplot using ggplot2",
x = "Class",
y = "Length") +
theme(legend.position = "right",
text = element_text(color = "black", size = 10),
axis.text.x = element_text(color = "blue", size = 9),
axis.text.y = element_text(color = "red", size = 8))
box_plot
#save as a variable
ggsave("box_plot_500dpi.png", dpi = 500)
Saving 4.17 x 2.57 in image
#here the dot sign is a outlier
ggplot(data = iris, aes(x = variety, y = sepal.length, fill = variety))+
geom_violin()+
labs(title = "This plot is created using ggplot",
x = "Class",
Y = "Sepal Length",
caption = "Source = iris dataset")
# middle wide area indicate mean
summary(iris)
sepal.length
Min. :4.300
1st Qu.:5.100
Median :5.800
Mean :5.843
3rd Qu.:6.400
Max. :7.900
sepal.width
Min. :2.000
1st Qu.:2.800
Median :3.000
Mean :3.057
3rd Qu.:3.300
Max. :4.400
petal.length
Min. :1.000
1st Qu.:1.600
Median :4.350
Mean :3.758
3rd Qu.:5.100
Max. :6.900
petal.width
Min. :0.100
1st Qu.:0.300
Median :1.300
Mean :1.199
3rd Qu.:1.800
Max. :2.500
variety
Length:150
Class :character
Mode :character
##Correlaton
cor(iris$sepal.length, iris$sepal.width)
[1] -0.1175698
#correlation value range from -1 to +1
cor_matrix = cor(iris[ ,1:4])
cor_matrix
sepal.length
sepal.length 1.0000000
sepal.width -0.1175698
petal.length 0.8717538
petal.width 0.8179411
sepal.width
sepal.length -0.1175698
sepal.width 1.0000000
petal.length -0.4284401
petal.width -0.3661259
petal.length
sepal.length 0.8717538
sepal.width -0.4284401
petal.length 1.0000000
petal.width 0.9628654
petal.width
sepal.length 0.8179411
sepal.width -0.3661259
petal.length 0.9628654
petal.width 1.0000000
#to see correlation from 1 to 4 column, it has to be numerical value
library(ggcorrplot)
ggcorrplot(cor_matrix)
#to understand correlation among variables
###Lower triangle
library(ggcorrplot)
ggcorrplot(cor_matrix, type = "lower")
###Upper triangle
library(ggcorrplot)
ggcorrplot(cor_matrix, type = "upper")
ggcorrplot(cor_matrix, type = "lower",
color = c("blue", "purple","white"))
ggcorrplot(cor_matrix, type = "lower",
color = c("blue", "purple","white"),
lab = TRUE)
library(GGally)
ggpairs(iris, aes(color = variety))
library(plotly)
#pipeline operator (%>%): to apply conditions
fig = iris %>%
plot_ly(y= ~sepal.length, type = "violin")
fig
library(plotly)
#pipeline operator (%>%): to apply conditions
fig = iris %>%
plot_ly(y= ~sepal.length, type = "box")
fig
library(plotly)
#pipeline operator (%>%): to apply conditions
fig = iris %>%
plot_ly(x= ~sepal.length, type = "histogram")
fig
plot_ly(iris, x = ~variety, y = ~ sepal.length, type = "box")