knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
To hide code chunk echo=FALSE otherwise TRUE.
Loading required packages
library(ggplot2)
library(dplyr)
library(ggthemes)
library(ggThemeAssist)
library(forcats)
library(ggpubr)
library(CGPfunctions)
library(patchwork)
library(gridExtra)
data("iris")
ggplot(data = iris,aes(x = Sepal.Length,y = Sepal.Width))+geom_point()
data("iris")
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
ggplot(data = iris)+geom_point(aes(x=Sepal.Length,y=Sepal.Width),col="blue") +
labs(title = "Scatter Plot of Sepal length and Sepal width",x= "Sepal length",
y="Sepal width")# for rename the x and y
Data Visualization and ggplot
Data visualization with R and ggplot2 in R Programming Language also termed as Grammar of Graphics. It includes several layers on which it is governed. The layers are as follows:
Building Blocks of layers with the grammar of graphics Data: The element is the data set itself Aesthetics: The data is to map onto the Aesthetics attributes such as x-axis, y-axis, color, fill, size, labels, alpha, shape, line width, line type Geometrics: How our data being displayed using point, line, histogram, bar, boxplot Facets: It displays the subset of the data using Columns and rows Statistics: Binning, smoothing, descriptive, intermediate Coordinates: the space between data and display using Cartesian, fixed, polar, limits Themes: Non-data link
Data-set mtcars(motor trend car road test) comprise fuel consumption and 10 aspects of automobile design and performance for 32 automobiles and come come pre-installed with dplyr package in R.
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
Data layer ggplot2 in R the data Layer we define the source of the information to be visualize, let’s use the mtcars data-set in the ggplot2 package.
ggplot(data=mtcars)+
labs(title="MTCars Data Plot")
Aesthetic layer ggplot2 in R Here we will display and map data-set into certain aesthetics.
ggplot(data=mtcars,aes(x=hp,y=mpg,col=disp))+
labs(tile="Miles per Gallon vs Horsepower",x="Horse Power",y="Miles Per Galon")
Geometric layer ggplot2 in R geometric layer control the essential elements, see how our data being displayed using point, line, histogram, bar, boxplot.
ggplot(data=mtcars,aes(x=hp,y=mpg,col=disp))+
labs(tile="Miles per Gallon vs Horsepower",x="Horse Power",y="Miles Per Galon")+
geom_point()
Geometric layer Adding Size, color, and shape and then plotting the Histogram plot
ggplot(data=mtcars,aes(x=hp,y=mpg,size = disp))+
labs(tile="Miles per Gallon vs Horsepower",x="Horse Power",y="Miles Per Galon")+
geom_point()
ggplot(data=mtcars,aes(x=hp,y=mpg,size=disp,col=factor(cyl)))+
labs(tile="Miles per Gallon vs Horsepower",x="Horse Power",y="Miles Per Galon")+
geom_point()
ggplot(data=mtcars,aes(x=hp,y=mpg,shape=factor(am),color=factor(cyl)))+
labs(tile="Miles per Gallon vs Horsepower",x="Horse Power",y="Miles Per Galon")+
geom_point()
ggplot(data=mtcars,aes(x=hp,y=mpg,color=factor(cyl)))+geom_point(shape=4)
labs(tile="Miles per Gallon vs Horsepower",x="Horse Power",y="Miles Per Galon")+
geom_point()
## NULL
# here, geom_point(shape=4) is used
# check the tutorial pdf
ggplot(data=mtcars,aes(x=hp))+
labs(title = "Histogram of Horse-power",
x="Horse-power",y="Count")+
geom_histogram(binwidth = 10)
Facet layer
ggplot2 in R facet layer is used to split the data up into subsets of the entire data-set and it allows the subsets to be visualized on the same plot. Here we separate rows according to transmission type and Separate columns according to cylinders.
# Separate rows according to transmission type
p <- ggplot(data=mtcars,mapping = aes(x = hp,y = mpg,shape = factor(cyl)))+
geom_point()
p+facet_grid(am~.)+
labs(title = "Miles per Gallon Vs Horse-power",
x="Horse-power",
y="Miles per Gallon")
p
The line p + facet_grid(am ~ .) in your ggplot2 code is used to create faceted plots by the am variable in rows.
Understanding facet_grid(am ~ .) facet_grid() creates multiple plots based on the values of a categorical or discrete numeric variable.
The formula inside facet_grid(am ~ .):
(1)The left side (am) represents the rows.
(2)The right side (.) means no faceting by columns.
This means the plot will be split into multiple rows, where each row corresponds to a unique value of the am (transmission type) variable.
q <- ggplot(data=mtcars,aes(x=hp,y=mpg,
shape=factor(cyl)))+
geom_point()
q+facet_grid(.~cyl)+
labs(title = "Miles per Gallon Vs Horse-power",
x="Horse-power",
y="Miles per Gallon")
q <- ggplot(data=mtcars,aes(x=hp,y=mpg,
shape=factor(cyl)))+
geom_point()
q+facet_wrap(~cyl)+
labs(title = "Miles per Gallon Vs Horse-power",
x="Horse-power",
y="Miles per Gallon")
# Here,facet_wrap() is used to group wise presentation
Understanding facet_grid(. ~ cyl) facet_grid() is a function used to create multiple small plots (facets) based on the values of a categorical variable.
The formula inside facet_grid(. ~ cyl):
(1)The left side of the formula (.) represents rows in the grid. Since there’s a dot (.) here, it means no faceting by rows.
(2)The right side of the formula (~ cyl) means faceting by the cyl (number of cylinders in the mtcars dataset) in columns.
As a result, the plot will be split into multiple subplots, with each column representing a different number of cylinders (cyl).
Statistics layer
ggplot(data=mtcars,aes(x = hp,
y=mpg))+geom_point()+
stat_smooth(method = lm,col="blue")+
labs(title="Miles per Gallon Vs Horse-power",
x="Horse-power",y="Miles per Gallon")
## `geom_smooth()` using formula = 'y ~ x'
Coordinates layer ggplot2 in R these layers, data coordinates are mapped together to the mentioned plane of the graphic and we adjust the axis and changes the spacing of displayed data with Control plot dimensions.
ggplot(data=mtcars,aes(x = wt,
y=mpg))+geom_point()+
stat_smooth(method = lm,col="blue")+
labs(title="Miles per Gallon Vs Weight",
x="Weight",y="Miles per Gallon")+
scale_y_continuous("Miles per Gallon",limits = c(2,25),expand=c(0,0))+
scale_x_continuous("Weight",limits=c(0,25),expand=c(0,0))+
coord_equal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).
Coord_cartesian() to proper zoom in
ggplot(data=mtcars,aes(x = wt,
y=hp,col=am))+
geom_point()+geom_smooth()+
coord_cartesian(xlim = c(3,6))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
geom_smooth Adds a smoothed trend line (default is LOESS for
small data-sets). and Helps visualize the general
relationship between wt and hp.
coord_cartesian(xlim = c(3, 6)) (1) Zooms in on the x-axis by setting xlim = c(3, 6). (2)Unlike scale_x_continuous(), which removes data outside the range, coord_cartesian() keeps all data points but only displays the specified range. (3)The y-axis automatically adjusts to fit the visible data.
Theme layer ggplot2 in R layer controls the finer points of display like the font size and background color properties.
Example-1 Theme layer -element_rect() function
ggplot(data=mtcars,
aes(x=hp,y=mpg))+
geom_point()+
facet_grid(.~cyl)+
theme(plot.background=element_rect(fill="orange",colour = "gray"))+
labs(title="Miles per Gallon Vs Horse-power",
x="Horse-power",
y="Miles per Gallon")
Example-2
ggplot(data=mtcars,aes(x=hp,y=mpg))+
geom_point()+facet_grid(am~cyl)+
theme_gray()+
labs(title="Miles per Gallon Vs Horse-power",
x="Horse-power",
y="Miles per Gallon")
ggplot2 in R provides various types of visualizations. More parameters can be used included in the package as the package gives greater control over the visualizations of data. Many packages can integrate with the ggplot2 package to make the visualizations interactive and animated.
ggplot(data=mtcars,
aes(x=wt,y=mpg))+
stat_density2d(aes(fill = ..level..),
geom="polygon",color="white")+
scale_fill_viridis_c()+
labs(title = "2D Density Contour Plot of mtcars Data-set",
x="Weight(wt)",
y="Miles per Gallon(mpg)",
fill="Density")+
theme_minimal()
## Warning: The dot-dot notation (`..level..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(level)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
In ggplot2 in R stat_density_2d to generate the 2D density contour plot. The aesthetics x and y specify the variables on the x-axis and y-axis, respectively. The fill aesthetic is set to ..level.. to map fill color to density levels.
stat_density_2d(aes(fill = ..level..), geom = “polygon”, color = “white”) + stat_density_2d() computes the 2D density estimate of the data. (1)aes(fill = ..level..): Uses ..level.., which represents density levels, to color the plot. (2)geom = “polygon”: Fills the density regions with colors instead of drawing contour lines. (3)color = “white”: Outlines the density regions in white to create visible contour boundaries.
Creating a panel of different plots
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# Selecting specific columns from mtcars data-set
selected_col <- c("mpg","disp","hp","drat")
selected_data <- mtcars[,selected_col]
# Create histograms for individual variables
hist_plot_mpg <- ggplot(data = selected_data, aes(x = mpg)) +
geom_histogram(binwidth = 2,
fill = "blue",
color = "white") +
labs(title = "Histogram: Miles per Gallon", x = "Miles per Gallon")
hist_plot_disp <- ggplot(selected_data, aes(x = disp)) +
geom_histogram(binwidth = 50, fill = "red", color = "white") +
labs(title = "Histogram: Displacement", x = "Displacement", y = "Frequency")
hist_plot_hp <- ggplot(selected_data, aes(x = hp)) +
geom_histogram(binwidth = 20, fill = "green", color = "white") +
labs(title = "Histogram: Horsepower", x = "Horsepower", y = "Frequency")
hist_plot_drat <- ggplot(selected_data, aes(x = drat)) +
geom_histogram(binwidth = 0.5, fill = "orange", color = "white") +
labs(title = "Histogram: Drat", x = "Drat", y = "Frequency")
# Arrange the plots in a grid
grid.arrange(hist_plot_mpg,
hist_plot_disp,
hist_plot_hp,
hist_plot_drat,
ncol = 2)
# Here, ncol=2 means the histograms are presented into
# two columns
#Histogram ####
ggplot(data = iris) +
geom_histogram(
aes(x = Sepal.Length),
binwidth = 0.5,
fill = "lightblue",
col = "black"
)
ggplot(data = iris) +
geom_histogram(
aes(x = Sepal.Length),
bins = 10,
fill = "lightblue",
col = "black"
)
# Here,bins
# Number of bins. Overridden by bin-width. Defaults to 30.
ggplot(data = iris) +
geom_histogram(
aes(x = Sepal.Length),
bins = 10,
fill = "lightblue",
col = "white"
)
ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species),
bins = 10,
col = "black")
ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species),
bins = 10,
col = "white")
ggplot(data = iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(bins = 10,
col = "black",
alpha = 0.2)
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length,fill=Species),bins=10,
col="white",alpha=0.5)
To (knit to pdf) we need packages install.packages(“tinytex”) tinytex::install_tinytex in Console
In line code
res <- sample(1:1000,10)
res
## [1] 364 378 102 407 512 863 545 646 114 326
sum(res)
## [1] 4257
The total of the 10 sample number 4257
ggplot(data = iris) +
geom_histogram(
aes(x = Sepal.Length, fill = Species),
bins = 10,
col = "black"
,
alpha = 0.6
) +
coord_cartesian(expand = T)
If TRUE, the default, adds a small expansion factor to the limits to
ensure that data and axes don’t overlap. If FALSE, limits are taken
exactly from the data or xlim/ylim.
ggplot(data = iris) +
geom_histogram(
aes(x = Sepal.Length, fill = Species),
bins = 10,
col = "black",
apha = 0.5
) +
scale_y_continuous(breaks = seq(-10, 30, by = 5),
expand = expansion(mult = c(-0.1, 1), # Expands upper portion of the plot 20%
add = c(10, 0))) # Increase gap at the bottom portion by 10 unit
## Warning in geom_histogram(aes(x = Sepal.Length, fill = Species), bins = 10, :
## Ignoring unknown parameters: `apha`
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length,fill=Species),
bins=10,col="black",alpha=0.5)+
scale_y_continuous(expand=expansion(add = c(0,5)))+
scale_x_continuous(expand = expansion(add=c(0,0)))+
labs(title="Histogram of Sepal-length as per Species",
x="Sepal-length",
y="Count")
#Facet#
ggplot2 in R facet layer is used to split the data up into subsets of the entire data-set and it allows the subsets to be visualized on the same plot.
ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species),
bins = 10,
col = "white") +
facet_wrap(vars(Species), ncol = 1, scales = "free") # Species variable(vars) wise
ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species),
bins = 10,
col = "white") +
facet_wrap(vars(Species), ncol =1)
ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species),
bins = 10,
col = "white") +
facet_wrap(vars(Species), ncol = 1, scales = "free_y")
facet_grid() forms a matrix of panels defined by row and column faceting variables. It is most useful when you have two discrete variables, and all combinations of the variables exist in the data.
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length,fill=Species),
bins=10,col="white",alpha=0.5)+
facet_grid(rows=vars(Species))
setwd("D:/R-Programming/Class-12/Data")
student <- readxl::read_excel("StudentSurveyData.xlsx")
str(student)
## tibble [111 × 15] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:111] 1 2 3 4 5 6 7 8 9 10 ...
## $ Gender : chr [1:111] "Female" "Male" "Male" "Male" ...
## $ Age : num [1:111] 20 23 21 21 23 26 21 30 20 21 ...
## $ Class : chr [1:111] "Sophomore" "Senior" "Freshman" "Sophomore" ...
## $ Major : chr [1:111] "Other" "Management" "Other" "IS" ...
## $ Grad Intention : chr [1:111] "Yes" "Yes" "Yes" "Yes" ...
## $ GPA : num [1:111] 2.88 3.6 2.5 2.5 2.8 2.34 3 3.1 3.6 3.3 ...
## $ Employment : chr [1:111] "Full-Time" "Part-Time" "Part-Time" "Full-Time" ...
## $ Salary : num [1:111] 55 30 50 45 45 83 55 85 35 42.5 ...
## $ Social Networking: num [1:111] 5 4 2 4 7 3 3 1 0 11 ...
## $ Satisfaction : num [1:111] 3 4 4 6 4 2 3 2 4 4 ...
## $ Spending : num [1:111] 850 860 1100 1100 1000 1200 1000 700 1000 700 ...
## $ Computer : chr [1:111] "Laptop" "Desktop" "Laptop" "Tablet" ...
## $ Text Messages : num [1:111] 200 50 200 250 100 0 50 300 400 100 ...
## $ Wealth : num [1:111] 2 10 70 100 1 5 0.6 1 0.6 1 ...
ggplot(data=student)+
geom_histogram(aes(x=GPA,fill=Employment),
bins=10,col="black",alpha=0.5)+
facet_grid(rows=vars(Employment),cols=vars(Gender))
# here,cols means columns
ggplot(data=student)+
geom_histogram(aes(x=GPA,fill=Employment),
bins=10,col="black",alpha=0.5)+
facet_grid(rows=vars(Employment),cols=vars(Class))
# here, cols means columns
# The function factor is used to encode a vector as a factor (the terms ‘category’ and ‘enumerated type’ are also used for factors). If argument ordered is TRUE, the factor levels are assumed to be ordered.
student %>%
mutate(Class=factor(Class,levels = c("Freshman","Sophomore", "Junior","Senior"))) %>%
ggplot()+
geom_histogram(aes(x=Spending,fill=Employment),
bins=10,col="white")+
facet_grid(rows=vars(Employment),cols=vars(Class))
#Theme#
##Buit-in ggplot2 theme##
ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species),
bins = 10, col = "white", alpha = 0.5) +
facet_wrap(vars(Species), ncol = 1) +
theme_classic()
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length,fill=Species),
bins=10,col="white")+
facet_wrap(vars(Species),ncol=1)+
theme_bw()
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length,fill=Species),
bins=10,col="black")+
facet_wrap(vars(Species),ncol=1)+
theme_minimal()
ggplot(data=student)+
geom_histogram(aes(x=GPA,fill=Employment),
bins=10,col="black",alpha=0.5)+
facet_grid(rows=vars(Employment),cols=vars(Class))+
theme_minimal()
ggplot(data=student)+
geom_histogram(aes(x=GPA,fill=Employment),
bins=10,col="black",alpha=0.5)+
facet_grid(rows=vars(Employment),cols=vars(Class))+
theme_linedraw()
ggplot(data=student)+
geom_histogram(aes(x=GPA,fill=Employment),
bins=10,col="black",alpha=0.5)+
facet_grid(rows=vars(Employment),cols=vars(Class))+
theme_void()
ggplot(data=student)+
geom_histogram(aes(x=GPA,fill=Employment),
bins=10,col="black",alpha=0.5)+
facet_grid(rows=vars(Employment),cols=vars(Class))+
theme_test()
ggplot(data=student)+
geom_histogram(aes(x=GPA,fill=Employment),
bins=10,col="black",alpha=0.5)+
facet_grid(rows=vars(Employment),cols=vars(Class))+
ggthemes::theme_hc()
##Other pre-built theme##
library(ggthemes) #Theme similar to the default settings of LibreOffice Calc charts.
ggplot(data=student)+
geom_histogram(aes(x=GPA,fill=Employment),
bins=10,col="black",alpha=0.5)+
facet_grid(rows=vars(Employment),cols=vars(Class))+
theme_calc()
##Manually editing themes##
library(ggThemeAssist)
p <- ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species), bins = 10, col = "white", alpha = 0.5) +
facet_wrap(vars(Species), ncol = 1) +
labs(
title = "Histogram of Sepal Length by Species",
x = "Sepal Length",
y = "Frequency",
fill = "Species",
subtitle = "Using Facet and Other Customizations",
caption = "Data: Iris"
)
# ggThemeAssistGadget(p)is an app
custom_theme <- theme(
axis.title = element_text(face = "italic"),
plot.title = element_text(face = "bold", hjust = 0),
legend.text = element_text(face = "italic"),
legend.title = element_text(face = "bold.italic"),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
legend.position = "bottom",
legend.direction = "horizontal"
)
# therefore,
p+custom_theme
##Theme set globally## Where we don’t set theme After setting theme globally they catch the global theme.
#theme_set(theme_bw())
##Manually change color##
ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species), bins = 10, col = "white", alpha = 0.5) +
facet_wrap(vars(Species), ncol = 1) +
scale_fill_manual(values = c("setosa" = "#6C1C80", "versicolor" = "#30A19C", "virginica" = "#123B96")) # named-vector ("setosa"="#6C1C80")
ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species), bins = 10, col = "white", alpha = 0.5) +
facet_wrap(vars(Species), ncol = 1) +
scale_fill_brewer(palette = "Set1")# To get more color palette see the tutorial pdf
ggplot(data = iris) +
geom_histogram(aes(x = Sepal.Length, fill = Species), bins = 10, col = "white", alpha = 0.5) +
facet_wrap(vars(Species), ncol = 1) +
scale_fill_hue(
l = 80, c = 150, # adjust luminosity and chroma
h = c(90, 360) # adjust range of hues
)
ggplot(data=iris)+
geom_density(aes(x=Sepal.Length,fill=Species))
ggplot(data=iris)+
geom_density(aes(x=Sepal.Length,fill=Species))+
facet_wrap(vars(Species),ncol=1)+
labs(title="Density Plot of Sepal-length as per Species",
x="Sepal-length",
y="Density")
#Histogram + Density plot#
ggplot(data=iris,aes(x=Sepal.Length,fill=Species))+
geom_density(alpha=0.5,col="white")+
geom_histogram(aes(y=after_stat(density)),alpha=0.5,bins=10)+
facet_wrap(vars(Species),ncol=1)
Here, I have used y=after_stat(density) because geom_histogram has a
default y value is equal to count and geom_density has a default y value
is equal to density that means probability so to void collision for data
interpretation as I want to understand data distribution I will need the
data density value with the integration of density plot and
histogram
ggplot(iris, aes(Sepal.Length)) +
geom_histogram(aes(y = after_stat(density)),
color = "#000000", fill = "#0099F8") +
geom_density(color = "#000000", fill = "#0EE100", alpha = 0.5) +
geom_vline(aes(xintercept = mean(Sepal.Length)),
color = "#000000", size = 1, linetype = "dashed") +
labs(
title = "Distribution of Sepal Length",
subtitle = "Made by ggplot2",
caption = "Source: Iris Data",
x = "Sepal Length",
y = "Density"
) +
theme_classic() +
theme(
plot.title = element_text(color = "blue", size = 16, face = "bold"),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(face = "italic")
) +
annotate("text", x = 5.9, y = 0.6,
label = paste0("Mode: ", round(DescTools::Mode(iris$Sepal.Length),1) ), hjust = 0)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Here,geom_vline is the part of the geom_abline that helps add reference
lines (sometimes called rules) to a plot, either horizontal, vertical,
or diagonal (specified by slope and intercept). These are useful for
annotating plots.
ggplot(data=student)+
geom_bar(aes(x=Computer))
ggplot(student)+
geom_bar(aes(x=Computer ))+
coord_flip()
ggplot(data=student)+
geom_bar(aes(y=Computer))
student["Computer"]
## # A tibble: 111 × 1
## Computer
## <chr>
## 1 Laptop
## 2 Desktop
## 3 Laptop
## 4 Tablet
## 5 Laptop
## 6 Desktop
## 7 Laptop
## 8 Tablet
## 9 Laptop
## 10 Laptop
## # ℹ 101 more rows
student %>%
count(Computer)
## # A tibble: 3 × 2
## Computer n
## <chr> <int>
## 1 Desktop 20
## 2 Laptop 80
## 3 Tablet 11
student %>%
count(Computer) %>%
ggplot+
geom_col(aes(x=Computer,y=n))
# coord_flip()
# This function is superseded because in many cases, coord_flip() can easily be replaced by swapping the x and y aesthetics, or optionally setting the orientation argument in geom and stat layers.
student %>%
count(Computer) %>%
ggplot+
geom_col(aes(x=Computer,y=n))+
coord_flip()
# Otherwise,
student %>%
count(Computer) %>%
ggplot+
geom_col(aes(y=Computer,x=n))
student %>%
count(Computer) %>%
ggplot() +
geom_bar(aes(x = Computer, y = n), stat = "identity")
#Arranging bars#
###geom_bar modification###
ggplot(data=student)+
geom_bar(aes(x=Computer))+
scale_x_discrete(limits=c("Laptop","Desktop","Tablet"))
ggplot(student)+
geom_bar(aes(x=fct_infreq(Computer)))+
labs(x="Computer Usage Status")
# Here,fct_infreq means by number of observations with each level (largest first)
ggplot(student)+
geom_bar(aes(x=fct_infreq(Computer) %>% fct_rev()))+
labs(x="Computer Usage Status",
y="Frequency",
title="A simple bar plot")
# fct_rev reverse order of factor levels.
ggplot(student) +
geom_bar(aes(x = Computer), fill = c("black","bisque3","red")) +
theme_bw()
ggplot(student) +
geom_bar(aes(x = Computer, fill = Computer)) +
scale_fill_manual(values = c("black","bisque3","red"))+
labs(x="Computer Usage Status",
y="Frequency",
title="A simple bar plot")
# scale_fill_manual() function is the part of the
# scale_manual () functions and these functions allow us
# to specify our own set of mapping from levels in the data to
# aesthetic values.
student %>%
count(Computer) %>%
ggplot() +
geom_col(aes(x = Computer, y = n)) +
scale_x_discrete(limits = c("Laptop", "Desktop", "Tablet"))
# reorder()- is a generic function. The "default" method treats its first argument as a categorical variable, and reorders its levels based on the values of a second variable, usually numeric.
student %>%
count(Computer) %>%
ggplot()+
geom_col(aes(x=reorder(Computer,-n),y=n))
student %>%
count(Computer) %>%
ggplot(aes(x = reorder(Computer, -n), y = n, label=n)) +
geom_col() +
geom_text(vjust=-1, color="black", size=3.5) +
theme_minimal() +
ylim(0, 100)
# Here, geom_text is the part of geom_label() and Text geoms are useful for labeling plots. They can be used by themselves as scatterplots or in combination with other geoms, for example, for labeling points or for annotating the height of bars. geom_text() adds only text to the plot. geom_label() draws a rectangle behind the text, making it easier to read.
student %>%
count(Computer) %>%
ggplot(aes(x = reorder(Computer, -n), y = n)) +
geom_col() +
geom_text(aes(label=n), vjust=-0.5, color="black", size=3) +
theme_minimal()
# Here, aes(label=n) means each bar in the bar chart will have its respective count displayed on top.
student %>%
count(Computer, Class) %>%
ggplot(aes(x = reorder(Computer, -n), y = n)) +
geom_col() +
geom_text(aes(label=n), vjust=-0.5, color="black", size=3) +
theme_minimal() +
facet_wrap(vars(Class)) +
ylim(0, 35) +
labs(x = "Computer Usage")
student %>%
count(Computer, Class) %>%
ggplot(aes(x = reorder(Computer, -n), y = n)) + # -n means descending order
geom_col(fill = "cornflowerblue") +
geom_text(aes(label=n), vjust=-0.5, color="black", size=3) +
theme_light() +
facet_wrap(vars(Class)) +
ylim(0, 35) +
labs(x = "Device usage", y = "Freq.", title = "Frequency of Device Usage by Class") +
theme(plot.title = element_text(hjust = 0.5),
strip.text = element_text(colour = 'black'))
#Values on the bars 1#
ggplot(data=student,aes(y=Major))+
geom_bar()+
geom_text(aes(x=after_stat(count-1),
label=after_stat(count)),
stat="count",
size=4,col="white")+
labs(x="Frequency",y=NULL)
ggplot(student, aes(y = Major, fill = Computer)) +
geom_bar(position = "dodge") +
geom_text(
aes(x = after_stat(count + 1), label = after_stat(count)),
stat = "count",
size = 3,
position = position_dodge(1)
) +
labs(x = "Freq.", y = NULL)
ggplot(student)+
geom_bar(aes(x=Class,fill=Employment))
ggplot(student)+
geom_bar(aes(x=Class,fill=Employment),position="stack")
ggplot(student)+
geom_bar(aes(x=Class,fill=Employment),position="dodge")
ggplot(student)+
geom_bar(aes(x=Class,fill=Employment),position="dodge2")
ggplot(student)+
geom_bar(aes(x=Class,fill=Employment),position="fill")
#Arranging bars#
student %>%
mutate(Class = factor(Class, levels = c("Freshman","Sophomore", "Junior","Senior"))) %>%
ggplot() +
geom_bar(aes(x = Class, fill = Employment), position = "fill")
ggplot(student) +
geom_bar(aes(x = Class, fill = Employment), position = "fill") +
scale_x_discrete(limits = c("Freshman","Sophomore", "Junior","Senior"))
#Values on bars 2#
ggplot(student, aes(x = Class, fill = Employment)) +
geom_bar(position = "fill") +
geom_text(aes(label = after_stat(count)), size = 3,
stat = "count", position = position_fill(vjust = 0.5))
install.packages("CGPfunctions")
## Warning: package 'CGPfunctions' is in use and will not be installed
library(CGPfunctions)
library(ggplot2)
CGPfunctions::PlotXTabs2(
data = student,
y = Gender,
x = Computer,
results.subtitle = FALSE,
sample.size.label = TRUE, palette = "Set1",
ggtheme = ggplot2::theme_bw()
) + labs(title = "Stacked Barplot of Device Usage by Gender")
# Here, CGPfunctions is the part of the ggplot2
# Therefore we will fetch CGPfunctions as like as ggplot
# Remember here we have taken two variables of category- y and x
# respectively therefore Chi-square will be conducted that will be
# shown by results.subtitle=TRUE
CGPfunctions::PlotXTabs2(
data = student,
y = Computer,
x = Class,
results.subtitle = TRUE,
sample.size.label = TRUE, palette = "Set3",
ggtheme = ggplot2::theme_bw()
) + labs(title = "Stacked Barplot of Device Usage by Class")
#Legend customization# ##Legend Position##
ggplot(data = iris) +
geom_point(aes(
x = Sepal.Length,
y = Sepal.Width,
col = Species,
size = Petal.Length
)) + labs(x = "Sepal Length", y = "Sepal Width", title = "Scatter Plot of Sepal Length vs Width") +
theme(legend.position = "right")
ggplot(data = iris) +
geom_point(aes(
x = Sepal.Length,
y = Sepal.Width,
col = Species,
size = Petal.Length
)) +
labs(x = "Sepal Length", y = "Sepal Width", title = "Scatter Plot of Sepal Length vs Width") +
guides(color = guide_legend(position = "bottom"))
# Guides is the part of ggplot2 and guides for each scale can be set scale-by-scale with the guide argument, or en masse with guides()
ggplot(data = iris) +
geom_point(aes(
x = Sepal.Length,
y = Sepal.Width,
col = Species,
size = Petal.Length
)) +
labs(x = "Sepal Length", y = "Sepal Width", title = "Scatter Plot of Sepal Length vs Width") +
guides(
color = guide_legend(
title = "Species Name",
position = "bottom",
direction = "horizontal",
title.position = "left",
reverse = FALSE
)
)
ggplot(data = iris) +
geom_point(aes(x = Sepal.Length, y = Sepal.Width, col = Species, size = Petal.Length)) +
guides(color = "none")

ggplot(data = iris) +
geom_point(aes(x = Sepal.Length, y = Sepal.Width, col = Species, size = Petal.Length)) +
guides(size = "none")

ggplot(data = iris) +
geom_point(aes(x = Sepal.Length, y = Sepal.Width, col = Species, size = Petal.Length)) +
guides(color = "none", size = "none")

ggplot(data = iris) +
geom_point(aes(x = Sepal.Length, y = Sepal.Width, col = Species, size = Petal.Length))+
theme(legend.position="none")

ggplot(data = student) +
geom_bar(aes(y = Computer, fill = Gender), position = "fill") +
scale_fill_discrete(breaks = c("Male","Female")) +
theme(legend.position = "bottom")

# install.packages("scales")
ggplot(student, aes(x = Class, fill = Employment)) +
geom_bar(position = "fill") +
labs(y = "Propotion") +
scale_y_continuous(labels = scales::label_percent())
student %>%
mutate(Class = factor(Class,
levels = c("Freshman","Sophomore", "Junior","Senior"))) %>%
group_by(Class, Gender) %>%
summarize(AvgSpending = mean(Spending)) %>%
ungroup() %>%
ggplot() +
geom_col(aes(fill = Class, y = AvgSpending, x = Gender), position = "dodge") +
theme(legend.position = "bottom") +
scale_y_continuous(labels = scales::label_currency(prefix = "BDT "))
## `summarise()` has grouped output by 'Class'. You can override using the
## `.groups` argument.
# Without ungroup() function
student %>%
mutate(Class = factor(Class,
levels = c("Freshman","Sophomore", "Junior","Senior"))) %>%
group_by(Class, Gender) %>%
summarize(AvgSpending = mean(Spending)) %>%
ggplot() +
geom_col(aes(fill = Class, y = AvgSpending, x = Gender), position = "dodge") +
theme(legend.position = "bottom") +
scale_y_continuous(labels = scales::label_currency(prefix = "BDT "))
## `summarise()` has grouped output by 'Class'. You can override using the
## `.groups` argument.
#Box plot#
ggplot(data=student,aes(x=Class,y=Spending))+
geom_boxplot()+
geom_jitter()
# or,
ggplot(student)+
geom_boxplot(aes(x=Class,y=Spending))+
geom_jitter(aes(x=Class,y=Spending))
# The jitter geom is a convenient shortcut for geom_point(position = "jitter"). It adds a small amount of random variation to the location of each point, and is a useful way of handling overplotting caused by discreteness in smaller datasets.
iris %>%
ggplot(aes(x=Species,y=Sepal.Length))+
geom_boxplot()+
geom_jitter()+
labs(title="Boxplot of Sepal length and Species",
x="Species",
y="Sepal length")+
theme_bw()
student %>%
mutate(Class = factor(Class,
levels = c("Freshman","Sophomore", "Junior","Senior"))) %>%
ggboxplot(x = "Class", y = "Spending",
color = "Class", palette =c("#00AFBB", "#E7B800", "#FC4E07","black"),
add = "jitter", shape = "Class") +
theme(legend.position = "none") +
geom_pwc(method = "t_test") +
stat_compare_means(method = "anova", label.y = 50)
p2 <- ggboxplot(data = iris, x = "Species", y = "Sepal.Length",
color = "Species", add = "jitter", palette = "npg") +
theme(legend.position = "none") +
geom_pwc(method = "t_test")
p2
ggadjust_pvalue(
p2, p.adjust.method = "bonferroni",
label = "{p.adj.format}{p.adj.signif}"
)
p3 <- student %>%
mutate(Class = factor(Class,
levels = c("Freshman","Sophomore", "Junior","Senior"))) %>%
ggboxplot(x = "Class", y = "Spending",
color = "Class", palette =c("#00AFBB", "#E7B800", "#FC4E07","black"),
add = "jitter", shape = "Class") +
theme(legend.position = "none") +
geom_pwc(method = "t_test") +
stat_compare_means(method = "anova", label.y = 50)
p3
ggadjust_pvalue(
p3, p.adjust.method = "bonferroni",
label = "{p.adj.format}{p.adj.signif}", hide.ns = TRUE
)
# install.packages("equisse")
# library(equisse)
ggplot(iris) +
aes(x = Sepal.Length, fill = Species) +
geom_histogram(bins = 30L) +
scale_fill_manual(
values = c(setosa = "#6C1C80",versicolor = "#30A19C",virginica = "#123B96")
) +
labs(x = "X label", y = "Y label", title = "Title",
subtitle = "Subtitle", caption = "Caption", fill = "Fill label") +
theme_bw() +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold.italic"),
plot.subtitle = element_text(face = "italic"),
axis.title.y = element_text(face = "italic"),
axis.title.x = element_text(face = "italic")
) +
facet_wrap(vars(Species), ncol = 1)
##patchwork##
library(patchwork)
(p3 | p2) /
p
See more: https://patchwork.data-imaginist.com/articles/patchwork.html
##ggarrange##
# library(gridExtra)
ggarrange(p,
ggarrange(p2, p3, ncol = 2),
ncol = 1,
nrow = 2)
p %>% plotly::ggplotly()
p4 <- ggplot(student, aes(x = Class, y = Spending)) +
geom_boxplot() +
geom_jitter()
plotly::ggplotly(p4)