# load necessary packages
library(ggplot2)
In R you can create a simple scatter plot using the
plot()
function. The basic syntax is as follows: -
plot(x, y, main, xlab, ylab, col, pch)
Where: - x
: A numeric vector representing the
x-coordinates of the points. - y
: A numeric vector
representing the y-coordinates of the points. - main
: A
character string for the title of the plot. - xlab
: A
character string for the label of the x-axis. - ylab
: A
character string for the label of the y-axis. - col
: A
color specification for the points. - pch
: A plotting
character or symbol to use.
# use dataset that included in R
data(mtcars)
# create a simple scatter plot
plot(mtcars$wt, mtcars$mpg,
main = "Scatter plot of Weight vs. MPG",
xlab = "Weight (1000 lbs)",
ylab = "Miles per Gallon (MPG)",
col = "blue",
pch = 19) +
# add a regression line
abline(lm(mpg ~ wt, data = mtcars), col = "red")
## integer(0)
# add a legend
legend("topright", legend = "Regression Line", col = "red", lty = 1)
# change theme to black and white
theme_set(theme_bw())
# create a scatter plot using ggplot2
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(color = "blue", size = 3) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(title = "Scatter plot of Weight vs. MPG",
x = "Weight (1000 lbs)",
y = "Miles per Gallon (MPG)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# bloxplot using ggplot2
ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_boxplot(fill = "lightblue", color = "darkblue") +
labs(title = "Boxplot of MPG by Number of Cylinders",
x = "Number of Cylinders",
y = "Miles per Gallon (MPG)") +
theme_minimal()
# barplot using ggplot2
ggplot(mtcars, aes(x = factor(cyl))) +
geom_bar(fill = "lightgreen", color = "darkgreen") +
labs(title = "Barplot of Number of Cars by Cylinders",
x = "Number of Cylinders",
y = "Count of Cars") +
theme_minimal()
# the same barplot but fill different color by am 0 and 1
ggplot(mtcars, aes(x = factor(cyl), fill = factor(am))) +
geom_bar(position = "dodge", color = "black") +
labs(title = "Barplot of Number of Cars by Cylinders and Transmission",
x = "Number of Cylinders",
y = "Count of Cars",
fill = "Transmission (0 = automatic, 1 = manual)") +
theme_minimal()
# same barplot but 1 column for 1 category of number of cylinders, in other words, stacked barplot
ggplot(mtcars, aes(x = factor(cyl), fill = factor(am))) +
geom_bar(position = "stack", color = "black") +
labs(title = "Stacked Barplot of Number of Cars by Cylinders and Transmission",
x = "Number of Cylinders",
y = "Count of Cars",
fill = "Transmission (0 = automatic, 1 = manual)") +
theme_minimal()
# line plot using ggplot2 where hp is y and dysp is x and 3 lines for 3 categories of cyl
ggplot(mtcars, aes(x = disp, y = hp, color = factor(cyl))) +
geom_line(size = 1) +
labs(title = "Line Plot of Horsepower vs. Displacement by Number of Cylinders",
x = "Displacement (cu.in.)",
y = "Horsepower (hp)",
color = "Number of Cylinders") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# the same with scatter points and add the attribute of vs
ggplot(mtcars, aes(x = disp, y = hp, color = factor(cyl), shape = factor(vs))) +
geom_point(size = 3) +
geom_line(size = 1) +
labs(title = "Line Plot of Horsepower vs. Displacement by Number of Cylinders and Engine Type",
x = "Displacement (cu.in.)",
y = "Horsepower (hp)",
color = "Number of Cylinders",
shape = "Engine Type (0 = V/S, 1 = straight)") +
theme_minimal()
# Using ggplot2, create a histogram of the mtcars dataset where the x-axis represents miles per gallon (mpg). Fill the bars with a gradient colour based on the count of cars in each bin. Make the plot visually appealing with a classic theme, larger labels, and a custom title like ‘Distribution of Fuel Efficiency’. Add a vertical line to indicate the average mpg.
ggplot(mtcars, aes(x = mpg)) +
geom_histogram(aes(fill = ..count..), bins = 10, color = "black") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
geom_vline(aes(xintercept = mean(mpg)), color = "red", linetype = "dashed", size = 1) +
labs(title = "Distribution of Fuel Efficiency",
x = "Miles per Gallon (MPG)",
y = "Count of Cars") +
theme_classic(base_size = 15) +
theme(legend.position = "none")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# let say we want 10 bins, first calculate the range of mpg of each bin
range_mpg <- range(mtcars$mpg)
bin_width <- (range_mpg[2] - range_mpg[1]) / 10
bin_width
## [1] 2.35
# then create the histogram with specified binwidth
ggplot(mtcars, aes(x = mpg)) +
geom_histogram(aes(fill = ..count..), binwidth = bin_width, color = "black") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
geom_vline(aes(xintercept = mean(mpg)), color = "red", linetype = "dashed", size = 1) +
labs(title = "Distribution of Fuel Efficiency",
x = "Miles per Gallon (MPG)",
y = "Count of Cars") +
theme_classic(base_size = 15) +
theme(legend.position = "none")
# install.packages("GGally") # Uncomment this line to install the GGally package if
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
# create a pair plot of the mtcars dataset
ggpairs(mtcars,
columns = 1:5, # select the first 5 columns for pair plot
aes(color = factor(cyl), alpha = 0.5)) + # color by number of cylinders
theme_minimal() +
labs(title = "Pair Plot of mtcars Dataset",
color = "Number of Cylinders")
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero