R tutorial about graphs

# load necessary packages
library(ggplot2)

1. syntax of plot function

In R you can create a simple scatter plot using the plot() function. The basic syntax is as follows: - plot(x, y, main, xlab, ylab, col, pch)

Where: - x: A numeric vector representing the x-coordinates of the points. - y: A numeric vector representing the y-coordinates of the points. - main: A character string for the title of the plot. - xlab: A character string for the label of the x-axis. - ylab: A character string for the label of the y-axis. - col: A color specification for the points. - pch: A plotting character or symbol to use.

2. Example of a simple scatter plot

# use dataset that included in R
data(mtcars)

# create a simple scatter plot
plot(mtcars$wt, mtcars$mpg,
     main = "Scatter plot of Weight vs. MPG",
     xlab = "Weight (1000 lbs)",
     ylab = "Miles per Gallon (MPG)",
     col = "blue",
     pch = 19) +

# add a regression line
abline(lm(mpg ~ wt, data = mtcars), col = "red")
## integer(0)
# add a legend
legend("topright", legend = "Regression Line", col = "red", lty = 1)

# change theme to black and white
theme_set(theme_bw())
# create a scatter plot using ggplot2
ggplot(mtcars, aes(x = wt, y = mpg)) +
  geom_point(color = "blue", size = 3) +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(title = "Scatter plot of Weight vs. MPG",
       x = "Weight (1000 lbs)",
       y = "Miles per Gallon (MPG)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# bloxplot using ggplot2
ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
  geom_boxplot(fill = "lightblue", color = "darkblue") +
  labs(title = "Boxplot of MPG by Number of Cylinders",
       x = "Number of Cylinders",
       y = "Miles per Gallon (MPG)") +
  theme_minimal()

# barplot using ggplot2
ggplot(mtcars, aes(x = factor(cyl))) +
  geom_bar(fill = "lightgreen", color = "darkgreen") +
  labs(title = "Barplot of Number of Cars by Cylinders",
       x = "Number of Cylinders",
       y = "Count of Cars") +
  theme_minimal()

# the same barplot but fill different color by am 0 and 1
ggplot(mtcars, aes(x = factor(cyl), fill = factor(am))) +
  geom_bar(position = "dodge", color = "black") +
  labs(title = "Barplot of Number of Cars by Cylinders and Transmission",
       x = "Number of Cylinders",
       y = "Count of Cars",
       fill = "Transmission (0 = automatic, 1 = manual)") +
  theme_minimal()

# same barplot but 1 column for 1 category of number of cylinders, in other words, stacked barplot
ggplot(mtcars, aes(x = factor(cyl), fill = factor(am))) +
  geom_bar(position = "stack", color = "black") +
  labs(title = "Stacked Barplot of Number of Cars by Cylinders and Transmission",
       x = "Number of Cylinders",
       y = "Count of Cars",
       fill = "Transmission (0 = automatic, 1 = manual)") +
  theme_minimal()

# line plot using ggplot2 where hp is y and dysp is x and 3 lines for 3 categories of cyl
ggplot(mtcars, aes(x = disp, y = hp, color = factor(cyl))) +
  geom_line(size = 1) +
  labs(title = "Line Plot of Horsepower vs. Displacement by Number of Cylinders",
       x = "Displacement (cu.in.)",
       y = "Horsepower (hp)",
       color = "Number of Cylinders") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# the same with scatter points and add the attribute of vs
ggplot(mtcars, aes(x = disp, y = hp, color = factor(cyl), shape = factor(vs))) +
  geom_point(size = 3) +
  geom_line(size = 1) +
  labs(title = "Line Plot of Horsepower vs. Displacement by Number of Cylinders and Engine Type",
       x = "Displacement (cu.in.)",
       y = "Horsepower (hp)",
       color = "Number of Cylinders",
       shape = "Engine Type (0 = V/S, 1 = straight)") +
  theme_minimal()

# Using ggplot2, create a histogram of the mtcars dataset where the x-axis represents miles per gallon (mpg). Fill the bars with a gradient colour based on the count of cars in each bin. Make the plot visually appealing with a classic theme, larger labels, and a custom title like ‘Distribution of Fuel Efficiency’. Add a vertical line to indicate the average mpg.
ggplot(mtcars, aes(x = mpg)) +
  geom_histogram(aes(fill = ..count..), bins = 10, color = "black") +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  geom_vline(aes(xintercept = mean(mpg)), color = "red", linetype = "dashed", size = 1) +
  labs(title = "Distribution of Fuel Efficiency",
       x = "Miles per Gallon (MPG)",
       y = "Count of Cars") +
  theme_classic(base_size = 15) +
  theme(legend.position = "none")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# let say we want 10 bins, first calculate the range of mpg of each bin
range_mpg <- range(mtcars$mpg)
bin_width <- (range_mpg[2] - range_mpg[1]) / 10
bin_width
## [1] 2.35
# then create the histogram with specified binwidth
ggplot(mtcars, aes(x = mpg)) +
  geom_histogram(aes(fill = ..count..), binwidth = bin_width, color = "black") +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  geom_vline(aes(xintercept = mean(mpg)), color = "red", linetype = "dashed", size = 1) +
  labs(title = "Distribution of Fuel Efficiency",
       x = "Miles per Gallon (MPG)",
       y = "Count of Cars") +
  theme_classic(base_size = 15) +
  theme(legend.position = "none")

pair plot

# install.packages("GGally") # Uncomment this line to install the GGally package if
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
# create a pair plot of the mtcars dataset
ggpairs(mtcars,
        columns = 1:5, # select the first 5 columns for pair plot
        aes(color = factor(cyl), alpha = 0.5)) + # color by number of cylinders
  theme_minimal() +
  labs(title = "Pair Plot of mtcars Dataset",
       color = "Number of Cylinders")
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero