# Calling libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dslabs)
## Warning: package 'dslabs' was built under R version 4.4.3
# Calling & viewing dataset

data("iris")
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# Renaming variables

names(iris) <- tolower(names(iris))
names(iris) <- gsub("\\.", "_", names(iris))

head(iris)
##   sepal_length sepal_width petal_length petal_width species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# Create the scatterplot with different variables

ggplot(iris, aes(x = sepal_length, y = sepal_width, color = species)) + # Call the graph type, add variables
  geom_point(size = 3) +  # Add points with size
  labs( # Add titles, variable names, legend name
    title = "Sepal Dimensions of Iris Flowers by Species in Centimeters",
    x = "Sepal Length (cm)",
    y = "Sepal Width (cm)",
    color = "Species") +
  theme_bw() +  # Use a neat, simple theme
  scale_color_brewer(palette = "Set2") +  # Apply a contrasting and distinct color palette
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),  # Center align title, bold title
    axis.text = element_text(size = 12),  # Adjust axis text size
    legend.title = element_text(size = 12) # Adjust legend text size
  )

The dataset I have chosen is the “iris” dataset, and it is a dataset that provides measurements between mutliple types of flowers: Setosa, Versicolor, and Virginica iris flowers. The graph I made is a scatterplot comparing the length and width of sepals on these flowers in centimeters. Sepals are a leaf-esqe structure near the base of the blossom and at the top of the stem. Prior to blooming, the sepal will enclose itself around the unopened flower bud to protect it. It also works to support the flower petals once it has bloomed. It is a very simple scatterplot, with size 3 points, a very simple theme, and contrasting colors to differentiate between species. Bright, somewhat muted colors were intentionally chosen for a spring-y vibe, since the dataset is about flowers. Font size and location of the title, axises, and legend were also altered slightly in order to help them become easier to read and generally format better.