Part 1

1a

#library
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.2.0     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# data 
data(iris)
#print(iris)

# cleaning 
iris_clean <- iris %>%
  filter(Petal.Length != 3.5) %>%
  mutate(sepal_ratio = Sepal.Length/Sepal.Width)

head(iris_clean, n=5)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal_ratio
## 1          5.1         3.5          1.4         0.2  setosa    1.457143
## 2          4.9         3.0          1.4         0.2  setosa    1.633333
## 3          4.7         3.2          1.3         0.2  setosa    1.468750
## 4          4.6         3.1          1.5         0.2  setosa    1.483871
## 5          5.0         3.6          1.4         0.2  setosa    1.388889
# plot 

iris_clean %>%
  ggplot(aes(Species, sepal_ratio, fill = Species)) + 
  geom_violin(alpha = 0.5) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA) + 
  geom_jitter(alpha = 0.5, width = 0.1) + 
  theme_minimal() + 
  theme(
    legend.position = "none"
  ) + 
  labs(
    title = "Distribution of Sepal Length-Width Ratio", 
    x = "Species", 
    y = "Sepal Length-Width Ratio", 
    caption = "Figure 1a. Distribution of the ratio between sepal length and width in the Iris dataset. Data excludes species with a petal length of 3.5"
  )

Part 2

2a

# library
library(ggplot2)
library(dplyr)

# data
data("economics_long")
View(economics_long)

# plot

plot(
  economics_long$date, 
  economics_long$value, 
  type = "b", 
  xlab = "Time (years)", 
  ylab = "Value", 
  main = "Economic Values Over Time"
)

2b

# library
library(ggplot2)
library(dplyr)

# plot
economics_long %>%
  ggplot(aes(date, value, colour = variable)) + 
  geom_point(size = 0.5, alpha = 0.5) + 
  geom_smooth(method = "lm", alpha = 0.5) + 
  theme_minimal() +
  theme() +
  labs(
    title = "Economic Values Over time", 
    subtitle = "Comparison of multiple economic values over time from the economics_long dataset",
    x = "Date (years)", 
    y = "Value", 
    colour = "Economic Value", 
    caption = "Figure 2b. Multiple economic values over time, illustrating different economic trends across multiple variables."
  )
## `geom_smooth()` using formula = 'y ~ x'

2c

# library
library(ggplot2)
library(dplyr)

# plot
economics_long %>%
  ggplot(aes(date, value, colour = variable)) + 
  geom_point(size = 0.5, alpha = 0.35) + 
  geom_smooth(method = "lm") + 
  coord_cartesian(ylim = c(0, 10000)) +
  theme_minimal() +
  theme() +
  labs(
    title = "Zoomed View of Lower-Range Economic Values Over Time", 
    subtitle = "Comparison of multiple economic values over time with a focus on lower-end values",
    x = "Date (years)", 
    y = "Value", 
    colour = "Economic Value", 
    caption = "Figure 2c. Multiple economic values over time. Y-axis zoomed using coord_cartesian() illustrating different economic trends across multiple variables in the lower-range of the dataset without altering values."
  )
## `geom_smooth()` using formula = 'y ~ x'

Part 3

3a

# library
library(ggplot2)
library(dplyr)
library(tidyverse)

# data
data("penguins")
View(penguins)

# plot

penguins_clean <- penguins %>%
  filter(!is.na(body_mass))

penguins_clean %>%
  ggplot(aes(body_mass, fill = species)) + 
  geom_density(alpha = 0.5) + 
  scale_fill_manual(values = c("darkseagreen3", "mistyrose3", "darkslategrey")) + 
  theme_minimal() + 
  theme() +
  labs(
    title = "Distribution of Body Mass Across Different Penguin Species", 
    x = "Body Mass (g)", 
    y = "Frequency", 
    fill = "Penguin Species",
    caption = "Figure 3a. Density distribution of body mass among different penguin species in the Penguins data set."
  )

Part 4

4a

# library
library(tidyr)
library(ggplot2)
library(viridis)
## Warning: package 'viridis' was built under R version 4.5.3
## Loading required package: viridisLite
#install.packages("viridis")

# data
data("diamonds")
#View(diamonds)

# plot 

diamonds %>%
  ggplot(aes(cut, fill = color)) + 
  geom_bar(position = "fill") + 
  scale_color_viridis_b() + 
  theme_minimal() + 
  labs(
    title = "Proportional Distribution of Diamond Cut Within Each Color Category", 
    x = "Diamond Cut", 
    y = "Count", 
    fill = "Diamond Color"
    
  )

4b

# plot 
diamonds %>%
  ggplot(aes(cut, fill = color)) + 
  geom_bar(position = "dodge") + 
  scale_color_viridis_b() + 
  theme_minimal() + 
  labs(
    title = "Grouped Distribution of Diamond Cut Within Each Color Category", 
    x = "Diamond Cut", 
    y = "Count", 
    fill = "Diamond Color"
  )