#library
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.2.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# data
data(iris)
#print(iris)
# cleaning
iris_clean <- iris %>%
filter(Petal.Length != 3.5) %>%
mutate(sepal_ratio = Sepal.Length/Sepal.Width)
head(iris_clean, n=5)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal_ratio
## 1 5.1 3.5 1.4 0.2 setosa 1.457143
## 2 4.9 3.0 1.4 0.2 setosa 1.633333
## 3 4.7 3.2 1.3 0.2 setosa 1.468750
## 4 4.6 3.1 1.5 0.2 setosa 1.483871
## 5 5.0 3.6 1.4 0.2 setosa 1.388889
# plot
iris_clean %>%
ggplot(aes(Species, sepal_ratio, fill = Species)) +
geom_violin(alpha = 0.5) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
geom_jitter(alpha = 0.5, width = 0.1) +
theme_minimal() +
theme(
legend.position = "none"
) +
labs(
title = "Distribution of Sepal Length-Width Ratio",
x = "Species",
y = "Sepal Length-Width Ratio",
caption = "Figure 1a. Distribution of the ratio between sepal length and width in the Iris dataset. Data excludes species with a petal length of 3.5"
)
# library
library(ggplot2)
library(dplyr)
# data
data("economics_long")
View(economics_long)
# plot
plot(
economics_long$date,
economics_long$value,
type = "b",
xlab = "Time (years)",
ylab = "Value",
main = "Economic Values Over Time"
)
# library
library(ggplot2)
library(dplyr)
# plot
economics_long %>%
ggplot(aes(date, value, colour = variable)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(method = "lm", alpha = 0.5) +
theme_minimal() +
theme() +
labs(
title = "Economic Values Over time",
subtitle = "Comparison of multiple economic values over time from the economics_long dataset",
x = "Date (years)",
y = "Value",
colour = "Economic Value",
caption = "Figure 2b. Multiple economic values over time, illustrating different economic trends across multiple variables."
)
## `geom_smooth()` using formula = 'y ~ x'
# library
library(ggplot2)
library(dplyr)
# plot
economics_long %>%
ggplot(aes(date, value, colour = variable)) +
geom_point(size = 0.5, alpha = 0.35) +
geom_smooth(method = "lm") +
coord_cartesian(ylim = c(0, 10000)) +
theme_minimal() +
theme() +
labs(
title = "Zoomed View of Lower-Range Economic Values Over Time",
subtitle = "Comparison of multiple economic values over time with a focus on lower-end values",
x = "Date (years)",
y = "Value",
colour = "Economic Value",
caption = "Figure 2c. Multiple economic values over time. Y-axis zoomed using coord_cartesian() illustrating different economic trends across multiple variables in the lower-range of the dataset without altering values."
)
## `geom_smooth()` using formula = 'y ~ x'
# library
library(ggplot2)
library(dplyr)
library(tidyverse)
# data
data("penguins")
View(penguins)
# plot
penguins_clean <- penguins %>%
filter(!is.na(body_mass))
penguins_clean %>%
ggplot(aes(body_mass, fill = species)) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("darkseagreen3", "mistyrose3", "darkslategrey")) +
theme_minimal() +
theme() +
labs(
title = "Distribution of Body Mass Across Different Penguin Species",
x = "Body Mass (g)",
y = "Frequency",
fill = "Penguin Species",
caption = "Figure 3a. Density distribution of body mass among different penguin species in the Penguins data set."
)
# library
library(tidyr)
library(ggplot2)
library(viridis)
## Warning: package 'viridis' was built under R version 4.5.3
## Loading required package: viridisLite
#install.packages("viridis")
# data
data("diamonds")
#View(diamonds)
# plot
diamonds %>%
ggplot(aes(cut, fill = color)) +
geom_bar(position = "fill") +
scale_color_viridis_b() +
theme_minimal() +
labs(
title = "Proportional Distribution of Diamond Cut Within Each Color Category",
x = "Diamond Cut",
y = "Count",
fill = "Diamond Color"
)
# plot
diamonds %>%
ggplot(aes(cut, fill = color)) +
geom_bar(position = "dodge") +
scale_color_viridis_b() +
theme_minimal() +
labs(
title = "Grouped Distribution of Diamond Cut Within Each Color Category",
x = "Diamond Cut",
y = "Count",
fill = "Diamond Color"
)