Loading packages and datasets
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1 ✔ tidyr 1.3.2
## ✔ readr 2.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data(iris)
data("economics_long")
data(penguins)
data(diamonds)
Part 1
iris_clean <- iris %>%
filter(Petal.Length != 3.5) ## Excluding 3.5 petal length
iris_clean <- iris_clean %>%
mutate(sepal_ratio = Sepal.Length/Sepal.Width) ## New variable for length to width ratio
iris_outliers <- iris_clean %>%
group_by(Species) %>%
mutate(q1 = quantile(sepal_ratio, 0.25),
q3 = quantile(sepal_ratio, 0.75),
iqr = q3 - q1,
is_outlier = sepal_ratio < (q1 - 1.5*iqr) | sepal_ratio > (q3 + 1.5*iqr)) %>%
filter(is_outlier) ## Filtering outliers for jitter
ggplot(iris_clean, aes(x = Species, y=sepal_ratio)) + ## setting dataset and aes
geom_violin(fill = "lightblue", alpha = 0.4, trim = FALSE, width = 1) + ## violin plot
geom_boxplot(width = 0.1, alpha = 0.7, outlier.shape=NA)+ ## overlaid boxplot with no outliers
labs(
title="Distribution of Sepal Length to Width Ratio by Species",
x = "Iris Species",
y = "Sepal Length to Width Ratio",
subtitle = "Species Sepal Length to Width Ratio",
caption = "Source: iris dataset"
)+ ## labels
geom_jitter(data = iris_outliers, width = 0.2, color = "red", size = 2, alpha = 0.5) + #jittered outliers using outlier data
theme_minimal()

Part 2
2A
plot(economics_long$date, economics_long$value, type = "l",
xlab = "Year",
ylab = "Value",
main = "Change in Value over Time")+ ## making the line and adding labels
points(economics_long$date, economics_long$value) ## adding points

## integer(0)
2B
ggplot(economics_long, aes(x = date, y = value, color = variable)) + ## setting dataset and aes
geom_line()+ #adding line layer
labs(
x = "Year",
y= "Value",
title = "Change in Value over Time",
subtitle = "Change in Value over Time for Multiple Variables",
caption= "Souce: economics_long dataset"
)+ #adding labels
theme_minimal()

2C
ggplot(economics_long, aes(x = date, y = value, color = variable)) + ## setting dataset and aes
geom_line()+ #adding line layer
labs(
x = "Year",
y= "Value",
title = "Change in Value over Time",
subtitle = "Change in Value over Time for Multiple Variables",
caption= "Souce: economics_long dataset"
)+ #adding labels
coord_cartesian(ylim=c(0,16000))+ ## setting lower and upper y axis limits
theme_minimal()

Part 3
penguins_clean <- na.omit(penguins) ## Removing missing values
ggplot(penguins_clean, aes(x = body_mass, fill = species)) + ## setting dataset and aes
geom_density(alpha = 0.4)+ #density plot with transparent fill
labs(
title = "Penguin Body Mass Density",
subtitle = "Density of Penguin Body Mass by Species",
y = "Density",
x = "Body Mass",
caption = "Source: penguins dataset",
fill = "Species"
)+ ## setting labels
scale_fill_manual(values = c("Adelie" = "darkseagreen3", "Chinstrap"="mistyrose3", "Gentoo"="darkslategray"))+ ## changing color assignments
theme_minimal()

Part 4
4A
ggplot(diamonds, aes(x = color, fill = cut)) + ## dataset and aes
geom_bar(position = "fill")+ ## proportional stacked bar graph
labs(
title = "Proportion of Cut by Color",
subtitle = "Proportion of Diamond Cut by Color Category",
x = "Color",
y ="Count",
fill = "Cut Quality",
caption = "Source: diamonds dataset"
)+ ## setting labels
theme_minimal()

4B
ggplot(data = diamonds, aes(x = color, fill = cut)) + ## setting dataset and aes
geom_bar(position = "dodge") + ## making barplot with side by side bars, geom bar default uses raw counts
labs(
title = "Grouped Bar Plot of Diamond Cut within Color",
x = "Color",
y = "Count",
fill = "Cut Quality",
subtitle = "Counts of Cut Quality Across Color Categories",
caption = "Source: diamonds dataset"
) + ## setting labels
theme_minimal()
