Loading packages and datasets

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ lubridate 1.9.5     ✔ tibble    3.3.1
## ✔ purrr     1.2.1     ✔ tidyr     1.3.2
## ✔ readr     2.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data(iris)
data("economics_long")
data(penguins)
data(diamonds)

Part 1

iris_clean <- iris %>%
  filter(Petal.Length != 3.5) ## Excluding 3.5 petal length
iris_clean <- iris_clean %>%
  mutate(sepal_ratio = Sepal.Length/Sepal.Width) ## New variable for length to width ratio
iris_outliers <- iris_clean %>%
  group_by(Species) %>%
  mutate(q1 = quantile(sepal_ratio, 0.25),
         q3 = quantile(sepal_ratio, 0.75),
         iqr = q3 - q1,
         is_outlier = sepal_ratio < (q1 - 1.5*iqr) | sepal_ratio > (q3 + 1.5*iqr)) %>%
  filter(is_outlier) ## Filtering outliers for jitter
ggplot(iris_clean, aes(x = Species, y=sepal_ratio)) + ## setting dataset and aes
  geom_violin(fill = "lightblue", alpha = 0.4, trim = FALSE, width = 1) + ## violin plot 
  geom_boxplot(width = 0.1, alpha = 0.7, outlier.shape=NA)+ ## overlaid boxplot with no outliers
  labs(
    title="Distribution of Sepal Length to Width Ratio by Species",
    x = "Iris Species",
    y = "Sepal Length to Width Ratio",
    subtitle = "Species Sepal Length to Width Ratio",
    caption = "Source: iris dataset"
  )+ ## labels
  geom_jitter(data = iris_outliers, width = 0.2, color = "red", size = 2, alpha = 0.5) + #jittered outliers using outlier data
  theme_minimal()

Part 2

2A

plot(economics_long$date, economics_long$value, type = "l",
     xlab = "Year",
     ylab = "Value",
     main = "Change in Value over Time")+ ## making the line and adding labels
  points(economics_long$date, economics_long$value) ## adding points

## integer(0)

2B

ggplot(economics_long, aes(x = date, y = value, color = variable)) + ## setting dataset and aes
  geom_line()+ #adding line layer
  labs(
    x = "Year",
    y= "Value",
    title = "Change in Value over Time",
    subtitle = "Change in Value over Time for Multiple Variables",
    caption= "Souce: economics_long dataset"
  )+ #adding labels
  theme_minimal()

2C

ggplot(economics_long, aes(x = date, y = value, color = variable)) + ## setting dataset and aes
  geom_line()+ #adding line layer
  labs(
    x = "Year",
    y= "Value",
    title = "Change in Value over Time",
    subtitle = "Change in Value over Time for Multiple Variables",
    caption= "Souce: economics_long dataset"
  )+ #adding labels
  coord_cartesian(ylim=c(0,16000))+ ## setting lower and upper y axis limits
  theme_minimal()

Part 3

penguins_clean <- na.omit(penguins) ## Removing missing values

ggplot(penguins_clean, aes(x = body_mass, fill = species)) + ## setting dataset and aes
  geom_density(alpha = 0.4)+ #density plot with transparent fill
  labs(
    title = "Penguin Body Mass Density",
    subtitle = "Density of Penguin Body Mass by Species",
    y = "Density",
    x = "Body Mass",
    caption = "Source: penguins dataset",
    fill = "Species"
  )+ ## setting labels
  scale_fill_manual(values = c("Adelie" = "darkseagreen3", "Chinstrap"="mistyrose3", "Gentoo"="darkslategray"))+ ## changing color assignments
  theme_minimal()

Part 4

4A

ggplot(diamonds, aes(x = color, fill = cut)) + ## dataset and aes
  geom_bar(position = "fill")+ ## proportional stacked bar graph
  labs(
    title = "Proportion of Cut by Color",
    subtitle = "Proportion of Diamond Cut by Color Category",
    x = "Color",
    y ="Count",
    fill = "Cut Quality",
    caption = "Source: diamonds dataset"
  )+ ## setting labels
  theme_minimal()

4B

ggplot(data = diamonds, aes(x = color, fill = cut)) + ## setting dataset and aes
  geom_bar(position = "dodge") + ## making barplot with side by side bars, geom bar default uses raw counts
  labs(
    title = "Grouped Bar Plot of Diamond Cut within Color",
    x = "Color",
    y = "Count",
    fill = "Cut Quality",
    subtitle = "Counts of Cut Quality Across Color Categories",
    caption = "Source: diamonds dataset"
  ) + ## setting labels
  theme_minimal()