install.packages(“tidyverse”) install.packages(“ggplot2”) install.packages(“dplyr”)

Part 1

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)

data("iris")
iris <- iris %>%
  filter(Petal.Length != 3.5) #filtering out irises with petal length of 3.5
iris <- iris %>%
  mutate(
    sepal_ratio = Sepal.Length / Sepal.Width) #Creating sepal ratio variable

#Outliers
library(dplyr)
iris_outliers <- iris %>% #altering iris dataset
  group_by(Species) %>% #grouping by species
  mutate(q1 = quantile(sepal_ratio, 0.25), #Establishing quartile 1
         q3 = quantile(sepal_ratio, 0.75), # Establishing quartile 3
         iqr = q3 - q1, #Establishing interquartile range
         is_outlier = sepal_ratio < (q1 - 1.5*iqr) | sepal_ratio > (q3 + 1.5*iqr)) %>% #Figuring out which data points are outliers
  filter(is_outlier) # filtering out iris_outliers to only include outliers

# Violin with boxplot overlay 

ggplot(iris, aes(x = Species, y = sepal_ratio)) + #ggplot set up with x and y variable
  geom_violin(fill = "lightblue", alpha = 0.4, trim = FALSE) + #creating violin plot with transparency
  geom_boxplot(width = 0.25, alpha = 0.7, outlier.shape = NA) + #adding boxplot with width and transparency
  geom_jitter(data = iris_outliers, width = 0.2, height = 0, alpha = 0.6, color = "red", size = 2) + #jittering outliers
  labs(
    x="Species" ,
    y="Sepal Length to Width Ratio" ,
    title = "Sepal Length to Width Ratio Per Species" ,
    caption = "Irises with a petal length of 3.5 were excluded leading to a total sample size of 148"
  )+ #labeling axes, title, and caption
theme_minimal() #minimal theme for aesthetics

#Part 2 #2a.

#Line
plot(economics_long$date, economics_long$value, #plotting date and value as x and y)
  type = "b", #both points and lines
  pch = 9, #type of point symbol
  col = "darkgreen", #color of points and lines
  lwd = 1, #linewidth=1
  xlab = "Date",
  ylab = "Value for Variable Category",
  main = "Values for Economic Variables Over Time"
) #labeling

#2b.

ggplot(economics_long, aes(x = date, y = value, color=variable)) + #date and value as x and y with different colors for each variable
  geom_point(size = 1) + #Scatterplot points
  geom_line(lwd = .5, alpha=.5)+ #line with size .5 and transparency .5
  labs(title = "Change in Value Over Time", 
       subtitle = "Value for categories within the economic dataset between 1960 and 2020",
       x = "Dates",
       y = "Value for Variable Category",
       caption = "Source: ggplot2: economics_long dataset") + #labeling
  theme_minimal() #Improve plot aesthtetics

#2c.

ggplot(economics_long, aes(x = date, y = value, color=variable)) + #date and value as x and y with different colors for each variable
  geom_point( size = 1) + #Scatterplot points
  geom_line(lwd = .5, alpha=.5)+ #line with size .5 and transparency .5
  coord_cartesian(ylim = c(0, 20000))+ #limiting range for y
  labs(title = "Change in Value Over Time", 
       subtitle="Value for categories within the economic dataset between 1960 and 2020",
       x = "Dates",
       y = "Value for Variable Category",
       caption = "Source: ggplot2: economics_long dataset") + #labeling
  theme_minimal() #Improve plot aesthtetics

#Part 3

penguins_clean <- penguins %>% #altering penguins dataset
  filter(!is.na(body_mass)) #removing values without body mass values
ggplot(penguins_clean, aes(x = body_mass, fill = species)) + #using new penguins_clean dataset with body mass and species
  geom_density(alpha = 0.4)+ #creating density plot with transparency
scale_fill_manual(values = c(Adelie = "darkseagreen3", Chinstrap = "mistyrose3", Gentoo = "darkslategrey")) + #assigning color values
  labs(
    title = "Distribution of Body Mass Among Penguin Species",
    x = "Body Mass",
    y="Density"
  ) + #labeling
theme_minimal() #theme for plot aesthetic

#Part 4

#4a.

data(diamonds) #diamonds dataset
ggplot(diamonds, aes(x = color, fill = cut)) + #using color for x and cut for y
  geom_bar(position = "fill") + #barplot that stacks
  labs(
    title ="Proportion of Cut for Each Diamond Color",
    x = "Color",
    y = "Proportion"
  )+ #labeling
  scale_fill_viridis_d()+#color blind color scale
  theme_minimal() #theme for plot aesthetic

#4b.

ggplot(diamonds, aes(x =color, fill = cut)) + #using color for x and cut for y
  geom_bar(stat = "count", position = "dodge") + #Barplot with count and grouping the barplot
  labs(
    title="Count of Diamond Cut Across Diamond Colors",
    x = "Diamonds Color",
    y = "Diamond Count"
  )+ #labeling
theme_minimal() # theme for aesthetic