install.packages(“tidyverse”) install.packages(“ggplot2”) install.packages(“dplyr”)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
data("iris")
iris <- iris %>%
filter(Petal.Length != 3.5) #filtering out irises with petal length of 3.5
iris <- iris %>%
mutate(
sepal_ratio = Sepal.Length / Sepal.Width) #Creating sepal ratio variable
#Outliers
library(dplyr)
iris_outliers <- iris %>% #altering iris dataset
group_by(Species) %>% #grouping by species
mutate(q1 = quantile(sepal_ratio, 0.25), #Establishing quartile 1
q3 = quantile(sepal_ratio, 0.75), # Establishing quartile 3
iqr = q3 - q1, #Establishing interquartile range
is_outlier = sepal_ratio < (q1 - 1.5*iqr) | sepal_ratio > (q3 + 1.5*iqr)) %>% #Figuring out which data points are outliers
filter(is_outlier) # filtering out iris_outliers to only include outliers
# Violin with boxplot overlay
ggplot(iris, aes(x = Species, y = sepal_ratio)) + #ggplot set up with x and y variable
geom_violin(fill = "lightblue", alpha = 0.4, trim = FALSE) + #creating violin plot with transparency
geom_boxplot(width = 0.25, alpha = 0.7, outlier.shape = NA) + #adding boxplot with width and transparency
geom_jitter(data = iris_outliers, width = 0.2, height = 0, alpha = 0.6, color = "red", size = 2) + #jittering outliers
labs(
x="Species" ,
y="Sepal Length to Width Ratio" ,
title = "Sepal Length to Width Ratio Per Species" ,
caption = "Irises with a petal length of 3.5 were excluded leading to a total sample size of 148"
)+ #labeling axes, title, and caption
theme_minimal() #minimal theme for aesthetics
#Part 2 #2a.
#Line
plot(economics_long$date, economics_long$value, #plotting date and value as x and y)
type = "b", #both points and lines
pch = 9, #type of point symbol
col = "darkgreen", #color of points and lines
lwd = 1, #linewidth=1
xlab = "Date",
ylab = "Value for Variable Category",
main = "Values for Economic Variables Over Time"
) #labeling
#2b.
ggplot(economics_long, aes(x = date, y = value, color=variable)) + #date and value as x and y with different colors for each variable
geom_point(size = 1) + #Scatterplot points
geom_line(lwd = .5, alpha=.5)+ #line with size .5 and transparency .5
labs(title = "Change in Value Over Time",
subtitle = "Value for categories within the economic dataset between 1960 and 2020",
x = "Dates",
y = "Value for Variable Category",
caption = "Source: ggplot2: economics_long dataset") + #labeling
theme_minimal() #Improve plot aesthtetics
#2c.
ggplot(economics_long, aes(x = date, y = value, color=variable)) + #date and value as x and y with different colors for each variable
geom_point( size = 1) + #Scatterplot points
geom_line(lwd = .5, alpha=.5)+ #line with size .5 and transparency .5
coord_cartesian(ylim = c(0, 20000))+ #limiting range for y
labs(title = "Change in Value Over Time",
subtitle="Value for categories within the economic dataset between 1960 and 2020",
x = "Dates",
y = "Value for Variable Category",
caption = "Source: ggplot2: economics_long dataset") + #labeling
theme_minimal() #Improve plot aesthtetics
#Part 3
penguins_clean <- penguins %>% #altering penguins dataset
filter(!is.na(body_mass)) #removing values without body mass values
ggplot(penguins_clean, aes(x = body_mass, fill = species)) + #using new penguins_clean dataset with body mass and species
geom_density(alpha = 0.4)+ #creating density plot with transparency
scale_fill_manual(values = c(Adelie = "darkseagreen3", Chinstrap = "mistyrose3", Gentoo = "darkslategrey")) + #assigning color values
labs(
title = "Distribution of Body Mass Among Penguin Species",
x = "Body Mass",
y="Density"
) + #labeling
theme_minimal() #theme for plot aesthetic
#Part 4
#4a.
data(diamonds) #diamonds dataset
ggplot(diamonds, aes(x = color, fill = cut)) + #using color for x and cut for y
geom_bar(position = "fill") + #barplot that stacks
labs(
title ="Proportion of Cut for Each Diamond Color",
x = "Color",
y = "Proportion"
)+ #labeling
scale_fill_viridis_d()+#color blind color scale
theme_minimal() #theme for plot aesthetic
#4b.
ggplot(diamonds, aes(x =color, fill = cut)) + #using color for x and cut for y
geom_bar(stat = "count", position = "dodge") + #Barplot with count and grouping the barplot
labs(
title="Count of Diamond Cut Across Diamond Colors",
x = "Diamonds Color",
y = "Diamond Count"
)+ #labeling
theme_minimal() # theme for aesthetic