#part 1
#load the tidyverse package so I can use dplyr and ggplot for making the figure
# The instructions say to exclude any flowers with a petal length of 3.5, so I removed those rows first Then created a new vatiable called sepal_ration, which is sepal length divided by sepal width
# Load packages
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
iris_ratio <- iris %>%
filter(Petal.Length != 3.5) %>%
mutate(sepal_ratio = Sepal.Length / Sepal.Width)
#count how many were excluded and fianl sample size
n_excluded <- sum(iris$Petal.Length == 3.5)
n_total <- nrow(iris_ratio)
# Get only the outliers for each species
iris_outliers <- iris_ratio %>%
group_by(Species) %>%
filter(sepal_ratio %in% boxplot.stats(sepal_ratio)$out)
#make the figure
ggplot(iris_ratio, aes(x = Species, y = sepal_ratio, fill = Species)) +
geom_violin(trim = FALSE, alpha = 0.6) +
geom_boxplot(width = 0.15, alpha = 0.8) +
geom_jitter(width = 0.08, alpha = 0.5) +
theme_minimal()
#part 2a
# Create a base R plot showing how personal consumption changes over time
# This uses the pce variable directly from the economics dataset
plot(
economics$date,
economics$pce,
type = "o", # shows both points and a line
pch = 16, # solid circle points
xlab = "Date",
ylab = "Personal Consumption Expenditures",
main = "Personal Consumption Over Time"
)
#Part 2b
# Load ggplot package
library(ggplot2)
# Start the plot and set x and y variables
ggplot(economics, aes(x = date, y = pce)) +
# Add a line to show the trend over time
geom_line() +
# Add points to show each data value
geom_point() +
# Add title and axis labels
labs(
title = "Personal Consumption Over Time",
x = "Date",
y = "Personal Consumption Expenditures"
) +
# Apply a clean theme
theme_minimal()
#part 2C
# Start the plot and set x and y variables
ggplot(economics, aes(x = date, y = pce)) +
# Add a line to show the trend
geom_line() +
# Add points for each data value
geom_point() +
# Limit the y-axis to zoom in on smaller values
ylim(0, 10000) +
# Add title and axis labels
labs(
title = "Personal Consumption Over Time (Zoomed)",
x = "Date",
y = "Personal Consumption Expenditures"
) +
# Apply a clean theme
theme_minimal()
## Warning: Removed 64 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 69 rows containing missing values or values outside the scale range
## (`geom_point()`).
#Part 3a
# Load the penguins dataset
library(palmerpenguins)
# Remove rows where body mass is missing
penguins_clean <- penguins[!is.na(penguins$body_mass_g), ]
# Start the plot and map variables
ggplot(penguins_clean, aes(x = body_mass_g, fill = species)) +
# Create density curves
geom_density(alpha = 0.5) +
# Manually assign colors to each species
scale_fill_manual(values = c("darkseagreen3", "mistyrose3", "darkslategray")) +
# Add labels
labs(
title = "Penguin Body Mass Distribution",
x = "Body Mass (grams)",
y = "Density",
fill = "Species"
) +
# Apply clean theme
theme_minimal()
#part 4
# Start the plot and put diamond color on the x-axis and cut in the fill
ggplot(diamonds, aes(x = color, fill = cut)) +
# Make a proportional stacked bar plot
geom_bar(position = "fill") +
# Add a title and axis labels
labs(
title = "Proportion of Diamond Cut Within Each Color",
x = "Diamond Color",
y = "Proportion",
fill = "Cut"
) +
# Use a simple theme to keep the graph clean
theme_minimal()
#Part 4b
# Start the plot and put diamond color on the x-axis and cut in the fill
ggplot(diamonds, aes(x = color, fill = cut)) +
# Make side-by-side bars to show raw counts
geom_bar(position = "dodge") +
# Add a title and axis labels
labs(
title = "Count of Diamond Cut Within Each Color",
x = "Diamond Color",
y = "Count",
fill = "Cut"
) +
# Use a simple theme to keep the graph clean
theme_minimal()