#part 1

#load the tidyverse package so I can use dplyr and ggplot for making the figure

# The instructions say to exclude any flowers with a petal length of 3.5, so I removed those rows first Then created a new vatiable called sepal_ration, which is sepal length divided by sepal width

# Load packages
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
iris_ratio <- iris %>%
  filter(Petal.Length != 3.5) %>%
  mutate(sepal_ratio = Sepal.Length / Sepal.Width)

#count how many were excluded and fianl sample size

n_excluded <- sum(iris$Petal.Length == 3.5)
n_total <- nrow(iris_ratio)

# Get only the outliers for each species
iris_outliers <- iris_ratio %>%
  group_by(Species) %>%
  filter(sepal_ratio %in% boxplot.stats(sepal_ratio)$out)

#make the figure
ggplot(iris_ratio, aes(x = Species, y = sepal_ratio, fill = Species)) +
  geom_violin(trim = FALSE, alpha = 0.6) +
  geom_boxplot(width = 0.15, alpha = 0.8) +
  geom_jitter(width = 0.08, alpha = 0.5) +
  theme_minimal()

#part 2a

# Create a base R plot showing how personal consumption changes over time
# This uses the pce variable directly from the economics dataset

plot(
  economics$date,
  economics$pce,
  type = "o",        # shows both points and a line
  pch = 16,          # solid circle points
  xlab = "Date",
  ylab = "Personal Consumption Expenditures",
  main = "Personal Consumption Over Time"
)

#Part 2b

# Load ggplot package
library(ggplot2)

# Start the plot and set x and y variables
ggplot(economics, aes(x = date, y = pce)) +

  # Add a line to show the trend over time
  geom_line() +

  # Add points to show each data value
  geom_point() +

  # Add title and axis labels
  labs(
    title = "Personal Consumption Over Time",
    x = "Date",
    y = "Personal Consumption Expenditures"
  ) +

  # Apply a clean theme
  theme_minimal()

#part 2C

# Start the plot and set x and y variables
ggplot(economics, aes(x = date, y = pce)) +

  # Add a line to show the trend
  geom_line() +

  # Add points for each data value
  geom_point() +

  # Limit the y-axis to zoom in on smaller values
  ylim(0, 10000) +

  # Add title and axis labels
  labs(
    title = "Personal Consumption Over Time (Zoomed)",
    x = "Date",
    y = "Personal Consumption Expenditures"
  ) +

  # Apply a clean theme
  theme_minimal()
## Warning: Removed 64 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 69 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Part 3a

# Load the penguins dataset
library(palmerpenguins)

# Remove rows where body mass is missing
penguins_clean <- penguins[!is.na(penguins$body_mass_g), ]

# Start the plot and map variables
ggplot(penguins_clean, aes(x = body_mass_g, fill = species)) +

  # Create density curves
  geom_density(alpha = 0.5) +

  # Manually assign colors to each species
  scale_fill_manual(values = c("darkseagreen3", "mistyrose3", "darkslategray")) +

  # Add labels
  labs(
    title = "Penguin Body Mass Distribution",
    x = "Body Mass (grams)",
    y = "Density",
    fill = "Species"
  ) +

  # Apply clean theme
  theme_minimal()

#part 4

# Start the plot and put diamond color on the x-axis and cut in the fill
ggplot(diamonds, aes(x = color, fill = cut)) +

  # Make a proportional stacked bar plot
  geom_bar(position = "fill") +

  # Add a title and axis labels
  labs(
    title = "Proportion of Diamond Cut Within Each Color",
    x = "Diamond Color",
    y = "Proportion",
    fill = "Cut"
  ) +

  # Use a simple theme to keep the graph clean
  theme_minimal()

#Part 4b

# Start the plot and put diamond color on the x-axis and cut in the fill
ggplot(diamonds, aes(x = color, fill = cut)) +

  # Make side-by-side bars to show raw counts
  geom_bar(position = "dodge") +

  # Add a title and axis labels
  labs(
    title = "Count of Diamond Cut Within Each Color",
    x = "Diamond Color",
    y = "Count",
    fill = "Cut"
  ) +

  # Use a simple theme to keep the graph clean
  theme_minimal()