install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(ggplot2)
data("iris")
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Part 1

1a.

iris_clean <- iris %>%
  filter(Petal.Length != 3.5) %>% #removes flowers where petal legnth = 3.5
  mutate(sepal_ratio = Sepal.Length/ Sepal.Width) #this is creating a new variable of sepal lenght being divided by sepal width

ggplot(iris_clean, aes(Species, sepal_ratio, fill = Species)) + # creating x and y axis and clarifying it has each species a different color
  geom_violin(alpha = 0.5) + # shows full distribution shape, alpha adds transparency
  geom_boxplot(alpha = 0.5, outlier.shape = NA) + #overlays a boxplot without outliers
  geom_jitter(alpha = 0.5, width = 0.1) + #overlaying jitter points
  labs(
    title = "The Distribution to Sepal length to width ratio",
    x = "Species",
    y = " Sepal length to width Ratio",
    caption = "In figure 1a the data exludes the flower species with a petal size of 3.5. This iris dataset shows the distribution of the ratio between the sepal width and sepal length." 
  ) +
  #axis lables, caption, and title
theme_minimal() +
theme(
legend.position = "none" 
)

#wanting no legend

part 2 2a

data("economics_long")

plot(
  
economics_long$date, # x-axis
economics_long$value , #y-axis
type = "b", # showing both, line and dots
xlab = "time (years)", #x label
ylab = "Value", #y label
main = "Change in the value over different amounts of time" # this is the title
)

2b.

ggplot(economics_long, aes( x= date, y= value, color = variable))+ # setting the x and y 
  geom_line( lwd = 0.6)+ #line size
  geom_point ( size = 0.5, alpha = 0.5)+ # this is transprancy and size 
  labs(
    title = "Economic Indicators over different periods of time",
    subtitle = "graph by Hope Parrish",
    caption = "ggplot economics dataset",
    x = "date",
    y= "value",
    color= "variable"
      )+
  # axis labels, title, caption
  theme_minimal() # theme is a clean visual 

2c.

ggplot(economics_long, aes(x= date, y = value, color = variable))+ # set x and y parameters
  geom_point( size = 0.5, alpha = 0.4) + #size and transparency, and plot type
  geom_smooth(method = "lm") + # adds the regression line
  coord_cartesian(ylim = c(0,10000))+ #sets the y value range 

  labs(
    title ="Zoomed in view of Economic indicators",
    subtitle ="comparison of differnt economic values over time", 
    x ="date",
    y = "value",
    color ="variable",
    caption = "there is a zoomed in view on the y-axis to show various economic trends"
      ) +
#axis labels, title, caption, and color
  theme_minimal() # clean visual
## `geom_smooth()` using formula = 'y ~ x'

part 3 3a.

#packages 
library(ggplot2)
#data set 
data("penguins")

head(penguins) #viewing the first 10 lines
##   species    island bill_len bill_dep flipper_len body_mass    sex year
## 1  Adelie Torgersen     39.1     18.7         181      3750   male 2007
## 2  Adelie Torgersen     39.5     17.4         186      3800 female 2007
## 3  Adelie Torgersen     40.3     18.0         195      3250 female 2007
## 4  Adelie Torgersen       NA       NA          NA        NA   <NA> 2007
## 5  Adelie Torgersen     36.7     19.3         193      3450 female 2007
## 6  Adelie Torgersen     39.3     20.6         190      3650   male 2007
# making the plot
penguins_clean <- penguins %>% #passing the dataset into the next function and creating a new dataset

  filter(!is.na(body_mass)) # filtering out the values not needed in the data set body_mass
  
penguins_clean %>% #pipe in data
  ggplot(aes(body_mass, fill = species))+ # categorizing by spcies and sets x- axis
  geom_density(alpha = 0.5) + # making the colors semi- transprent, allows for visible overlap
  scale_fill_manual(values = c(
"Adelie" = "darkseagreen3", 
"Chinstrap" = "mistyrose3", 
"Gentoo"= "darkslategrey")) + 
  # assigns specific color to each species
  labs(
  title = "distribution of penguin body mass by species", # title
   x = "body mass",  # x- axis label
  y = "density", # y-axis label
  fill = "species",  #legend title
  caption = "this is a density plot taht is showing overplapping differences in the size among species as well as the distibution of body mass"
      ) +
 theme_minimal() # applying a clean visual style

part 4 4a.

#load the requires library 
library(ggplot2)

ggplot(diamonds, aes(x= cut, fill = color))+
# diamonds is the data set, x is setting the x-axis and fill is dividing the bars by cut category

  geom_bar(position = "fill") +
  #this is setting the y-axis to percent
  
  scale_color_viridis_b() + # setting the color scheme to a colorblind friendly pallette for visulaization
  labs(
  title= "proportion of each cut of diamonds wihtin different color categories",
    x = "cut of diamonds",
    y=  "count",
    fill = " color of diamonds" 
    ) +
# the labs adds a axis label, title, and legend 
  

theme_minimal() # applies clean theme

4b.

ggplot(diamonds,aes(x= cut, fill = color)) + 
  #diamonds is the data set, x is setting the x-axis and fill is dividing the bars by cut category
 geom_bar(position = "dodge") +
  #dodge places the bars in a side- by side visual
scale_color_viridis_b() +
# setting the color scheme to a colorblind friendly pallette for visulaization
  labs(
    title= "count of diamond cuts within each color category",
      x = "diamond cut",
      y = "count",
      fill = "color of diamond"
     )+
#the labs adds a axis label, title, and legend 
  theme_minimal() #clean visual style