install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(ggplot2)
data("iris")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
1a.
iris_clean <- iris %>%
filter(Petal.Length != 3.5) %>% #removes flowers where petal legnth = 3.5
mutate(sepal_ratio = Sepal.Length/ Sepal.Width) #this is creating a new variable of sepal lenght being divided by sepal width
ggplot(iris_clean, aes(Species, sepal_ratio, fill = Species)) + # creating x and y axis and clarifying it has each species a different color
geom_violin(alpha = 0.5) + # shows full distribution shape, alpha adds transparency
geom_boxplot(alpha = 0.5, outlier.shape = NA) + #overlays a boxplot without outliers
geom_jitter(alpha = 0.5, width = 0.1) + #overlaying jitter points
labs(
title = "The Distribution to Sepal length to width ratio",
x = "Species",
y = " Sepal length to width Ratio",
caption = "In figure 1a the data exludes the flower species with a petal size of 3.5. This iris dataset shows the distribution of the ratio between the sepal width and sepal length."
) +
#axis lables, caption, and title
theme_minimal() +
theme(
legend.position = "none"
)
#wanting no legend
part 2 2a
data("economics_long")
plot(
economics_long$date, # x-axis
economics_long$value , #y-axis
type = "b", # showing both, line and dots
xlab = "time (years)", #x label
ylab = "Value", #y label
main = "Change in the value over different amounts of time" # this is the title
)
2b.
ggplot(economics_long, aes( x= date, y= value, color = variable))+ # setting the x and y
geom_line( lwd = 0.6)+ #line size
geom_point ( size = 0.5, alpha = 0.5)+ # this is transprancy and size
labs(
title = "Economic Indicators over different periods of time",
subtitle = "graph by Hope Parrish",
caption = "ggplot economics dataset",
x = "date",
y= "value",
color= "variable"
)+
# axis labels, title, caption
theme_minimal() # theme is a clean visual
2c.
ggplot(economics_long, aes(x= date, y = value, color = variable))+ # set x and y parameters
geom_point( size = 0.5, alpha = 0.4) + #size and transparency, and plot type
geom_smooth(method = "lm") + # adds the regression line
coord_cartesian(ylim = c(0,10000))+ #sets the y value range
labs(
title ="Zoomed in view of Economic indicators",
subtitle ="comparison of differnt economic values over time",
x ="date",
y = "value",
color ="variable",
caption = "there is a zoomed in view on the y-axis to show various economic trends"
) +
#axis labels, title, caption, and color
theme_minimal() # clean visual
## `geom_smooth()` using formula = 'y ~ x'
part 3 3a.
#packages
library(ggplot2)
#data set
data("penguins")
head(penguins) #viewing the first 10 lines
## species island bill_len bill_dep flipper_len body_mass sex year
## 1 Adelie Torgersen 39.1 18.7 181 3750 male 2007
## 2 Adelie Torgersen 39.5 17.4 186 3800 female 2007
## 3 Adelie Torgersen 40.3 18.0 195 3250 female 2007
## 4 Adelie Torgersen NA NA NA NA <NA> 2007
## 5 Adelie Torgersen 36.7 19.3 193 3450 female 2007
## 6 Adelie Torgersen 39.3 20.6 190 3650 male 2007
# making the plot
penguins_clean <- penguins %>% #passing the dataset into the next function and creating a new dataset
filter(!is.na(body_mass)) # filtering out the values not needed in the data set body_mass
penguins_clean %>% #pipe in data
ggplot(aes(body_mass, fill = species))+ # categorizing by spcies and sets x- axis
geom_density(alpha = 0.5) + # making the colors semi- transprent, allows for visible overlap
scale_fill_manual(values = c(
"Adelie" = "darkseagreen3",
"Chinstrap" = "mistyrose3",
"Gentoo"= "darkslategrey")) +
# assigns specific color to each species
labs(
title = "distribution of penguin body mass by species", # title
x = "body mass", # x- axis label
y = "density", # y-axis label
fill = "species", #legend title
caption = "this is a density plot taht is showing overplapping differences in the size among species as well as the distibution of body mass"
) +
theme_minimal() # applying a clean visual style
part 4 4a.
#load the requires library
library(ggplot2)
ggplot(diamonds, aes(x= cut, fill = color))+
# diamonds is the data set, x is setting the x-axis and fill is dividing the bars by cut category
geom_bar(position = "fill") +
#this is setting the y-axis to percent
scale_color_viridis_b() + # setting the color scheme to a colorblind friendly pallette for visulaization
labs(
title= "proportion of each cut of diamonds wihtin different color categories",
x = "cut of diamonds",
y= "count",
fill = " color of diamonds"
) +
# the labs adds a axis label, title, and legend
theme_minimal() # applies clean theme
4b.
ggplot(diamonds,aes(x= cut, fill = color)) +
#diamonds is the data set, x is setting the x-axis and fill is dividing the bars by cut category
geom_bar(position = "dodge") +
#dodge places the bars in a side- by side visual
scale_color_viridis_b() +
# setting the color scheme to a colorblind friendly pallette for visulaization
labs(
title= "count of diamond cuts within each color category",
x = "diamond cut",
y = "count",
fill = "color of diamond"
)+
#the labs adds a axis label, title, and legend
theme_minimal() #clean visual style