library(ggplot2) ### load in ggplot package
library(dplyr) ### load in dplyr package
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sepal_data <- iris %>% ## load in + rename altered data as "sepal_data"
filter(Petal.Length != 3.5) %>% ### exclude petals at 3.5 length
mutate(S.ratio = Sepal.Length/Sepal.Width) ## define sepal L:W ratio value
ggplot(sepal_data, aes(x = Species, y = S.ratio, fill = Species)) + ### create plot & define x/y variables
geom_violin(trim = FALSE, alpha = 0.6) + ## create violin, cut off the tail, adjust transparency
geom_boxplot(width = 0.15, outlier.shape = NA, alpha = 0.8) + ## create boxplot, adjust width to fit inside violin, adjust transparency
geom_jitter(width = 0.1, size = 1.0, alpha = 0.5)+ ## show outliers as jitter points, with adjusted width/size/transparency
labs(
title = "Sepal Length / Width Ratio (cm) across Species", ### title
x = "Species", ### x axis title
y = "Sepal Length / Width (cm)" ### y axis title
) +
theme_minimal() ### set theme (minimal is usually most ideal)
Figure 1: Data collected from setosa, versicolor, and virginica plants
showing the distribution of their sepal length to width ratio measured
in centimeters (n = 148). Plants with a petal length of 3.5 were
excluded from the figure.
data(economics_long) ## loading in data
ec_data <- subset(economics_long, variable == "unemploy") ## specifying the variable we're testing so it's no longer long-form data
plot(ec_data$date, ec_data$value, ## specify x/y axes
type = "b", ## telling r we want datapoints and a line
xlab = "Date", ## name x axis
ylab = "Employment", ## name y axis
main = "Change in Employment Over Time") ### title
Figure 2: A base R plot displaying the relationship between time and
employment, with the dataset showing many clear peaks and troughs as
numbers fall and rise.
ggplot(economics_long, aes(x=date, y=value, color=variable))+ ## since we're using ggplot which is better with data than base r, we can use the long form of the data
geom_line() + ## create line plot
labs( ## name titles etc
title = "Economics Over Time", ## add title
subtitle = "Colored by variable", ## add subtitle (wasn't sure what to label this one)
x = "Date", ## name x axis
y = "Value", ## name y axis
color = "Variable") + ## name of color
theme_minimal() ## basic theme
## Figure 3: A line plot showing all the variables of the original data.
As population increases through the years, our othr variables remain
essentially unchanging.
ggplot(economics_long, aes(x=date, y=value, color=variable))+ ## since we're using ggplot which is better with data than base r, we can use the long form of the data
geom_line() + ## create line plot
labs( ## name titles etc
title = "Economics Over Time", ## add title
subtitle = "Colored by variable", ## add subtitle (wasn't sure what to label this one)
x = "Date", ## name x axis
y = "Value", ## name y axis
color = "Variable") + ## name of color
theme_minimal() + ## basic theme
coord_cartesian(ylim=c(2500,15000)) ## zoom in on the data without altering it - you can choose the exact range
Figure 3: A line plot showing all the variables of the original data.
This plot has been zoomed in without alterations. Having a much closer
view, you can see that as the pce variable increases steadily,
unemployment continues to have varying troughs and peaks, with a general
trend of going upwards.
library(palmerpenguins) ## making sure the data installed
##
## Attaching package: 'palmerpenguins'
## The following objects are masked from 'package:datasets':
##
## penguins, penguins_raw
data(penguins) ## loading in the data
pengs <- penguins %>% ## filtering the data and naming the filtered data
filter(!is.na(body_mass_g)) ## filtering the data by removing n/a values from body mass
ggplot(pengs, aes(x=body_mass_g, fill=species)) + ##
geom_density(alpha = 0.6) + ## create density plot & alter opacity (alpha)
scale_fill_manual(values = c( ## fill data, but with certain variables corresponding to certain colors
"Adelie" = "darkseagreen3",
"Chinstrap" = "mistyrose3",
"Gentoo" = "darkslategrey")) +
labs( ## changing titles/axes
title = "Distribution of Penguin Body Mass by Species", ## create title
x = "Body Mass(g)", ## name x axis
y = "Density", ## name y axis
fill = "Species") + ## fill the data by species (using aforementioned colors)
theme_minimal() ## pick the usual theme
Figure 4: A density plot displaying the relationship between body mass
(g) and penguin species. Predominantly, the Gentoo penguins appear to
have the higher average body mass.
ggplot(diamonds, aes(x=color, fill=cut)) + ## specify dataset, x value, & fill
geom_bar(position = "fill") + ## create barplot
scale_fill_brewer(palette = "Set2") + ## selecting colorblind friendly theme
labs( ## adding titles & axes labels
title = "The Relationship Between Diamond Cut and Color", ## add title
x = "Diamond Color", ## name x axis
y = "Proportion", ## name y axis
fill = "Cut") + ## fill/color by cut
theme_minimal() ## usual theme
Figure 5: A proportional stacked barplot displaying the proportion of
each diamond cut category within each color category.
ggplot(diamonds, aes(x=color, fill=cut)) + ## specify dataset, x value, & fill
geom_bar(position = "dodge") + ## create barplot & specify it's grouped by using dodge
scale_fill_brewer(palette = "Set2") + ## set to colorblind friendly palette
labs( ## change names of titles & axes
title = "The Relationship Between Diamond Cut and Color", ## create title
x = "Diamond Color", ## name x axis
y = "Count", ## name y axis
fill = "Cut") + ## fill/color by cut
theme_minimal() ## use the usual theme
Figure 6: A proportional grouped barplot displaying the proportion of
each diamond cut category within each color category, showing the raw
counts of each cut category