install.packages("fmsb") #install fmsb
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(fmsb) #ensure fmsb installed
data <- mtcars[1:5, c("mpg", "hp", "wt", "qsec", "disp")] #selecting columns from mtcars
data <- rbind(
apply(data, 2, max), #set maximum
apply(data, 2, min), #set minimum
data
)
radarchart(data) #display finished chart
A radar plot is most appropriate for this data because it’s fitting
for multi-faceted data. In this case, we are viewing the relationship
between five variables. With a radar plot, it is easier to visualize
complex relationship between variables (most ideal when observing more
than three variables).
install.packages("maps") #download maps package
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
install.packages("tibble") #download tibble package
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(maps) #ensure maps package installed
library(tibble) #ensure tibble package installed
arrest_data <- USArrests %>% #make state into a column
tibble::rownames_to_column(var= "state")
arrest_data$state <- tolower(arrest_data$state) #lowercase to match map
st_map <- map_data("state") #use map data
map_dat <- st_map %>% #combine map & arrest data
left_join(arrest_data, by = c("region"="state")) #change region
ggplot(map_dat, aes(x=long, y=lat, group=group, fill= Murder))+ #plot map/arrest data, fill by murder rate
geom_polygon(color= "white")+ #white background
coord_fixed(1.5)+ #fix page location
scale_fill_gradient( #gradient fill by murder rate
low = "yellow2", #low murder rates=yellow
high= "red3", #high murder rates=red
name= "Murder Rate")+ #title gradient legend
labs(
title= "USA Murder Rates Across States", #add title
subtitle= "Source: USArrests dataset")+ #add subtitle crediting source data
theme_void() #change theme to void
A choropleth plot is good for visualizing data on a map by location
(in this case, by state). This plot is appropriate for this data because
it creates a clear visually distinct scale of areas with low/high murer
rates by color. States with lower murder rates appear more yellow, while
states with higher murder rates appear more red.
install.packages("dplyr") #install dplyr package
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
install.packages("networkD3") #install networkD3 package
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(networkD3) #ensure networkD3 installed
library(dplyr) #ensure dplyr installed
data("Titanic") #load in data
ttc_data <- as.data.frame(Titanic) #turn into data
links <- bind_rows( #combining all the links
ttc_data %>% #creating class & sex link
group_by(Class, Sex) %>% #incl class/sex
summarise(value= sum(Freq), .groups= "drop") %>% #sum freq & drop groups
rename(source= Class, #going from class
target = Sex), #going to sex
ttc_data %>% #creating sex & age link
group_by(Sex, Age) %>% #incl sex/age
summarise(value= sum(Freq), .groups= "drop") %>% #sum freq & drop groups
rename(source= Sex, #going from sex
target = Age), #going to age
ttc_data %>% #creating sex & age link
group_by(Age, Survived) %>% #incl age/survival
summarise(value= sum(Freq), .groups= "drop") %>% #sum freq & drop groups
rename(source= Age, #going from age
target = Survived)) #going to survival
nodes <- data.frame( #create nodes data frame
name=c(as.character(links$source), as.character(links$target)) %>% #nodes link sources & targets
unique()
)
links <- as.data.frame(links) #make links data frames to get rid of an error
links$IDsource <- match(links$source, nodes$name)-1 #match nodes from target to source
links$IDtarget <- match(links$target, nodes$name)-1 #match nodes from target to source
p <- sankeyNetwork(Links = links, Nodes = nodes, Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name") #create the network
A Sankey-style flow diagram is most appropriate for this data because we’re examining the interlinking of four variables. It shows the flow between groups, making their connections and relationships more visually clear. In this case, we’re able to see the flow between passenger class, sex, age, and survival status, and identify trends in the flow plot.
install.packages("ggdist") #install ggdist package
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
install.packages("ggplot2") #install ggplot package
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(ggdist) #ensure ggdist installed
library(ggplot2) #ensure ggplot installed
data("ToothGrowth") #load in data
ToothGrowth <- as.data.frame(ToothGrowth) #turn into data
tg_clean <- ToothGrowth %>% #clean up data
select(supp, len) %>% #select variables of interest (tooth length & supplement type)
na.omit() #get rid of N/A data
ggplot(tg_clean, aes(x = supp, y = len, fill = supp)) + #select data, set axes & fill
stat_halfeye( #create half violin/density
adjust = 0.5,
width = 0.6,
justification = -0.2,
.width = 0,
point_colour = NA
) +
geom_boxplot( #create boxplot
width = 0.12,
outlier.shape = NA,
alpha = 0.5
) +
geom_jitter( #add jitter
width = 0.08,
alpha = 0.5,
size = 1.5
) +
labs(
title = "Tooth Growth Across Suppliment Type", #create title
x = "Suppliment Type", #name x axis
y = "Tooth Length (cm)" #name y axis
) +
theme_minimal() + #minimal theme
theme(legend.position = "none") #remove legend
A raincloud plot includes half of a violin/density view, as well as
a box plot. This is an effective method for visualizing data because it
shows the density of it’s distribution, as well as it’s typical range.
For this data, it helps visually communicate the density and
distribution of tooth length in centimeters between the two supplements,
highlighting that the OJ suppliment has higher variability in
density.
data(iris) #load in iris data
ggplot(iris, aes(x=Sepal.Length, y=Petal.Length, color=Species))+ #plot sepal/petal length & color-code by species
geom_point()+ #turn into scatterplot
labs(
x="Sepal Length (cm)", #name x axis
y="Petal Length (cm)", #name y axis
title="Sepal and Petal Length Across Species", #add title
subtitle="Source: iris dataset") #add subtitle
For this figure, I chose a scatterplot, because it is the go-to
choice for mapping two continuous variables. Here, it shows the
relationship between petal and sepal length, colored by species. It
displays a pattern of setosas having shorter petals and sepals, while
virginicas have the longest petals and sepals.
ggplot(iris, aes(x=Sepal.Length, y=Petal.Length, color=Species))+ #graph by sepal/petal length + color by species
geom_boxplot(alpha= 0.6)+ #add boxplot + lower opacity
labs(
x="Sepal Length (cm)", #name x axis
y="Petal Length (cm)", #name y axis
title="Sepal and Petal Length Across Species", #add title
subtitle="Source: iris dataset" #add subtitle
)
## Warning: Orientation is not uniquely specified when both the x and y aesthetics are
## continuous. Picking default orientation 'x'.
I chose a boxplot for this figure because it shows a clear value
range. It also shows the relationship between petal and sepal length,
colored by species. We can see that it displays the same pattern of
setosas having shorter petals and sepals, while virginicas have the
longest petals and sepals.
ggplot(iris, aes(x=Sepal.Width, y=Petal.Width, fill=Species))+ #plot sepal/petal width + fill by species
geom_violin()+ #create violin
labs(
x="Sepal Width (cm)", #name x axis
y="Petal Width (cm)", #name y axis
title="Sepal and Petal Width Across Species", #add title
subtitle="Source: iris dataset" #add subtitle
)
## Warning: `position_dodge()` requires non-overlapping x intervals.
I chose a violin plot for this data because it communicates the
density of the data. Setosas have the widest sepals, but very thin
petals. However, virginicas have the largest width.
ggplot(iris, aes(x=Sepal.Width, fill=Species))+ #plot sepal/petal width + fill by species
geom_density(alpha=0.6)+ #create density plot
labs(
x="Sepal Width (cm)", #name x axis
title="Sepal and Petal Width Across Species", #add title
subtitle="Source: iris dataset" #add subtitle
)
For this figure, I chose a density plot. Seeing the results of sepal
width from Figure 3, I wanted to further investigate the relationship
between sepal width specific across species. Versicolor and virginica
species peak very closely together, while setosas have the largest sepal
width.