Set up

install.packages("ggplot2")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)

install.packages("maps")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)

install.packages("datasets")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)

## Warning: package 'datasets' is a base package, and should not be updated

install.packages("treemapify")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)

install.packages("dplyr")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)

install.packages("tidyverse")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)

install.packages("ggraph")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)

library(ggraph)

## Loading required package: ggplot2

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ lubridate 1.9.5     ✔ tibble    3.3.1
## ✔ purrr     1.2.2     ✔ tidyr     1.3.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(ggplot2)
library(maps)

## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map

library(datasets)
library(treemapify)

Plot 1

iris_long=iris %>%
  mutate(id=row_number()) %>%
  pivot_longer(cols=1:4, names_to="measurement",
             values_to="value")
ggplot(iris_long, aes(x=measurement, y=value, group=id,color=Species))+
geom_line(alpha=0.4)+
  labs(
    title="Parallel Coordinates Plot of Flower Measurements Across Species",
    x="Measurement",
    y="Value"
  )+
  theme_minimal()+
  theme(axis.text.x=element_text(angle = 45, hjust = 1))

This plot type works well with comparing the different flower measurements because we can see how each species varies within each measurement. We see how the species vary very differenlty yet when comparing sepal width the species start to blend together and the difference is not so obvious.

Plot 2

head(USArrests)

##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7

arrests=USArrests %>%
  rownames_to_column("state") %>%
  mutate(state=tolower(state))

map=map_data("state")

map_df=map %>%
  left_join(arrests, by=c("region"="state"))

ggplot(map_df,aes(long,lat,group=group, fill=Murder))+
  geom_polygon(color="white", linewidth=1)+
  scale_fill_viridis_c()+
  labs(
    title="Murder Arrests by State",
    x="latitude",
    y="longitude",
    fill="Muder Rate"
  )+
  theme_minimal()

This plot works nicely to show geographic variation because of the shading each state has. It works well because the data is sorted by location not, which gives us the opportunity to place points on a map. As well as having the colorblind friendly pallet means very minimal people will confused or lost when interpreting the graph. Here we can see that Murder rate increases the further south you go in the United States.

Plot 3

head(USArrests)

##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7

df=USArrests %>%
  scale() %>%
  as.data.frame() %>%
  mutate(state=rownames(USArrests))

dist.mat=dist(df[,1:4],method="euclidean")
dist.df=as.data.frame(as.matrix(dist.mat))
dist.df$state1=rownames(dist.df)

dist.long=dist.df %>%
  pivot_longer(cols=-state1,names_to="state2",values_to = "distance") %>%
  filter(state1 !=state2) %>%
  mutate(similarity = 1/(1 + distance)) %>%
  group_by(state1) %>%
  slice_max(similarity, n=2) %>%
  ungroup()

edge=dist.long

node=tibble(
  state=df$state,
  angle=seq(0,2*pi, length.out=nrow(df)),
  x=cos(angle),
  y=sin(angle)
)

edge2=edge %>%
  left_join(node,by=c("state1"="state")) %>%
  rename(x1=x, y1=y) %>%
  left_join(node, by=c("state2"="state")) %>%
  rename(x2=x,y2=y)

ggplot()+
  geom_segment(data=edge2,aes(x=x1,y=y1,xend=x2,yend=y2),size=0.5,alpha=.7)+
  geom_point(data=node, aes(x=x,y=y),size=1.5,alpha=0.5)+
  geom_text(data=node,aes(x=x,y=y,label=state),size=3, angle=25)+
  labs(
    title = "Network Diagram Showing Similarities of Arrests based on Distance",
    x="",
    y=""
  )+
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

This plot when done correctly can help show connections/similarities between variables. In this case the state names blend in with each other making it slightly confusing to read however still possible. The lines indicate similiarity in distance across all types of arrests.

Plot 4

head(ToothGrowth)

##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5

ggplot(ToothGrowth, aes(x=supp, y=len, fill=supp)) +
  geom_violin(trim=FALSE, alpha=0.5)+
  geom_boxplot(width=0.15, outlier.shape = NA, alpha=0.7)+
  geom_jitter(width = 0.1, alpha=0.7)+
  labs(
    title="comparing Tooth Lenght by Supplement Type",
    x="supplement type",
    y="length(cm)")+
  scale_fill_viridis_d(option="D")+
    theme_minimal()

A rain cloud plot is very helpful when comparing two different groups. The colorblind friendly colors allow for minimal confusion when reading the graph. Rain cloud plots provide your standard box plot,violin, and regular points all at once. This helps support your figures when all figure types match and work together.

Student Portfolio

head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

#how do car characteristics vary among gear type

fig1=ggplot(mtcars, aes(x=factor(gear), y=mpg, fill=factor(gear)))+
  geom_boxplot(alpha=0.7)+
   geom_violin(trim=FALSE, alpha=0.5)+
   geom_jitter(width = 0.1, alpha=0.7)+
  labs(
    title="Gear Type compared to MPG",
    x= "Gear Type",
    y= "mpg",
  )+
  theme_minimal()
fig1

fig2=ggplot(mtcars, aes(x=factor(gear), y=hp, color=factor(gear)))+
  geom_point(size=2,alpha=0.7)+
  labs(
    title="Gear Type Compared to Horse Power",
    x="Gear type",
    y= "Horse Power"
  )+
  theme_minimal()
fig2

I wanted to look at how gear type can effect car performance. To look at this, I created two figures each looking at a different aspect. For figure 1. I created a rain cloud plot looking at the MPG that each gear type gets. We can see that cars with 4 gears typically have a higher MPG than the others, but only marginally when compared to cars with 5 gears. This plot type worked well with what we were looking at because we are comparing numerical values among three different groups.

The next graph I created was a scatter plot, but this time I looked at horse power or “hp”. The figure showed that cars with 5 gears had much more horse power when compared with the other two types of cars. This graph gives us a clear indicator of cars that have higher horse power than others. When combining the data from both graphs it is a reasonable assumption to make that cars with higher gears can on average put oput more horsepower but in return have a lower mpg than cars with 4 gears.

salcedo_ryan_final_take_home

Ryan Salcedo

2026-05-04