Part One
install.packages("ggplot2") #installs ggplot for easier figure making
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr") #installs dpylr for easier manipulation of data
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("tidyverse") #installs tidyverse
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(dplyr) #loads in dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) #loads in ggplot
library(tidyverse) #loads in tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1 ✔ tidyr 1.3.2
## ✔ readr 2.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
head(iris) #loads in data set for upcoming figures
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
iris_clean= iris %>% #sets up the name for the new table
mutate(sepal_ratio= Sepal.Length/Sepal.Width) %>% #creates a new variable comparing sepal length and width
filter(Petal.Length != 3.5) #removes any petal length of 3.5
iris_clean #used to make sure new variable was added and filtered properly
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal_ratio
## 1 5.1 3.5 1.4 0.2 setosa 1.457143
## 2 4.9 3.0 1.4 0.2 setosa 1.633333
## 3 4.7 3.2 1.3 0.2 setosa 1.468750
## 4 4.6 3.1 1.5 0.2 setosa 1.483871
## 5 5.0 3.6 1.4 0.2 setosa 1.388889
## 6 5.4 3.9 1.7 0.4 setosa 1.384615
## 7 4.6 3.4 1.4 0.3 setosa 1.352941
## 8 5.0 3.4 1.5 0.2 setosa 1.470588
## 9 4.4 2.9 1.4 0.2 setosa 1.517241
## 10 4.9 3.1 1.5 0.1 setosa 1.580645
## 11 5.4 3.7 1.5 0.2 setosa 1.459459
## 12 4.8 3.4 1.6 0.2 setosa 1.411765
## 13 4.8 3.0 1.4 0.1 setosa 1.600000
## 14 4.3 3.0 1.1 0.1 setosa 1.433333
## 15 5.8 4.0 1.2 0.2 setosa 1.450000
## 16 5.7 4.4 1.5 0.4 setosa 1.295455
## 17 5.4 3.9 1.3 0.4 setosa 1.384615
## 18 5.1 3.5 1.4 0.3 setosa 1.457143
## 19 5.7 3.8 1.7 0.3 setosa 1.500000
## 20 5.1 3.8 1.5 0.3 setosa 1.342105
## 21 5.4 3.4 1.7 0.2 setosa 1.588235
## 22 5.1 3.7 1.5 0.4 setosa 1.378378
## 23 4.6 3.6 1.0 0.2 setosa 1.277778
## 24 5.1 3.3 1.7 0.5 setosa 1.545455
## 25 4.8 3.4 1.9 0.2 setosa 1.411765
## 26 5.0 3.0 1.6 0.2 setosa 1.666667
## 27 5.0 3.4 1.6 0.4 setosa 1.470588
## 28 5.2 3.5 1.5 0.2 setosa 1.485714
## 29 5.2 3.4 1.4 0.2 setosa 1.529412
## 30 4.7 3.2 1.6 0.2 setosa 1.468750
## 31 4.8 3.1 1.6 0.2 setosa 1.548387
## 32 5.4 3.4 1.5 0.4 setosa 1.588235
## 33 5.2 4.1 1.5 0.1 setosa 1.268293
## 34 5.5 4.2 1.4 0.2 setosa 1.309524
## 35 4.9 3.1 1.5 0.2 setosa 1.580645
## 36 5.0 3.2 1.2 0.2 setosa 1.562500
## 37 5.5 3.5 1.3 0.2 setosa 1.571429
## 38 4.9 3.6 1.4 0.1 setosa 1.361111
## 39 4.4 3.0 1.3 0.2 setosa 1.466667
## 40 5.1 3.4 1.5 0.2 setosa 1.500000
## 41 5.0 3.5 1.3 0.3 setosa 1.428571
## 42 4.5 2.3 1.3 0.3 setosa 1.956522
## 43 4.4 3.2 1.3 0.2 setosa 1.375000
## 44 5.0 3.5 1.6 0.6 setosa 1.428571
## 45 5.1 3.8 1.9 0.4 setosa 1.342105
## 46 4.8 3.0 1.4 0.3 setosa 1.600000
## 47 5.1 3.8 1.6 0.2 setosa 1.342105
## 48 4.6 3.2 1.4 0.2 setosa 1.437500
## 49 5.3 3.7 1.5 0.2 setosa 1.432432
## 50 5.0 3.3 1.4 0.2 setosa 1.515152
## 51 7.0 3.2 4.7 1.4 versicolor 2.187500
## 52 6.4 3.2 4.5 1.5 versicolor 2.000000
## 53 6.9 3.1 4.9 1.5 versicolor 2.225806
## 54 5.5 2.3 4.0 1.3 versicolor 2.391304
## 55 6.5 2.8 4.6 1.5 versicolor 2.321429
## 56 5.7 2.8 4.5 1.3 versicolor 2.035714
## 57 6.3 3.3 4.7 1.6 versicolor 1.909091
## 58 4.9 2.4 3.3 1.0 versicolor 2.041667
## 59 6.6 2.9 4.6 1.3 versicolor 2.275862
## 60 5.2 2.7 3.9 1.4 versicolor 1.925926
## 61 5.9 3.0 4.2 1.5 versicolor 1.966667
## 62 6.0 2.2 4.0 1.0 versicolor 2.727273
## 63 6.1 2.9 4.7 1.4 versicolor 2.103448
## 64 5.6 2.9 3.6 1.3 versicolor 1.931034
## 65 6.7 3.1 4.4 1.4 versicolor 2.161290
## 66 5.6 3.0 4.5 1.5 versicolor 1.866667
## 67 5.8 2.7 4.1 1.0 versicolor 2.148148
## 68 6.2 2.2 4.5 1.5 versicolor 2.818182
## 69 5.6 2.5 3.9 1.1 versicolor 2.240000
## 70 5.9 3.2 4.8 1.8 versicolor 1.843750
## 71 6.1 2.8 4.0 1.3 versicolor 2.178571
## 72 6.3 2.5 4.9 1.5 versicolor 2.520000
## 73 6.1 2.8 4.7 1.2 versicolor 2.178571
## 74 6.4 2.9 4.3 1.3 versicolor 2.206897
## 75 6.6 3.0 4.4 1.4 versicolor 2.200000
## 76 6.8 2.8 4.8 1.4 versicolor 2.428571
## 77 6.7 3.0 5.0 1.7 versicolor 2.233333
## 78 6.0 2.9 4.5 1.5 versicolor 2.068966
## 79 5.5 2.4 3.8 1.1 versicolor 2.291667
## 80 5.5 2.4 3.7 1.0 versicolor 2.291667
## 81 5.8 2.7 3.9 1.2 versicolor 2.148148
## 82 6.0 2.7 5.1 1.6 versicolor 2.222222
## 83 5.4 3.0 4.5 1.5 versicolor 1.800000
## 84 6.0 3.4 4.5 1.6 versicolor 1.764706
## 85 6.7 3.1 4.7 1.5 versicolor 2.161290
## 86 6.3 2.3 4.4 1.3 versicolor 2.739130
## 87 5.6 3.0 4.1 1.3 versicolor 1.866667
## 88 5.5 2.5 4.0 1.3 versicolor 2.200000
## 89 5.5 2.6 4.4 1.2 versicolor 2.115385
## 90 6.1 3.0 4.6 1.4 versicolor 2.033333
## 91 5.8 2.6 4.0 1.2 versicolor 2.230769
## 92 5.0 2.3 3.3 1.0 versicolor 2.173913
## 93 5.6 2.7 4.2 1.3 versicolor 2.074074
## 94 5.7 3.0 4.2 1.2 versicolor 1.900000
## 95 5.7 2.9 4.2 1.3 versicolor 1.965517
## 96 6.2 2.9 4.3 1.3 versicolor 2.137931
## 97 5.1 2.5 3.0 1.1 versicolor 2.040000
## 98 5.7 2.8 4.1 1.3 versicolor 2.035714
## 99 6.3 3.3 6.0 2.5 virginica 1.909091
## 100 5.8 2.7 5.1 1.9 virginica 2.148148
## 101 7.1 3.0 5.9 2.1 virginica 2.366667
## 102 6.3 2.9 5.6 1.8 virginica 2.172414
## 103 6.5 3.0 5.8 2.2 virginica 2.166667
## 104 7.6 3.0 6.6 2.1 virginica 2.533333
## 105 4.9 2.5 4.5 1.7 virginica 1.960000
## 106 7.3 2.9 6.3 1.8 virginica 2.517241
## 107 6.7 2.5 5.8 1.8 virginica 2.680000
## 108 7.2 3.6 6.1 2.5 virginica 2.000000
## 109 6.5 3.2 5.1 2.0 virginica 2.031250
## 110 6.4 2.7 5.3 1.9 virginica 2.370370
## 111 6.8 3.0 5.5 2.1 virginica 2.266667
## 112 5.7 2.5 5.0 2.0 virginica 2.280000
## 113 5.8 2.8 5.1 2.4 virginica 2.071429
## 114 6.4 3.2 5.3 2.3 virginica 2.000000
## 115 6.5 3.0 5.5 1.8 virginica 2.166667
## 116 7.7 3.8 6.7 2.2 virginica 2.026316
## 117 7.7 2.6 6.9 2.3 virginica 2.961538
## 118 6.0 2.2 5.0 1.5 virginica 2.727273
## 119 6.9 3.2 5.7 2.3 virginica 2.156250
## 120 5.6 2.8 4.9 2.0 virginica 2.000000
## 121 7.7 2.8 6.7 2.0 virginica 2.750000
## 122 6.3 2.7 4.9 1.8 virginica 2.333333
## 123 6.7 3.3 5.7 2.1 virginica 2.030303
## 124 7.2 3.2 6.0 1.8 virginica 2.250000
## 125 6.2 2.8 4.8 1.8 virginica 2.214286
## 126 6.1 3.0 4.9 1.8 virginica 2.033333
## 127 6.4 2.8 5.6 2.1 virginica 2.285714
## 128 7.2 3.0 5.8 1.6 virginica 2.400000
## 129 7.4 2.8 6.1 1.9 virginica 2.642857
## 130 7.9 3.8 6.4 2.0 virginica 2.078947
## 131 6.4 2.8 5.6 2.2 virginica 2.285714
## 132 6.3 2.8 5.1 1.5 virginica 2.250000
## 133 6.1 2.6 5.6 1.4 virginica 2.346154
## 134 7.7 3.0 6.1 2.3 virginica 2.566667
## 135 6.3 3.4 5.6 2.4 virginica 1.852941
## 136 6.4 3.1 5.5 1.8 virginica 2.064516
## 137 6.0 3.0 4.8 1.8 virginica 2.000000
## 138 6.9 3.1 5.4 2.1 virginica 2.225806
## 139 6.7 3.1 5.6 2.4 virginica 2.161290
## 140 6.9 3.1 5.1 2.3 virginica 2.225806
## 141 5.8 2.7 5.1 1.9 virginica 2.148148
## 142 6.8 3.2 5.9 2.3 virginica 2.125000
## 143 6.7 3.3 5.7 2.5 virginica 2.030303
## 144 6.7 3.0 5.2 2.3 virginica 2.233333
## 145 6.3 2.5 5.0 1.9 virginica 2.520000
## 146 6.5 3.0 5.2 2.0 virginica 2.166667
## 147 6.2 3.4 5.4 2.3 virginica 1.823529
## 148 5.9 3.0 5.1 1.8 virginica 1.966667
iris_outliers <- iris_clean%>% #creates new object for outliers of iris clean
group_by(Species)%>% # creates outliers per species
mutate(
Q1 = quantile(sepal_ratio, 0.25), #identifies q1
Q3 = quantile(sepal_ratio, 0.75), #identifies q3
IQR = Q3 - Q1, #math for iqr
lower = Q1 - 1.5 * IQR, # sets lower limit
upper = Q3 + 1.5 * IQR, # sets upper limit
is_outlier = sepal_ratio < lower | sepal_ratio > upper
) %>% #keeps outliers that are below lower limit and above upper limit
filter(is_outlier) #keeps relevant outliers
ggplot(iris_clean, aes(x= Species, y= sepal_ratio, fill= Species)) +
geom_violin(trim= FALSE, alpha= 0.7) + #creates a full violin plot thats semi transparent for incoming box plot
geom_boxplot(width= .20, outlier.shape= NA) + # hide default outlier points and creates boxplots that go in the violoin plot
geom_jitter(data= iris_outliers,aes(x= Species, y= sepal_ratio), #speads outlier points for clairity using previous table
width= 0.2, #width of outlier points
size= 2, #size of outlier points
)+
labs(title = "Distribution of Ratio of Sepal Length to Sepal Width by Species", #title of figure
y = "Sepal Ratio", #name for y axis
caption = "Any flowers with a petal length of exactly 3.5 were excluded from the final figure. With a sample size of 148", #sets caption
x = "Species" #name for x axis
)+
theme_minimal() #sets the theme

Part Two
head(economics_long) #loads in data set used for this figure
## # A tibble: 6 × 4
## date variable value value01
## <date> <chr> <dbl> <dbl>
## 1 1967-07-01 pce 507. 0
## 2 1967-08-01 pce 510. 0.000265
## 3 1967-09-01 pce 516. 0.000762
## 4 1967-10-01 pce 512. 0.000471
## 5 1967-11-01 pce 517. 0.000916
## 6 1967-12-01 pce 525. 0.00157
plot(economics_long$date, economics_long$value, #sets y and x axis points
type="b",#turns figure into line and point figure
pch=19,#changes shape of points
col="blue",#adds color
lwd=1,#line width
xlab="Year", ylab = "Value (USD)", main="Change in Value Over Year") #names the Y axis, X axis, and title

ggplot(economics_long, aes(x=date,y=value,color=variable))+ #sets x and y axis and sorts by variable
geom_line(linewidth = 1)+ #width of line
geom_point(size=.5)+ #sets size of points
labs(title="Change in Value over Year", x= "Year", y="Value (USD)", color="Variables")+#title, y axis, and x axis
coord_cartesian(ylim=c(0,15000))+ #changes the view of the figure
theme_bw() #sets theme

Part 3
head(penguins) #load in data set
## species island bill_len bill_dep flipper_len body_mass sex year
## 1 Adelie Torgersen 39.1 18.7 181 3750 male 2007
## 2 Adelie Torgersen 39.5 17.4 186 3800 female 2007
## 3 Adelie Torgersen 40.3 18.0 195 3250 female 2007
## 4 Adelie Torgersen NA NA NA NA <NA> 2007
## 5 Adelie Torgersen 36.7 19.3 193 3450 female 2007
## 6 Adelie Torgersen 39.3 20.6 190 3650 male 2007
penguins_clean=penguins%>% #sets new clean data
drop_na(body_mass, species) #removes any missing data
head(penguins_clean) #check new polished dataset
## species island bill_len bill_dep flipper_len body_mass sex year
## 1 Adelie Torgersen 39.1 18.7 181 3750 male 2007
## 2 Adelie Torgersen 39.5 17.4 186 3800 female 2007
## 3 Adelie Torgersen 40.3 18.0 195 3250 female 2007
## 4 Adelie Torgersen 36.7 19.3 193 3450 female 2007
## 5 Adelie Torgersen 39.3 20.6 190 3650 male 2007
## 6 Adelie Torgersen 38.9 17.8 181 3625 female 2007
ggplot(penguins_clean, aes(x=body_mass, fill=species))+ #sets x and y axis
geom_density(alpha=0.7)+ # creates density graph that is semi transparent
scale_fill_manual(values = c(Adelie="darkseagreen3",Gentoo="mistyrose3",Chinstrap="darkslategrey"))#sets each species to a different color+

labs( title="Distribution of Penguins Body mass by Species",
x="Mass(g)", #x axis
y="Density", # y axis
fill="Species")+
theme_bw()
## NULL
Part 4
head(diamonds) #loads data set
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
ggplot(diamonds, aes(x=color, fill=cut))+ #sets x and y variables
geom_bar(position = "fill")+ #creates bar graph with color
scale_fill_viridis_d()+ #sets colorblind mode
labs(
title = "Distribution of Cut Across Color categories via Proportion",
y="Proportion",
x="Color",
fill="Cut"
)+ #sets title, y label, and x label, and legend
theme_minimal() #theme

ggplot(diamonds, aes(x=color, fill=cut))+
geom_bar(position = "dodge")+ #creates bar plot with categories side by side
scale_fill_viridis_d()+ #sets color blind mode
labs(
title = "Raw Counts of Cut Across Color categories", #title
y="Count", #y axis
x="Color", #x axis
fill="Cut" #legend
)+
theme_minimal() #sets theme
