Load Library
library("dslabs")
## Warning: package 'dslabs' was built under R version 4.0.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## -- Attaching packages ------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 0.8.5
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'tidyr' was built under R version 4.0.2
## Warning: package 'forcats' was built under R version 4.0.2
## -- Conflicts ---------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
Chossing my dataset
data("murders")
str(murders)
## 'data.frame': 51 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ abb : chr "AL" "AK" "AZ" "AR" ...
## $ region : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
## $ population: num 4779736 710231 6392017 2915918 37253956 ...
## $ total : num 135 19 232 93 1257 ...
Set WD
setwd("/Users/Joeyc/Documents/School/Fall 2020/Data 110/Homework")
Creating the CSV file for murders
write_csv(murders, "murders.csv", na="")
Load more libraries
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.0.3
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 4.0.3
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.0.3
Creating the Scatterplot
ds_theme_set()
murders %>%
ggplot(aes(x=population/10^6, total, label = abb))+
geom_point(aes(x = population/10^6, y = total))+
geom_text_repel(nudge_x = 0.005)+
xlab("Population in millions") +
ylab("Total number of murders") +
ggtitle("Total Gun Murders Per Region")+
geom_text(aes(x=25, y=1300, label=region), cex=4, color="blue")+
facet_grid(.~region)

I used the dataset murders. This murders dataset took the total murders by guns in 2010 and categorizes them by each state. The variables in this dataset included; each state, the populations of those states, which region of the country those states are from, and the total murders in that state from a gun. For this assignment I plotted these points on a scatterplot. Using a facet wrap I broke down these plots even further by region to clearly see which region of the country was affected most by gun violence. The x axis has the populations in millions and the y axis shows the total numbers of murders. I made sure to label each of these points to show which state had the most murders.