Attached is the code used for the Week 9 Lab.
# Read the data from the web
FetchedData <-
read.csv("https://raw.githubusercontent.com/drkblake/Data/main/DataWrangling.csv")
# Save the data on your computer
write.csv(FetchedData, "DataWrangling.csv", row.names = FALSE)
# remove the data from the environment
rm (FetchedData)
# Installing required packages
if (!require("tidyverse"))
install.packages("tidyverse")
library(tidyverse)
# Read the data
mydata <- read.csv("DataWrangling.csv")
# Create a continuous "Density" variable measuring
# households per square mile, then a two-level and
# a three-level categorical version
mydata <- mydata %>%
mutate(Density = Households / Land_area) %>%
mutate(Density_2 = cut_number(Density, n = 2)) %>% mutate(Density_3 = cut_number(Density, n = 3))
mydata <- mydata %>%
mutate(
Density_2 = case_when(
Density_2 == "[7.35,28.6]" ~ "Low density",
Density_2 == "(28.6,583]" ~ "High density",
.default = "Error"
)
)
mydata <- mydata %>%
mutate(
Density_3 = case_when(
Density_3 == "[7.35,21]" ~ "Low density",
Density_3 == "(21,40.4]" ~ "Intermediate density",
Density_3 == "(40.4,583]" ~ "High density",
.default = "Error"
)
)
# Re-save the data on your computer
write.csv(mydata, "DataWrangling.csv", row.names = FALSE)
# Task 1
ggplot(mydata, aes(x = Pct_College))+
geom_histogram(color = "gray",
fill = "darkblue")
# Task 2
ggplot(mydata, aes(x = Density,
y = Pct_College))+
geom_point(color = "red",
fill = "red")