library(openintro)
library(ggplot2)
library(dplyr)
# Load data
data(countyComplete) # It comes from the openintro package
# Create a new variable, rural
countyComplete$rural <- ifelse(countyComplete$density < 500, "rural", "urban")
countyComplete$rural <- factor(countyComplete$rural)
# Scatterplot of weight vs. weeks
ggplot(data = countyComplete, aes(x = per_capita_income, y = bachelors)) + geom_point()
ggplot(data = countyComplete,
aes(x = cut(per_capita_income, breaks = 5), y = bachelors)) +
geom_boxplot()
# Body dimensions scatterplot
ggplot(data = countyComplete, aes(x = per_capita_income, y = bachelors, color = factor(rural))) +
geom_point()
# Load the package
library(dplyr)
# Compute correlation
countyComplete %>%
summarize(N = n(), r = cor(per_capita_income, bachelors))
## N r
## 1 3143 0.7924464
Those who have a bachelors degree who live in an urban area have a higher per capita income.
Those who have bachelors degree that live in a rural area have more of the average per capita income.