library(openintro)
library(ggplot2)
library(dplyr)

# Load data
data(countyComplete) # It comes from the openintro package

# Create a new variable, rural
countyComplete$rural <- ifelse(countyComplete$density < 500, "rural", "urban")
countyComplete$rural <- factor(countyComplete$rural)

1.1 Scatterplots

# Load packages
library(openintro)

# Scatterplot of weight vs. weeks
ggplot(data = countyComplete, aes(x = per_capita_income, y = bachelors)) + geom_point()

1.2 Boxplots as discretized/conditioned scatterplots

# Boxplot of weight vs. weeks
ggplot(data = countyComplete, 
       aes(x = cut(per_capita_income, breaks = 5), y = bachelors)) + 
  geom_boxplot()

Interpretation

1.3 Creating scatterplots

# Load the package
library(openintro)

# Body dimensions scatterplot
ggplot(data = countyComplete, aes(x = per_capita_income, y = bachelors, color = factor(rural))) +
  geom_point()

Chapter 2: Correlation

2.1 Computing correlation

The cor(x, y) function will compute the Pearson product-moment correlation between variables, x and y.

# Load the package
library(openintro)

# Compute correlation
countyComplete %>%
  summarize(N = n(), r = cor(per_capita_income, bachelors))
##      N         r
## 1 3143 0.7924464

# Compute correlation for all non-missing pairs
countyComplete %>%
  summarize(N = n(), r = cor(bachelors, per_capita_income, use = "pairwise.complete.obs"))
##      N         r
## 1 3143 0.7924464