Part 1: Warm Up

1. Two Simple Math Equations

1 + 2
## [1] 3
2 * 2
## [1] 4

2. Assign Each a Letter

a <- 1 + 2
b <- 2 * 2

3. Add the Two Letters

a + b
## [1] 7

4. Create Two Vectors of 5 Numbers Each

v1 <- c(1, 2, 3, 4, 5)
v2 <- c(6, 7, 8, 9, 10)

5. Add the Two Vectors and Name the Result v3

v3 <- v1 + v2
v3
## [1]  7  9 11 13 15

6. Save v3 as a Data Frame

v3_df <- as.data.frame(v3)
v3_df
##   v3
## 1  7
## 2  9
## 3 11
## 4 13
## 5 15

7. Create a Data Frame

my_df <- data.frame(
  name  = c("Alice", "Bob", "Carol"),
  score = c(88, 92, 75),
  grade = c("B", "A", "C")
)
my_df
##    name score grade
## 1 Alice    88     B
## 2   Bob    92     A
## 3 Carol    75     C

Part 2: R for Data Science – Chapter 1 Exercises

Exercise 1.2.5 – Run ggplot(data = mpg). What do you see?

# Q1: Running ggplot with no aesthetics produces a blank canvas.
ggplot(data = mpg)

# Q2: How many rows and columns does mpg have?
nrow(mpg)
## [1] 234
ncol(mpg)
## [1] 11
# Q3: What does the drv variable describe?
# drv describes the drive type: f = front-wheel, r = rear-wheel, 4 = 4WD.
?mpg
## starting httpd help server ... done
# Q4: Make a scatterplot of hwy vs cyl.
ggplot(data = mpg) +
  geom_point(mapping = aes(x = cyl, y = hwy))

# Q5: What happens if you make a scatterplot of class vs drv?
# Both variables are categorical, so the plot shows discrete combinations
# with no useful trend information.
ggplot(data = mpg) +
  geom_point(mapping = aes(x = drv, y = class))


Exercise 1.4.3 – Aesthetic Mappings

# Q1: What's wrong with this code? Why are the points not blue?
# The color argument is inside aes(), treating it as a variable rather
# than a fixed aesthetic. Move it outside aes() to fix it.

# Broken (points not blue):
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, color = "blue"))

# Fixed:
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy), color = "blue")

# Q2: Which variables in mpg are categorical vs continuous?
# Categorical: manufacturer, model, trans, drv, fl, class
# Continuous:  displ, year, cyl, cty, hwy
str(mpg)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
# Q3: Map a continuous variable to color, size, and shape.
# Color and size work; shape does not accept continuous variables.
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, color = cty))

ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, size = cty))

# shape = cty  →  would throw an error (continuous variable)

# Q4: What happens if you map the same variable to multiple aesthetics?
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, color = drv, shape = drv))

# Q5: What does the stroke aesthetic do?
# stroke controls the width of the border for shapes that have a border (e.g., shape 21).
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy), shape = 21, stroke = 2)

# Q6: What happens if you map an aesthetic to something other than a variable name?
# ggplot evaluates the expression and uses the result.
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, color = displ < 5))


Exercise 1.5.5 – Facets

# Q1: What happens if you facet on a continuous variable?
# ggplot converts the continuous variable to a factor, creating one panel
# per unique value. With many unique values this can be unreadable.
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_wrap(~ cty)

# Q2: What do empty cells in facet_grid(drv ~ cyl) mean?
# Empty cells indicate no observations exist for that combination.
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(drv ~ cyl)

# Q3: What does . do in facet_grid?
# A dot (.) means "no variable on this axis", effectively collapsing
# the grid to a single row or single column.
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(drv ~ .)

ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(. ~ cyl)

# Q4: First faceted plot from the chapter – advantages and disadvantages.
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_wrap(~ class, nrow = 2)

# Advantage: easier to see patterns within each class.
# Disadvantage: harder to compare across classes; more difficult with large datasets.

# Q5: facet_wrap vs facet_grid
# facet_wrap wraps a 1-D sequence of panels into 2-D.
# facet_grid forms a matrix of panels based on two variables.
# Use facet_grid when you want to see interactions between two categorical variables.

# Q6: When would you use faceting instead of color aesthetics?
# Faceting is better when there are many categories or when individual
# panels need to be examined closely. Color works better for quick
# comparisons with few categories.

Exercise 1.6.1 – Geometric Objects

# Q1: What geom would you use for a line chart? Box plot? Histogram? Area chart?
# Line chart  → geom_line()
# Box plot    → geom_boxplot()
# Histogram   → geom_histogram()
# Area chart  → geom_area()

# Q2: Run this code in your head, then execute it. Does the output match?
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
  geom_point() +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Q3: What does show.legend = FALSE do?
# It hides the legend for that specific layer.
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, color = drv), show.legend = FALSE)

# Q4: What does the se argument to geom_smooth() do?
# se controls whether the confidence interval ribbon is displayed (TRUE by default).
ggplot(data = mpg) +
  geom_smooth(mapping = aes(x = displ, y = hwy), se = TRUE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = mpg) +
  geom_smooth(mapping = aes(x = displ, y = hwy), se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Q5: Will these two graphs look different? Why/why not?
# They will look the same. In the first, aesthetics are set globally in ggplot();
# in the second, each geom inherits the same local aesthetics. The result is identical.

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot() +
  geom_point(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_smooth(data = mpg, mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Q6: Recreate the six plots from the textbook.

# Plot 1
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point() +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Plot 2
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point() +
  geom_smooth(mapping = aes(group = drv), se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Plot 3
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
  geom_point() +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Plot 4
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point(mapping = aes(color = drv)) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Plot 5
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point(mapping = aes(color = drv)) +
  geom_smooth(mapping = aes(linetype = drv), se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Plot 6
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point(shape = 21, color = "white", stroke = 2,
             mapping = aes(fill = drv))