Step 1 – Load Libraries
library(tidyverse)
library(readxl)
Step 2 – Load the Dataset
my_data <- read_excel("/Users/christinamac/Desktop/Data/advertising (2)_randomized.xlsx")
my_data <- my_data[ , c("TV", "radio", "newspaper", "sales")]
names(my_data) <- c("TV", "Radio", "Newspaper", "Sales")
Step 3 – Understand the Dataset
3.1 First Six Observations
head(my_data)
## # A tibble: 6 × 4
## TV Radio Newspaper Sales
## <dbl> <dbl> <dbl> <dbl>
## 1 385. 15 3.2 3.16
## 2 18.0 19.4 12.8 19.1
## 3 265. 19.3 33.8 4
## 4 270. 14.9 17.3 15.2
## 5 43.6 7.76 32.3 8.38
## 6 281. 22.2 56.7 4.94
3.2 Dataset Structure
str(my_data)
## tibble [300 × 4] (S3: tbl_df/tbl/data.frame)
## $ TV : num [1:300] 385 18 265.4 270.2 43.6 ...
## $ Radio : num [1:300] 15 19.44 19.33 14.86 7.76 ...
## $ Newspaper: num [1:300] 3.2 12.8 33.8 17.3 32.3 ...
## $ Sales : num [1:300] 3.16 19.1 4 15.24 8.38 ...
3.3 Summary Statistics
summary(my_data)
## TV Radio Newspaper Sales
## Min. : 2.04 Min. : 0.18 Min. : 0.08 Min. : 0.31
## 1st Qu.: 76.00 1st Qu.:12.53 1st Qu.: 14.94 1st Qu.:10.37
## Median :140.42 Median :21.97 Median : 29.63 Median :14.19
## Mean :152.94 Mean :23.78 Mean : 32.59 Mean :14.02
## 3rd Qu.:217.56 3rd Qu.:33.70 3rd Qu.: 45.70 3rd Qu.:17.14
## Max. :479.72 Max. :75.12 Max. :120.15 Max. :33.22
Step 4 – Scatterplots by Advertising Channel
ggplot(my_data, aes(x = TV, y = Sales)) +
geom_point(aes(color = "TV")) +
geom_point(aes(x = Radio, color = "Radio")) +
geom_point(aes(x = Newspaper, color = "Newspaper")) +
scale_color_manual(
name = "Channel",
values = c("TV" = "blue", "Radio" = "green", "Newspaper" = "orange")
) +
labs(
title = "Advertising Budget vs. Sales by Channel",
x = "Advertising Budget (thousands of dollars)",
y = "Sales (thousands of units)"
) +
theme_minimal()

Step 6 – Compare Advertising Channels
library(gridExtra)
p1 <- ggplot(my_data, aes(x = TV, y = Sales)) +
geom_point() +
labs(title = "TV vs. Sales", x = "TV", y = "Sales")
p2 <- ggplot(my_data, aes(x = Radio, y = Sales)) +
geom_point() +
labs(title = "Radio vs. Sales", x = "Radio", y = "Sales")
p3 <- ggplot(my_data, aes(x = Newspaper, y = Sales)) +
geom_point() +
labs(title = "Newspaper vs. Sales", x = "Newspaper", y = "Sales")
grid.arrange(p1, p2, p3, ncol = 3)

Peer Collaboration Note
- Collaborated with:Josh, Lenin, Jaason
- Suggestion received/provided:code help