Step 1 – Load Libraries

library(tidyverse)
library(readxl)

Step 2 – Load the Dataset

my_data <- read_excel("/Users/christinamac/Desktop/Data/advertising (2)_randomized.xlsx")
my_data <- my_data[ , c("TV", "radio", "newspaper", "sales")]
names(my_data) <- c("TV", "Radio", "Newspaper", "Sales")

Step 3 – Understand the Dataset

3.1 First Six Observations

head(my_data)
## # A tibble: 6 × 4
##      TV Radio Newspaper Sales
##   <dbl> <dbl>     <dbl> <dbl>
## 1 385.  15          3.2  3.16
## 2  18.0 19.4       12.8 19.1 
## 3 265.  19.3       33.8  4   
## 4 270.  14.9       17.3 15.2 
## 5  43.6  7.76      32.3  8.38
## 6 281.  22.2       56.7  4.94

3.2 Dataset Structure

str(my_data)
## tibble [300 × 4] (S3: tbl_df/tbl/data.frame)
##  $ TV       : num [1:300] 385 18 265.4 270.2 43.6 ...
##  $ Radio    : num [1:300] 15 19.44 19.33 14.86 7.76 ...
##  $ Newspaper: num [1:300] 3.2 12.8 33.8 17.3 32.3 ...
##  $ Sales    : num [1:300] 3.16 19.1 4 15.24 8.38 ...

3.3 Summary Statistics

summary(my_data)
##        TV             Radio         Newspaper          Sales      
##  Min.   :  2.04   Min.   : 0.18   Min.   :  0.08   Min.   : 0.31  
##  1st Qu.: 76.00   1st Qu.:12.53   1st Qu.: 14.94   1st Qu.:10.37  
##  Median :140.42   Median :21.97   Median : 29.63   Median :14.19  
##  Mean   :152.94   Mean   :23.78   Mean   : 32.59   Mean   :14.02  
##  3rd Qu.:217.56   3rd Qu.:33.70   3rd Qu.: 45.70   3rd Qu.:17.14  
##  Max.   :479.72   Max.   :75.12   Max.   :120.15   Max.   :33.22

Step 4 – Scatterplots by Advertising Channel

ggplot(my_data, aes(x = TV, y = Sales)) +
  geom_point(aes(color = "TV")) +
  geom_point(aes(x = Radio, color = "Radio")) +
  geom_point(aes(x = Newspaper, color = "Newspaper")) +
  scale_color_manual(
    name   = "Channel",
    values = c("TV" = "blue", "Radio" = "green", "Newspaper" = "orange")
  ) +
  labs(
    title = "Advertising Budget vs. Sales by Channel",
    x     = "Advertising Budget (thousands of dollars)",
    y     = "Sales (thousands of units)"
  ) +
  theme_minimal()

Step 6 – Compare Advertising Channels

library(gridExtra)

p1 <- ggplot(my_data, aes(x = TV, y = Sales)) +
  geom_point() +
  labs(title = "TV vs. Sales", x = "TV", y = "Sales")

p2 <- ggplot(my_data, aes(x = Radio, y = Sales)) +
  geom_point() +
  labs(title = "Radio vs. Sales", x = "Radio", y = "Sales")

p3 <- ggplot(my_data, aes(x = Newspaper, y = Sales)) +
  geom_point() +
  labs(title = "Newspaper vs. Sales", x = "Newspaper", y = "Sales")

grid.arrange(p1, p2, p3, ncol = 3)

Peer Collaboration Note