Load and Explore the Data

library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
my_data <- read_excel("advertising_randomized.xlsx")
head(my_data)
## # A tibble: 6 × 6
##       X    X1    TV radio newspaper sales
##   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1    81   100 272.   21.5     34.2  11.2 
## 2    23   115 301.   18.1     18.4  14.9 
## 3   123     5 198.   10.3     49.0  16.0 
## 4    88   159 164.   18.6     54.2   6.49
## 5   192   103  82.7  14.0      9.54 16.1 
## 6    27   134 125.   66.9     36.2  13.2
str(my_data)
## tibble [50 × 6] (S3: tbl_df/tbl/data.frame)
##  $ X        : num [1:50] 81 23 123 88 192 27 135 226 83 83 ...
##  $ X1       : num [1:50] 100 115 5 159 103 134 98 144 38 135 ...
##  $ TV       : num [1:50] 271.6 301.4 197.8 163.9 82.7 ...
##  $ radio    : num [1:50] 21.5 18.1 10.3 18.6 14 ...
##  $ newspaper: num [1:50] 34.17 18.43 48.95 54.17 9.54 ...
##  $ sales    : num [1:50] 11.17 14.94 15.98 6.49 16.11 ...
summary(my_data)
##        X                X1               TV             radio      
##  Min.   :  1.00   Min.   :  3.00   Min.   :  1.41   Min.   : 1.15  
##  1st Qu.: 74.75   1st Qu.: 66.50   1st Qu.: 83.09   1st Qu.:11.99  
##  Median : 91.00   Median : 92.50   Median :137.65   Median :20.23  
##  Mean   :108.82   Mean   : 94.52   Mean   :152.90   Mean   :23.00  
##  3rd Qu.:133.75   3rd Qu.:133.25   3rd Qu.:205.19   3rd Qu.:31.60  
##  Max.   :253.00   Max.   :235.00   Max.   :371.05   Max.   :66.88  
##    newspaper         sales      
##  Min.   : 1.85   Min.   : 5.66  
##  1st Qu.:22.57   1st Qu.:10.35  
##  Median :31.93   Median :13.45  
##  Mean   :34.65   Mean   :14.04  
##  3rd Qu.:44.90   3rd Qu.:17.60  
##  Max.   :87.99   Max.   :28.71

Scatterplots

TV vs Sales

ggplot(data = my_data, mapping = aes(x = TV, y = sales)) +
  geom_point() +
  labs(title = "Sales vs. TV Advertising", x = "TV Advertising Budget", y = "Sales")

Radio vs Sales

ggplot(data = my_data, mapping = aes(x = radio, y = sales)) +
  geom_point() +
  labs(title = "Sales vs. Radio Advertising", x = "Radio Advertising Budget", y = "Sales")

Newspaper vs Sales

ggplot(data = my_data, mapping = aes(x = newspaper, y = sales)) +
  geom_point() +
  labs(title = "Sales vs. Newspaper Advertising", x = "Newspaper Advertising Budget", y = "Sales")

Regression Lines

TV vs Sales

ggplot(data = my_data, mapping = aes(x = TV, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Sales vs. TV Advertising", x = "TV Advertising Budget", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

Radio vs Sales

ggplot(data = my_data, mapping = aes(x = radio, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Sales vs. Radio Advertising", x = "Radio Advertising Budget", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

Newspaper vs Sales

ggplot(data = my_data, mapping = aes(x = newspaper, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Sales vs. Newspaper Advertising", x = "Newspaper Advertising Budget", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

Comparing Advertising Channels

my_data |>
  pivot_longer(cols = c(TV, radio, newspaper),
               names_to = "channel",
               values_to = "budget") |>
  ggplot(mapping = aes(x = budget, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~channel) +
  labs(title = "Sales vs. Advertising Channels", x = "Advertising Budget", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

Discussion

TV advertising has the strongest relationship with Sales, showing a clear positive trend.