Setup

install.packages("readxl")
library(readxl)

install.packages("tidyverse")
library(tidyverse)

my_data <- read_excel("advertising_randomized.xlsx")

Inspect the data

glimpse(my_data)
## Rows: 250
## Columns: 6
## $ X         <dbl> 75, 50, 6, 189, 188, 220, 36, 122, 208, 95, 9, 99, 115, 78, …
## $ X1        <dbl> 76, 101, 65, 41, 76, 171, 48, 183, 122, 124, 97, 147, 39, 13…
## $ TV        <dbl> 196.41, 297.96, 186.98, 141.62, 159.77, 265.41, 131.28, 257.…
## $ radio     <dbl> 40.58, 37.36, 27.71, 7.58, 44.12, 32.25, 40.42, 29.64, 8.80,…
## $ newspaper <dbl> 43.99, 5.63, 14.32, 17.66, 72.07, 32.10, 24.03, 70.75, 60.69…
## $ sales     <dbl> 16.22, 12.20, 4.39, 13.94, 20.30, 13.06, 14.36, 14.80, 8.02,…
head(my_data)
## # A tibble: 6 × 6
##       X    X1    TV radio newspaper sales
##   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1    75    76  196. 40.6      44.0  16.2 
## 2    50   101  298. 37.4       5.63 12.2 
## 3     6    65  187. 27.7      14.3   4.39
## 4   189    41  142.  7.58     17.7  13.9 
## 5   188    76  160. 44.1      72.1  20.3 
## 6   220   171  265. 32.2      32.1  13.1
str(my_data)
## tibble [250 × 6] (S3: tbl_df/tbl/data.frame)
##  $ X        : num [1:250] 75 50 6 189 188 220 36 122 208 95 ...
##  $ X1       : num [1:250] 76 101 65 41 76 171 48 183 122 124 ...
##  $ TV       : num [1:250] 196 298 187 142 160 ...
##  $ radio    : num [1:250] 40.58 37.36 27.71 7.58 44.12 ...
##  $ newspaper: num [1:250] 43.99 5.63 14.32 17.66 72.07 ...
##  $ sales    : num [1:250] 16.22 12.2 4.39 13.94 20.3 ...
summary(my_data)
##        X               X1              TV             radio      
##  Min.   :  1.0   Min.   :  1.0   Min.   :  0.81   Min.   : 0.04  
##  1st Qu.: 54.0   1st Qu.: 62.0   1st Qu.: 90.20   1st Qu.:13.31  
##  Median :101.0   Median :103.0   Median :151.97   Median :24.35  
##  Mean   :102.3   Mean   :104.9   Mean   :155.71   Mean   :24.78  
##  3rd Qu.:142.0   3rd Qu.:139.8   3rd Qu.:216.09   3rd Qu.:34.05  
##  Max.   :269.0   Max.   :292.0   Max.   :386.12   Max.   :65.93  
##    newspaper          sales      
##  Min.   :  0.01   Min.   : 0.69  
##  1st Qu.: 18.29   1st Qu.: 9.73  
##  Median : 32.35   Median :13.91  
##  Mean   : 34.60   Mean   :13.95  
##  3rd Qu.: 46.72   3rd Qu.:17.99  
##  Max.   :100.19   Max.   :29.05

TV vs Sales

ggplot(data = my_data, aes(x = TV, y = sales)) +
    geom_point() +
    geom_smooth(method = "lm") +
    labs(title = "TV Advertising vs Sales", x = "TV Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

Radio vs Sales

ggplot(my_data, aes(x = radio, y = sales)) +
    geom_point() +
    geom_smooth(method = "lm") +
    labs(title = "Radio Advertising vs Sales", x = "Radio Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

Newspaper vs Sales

ggplot(my_data, aes(x = newspaper, y = sales)) +
    geom_point() +
    geom_smooth(method = "lm") +
    labs(title = "Newspaper Advertising vs Sales", x = "Newspaper Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

Radio vs TV

ggplot(
  data = my_data,
  mapping = aes(x = radio, y = TV)
) +
  geom_point() +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

Radio vs sales, grouped by newspaper spend

ggplot(
  data = my_data,
  mapping = aes(x = radio, y = sales, color = cut(newspaper, breaks = 3))
) +
  geom_point() +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

TV advertising appears to have the strongest relationship with sales because the scatterplot shows the clearest positive linear trend. Radio also has a positive relationship, while newspaper appears weaker and more scattered.