Code and Plots

install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(readxl)

my_data <- read_excel("advertising_1_randomized.xlsx") 

## Run the library
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
glimpse(my_data)
## Rows: 300
## Columns: 6
## $ X         <dbl> 37, 7, 267, 70, 154, 117, 15, 128, 124, 43, 129, 141, 106, 2…
## $ X1        <dbl> 105, 118, 81, 124, 70, 103, 65, 170, 18, 135, 174, 53, 192, …
## $ TV        <dbl> 50.96, 191.74, 166.10, 109.57, 110.23, 211.52, 145.39, 181.0…
## $ radio     <dbl> 8.52, 50.52, 28.72, 13.07, 15.52, 23.16, 24.94, 27.94, 1.06,…
## $ newspaper <dbl> 6.97, 36.79, 25.35, 9.53, 28.06, 37.52, 8.15, 8.42, 4.32, 11…
## $ sales     <dbl> 13.31, 12.06, 4.53, 11.70, 16.36, 6.88, 18.07, 14.46, 16.79,…
head(my_data)
## # A tibble: 6 × 6
##       X    X1    TV radio newspaper sales
##   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1    37   105  51.0  8.52      6.97 13.3 
## 2     7   118 192.  50.5      36.8  12.1 
## 3   267    81 166.  28.7      25.4   4.53
## 4    70   124 110.  13.1       9.53 11.7 
## 5   154    70 110.  15.5      28.1  16.4 
## 6   117   103 212.  23.2      37.5   6.88
summary(my_data)
##        X                X1              TV             radio      
##  Min.   :  1.00   Min.   :  1.0   Min.   :  1.13   Min.   : 0.10  
##  1st Qu.: 62.75   1st Qu.: 53.0   1st Qu.: 79.62   1st Qu.:15.37  
##  Median :116.00   Median : 95.5   Median :142.12   Median :25.80  
##  Mean   :113.66   Mean   :101.3   Mean   :149.51   Mean   :26.07  
##  3rd Qu.:159.00   3rd Qu.:139.5   3rd Qu.:208.96   3rd Qu.:35.01  
##  Max.   :296.00   Max.   :368.0   Max.   :508.75   Max.   :68.71  
##    newspaper          sales       
##  Min.   :  0.33   Min.   : 0.060  
##  1st Qu.: 14.95   1st Qu.: 9.623  
##  Median : 29.80   Median :13.675  
##  Mean   : 33.10   Mean   :14.126  
##  3rd Qu.: 48.03   3rd Qu.:18.192  
##  Max.   :111.48   Max.   :32.070
ggplot(data = my_data)

str(my_data)
## tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
##  $ X        : num [1:300] 37 7 267 70 154 117 15 128 124 43 ...
##  $ X1       : num [1:300] 105 118 81 124 70 103 65 170 18 135 ...
##  $ TV       : num [1:300] 51 192 166 110 110 ...
##  $ radio    : num [1:300] 8.52 50.52 28.72 13.07 15.52 ...
##  $ newspaper: num [1:300] 6.97 36.79 25.35 9.53 28.06 ...
##  $ sales    : num [1:300] 13.31 12.06 4.53 11.7 16.36 ...
ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales)
)

ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales)
) +
  geom_point()

ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales, color = TV)
) +
  geom_point()

ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales, color = cut(newspaper, breaks = 2))
) +
  geom_point()

ggplot(
  data = my_data,
  mapping = aes(x = radio, y = sales, color = cut(newspaper, breaks = 2))
) +
  geom_point()

ggplot(
  data = my_data,
  mapping = aes(x = newspaper, y = sales, color = cut(TV, breaks = 2))
) +
  geom_point()

ggplot(
  data = my_data,
  mapping = aes(x = newspaper, y = sales, color = cut(TV, breaks = 2))
) +
  geom_point() +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales)
) +
  geom_point(mapping = aes(color = cut(newspaper, breaks = 2))) +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(
  data = my_data,
  mapping = aes(x = radio, y = sales)
) +
  geom_point(mapping = aes(color = cut(newspaper, breaks = 2))) +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales)
) +
  geom_point(mapping = aes(color = cut(newspaper, breaks = 2), shape = cut(newspaper, breaks = 2))) +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'