advertising

R Code to Analyze Advertising CSV Dataset

advertising <- read.csv( "https://raw.githubusercontent.com/utjimmyx/regression/master/advertising.csv" )

write.csv(advertising, file = "advertising.csv", row.names = FALSE)

install.packages("readxl")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)

library(readxl)
my_data <- read_excel("advertising_randomized.xlsx")

## Run the library
install.packages("tidyverse")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

head(my_data)

## # A tibble: 6 × 6
##       X    X1    TV radio newspaper sales
##   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1   165   169  17.5 32.6       27.8 16.0 
## 2   119    98 178.  63.0       21.8  6.54
## 3    65   116 150.  18.2       28.2 15.1 
## 4     6   157 195.   6.42      29.2 25.4 
## 5    97   149  13.0 28.7       30.2 14.2 
## 6     8   113 216.  31.2       31.9 18.2

glimpse(my_data)

## Rows: 300
## Columns: 6
## $ X         <dbl> 165, 119, 65, 6, 97, 8, 99, 134, 67, 14, 171, 118, 121, 140,…
## $ X1        <dbl> 169, 98, 116, 157, 149, 113, 75, 185, 105, 11, 155, 131, 67,…
## $ TV        <dbl> 17.54, 177.73, 150.15, 194.70, 12.99, 215.51, 147.47, 211.92…
## $ radio     <dbl> 32.55, 63.03, 18.15, 6.42, 28.71, 31.22, 21.29, 17.10, 16.99…
## $ newspaper <dbl> 27.76, 21.83, 28.17, 29.23, 30.20, 31.88, 26.16, 23.21, 26.0…
## $ sales     <dbl> 16.04, 6.54, 15.07, 25.41, 14.21, 18.24, 12.82, 12.51, 30.62…

## Create plots
ggplot(data = my_data)

str(my_data)

## tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
##  $ X        : num [1:300] 165 119 65 6 97 8 99 134 67 14 ...
##  $ X1       : num [1:300] 169 98 116 157 149 113 75 185 105 11 ...
##  $ TV       : num [1:300] 17.5 177.7 150.2 194.7 13 ...
##  $ radio    : num [1:300] 32.55 63.03 18.15 6.42 28.71 ...
##  $ newspaper: num [1:300] 27.8 21.8 28.2 29.2 30.2 ...
##  $ sales    : num [1:300] 16.04 6.54 15.07 25.41 14.21 ...

ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales)
) +
  geom_point()

#> Warning: Removed 2 rows containing missing values or values outside the scale range
#> (`geom_point()`).

ggplot(
  data =  my_data,
  mapping = aes(x = TV, y = sales, color = cut(newspaper, breaks = 3))
) +
  geom_point()

## Graphs without breaks and with regression lines
ggplot(
  data =  my_data,
  mapping = aes(x = TV, y = sales)
) +
  geom_point() +
  geom_smooth(method = "lm")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(
  data =  my_data,
  mapping = aes(x = radio, y = sales)
) +
  geom_point() +
  geom_smooth(method = "lm")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(
  data =  my_data,
  mapping = aes(x = newspaper, y = sales)
) +
  geom_point() +
  geom_smooth(method = "lm")

## `geom_smooth()` using formula = 'y ~ x'

advertising

Nicholas Bridgman

2026-06-06

R Code to Analyze Advertising CSV Dataset

Advertising Channel Comparison