Code and Plots
install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(readxl)
my_data <- read_excel("advertising_1_randomized.xlsx")
## Run the library
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
glimpse(my_data)
## Rows: 300
## Columns: 6
## $ X <dbl> 37, 7, 267, 70, 154, 117, 15, 128, 124, 43, 129, 141, 106, 2…
## $ X1 <dbl> 105, 118, 81, 124, 70, 103, 65, 170, 18, 135, 174, 53, 192, …
## $ TV <dbl> 50.96, 191.74, 166.10, 109.57, 110.23, 211.52, 145.39, 181.0…
## $ radio <dbl> 8.52, 50.52, 28.72, 13.07, 15.52, 23.16, 24.94, 27.94, 1.06,…
## $ newspaper <dbl> 6.97, 36.79, 25.35, 9.53, 28.06, 37.52, 8.15, 8.42, 4.32, 11…
## $ sales <dbl> 13.31, 12.06, 4.53, 11.70, 16.36, 6.88, 18.07, 14.46, 16.79,…
head(my_data)
## # A tibble: 6 × 6
## X X1 TV radio newspaper sales
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 37 105 51.0 8.52 6.97 13.3
## 2 7 118 192. 50.5 36.8 12.1
## 3 267 81 166. 28.7 25.4 4.53
## 4 70 124 110. 13.1 9.53 11.7
## 5 154 70 110. 15.5 28.1 16.4
## 6 117 103 212. 23.2 37.5 6.88
summary(my_data)
## X X1 TV radio
## Min. : 1.00 Min. : 1.0 Min. : 1.13 Min. : 0.10
## 1st Qu.: 62.75 1st Qu.: 53.0 1st Qu.: 79.62 1st Qu.:15.37
## Median :116.00 Median : 95.5 Median :142.12 Median :25.80
## Mean :113.66 Mean :101.3 Mean :149.51 Mean :26.07
## 3rd Qu.:159.00 3rd Qu.:139.5 3rd Qu.:208.96 3rd Qu.:35.01
## Max. :296.00 Max. :368.0 Max. :508.75 Max. :68.71
## newspaper sales
## Min. : 0.33 Min. : 0.060
## 1st Qu.: 14.95 1st Qu.: 9.623
## Median : 29.80 Median :13.675
## Mean : 33.10 Mean :14.126
## 3rd Qu.: 48.03 3rd Qu.:18.192
## Max. :111.48 Max. :32.070
ggplot(data = my_data)

str(my_data)
## tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
## $ X : num [1:300] 37 7 267 70 154 117 15 128 124 43 ...
## $ X1 : num [1:300] 105 118 81 124 70 103 65 170 18 135 ...
## $ TV : num [1:300] 51 192 166 110 110 ...
## $ radio : num [1:300] 8.52 50.52 28.72 13.07 15.52 ...
## $ newspaper: num [1:300] 6.97 36.79 25.35 9.53 28.06 ...
## $ sales : num [1:300] 13.31 12.06 4.53 11.7 16.36 ...
ggplot(
data = my_data,
mapping = aes(x = TV, y = sales)
)

ggplot(
data = my_data,
mapping = aes(x = TV, y = sales)
) +
geom_point()

ggplot(
data = my_data,
mapping = aes(x = TV, y = sales, color = TV)
) +
geom_point()

ggplot(
data = my_data,
mapping = aes(x = TV, y = sales, color = cut(newspaper, breaks = 2))
) +
geom_point()

ggplot(
data = my_data,
mapping = aes(x = radio, y = sales, color = cut(newspaper, breaks = 2))
) +
geom_point()

ggplot(
data = my_data,
mapping = aes(x = newspaper, y = sales, color = cut(TV, breaks = 2))
) +
geom_point()

ggplot(
data = my_data,
mapping = aes(x = newspaper, y = sales, color = cut(TV, breaks = 2))
) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(
data = my_data,
mapping = aes(x = TV, y = sales)
) +
geom_point(mapping = aes(color = cut(newspaper, breaks = 2))) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(
data = my_data,
mapping = aes(x = radio, y = sales)
) +
geom_point(mapping = aes(color = cut(newspaper, breaks = 2))) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(
data = my_data,
mapping = aes(x = TV, y = sales)
) +
geom_point(mapping = aes(color = cut(newspaper, breaks = 2), shape = cut(newspaper, breaks = 2))) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
