Building the the graphs was easy enough. I think the data itself was
a little hard to understand. It is hard to distinguish which asset (TV,
Radio, Newspaper) was most effective due to the data. I liked how
intuitive it was to build it. So far I have no concerns. I am sure it
will start getting a little harder as we proceed.
install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(readxl)
my_data <- read_excel("advertising_randomized (2).xlsx")
## run the library
install.packages('tidyverse')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
head(my_data)
## # A tibble: 6 × 6
## X X1 TV radio newspaper sales
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 79 191 250. 34.0 16.7 23.6
## 2 280 185 72.0 54.4 28.3 16.3
## 3 45 113 326. 8.24 43.3 16.9
## 4 99 85 141. 22.3 2.62 10.0
## 5 26 50 64.4 45.4 23.2 9.7
## 6 53 56 188. 29.0 70.0 9.66
glimpse(my_data)
## Rows: 300
## Columns: 6
## $ X <dbl> 79, 280, 45, 99, 26, 53, 137, 48, 73, 15, 87, 67, 62, 179, 8…
## $ X1 <dbl> 191, 185, 113, 85, 50, 56, 117, 23, 78, 152, 73, 83, 54, 239…
## $ TV <dbl> 249.80, 72.01, 326.29, 141.03, 64.36, 188.40, 315.96, 71.67,…
## $ radio <dbl> 33.97, 54.45, 8.24, 22.27, 45.41, 29.01, 45.32, 17.32, 36.65…
## $ newspaper <dbl> 16.74, 28.34, 43.28, 2.62, 23.16, 70.03, 34.41, 34.97, 60.02…
## $ sales <dbl> 23.61, 16.34, 16.91, 10.03, 9.70, 9.66, 11.49, 23.45, 22.80,…
ggplot(data = my_data)

str(my_data)
## tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
## $ X : num [1:300] 79 280 45 99 26 53 137 48 73 15 ...
## $ X1 : num [1:300] 191 185 113 85 50 56 117 23 78 152 ...
## $ TV : num [1:300] 249.8 72 326.3 141 64.4 ...
## $ radio : num [1:300] 33.97 54.45 8.24 22.27 45.41 ...
## $ newspaper: num [1:300] 16.74 28.34 43.28 2.62 23.16 ...
## $ sales : num [1:300] 23.6 16.3 16.9 10 9.7 ...
ggplot(
data = my_data,
mapping = aes(x = TV, y = sales)
)

ggplot(
data = my_data,
mapping = aes(x = TV, y = sales)
) +
geom_point()

#> Warning: Removed 2 rows containing missing values or values outside the scale range
#> (`geom_point()`).
ggplot(
data = my_data,
mapping = aes(x = TV, y = sales, color = newspaper)
) +
geom_point()

ggplot(
data = my_data,
mapping = aes(x = TV, y = sales, color = cut(newspaper, breaks = 2))
) +
geom_point()

ggplot(
data = my_data,
mapping = aes(x = radio, y = sales, color = cut(newspaper, breaks = 2))
) +
geom_point()

ggplot(
data = my_data,
mapping = aes(x = newspaper, y = sales, color = cut(newspaper, breaks = 2))
) +
geom_point()

ggplot(
data = my_data,
mapping = aes(x = TV, y = sales)
) +
geom_point(mapping = aes(color = cut(newspaper, breaks = 2))) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(
data = my_data,
mapping = aes(x = radio, y = sales)
) +
geom_point(mapping = aes(color = cut(newspaper, breaks = 2))) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(
data = my_data,
mapping = aes(x = newspaper, y = sales)
) +
geom_point(mapping = aes(color = cut(newspaper, breaks = 2))) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
