library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
my_data <- read_excel("advertising_randomized.xlsx")
head(my_data)
## # A tibble: 6 × 6
## X X1 TV radio newspaper sales
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 81 100 272. 21.5 34.2 11.2
## 2 23 115 301. 18.1 18.4 14.9
## 3 123 5 198. 10.3 49.0 16.0
## 4 88 159 164. 18.6 54.2 6.49
## 5 192 103 82.7 14.0 9.54 16.1
## 6 27 134 125. 66.9 36.2 13.2
str(my_data)
## tibble [50 × 6] (S3: tbl_df/tbl/data.frame)
## $ X : num [1:50] 81 23 123 88 192 27 135 226 83 83 ...
## $ X1 : num [1:50] 100 115 5 159 103 134 98 144 38 135 ...
## $ TV : num [1:50] 271.6 301.4 197.8 163.9 82.7 ...
## $ radio : num [1:50] 21.5 18.1 10.3 18.6 14 ...
## $ newspaper: num [1:50] 34.17 18.43 48.95 54.17 9.54 ...
## $ sales : num [1:50] 11.17 14.94 15.98 6.49 16.11 ...
summary(my_data)
## X X1 TV radio
## Min. : 1.00 Min. : 3.00 Min. : 1.41 Min. : 1.15
## 1st Qu.: 74.75 1st Qu.: 66.50 1st Qu.: 83.09 1st Qu.:11.99
## Median : 91.00 Median : 92.50 Median :137.65 Median :20.23
## Mean :108.82 Mean : 94.52 Mean :152.90 Mean :23.00
## 3rd Qu.:133.75 3rd Qu.:133.25 3rd Qu.:205.19 3rd Qu.:31.60
## Max. :253.00 Max. :235.00 Max. :371.05 Max. :66.88
## newspaper sales
## Min. : 1.85 Min. : 5.66
## 1st Qu.:22.57 1st Qu.:10.35
## Median :31.93 Median :13.45
## Mean :34.65 Mean :14.04
## 3rd Qu.:44.90 3rd Qu.:17.60
## Max. :87.99 Max. :28.71
ggplot(data = my_data, mapping = aes(x = TV, y = sales)) +
geom_point() +
labs(title = "Sales vs. TV Advertising", x = "TV Advertising Budget", y = "Sales")
ggplot(data = my_data, mapping = aes(x = radio, y = sales)) +
geom_point() +
labs(title = "Sales vs. Radio Advertising", x = "Radio Advertising Budget", y = "Sales")
ggplot(data = my_data, mapping = aes(x = newspaper, y = sales)) +
geom_point() +
labs(title = "Sales vs. Newspaper Advertising", x = "Newspaper Advertising Budget", y = "Sales")
ggplot(data = my_data, mapping = aes(x = TV, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Sales vs. TV Advertising", x = "TV Advertising Budget", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = my_data, mapping = aes(x = radio, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Sales vs. Radio Advertising", x = "Radio Advertising Budget", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = my_data, mapping = aes(x = newspaper, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Sales vs. Newspaper Advertising", x = "Newspaper Advertising Budget", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
my_data |>
pivot_longer(cols = c(TV, radio, newspaper),
names_to = "channel",
values_to = "budget") |>
ggplot(mapping = aes(x = budget, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~channel) +
labs(title = "Sales vs. Advertising Channels", x = "Advertising Budget", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
TV advertising has the strongest relationship with Sales, showing a clear positive trend.