advertising <- read.csv( "https://raw.githubusercontent.com/utjimmyx/regression/master/advertising.csv")
write.csv(advertising,
"advertising.csv",
row.names = FALSE)
install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(readxl)
my_data <- read_excel("advertising_randomized.xlsx")
## run the library
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
head(my_data)
## # A tibble: 6 × 6
## X X1 TV radio newspaper sales
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 114 82 228. 8.12 37.2 12.7
## 2 135 168 187. 22.1 29.4 14.1
## 3 38 104 124. 3.13 16.3 14.6
## 4 11 7 175. 27.6 44.8 14.1
## 5 84 179 167. 17.6 40.2 20.6
## 6 189 82 33.0 16.7 71.6 8.07
glimpse(my_data)
## Rows: 50
## Columns: 6
## $ X <dbl> 114, 135, 38, 11, 84, 189, 117, 202, 20, 217, 158, 104, 37, …
## $ X1 <dbl> 82, 168, 104, 7, 179, 82, 227, 262, 93, 87, 196, 184, 238, 1…
## $ TV <dbl> 228.04, 186.72, 123.51, 175.32, 167.16, 32.95, 62.36, 15.71,…
## $ radio <dbl> 8.12, 22.13, 3.13, 27.55, 17.61, 16.71, 18.28, 43.51, 8.28, …
## $ newspaper <dbl> 37.25, 29.36, 16.33, 44.80, 40.24, 71.61, 14.57, 59.05, 22.6…
## $ sales <dbl> 12.68, 14.14, 14.64, 14.09, 20.58, 8.07, 15.34, 12.48, 11.52…
ggplot(my_data, aes(x = TV, y = sales)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "TV Advertising vs Sales",
x = "TV Advertising Budget",
y = "Sales"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(my_data, aes(x = radio, y = sales)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Radio Advertising vs Sales",
x = "Radio Advertising Budget",
y = "Sales"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(my_data, aes(x = newspaper, y = sales)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Newspaper Advertising vs Sales",
x = "Newspaper Advertising Budget",
y = "Sales"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

long_data <- my_data %>%
pivot_longer(
cols = c(TV, radio, newspaper),
names_to = "Advertising_Channel",
values_to = "Budget"
)
ggplot(long_data, aes(x = Budget, y = sales)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ Advertising_Channel, scales = "free_x") +
labs(
title = "Comparison of Advertising Channels and Sales",
x = "Advertising Budget",
y = "Sales"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
