advertising <- read.csv( "https://raw.githubusercontent.com/utjimmyx/regression/master/advertising.csv")

write.csv(advertising,
          "advertising.csv",
          row.names = FALSE)
install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(readxl)
my_data <- read_excel("advertising_randomized.xlsx")

## run the library
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
head(my_data)
## # A tibble: 6 × 6
##       X    X1    TV radio newspaper sales
##   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1   114    82 228.   8.12      37.2 12.7 
## 2   135   168 187.  22.1       29.4 14.1 
## 3    38   104 124.   3.13      16.3 14.6 
## 4    11     7 175.  27.6       44.8 14.1 
## 5    84   179 167.  17.6       40.2 20.6 
## 6   189    82  33.0 16.7       71.6  8.07
glimpse(my_data)
## Rows: 50
## Columns: 6
## $ X         <dbl> 114, 135, 38, 11, 84, 189, 117, 202, 20, 217, 158, 104, 37, …
## $ X1        <dbl> 82, 168, 104, 7, 179, 82, 227, 262, 93, 87, 196, 184, 238, 1…
## $ TV        <dbl> 228.04, 186.72, 123.51, 175.32, 167.16, 32.95, 62.36, 15.71,…
## $ radio     <dbl> 8.12, 22.13, 3.13, 27.55, 17.61, 16.71, 18.28, 43.51, 8.28, …
## $ newspaper <dbl> 37.25, 29.36, 16.33, 44.80, 40.24, 71.61, 14.57, 59.05, 22.6…
## $ sales     <dbl> 12.68, 14.14, 14.64, 14.09, 20.58, 8.07, 15.34, 12.48, 11.52…
ggplot(my_data, aes(x = TV, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "TV Advertising vs Sales",
    x = "TV Advertising Budget",
    y = "Sales"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(my_data, aes(x = radio, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "Radio Advertising vs Sales",
    x = "Radio Advertising Budget",
    y = "Sales"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(my_data, aes(x = newspaper, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "Newspaper Advertising vs Sales",
    x = "Newspaper Advertising Budget",
    y = "Sales"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

long_data <- my_data %>%
  pivot_longer(
    cols = c(TV, radio, newspaper),
    names_to = "Advertising_Channel",
    values_to = "Budget"
  )
ggplot(long_data, aes(x = Budget, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~ Advertising_Channel, scales = "free_x") +
  labs(
    title = "Comparison of Advertising Channels and Sales",
    x = "Advertising Budget",
    y = "Sales"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'