R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(readxl)
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
my_data <- read_excel("advertising_randomized.xlsx")

# Inspect Data
glimpse(my_data)
## Rows: 300
## Columns: 6
## $ X         <dbl> 199, 116, 80, 97, 30, 83, 129, 96, 72, 138, 144, 117, 90, 23…
## $ X1        <dbl> 26, 165, 101, 92, 50, 105, 57, 21, 69, 69, 271, 246, 75, 69,…
## $ TV        <dbl> 194.97, 185.40, 163.33, 228.05, 216.87, 238.19, 63.14, 128.8…
## $ radio     <dbl> 28.65, 32.41, 19.27, 26.88, 54.40, 3.08, 19.94, 46.48, 19.40…
## $ newspaper <dbl> 38.78, 9.32, 80.67, 10.14, 34.85, 8.14, 24.13, 35.83, 35.93,…
## $ sales     <dbl> 12.77, 16.01, 5.79, 9.03, 9.04, 12.90, 12.54, 13.00, 20.43, …
head(my_data)
## # A tibble: 6 × 6
##       X    X1    TV radio newspaper sales
##   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1   199    26  195. 28.6      38.8  12.8 
## 2   116   165  185. 32.4       9.32 16.0 
## 3    80   101  163. 19.3      80.7   5.79
## 4    97    92  228. 26.9      10.1   9.03
## 5    30    50  217. 54.4      34.8   9.04
## 6    83   105  238.  3.08      8.14 12.9
str(my_data)
## tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
##  $ X        : num [1:300] 199 116 80 97 30 83 129 96 72 138 ...
##  $ X1       : num [1:300] 26 165 101 92 50 105 57 21 69 69 ...
##  $ TV       : num [1:300] 195 185 163 228 217 ...
##  $ radio    : num [1:300] 28.6 32.4 19.3 26.9 54.4 ...
##  $ newspaper: num [1:300] 38.78 9.32 80.67 10.14 34.85 ...
##  $ sales    : num [1:300] 12.77 16.01 5.79 9.03 9.04 ...
summary(my_data)
##        X               X1               TV             radio      
##  Min.   :  1.0   Min.   :  0.00   Min.   :  0.17   Min.   : 0.08  
##  1st Qu.: 60.0   1st Qu.: 51.75   1st Qu.: 74.35   1st Qu.:15.00  
##  Median : 96.5   Median : 92.00   Median :142.24   Median :24.91  
##  Mean   :101.9   Mean   : 95.88   Mean   :145.94   Mean   :25.82  
##  3rd Qu.:136.2   3rd Qu.:137.25   3rd Qu.:204.72   3rd Qu.:35.32  
##  Max.   :287.0   Max.   :271.00   Max.   :380.07   Max.   :72.81  
##    newspaper         sales       
##  Min.   : 0.42   Min.   : 0.040  
##  1st Qu.:18.55   1st Qu.: 9.995  
##  Median :34.90   Median :13.795  
##  Mean   :35.54   Mean   :13.778  
##  3rd Qu.:51.09   3rd Qu.:17.720  
##  Max.   :90.79   Max.   :27.150
# Sales vs. TV
ggplot(data = my_data, aes(x = TV, y = sales)) +
    geom_point() +
    geom_smooth(method = "lm") +
    labs(title = "TV Advertising vs Sales", x = "TV Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

# Radio vs. Sales
ggplot(my_data, aes(x = radio, y = sales)) +
    geom_point() +
    geom_smooth(method = "lm") +
    labs(title = "Radio Advertising vs Sales", x = "Radio Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

# Newspaper vs. Sales
ggplot(my_data, aes(x = newspaper, y = sales)) +
    geom_point() +
    geom_smooth(method = "lm") +
    labs(title = "Newspaper Advertising vs Sales", x = "Newspaper Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

# Radio vs. Tv
ggplot(
  data = my_data,
  mapping = aes(x = radio, y = TV)
) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Radio vs TV")
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

# Radio vs. Sales 

ggplot(
  data = my_data,
  mapping = aes(x = radio, y = sales, color = cut(newspaper, breaks = 3))
) +
  geom_point() +
  geom_smooth(method = "lm")+
  labs(title = "Radio vs sales, grouped by newspaper spend")
## `geom_smooth()` using formula = 'y ~ x'