This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(readxl)
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
my_data <- read_excel("advertising_randomized.xlsx")
# Inspect Data
glimpse(my_data)
## Rows: 300
## Columns: 6
## $ X <dbl> 199, 116, 80, 97, 30, 83, 129, 96, 72, 138, 144, 117, 90, 23…
## $ X1 <dbl> 26, 165, 101, 92, 50, 105, 57, 21, 69, 69, 271, 246, 75, 69,…
## $ TV <dbl> 194.97, 185.40, 163.33, 228.05, 216.87, 238.19, 63.14, 128.8…
## $ radio <dbl> 28.65, 32.41, 19.27, 26.88, 54.40, 3.08, 19.94, 46.48, 19.40…
## $ newspaper <dbl> 38.78, 9.32, 80.67, 10.14, 34.85, 8.14, 24.13, 35.83, 35.93,…
## $ sales <dbl> 12.77, 16.01, 5.79, 9.03, 9.04, 12.90, 12.54, 13.00, 20.43, …
head(my_data)
## # A tibble: 6 × 6
## X X1 TV radio newspaper sales
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 199 26 195. 28.6 38.8 12.8
## 2 116 165 185. 32.4 9.32 16.0
## 3 80 101 163. 19.3 80.7 5.79
## 4 97 92 228. 26.9 10.1 9.03
## 5 30 50 217. 54.4 34.8 9.04
## 6 83 105 238. 3.08 8.14 12.9
str(my_data)
## tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
## $ X : num [1:300] 199 116 80 97 30 83 129 96 72 138 ...
## $ X1 : num [1:300] 26 165 101 92 50 105 57 21 69 69 ...
## $ TV : num [1:300] 195 185 163 228 217 ...
## $ radio : num [1:300] 28.6 32.4 19.3 26.9 54.4 ...
## $ newspaper: num [1:300] 38.78 9.32 80.67 10.14 34.85 ...
## $ sales : num [1:300] 12.77 16.01 5.79 9.03 9.04 ...
summary(my_data)
## X X1 TV radio
## Min. : 1.0 Min. : 0.00 Min. : 0.17 Min. : 0.08
## 1st Qu.: 60.0 1st Qu.: 51.75 1st Qu.: 74.35 1st Qu.:15.00
## Median : 96.5 Median : 92.00 Median :142.24 Median :24.91
## Mean :101.9 Mean : 95.88 Mean :145.94 Mean :25.82
## 3rd Qu.:136.2 3rd Qu.:137.25 3rd Qu.:204.72 3rd Qu.:35.32
## Max. :287.0 Max. :271.00 Max. :380.07 Max. :72.81
## newspaper sales
## Min. : 0.42 Min. : 0.040
## 1st Qu.:18.55 1st Qu.: 9.995
## Median :34.90 Median :13.795
## Mean :35.54 Mean :13.778
## 3rd Qu.:51.09 3rd Qu.:17.720
## Max. :90.79 Max. :27.150
# Sales vs. TV
ggplot(data = my_data, aes(x = TV, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "TV Advertising vs Sales", x = "TV Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Radio vs. Sales
ggplot(my_data, aes(x = radio, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Radio Advertising vs Sales", x = "Radio Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Newspaper vs. Sales
ggplot(my_data, aes(x = newspaper, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Newspaper Advertising vs Sales", x = "Newspaper Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Radio vs. Tv
ggplot(
data = my_data,
mapping = aes(x = radio, y = TV)
) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Radio vs TV")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Radio vs. Sales
ggplot(
data = my_data,
mapping = aes(x = radio, y = sales, color = cut(newspaper, breaks = 3))
) +
geom_point() +
geom_smooth(method = "lm")+
labs(title = "Radio vs sales, grouped by newspaper spend")
## `geom_smooth()` using formula = 'y ~ x'