install.packages("readxl")
library(readxl)
install.packages("tidyverse")
library(tidyverse)
my_data <- read_excel("advertising_randomized.xlsx")
glimpse(my_data)
## Rows: 250
## Columns: 6
## $ X <dbl> 75, 50, 6, 189, 188, 220, 36, 122, 208, 95, 9, 99, 115, 78, …
## $ X1 <dbl> 76, 101, 65, 41, 76, 171, 48, 183, 122, 124, 97, 147, 39, 13…
## $ TV <dbl> 196.41, 297.96, 186.98, 141.62, 159.77, 265.41, 131.28, 257.…
## $ radio <dbl> 40.58, 37.36, 27.71, 7.58, 44.12, 32.25, 40.42, 29.64, 8.80,…
## $ newspaper <dbl> 43.99, 5.63, 14.32, 17.66, 72.07, 32.10, 24.03, 70.75, 60.69…
## $ sales <dbl> 16.22, 12.20, 4.39, 13.94, 20.30, 13.06, 14.36, 14.80, 8.02,…
head(my_data)
## # A tibble: 6 × 6
## X X1 TV radio newspaper sales
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 75 76 196. 40.6 44.0 16.2
## 2 50 101 298. 37.4 5.63 12.2
## 3 6 65 187. 27.7 14.3 4.39
## 4 189 41 142. 7.58 17.7 13.9
## 5 188 76 160. 44.1 72.1 20.3
## 6 220 171 265. 32.2 32.1 13.1
str(my_data)
## tibble [250 × 6] (S3: tbl_df/tbl/data.frame)
## $ X : num [1:250] 75 50 6 189 188 220 36 122 208 95 ...
## $ X1 : num [1:250] 76 101 65 41 76 171 48 183 122 124 ...
## $ TV : num [1:250] 196 298 187 142 160 ...
## $ radio : num [1:250] 40.58 37.36 27.71 7.58 44.12 ...
## $ newspaper: num [1:250] 43.99 5.63 14.32 17.66 72.07 ...
## $ sales : num [1:250] 16.22 12.2 4.39 13.94 20.3 ...
summary(my_data)
## X X1 TV radio
## Min. : 1.0 Min. : 1.0 Min. : 0.81 Min. : 0.04
## 1st Qu.: 54.0 1st Qu.: 62.0 1st Qu.: 90.20 1st Qu.:13.31
## Median :101.0 Median :103.0 Median :151.97 Median :24.35
## Mean :102.3 Mean :104.9 Mean :155.71 Mean :24.78
## 3rd Qu.:142.0 3rd Qu.:139.8 3rd Qu.:216.09 3rd Qu.:34.05
## Max. :269.0 Max. :292.0 Max. :386.12 Max. :65.93
## newspaper sales
## Min. : 0.01 Min. : 0.69
## 1st Qu.: 18.29 1st Qu.: 9.73
## Median : 32.35 Median :13.91
## Mean : 34.60 Mean :13.95
## 3rd Qu.: 46.72 3rd Qu.:17.99
## Max. :100.19 Max. :29.05
ggplot(data = my_data, aes(x = TV, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "TV Advertising vs Sales", x = "TV Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(my_data, aes(x = radio, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Radio Advertising vs Sales", x = "Radio Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(my_data, aes(x = newspaper, y = sales)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Newspaper Advertising vs Sales", x = "Newspaper Advertising Spend", y = "Sales")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(
data = my_data,
mapping = aes(x = radio, y = TV)
) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(
data = my_data,
mapping = aes(x = radio, y = sales, color = cut(newspaper, breaks = 3))
) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
TV advertising appears to have the strongest relationship with sales because the scatterplot shows the clearest positive linear trend. Radio also has a positive relationship, while newspaper appears weaker and more scattered.