R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Load Libraries

’’‘{r} library (readxl) library(tidyverse)’’’

advertising <- read.csv( "https://raw.githubusercontent.com/utjimmyx/regression/master/advertising.csv" )

write.csv(advertising, file = "advertising.csv", row.names = FALSE)

install.packages(“readxl”) library(readxl) my_data <- read_excel(“advertising_randomized.xlsx”) ##run the library install.packages(“tidyverse”) library(tidyverse)

head(my_data) glimpse(my_data)

ggplot(data = my_data)

str(my_data) ggplot( data = my_data, mapping = aes(x = TV, y = sales) )+ geom_point() #> Warning: Removed 2 rows containing missing values or values outside the scale range #> #>(‘geom_point()’)

ggplot(data = my_data)

str(my_data) ggplot( data = my_data, mapping = aes(x = TV, y = sales, color = newspaper) )+ geom_point()

str(my_data) ggplot( data = my_data, mapping = aes(x = TV, y = sales, color = cut(newspaper, breaks = 3)) )+ geom_point() ## Load Libraries

library(readxl)
library(tidyverse)

Load the Dataset

advertising <- read.csv("https://raw.githubusercontent.com/utjimmyx/regression/master/advertising.csv")

write.csv(advertising, file = "advertising.csv", row.names = FALSE)

my_data <- read_excel("advertising_randomized.xlsx")

Explore the Data

head(my_data)
## # A tibble: 6 × 6
##       X    X1    TV radio newspaper sales
##   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1   218    54 295.   19.9      20.7 13.3 
## 2   173   118 191    23.2      70.3  6.6 
## 3   162    13  62.5  21.5      65.6  8.43
## 4   191   131  63.4  31.5      40.5  4.02
## 5   155    34 203.   11.9      19.6 14.6 
## 6   112   154  64.6  37.8      37.3 18.2
str(my_data)
## tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
##  $ X        : num [1:300] 218 173 162 191 155 112 17 52 39 156 ...
##  $ X1       : num [1:300] 54 118 13 131 34 154 95 21 90 87 ...
##  $ TV       : num [1:300] 295.1 191 62.5 63.4 203.3 ...
##  $ radio    : num [1:300] 19.9 23.2 21.5 31.5 11.9 ...
##  $ newspaper: num [1:300] 20.7 70.3 65.6 40.5 19.6 ...
##  $ sales    : num [1:300] 13.29 6.6 8.43 4.02 14.65 ...
summary(my_data)
##        X               X1              TV            radio      
##  Min.   :  1.0   Min.   :  0.0   Min.   :  1.7   Min.   : 0.10  
##  1st Qu.: 57.0   1st Qu.: 54.0   1st Qu.: 83.5   1st Qu.:11.94  
##  Median :103.0   Median : 94.0   Median :142.9   Median :23.45  
##  Mean   :105.0   Mean   :100.5   Mean   :152.2   Mean   :24.72  
##  3rd Qu.:145.2   3rd Qu.:141.5   3rd Qu.:210.7   3rd Qu.:34.81  
##  Max.   :291.0   Max.   :283.0   Max.   :426.1   Max.   :75.19  
##    newspaper         sales      
##  Min.   : 0.07   Min.   : 0.28  
##  1st Qu.:16.20   1st Qu.:10.76  
##  Median :29.12   Median :14.44  
##  Mean   :33.41   Mean   :14.36  
##  3rd Qu.:48.32   3rd Qu.:18.18  
##  Max.   :98.83   Max.   :28.40
glimpse(my_data)
## Rows: 300
## Columns: 6
## $ X         <dbl> 218, 173, 162, 191, 155, 112, 17, 52, 39, 156, 10, 115, 46, …
## $ X1        <dbl> 54, 118, 13, 131, 34, 154, 95, 21, 90, 87, 129, 80, 98, 31, …
## $ TV        <dbl> 295.08, 191.00, 62.53, 63.37, 203.32, 64.56, 85.74, 184.46, …
## $ radio     <dbl> 19.89, 23.19, 21.53, 31.52, 11.89, 37.75, 12.86, 60.74, 8.34…
## $ newspaper <dbl> 20.73, 70.27, 65.62, 40.46, 19.56, 37.32, 18.50, 22.87, 40.5…
## $ sales     <dbl> 13.29, 6.60, 8.43, 4.02, 14.65, 18.17, 19.36, 15.29, 15.51, …

TV vs Sales

ggplot(data = my_data, mapping = aes(x = TV, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "TV Advertising vs Sales",
    x = "TV Advertising Budget",
    y = "Sales"
  ) +
  theme_minimal()

Radio vs Sales

ggplot(data = my_data, mapping = aes(x = radio, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "Radio Advertising vs Sales",
    x = "Radio Advertising Budget",
    y = "Sales"
  ) +
  theme_minimal()

Newspaper vs Sales

ggplot(data = my_data, mapping = aes(x = newspaper, y = sales)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "Newspaper Advertising vs Sales",
    x = "Newspaper Advertising Budget",
    y = "Sales"
  ) +
  theme_minimal()

## TV vs Sales Colored by Newspaper

ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales, color = cut(newspaper, breaks = 3))
) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "TV Advertising vs Sales Colored by Newspaper Spending",
    x = "TV Advertising Budget",
    y = "Sales",
    color = "Newspaper Level"
  ) +
  theme_minimal()