This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
’’‘{r} library (readxl) library(tidyverse)’’’
advertising <- read.csv( "https://raw.githubusercontent.com/utjimmyx/regression/master/advertising.csv" )
write.csv(advertising, file = "advertising.csv", row.names = FALSE)
install.packages(“readxl”) library(readxl) my_data <- read_excel(“advertising_randomized.xlsx”) ##run the library install.packages(“tidyverse”) library(tidyverse)
head(my_data) glimpse(my_data)
ggplot(data = my_data)
str(my_data) ggplot( data = my_data, mapping = aes(x = TV, y = sales) )+ geom_point() #> Warning: Removed 2 rows containing missing values or values outside the scale range #> #>(‘geom_point()’)
ggplot(data = my_data)
str(my_data) ggplot( data = my_data, mapping = aes(x = TV, y = sales, color = newspaper) )+ geom_point()
str(my_data) ggplot( data = my_data, mapping = aes(x = TV, y = sales, color = cut(newspaper, breaks = 3)) )+ geom_point() ## Load Libraries
library(readxl)
library(tidyverse)
advertising <- read.csv("https://raw.githubusercontent.com/utjimmyx/regression/master/advertising.csv")
write.csv(advertising, file = "advertising.csv", row.names = FALSE)
my_data <- read_excel("advertising_randomized.xlsx")
head(my_data)
## # A tibble: 6 × 6
## X X1 TV radio newspaper sales
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 218 54 295. 19.9 20.7 13.3
## 2 173 118 191 23.2 70.3 6.6
## 3 162 13 62.5 21.5 65.6 8.43
## 4 191 131 63.4 31.5 40.5 4.02
## 5 155 34 203. 11.9 19.6 14.6
## 6 112 154 64.6 37.8 37.3 18.2
str(my_data)
## tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
## $ X : num [1:300] 218 173 162 191 155 112 17 52 39 156 ...
## $ X1 : num [1:300] 54 118 13 131 34 154 95 21 90 87 ...
## $ TV : num [1:300] 295.1 191 62.5 63.4 203.3 ...
## $ radio : num [1:300] 19.9 23.2 21.5 31.5 11.9 ...
## $ newspaper: num [1:300] 20.7 70.3 65.6 40.5 19.6 ...
## $ sales : num [1:300] 13.29 6.6 8.43 4.02 14.65 ...
summary(my_data)
## X X1 TV radio
## Min. : 1.0 Min. : 0.0 Min. : 1.7 Min. : 0.10
## 1st Qu.: 57.0 1st Qu.: 54.0 1st Qu.: 83.5 1st Qu.:11.94
## Median :103.0 Median : 94.0 Median :142.9 Median :23.45
## Mean :105.0 Mean :100.5 Mean :152.2 Mean :24.72
## 3rd Qu.:145.2 3rd Qu.:141.5 3rd Qu.:210.7 3rd Qu.:34.81
## Max. :291.0 Max. :283.0 Max. :426.1 Max. :75.19
## newspaper sales
## Min. : 0.07 Min. : 0.28
## 1st Qu.:16.20 1st Qu.:10.76
## Median :29.12 Median :14.44
## Mean :33.41 Mean :14.36
## 3rd Qu.:48.32 3rd Qu.:18.18
## Max. :98.83 Max. :28.40
glimpse(my_data)
## Rows: 300
## Columns: 6
## $ X <dbl> 218, 173, 162, 191, 155, 112, 17, 52, 39, 156, 10, 115, 46, …
## $ X1 <dbl> 54, 118, 13, 131, 34, 154, 95, 21, 90, 87, 129, 80, 98, 31, …
## $ TV <dbl> 295.08, 191.00, 62.53, 63.37, 203.32, 64.56, 85.74, 184.46, …
## $ radio <dbl> 19.89, 23.19, 21.53, 31.52, 11.89, 37.75, 12.86, 60.74, 8.34…
## $ newspaper <dbl> 20.73, 70.27, 65.62, 40.46, 19.56, 37.32, 18.50, 22.87, 40.5…
## $ sales <dbl> 13.29, 6.60, 8.43, 4.02, 14.65, 18.17, 19.36, 15.29, 15.51, …
ggplot(data = my_data, mapping = aes(x = TV, y = sales)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "TV Advertising vs Sales",
x = "TV Advertising Budget",
y = "Sales"
) +
theme_minimal()
ggplot(data = my_data, mapping = aes(x = radio, y = sales)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Radio Advertising vs Sales",
x = "Radio Advertising Budget",
y = "Sales"
) +
theme_minimal()
ggplot(data = my_data, mapping = aes(x = newspaper, y = sales)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Newspaper Advertising vs Sales",
x = "Newspaper Advertising Budget",
y = "Sales"
) +
theme_minimal()
## TV vs Sales Colored by Newspaper
ggplot(
data = my_data,
mapping = aes(x = TV, y = sales, color = cut(newspaper, breaks = 3))
) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "TV Advertising vs Sales Colored by Newspaper Spending",
x = "TV Advertising Budget",
y = "Sales",
color = "Newspaper Level"
) +
theme_minimal()
’