R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

advertising <- read.csv( "https://raw.githubusercontent.com/utjimmyx/regression/master/advertising.csv" )

write.csv(advertising, file = "advertising.csv", row.names = FALSE)

install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(readxl)
my_data <- read_excel("advertising (2)_randomized.xlsx")
## run the library
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
head(my_data)
## # A tibble: 6 × 6
##       X    X1    TV radio newspaper sales
##   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1    47   198 385.  15          3.2  3.16
## 2     7    54  18.0 19.4       12.8 19.1 
## 3   144    23 265.  19.3       33.8  4   
## 4     7    72 270.  14.9       17.3 15.2 
## 5    28    74  43.6  7.76      32.3  8.38
## 6   108    82 281.  22.2       56.7  4.94
glimpse(my_data)
## Rows: 300
## Columns: 6
## $ X         <dbl> 47, 7, 144, 7, 28, 108, 28, 102, 112, 119, 18, 64, 167, 191,…
## $ X1        <dbl> 198, 54, 23, 72, 74, 82, 15, 96, 102, 99, 51, 40, 47, 127, 1…
## $ TV        <dbl> 385.01, 18.02, 265.35, 270.16, 43.59, 280.52, 81.43, 213.89,…
## $ radio     <dbl> 15.00, 19.44, 19.33, 14.86, 7.76, 22.20, 33.60, 17.02, 7.55,…
## $ newspaper <dbl> 3.20, 12.82, 33.81, 17.31, 32.27, 56.67, 27.59, 45.96, 40.53…
## $ sales     <dbl> 3.16, 19.10, 4.00, 15.24, 8.38, 4.94, 21.29, 17.91, 19.72, 6…
ggplot(data = my_data)

str(my_data)
## tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
##  $ X        : num [1:300] 47 7 144 7 28 108 28 102 112 119 ...
##  $ X1       : num [1:300] 198 54 23 72 74 82 15 96 102 99 ...
##  $ TV       : num [1:300] 385 18 265.4 270.2 43.6 ...
##  $ radio    : num [1:300] 15 19.44 19.33 14.86 7.76 ...
##  $ newspaper: num [1:300] 3.2 12.8 33.8 17.3 32.3 ...
##  $ sales    : num [1:300] 3.16 19.1 4 15.24 8.38 ...
ggplot(
  data =  my_data,
  mapping = aes(x = TV, y = sales)
)

ggplot(
  data =  my_data,
  mapping = aes(x = TV, y = sales)
) +
  geom_point()

#> Warning: Removed 2 rows containing missing values or values outside the scale range
#> (`geom_point()`).


ggplot(
  data =  my_data,
  mapping = aes(x = TV, y = sales, color = newspaper)
) +
  geom_point()

ggplot(
  data =  my_data,
  mapping = aes(x = TV, y = sales, color = cut(newspaper, breaks = 3))
) +
  geom_point()