msba 550 in-class lab week 1+2

Here is the results of my work doing the hello_world assignment from Week 1 and 2 in R!

install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(readxl)
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
advertising <- read.csv( "https://raw.githubusercontent.com/utjimmyx/regression/master/advertising.csv" )
write.csv(advertising, "advertising.csv", row.names = FALSE)

# I then took the exported dataset and uploaded it local
my_data <- read_excel("advertising_randomized.xlsx")

head(my_data)
## # A tibble: 6 × 6
##       X    X1    TV radio newspaper sales
##   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1    77   165  109. 14.8      79.8  10.1 
## 2    50   140  185.  3.34      9.56 10.5 
## 3    94    63  111. 10.5       3.69  6.78
## 4    59   119  179. 17.0      26.4  10.7 
## 5   182   136  177.  7.95     41.2  22.1 
## 6    16    52  194. 18.5      56.8  10.7
glimpse(my_data)
## Rows: 250
## Columns: 6
## $ X         <dbl> 77, 50, 94, 59, 182, 16, 19, 28, 122, 23, 147, 119, 115, 136…
## $ X1        <dbl> 165, 140, 63, 119, 136, 52, 150, 94, 85, 8, 110, 86, 196, 8,…
## $ TV        <dbl> 109.48, 185.34, 110.69, 178.80, 176.72, 193.72, 31.17, 75.69…
## $ radio     <dbl> 14.83, 3.34, 10.46, 16.97, 7.95, 18.49, 7.22, 21.84, 3.05, 2…
## $ newspaper <dbl> 79.85, 9.56, 3.69, 26.43, 41.23, 56.75, 12.42, 54.38, 15.62,…
## $ sales     <dbl> 10.07, 10.52, 6.78, 10.72, 22.13, 10.71, 9.07, 12.54, 11.21,…
str(my_data)
## tibble [250 × 6] (S3: tbl_df/tbl/data.frame)
##  $ X        : num [1:250] 77 50 94 59 182 16 19 28 122 23 ...
##  $ X1       : num [1:250] 165 140 63 119 136 52 150 94 85 8 ...
##  $ TV       : num [1:250] 109 185 111 179 177 ...
##  $ radio    : num [1:250] 14.83 3.34 10.46 16.97 7.95 ...
##  $ newspaper: num [1:250] 79.85 9.56 3.69 26.43 41.23 ...
##  $ sales    : num [1:250] 10.07 10.52 6.78 10.72 22.13 ...
# TV vs Sales
ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales, color = cut(TV, breaks = 3))
  ) + geom_point()

ggplot(
  data = my_data,
  mapping = aes(x = TV, y = sales)
  ) + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

# Radio vs Sales
ggplot(
  data = my_data,
  mapping = aes(x = radio, y = sales, color = cut(radio, breaks = 3))
  ) + geom_point()

ggplot(
  data = my_data,
  mapping = aes(x = radio, y = sales)
  ) + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

# Newspaper vs Sales
ggplot(
  data = my_data,
  mapping = aes(x = newspaper, y = sales, color = cut(newspaper, breaks = 3))
  ) + geom_point()

ggplot(
  data = my_data,
  mapping = aes(x = newspaper, y = sales)
  ) + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

reflection

This project was fun. I’ve never written or used R before so it was great to be hands on and learn the language. As far as the results of the analysis,