rm(list = ls())      # Clear all files from your environment
         gc() 
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 521207 27.9    1158970 61.9   660385 35.3
## Vcells 947154  7.3    8388608 64.0  1769723 13.6
 cat("\f")

Simple linear (bivariate) regression

  1. Pick any two quantitative variables from a data set that interests you.

I chose the Maven toys data set for 7B discussion.

library(readr)

#Load the data
Prod = read.csv("products.csv")

Sales = read.csv("sales.csv")

#In the last discussion I chose Invent and product variable, Now I chose Product and sales because we can see sale of the product in the data, it is very useful to find out the dependent and independent variable.
Merged = merge(x = Sales,
             y = Prod,
             by = c("Product_ID"))
head(Merged)
##   Product_ID Sale_ID       Date Store_ID Units  Product_Name Product_Category
## 1          1  209473 2017-07-11       46     1 Action Figure             Toys
## 2          1  818753 2018-09-23        2     1 Action Figure             Toys
## 3          1    1956 2017-01-02       10     1 Action Figure             Toys
## 4          1  443121 2018-01-16       17     1 Action Figure             Toys
## 5          1  561047 2018-04-06       34     1 Action Figure             Toys
## 6          1  147861 2017-05-20       41     1 Action Figure             Toys
##   Product_Cost Product_Price
## 1       $9.99        $15.99 
## 2       $9.99        $15.99 
## 3       $9.99        $15.99 
## 4       $9.99        $15.99 
## 5       $9.99        $15.99 
## 6       $9.99        $15.99
tail(Merged)
##        Product_ID Sale_ID       Date Store_ID Units  Product_Name
## 829257         35   89979 2017-04-01        6     1 Uno Card Game
## 829258         35  702572 2018-07-03       23     1 Uno Card Game
## 829259         35  322881 2017-10-22        9     1 Uno Card Game
## 829260         35  408174 2017-12-24        7     2 Uno Card Game
## 829261         35  702578 2018-07-03       23     1 Uno Card Game
## 829262         35  691042 2018-06-26        6     1 Uno Card Game
##        Product_Category Product_Cost Product_Price
## 829257            Games       $3.99         $7.99 
## 829258            Games       $3.99         $7.99 
## 829259            Games       $3.99         $7.99 
## 829260            Games       $3.99         $7.99 
## 829261            Games       $3.99         $7.99 
## 829262            Games       $3.99         $7.99
  1. My dependent variable is sold units and my independent variable is product price.

                               Unitsi∼β0+Product_priceβ1+ϵi
  2. Estimate the linear regression in R using the lm() command.

LR = lm(data = Merged, formula = Units ~ Product_Price)
LR
## 
## Call:
## lm(formula = Units ~ Product_Price, data = Merged)
## 
## Coefficients:
##          (Intercept)  Product_Price$11.99   Product_Price$12.99   
##             1.439856             -0.321242             -0.264789  
## Product_Price$14.99   Product_Price$15.99   Product_Price$19.99   
##            -0.084047             -0.122722             -0.297730  
##  Product_Price$2.99   Product_Price$20.99   Product_Price$24.99   
##             0.150791             -0.313566             -0.405632  
## Product_Price$25.99    Product_Price$3.99   Product_Price$39.99   
##            -0.320264              0.255159             -0.196112  
##  Product_Price$4.99    Product_Price$5.99    Product_Price$6.99   
##            -0.324501             -0.349222             -0.205568  
##  Product_Price$7.99    Product_Price$8.99    Product_Price$9.99   
##            -0.397147              0.009842             -0.211014
  1. Interpret the slope and intercept parameters :

The Intercept of sold unit value is 1.439856. The product price shows different value for different prices.

Merged$Product_Price <- as.numeric(gsub('[$,]', '', Merged$Product_Price)) #Change character to numeric
LR1 = lm(data = Merged, formula = Units ~ Product_Price)
LR1
## 
## Call:
## lm(formula = Units ~ Product_Price, data = Merged)
## 
## Coefficients:
##   (Intercept)  Product_Price  
##      1.442307      -0.009236

When a unit is increases then the product price is decreased by -0.009236.We can see the product price is the negative value in the slope parameter.

Replicate the slope and intercept parameter using the covariance/variance formulas, like we did in class with Excel :

Slope <- cov(Merged$Units, Merged$Product_Price)/var(Merged$Product_Price)
Slope
## [1] -0.009236223
Intercept1 <- mean(Merged$Units) - Slope * mean(Merged$Product_Price)
Intercept1
## [1] 1.442307
#Match the Intercept1 and Slope value with lm command
lm(data = Merged, 
   formula =  Units ~ Product_Price)
## 
## Call:
## lm(formula = Units ~ Product_Price, data = Merged)
## 
## Coefficients:
##   (Intercept)  Product_Price  
##      1.442307      -0.009236

Both Slope and Intercept1 values are same with lm command.