Multiple regression

## In class exercise (first session): Churros ########################################

## Read data 
churros <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/Class exercise/churros.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))

## Create data 
biteNum <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
bite <- c(0.0,  1.2, 0.8, 1.3, 1.1, 0.9, 1.3, 1.0, 0.7, 1.2, 1.1, 0.8, 1.5, 0.7, 0.6, 0.8)
length <- c(15.0, 13.8, 13.0, 11.7, 10.6, 9.7, 8.4, 7.4, 6.7, 5.5, 4.4, 3.6, 2.1, 1.4, 0.8, 0.0)

churros <- data.frame(biteNum, bite, length)

## Linear regression
s <- lm(length ~ biteNum, data = churros)
summary(s)

## 
## Call:
## lm(formula = length ~ biteNum, data = churros)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44588 -0.14114  0.00235  0.11529  0.51103 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14.77353    0.11915  123.99   <2e-16 ***
## biteNum     -1.01897    0.01354  -75.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2496 on 14 degrees of freedom
## Multiple R-squared:  0.9975, Adjusted R-squared:  0.9974 
## F-statistic:  5668 on 1 and 14 DF,  p-value: < 2.2e-16

## Scatter plot
attach(churros)

## The following objects are masked _by_ .GlobalEnv:
## 
##     bite, biteNum, length

plot(biteNum, length, main="Univariate Regression for Churros",
     xlab="Bite Number ", ylab="Length ", pch=19)

abline(lm(length~biteNum), col="red") # regression line (y~x)
lines(lowess(biteNum,length), col="blue") # lowess line (x,y)

## Example 1: Demand Analysis #################################################################
## Read data file
demand <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/demand_analysis.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))

## Linear regression
d <- lm(Demand ~ Price, data = demand)
summary(d)

## 
## Call:
## lm(formula = Demand ~ Price, data = demand)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8591 -0.4188 -0.2024  0.4543  1.4543 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.12794    0.30112   33.63  < 2e-16 ***
## Price       -0.89555    0.06377  -14.04 9.96e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6183 on 30 degrees of freedom
## Multiple R-squared:  0.868,  Adjusted R-squared:  0.8636 
## F-statistic: 197.2 on 1 and 30 DF,  p-value: 9.957e-15

## Plot for linear regression 
library(ggplot2)
ggplot(demand, aes(Price, Demand)) +
  geom_point() +
  stat_smooth(method = lm) #fitted line

## Predicted values
fitted(d)

##        1        2        3        4        5        6        7        8 
## 6.545722 6.993499 5.650168 4.754613 4.306836 3.859059 8.336830 6.545722 
##        9       10       11       12       13       14       15       16 
## 5.202390 7.441276 6.993499 8.336830 8.336830 7.441276 7.441276 8.784607 
##       17       18       19       20       21       22       23       24 
## 7.441276 5.829278 5.650168 6.545722 6.097945 6.545722 3.411282 6.545722 
##       25       26       27       28       29       30       31       32 
## 4.306836 6.545722 3.859059 5.202390 3.859059 6.993499 3.859059 8.336830

d2 <- data.frame(Demand = demand$Demand, Price = demand$Price, Fitted = fitted(d)) # create data frame with fitted values

## Example 2: New Product Development #################################################################
## Read data file
newProduct <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/new_product.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))

## Historgram (distribution)
newProduct$Sales <- gsub(",","",newProduct$Sales)
newProduct$Sales <- as.numeric(as.character(newProduct$Sales)) # convert data type

hist(newProduct$Sales) # histogram

hist(newProduct$DWBconcept)

hist(newProduct$Uniqueness)

hist(newProduct$DWBproto)

## Summary statistics
summary(newProduct)

##      Sales           DWBconcept      Uniqueness       DWBproto    
##  Min.   :  10850   Min.   :21.00   Min.   :13.00   Min.   :13.00  
##  1st Qu.:  82500   1st Qu.:31.00   1st Qu.:19.25   1st Qu.:20.00  
##  Median : 132000   Median :36.50   Median :25.00   Median :25.50  
##  Mean   : 219696   Mean   :35.94   Mean   :25.44   Mean   :27.09  
##  3rd Qu.: 264750   3rd Qu.:39.00   3rd Qu.:29.00   3rd Qu.:29.00  
##  Max.   :1017000   Max.   :54.00   Max.   :60.00   Max.   :53.00  
##  NA's   :23        NA's   :23      NA's   :23      NA's   :23

# Another summary
library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

describe(newProduct)

##            vars  n      mean        sd   median   trimmed       mad   min
## Sales         1 34 219695.88 235776.82 132000.0 175733.57 116384.10 10850
## DWBconcept    2 34     35.94      7.08     36.5     35.61      6.67    21
## Uniqueness    3 34     25.44      9.43     25.0     24.46      7.41    13
## DWBproto      4 34     27.09      8.89     25.5     26.07      5.93    13
##                max   range skew kurtosis       se
## Sales      1017000 1006150 1.78     2.69 40435.39
## DWBconcept      54      33 0.38     0.02     1.21
## Uniqueness      60      47 1.42     3.09     1.62
## DWBproto        53      40 1.15     1.10     1.52

## Regression
linearNP <- lm(Sales ~., data=newProduct)
summary(linearNP)

## 
## Call:
## lm(formula = Sales ~ ., data = newProduct)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -249597  -69713   -9682   63198  423486 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -733105     124721  -5.878 1.96e-06 ***
## DWBconcept     12380       4011   3.087 0.004330 ** 
## Uniqueness      6315       2444   2.584 0.014890 *  
## DWBproto       12817       3197   4.009 0.000372 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 130000 on 30 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.7235, Adjusted R-squared:  0.6958 
## F-statistic: 26.16 on 3 and 30 DF,  p-value: 1.629e-08

## Diagnostics: Residual plots, Normal Q-Q plot, Cook's distance, etc.
plot(linearNP, 1)

plot(linearNP, 3)

plot(linearNP, 2)

plot(linearNP, 4)

# Residuals vs Leverage
plot(linearNP, 5)

## Regression example 3: CPG ##############################################################
## Read CPG data
CPG <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/cpg.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))

## Scatter plot1
x <- CPG$AdSpend
y <- CPG$Sales

# Plot with main and axis titles
# Change point shape (pch = 19) and remove frame.
plot(x, y, main = "Scatter Plot",
     xlab = "Ad Spend", ylab = "Sales",
     pch = 19, frame = FALSE) # descriptive a little bit

## Scatter plot2
x <- CPG$Price
y <- CPG$Sales

# Plot with main and axis titles
# Change point shape (pch = 19) and remove frame.
plot(x, y, main = "Scatter Plot",
     xlab = "Price", ylab = "Sales",
     pch = 19, frame = FALSE)

## Regression
CPGlm <- lm(Sales ~., data=CPG)
summary(CPGlm)

## 
## Call:
## lm(formula = Sales ~ ., data = CPG)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9461 -1.4798 -0.1411  1.3872  3.7043 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   68.233     32.717   2.086   0.0451 *
## AdSpend        2.768      1.517   1.825   0.0774 .
## Price         -5.014      4.692  -1.069   0.2933  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.799 on 32 degrees of freedom
## Multiple R-squared:  0.9817, Adjusted R-squared:  0.9805 
## F-statistic: 856.6 on 2 and 32 DF,  p-value: < 2.2e-16

## Multicollinearity
x <- CPG$AdSpend
y <- CPG$Price
# Plot with main and axis titles
# Change point shape (pch = 19) and remove frame.
plot(x, y, main = "Scatter Plot",
     xlab = "AdSpend", ylab = "Price",
     pch = 19, frame = FALSE)

cor(x,y)

## [1] -0.9975562

## Separate Regression
CPGad <- lm(Sales ~ AdSpend, data=CPG)
summary(CPGad)

## 
## Call:
## lm(formula = Sales ~ AdSpend, data = CPG)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8742 -1.4225 -0.3679  1.5406  2.9666 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  33.2918     1.1074   30.06   <2e-16 ***
## AdSpend       4.3849     0.1062   41.29   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.803 on 33 degrees of freedom
## Multiple R-squared:  0.981,  Adjusted R-squared:  0.9804 
## F-statistic:  1705 on 1 and 33 DF,  p-value: < 2.2e-16

CPGprice <- lm(Sales ~ Price, data=CPG)
summary(CPGprice)

## 
## Call:
## lm(formula = Sales ~ Price, data = CPG)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1060 -1.4534  0.1132  1.1670  4.8965 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 127.8935     1.3059   97.94   <2e-16 ***
## Price       -13.5548     0.3392  -39.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.862 on 33 degrees of freedom
## Multiple R-squared:  0.9798, Adjusted R-squared:  0.9791 
## F-statistic:  1597 on 1 and 33 DF,  p-value: < 2.2e-16

## VIF
library(tidyverse)

## -- Attaching packages ----------------------------------------------------------------------- tidyverse 1.2.1 --

## v tibble  2.1.3     v purrr   0.3.2
## v tidyr   1.0.0     v dplyr   0.8.3
## v readr   1.3.1     v stringr 1.4.0
## v tibble  2.1.3     v forcats 0.4.0

## -- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
## x psych::%+%()    masks ggplot2::%+%()
## x psych::alpha()  masks ggplot2::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(caret)

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift

car::vif(CPGlm)

##  AdSpend    Price 
## 204.8537 204.8537

## Regression example 4: Fitness ###############################################
## Read Fitness data
fitness <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/fitnessdata/Fitness.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))

## Regression
fitness_lm <- lm(Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + MaxPulse, data=fitness)
summary(fitness_lm)

## 
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + 
##     MaxPulse, data = fitness)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4303 -0.8837  0.0761  1.0573  5.3575 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 101.95579   12.27680   8.305 1.62e-08 ***
## Age          -0.21838    0.09854  -2.216  0.03642 *  
## Weight       -0.07498    0.05494  -1.365  0.18496    
## Runtime      -2.64013    0.38548  -6.849 4.40e-07 ***
## RunPulse     -0.36720    0.12055  -3.046  0.00556 ** 
## RstPulse     -0.01952    0.06622  -0.295  0.77068    
## MaxPulse      0.30456    0.13719   2.220  0.03612 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.327 on 24 degrees of freedom
## Multiple R-squared:  0.8474, Adjusted R-squared:  0.8092 
## F-statistic: 22.21 on 6 and 24 DF,  p-value: 1.074e-08

## Stepwise regression
#One way around this in R is to use stepwise regression.  
#You can do forward stepwise regression, backward stepwise regression, 
#or a combination of both, but R uses the AICp criterion at each step 
#instead of the criteria described in the text.  
#To use this procedure in the forward direction, you first must fit a base model 
#(with one predictor) and a full model (with all the predictors you wish to consider).

## Forward stepwise
# Base model
Base <- lm(Oxy ~ Age, data=fitness )

# Full model
Full <- lm(Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + MaxPulse, data=fitness)

# Forward model
forward <- step(Base, scope = list( upper=Full, lower=~1 ), direction = "forward", trace=FALSE)
summary(forward) #summary of the best model

## 
## Call:
## lm(formula = Oxy ~ Age + Runtime + RunPulse + MaxPulse + Weight, 
##     data = fitness)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4927 -0.8379  0.0225  1.0092  5.3556 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 101.33103   11.86961   8.537 7.04e-09 ***
## Age          -0.21206    0.09441  -2.246  0.03378 *  
## Runtime      -2.68864    0.34216  -7.858 3.25e-08 ***
## RunPulse     -0.37070    0.11775  -3.148  0.00422 ** 
## MaxPulse      0.30603    0.13457   2.274  0.03181 *  
## Weight       -0.07327    0.05362  -1.366  0.18399    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.284 on 25 degrees of freedom
## Multiple R-squared:  0.8468, Adjusted R-squared:  0.8162 
## F-statistic: 27.64 on 5 and 25 DF,  p-value: 1.991e-09

extractAIC(forward) #extract AIC

## [1]  6.00000 56.54857

# Backward model
backward <- step(Full, direction = "backward", trace=FALSE)
summary(backward) #summary of the best model

## 
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RunPulse + MaxPulse, 
##     data = fitness)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4927 -0.8379  0.0225  1.0092  5.3556 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 101.33103   11.86961   8.537 7.04e-09 ***
## Age          -0.21206    0.09441  -2.246  0.03378 *  
## Weight       -0.07327    0.05362  -1.366  0.18399    
## Runtime      -2.68864    0.34216  -7.858 3.25e-08 ***
## RunPulse     -0.37070    0.11775  -3.148  0.00422 ** 
## MaxPulse      0.30603    0.13457   2.274  0.03181 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.284 on 25 degrees of freedom
## Multiple R-squared:  0.8468, Adjusted R-squared:  0.8162 
## F-statistic: 27.64 on 5 and 25 DF,  p-value: 1.991e-09

extractAIC(backward)

## [1]  6.00000 56.54857

## Forward and Backward model by segments
# Subset of fitness data
fitness2 <- fitness[c(5,2:4,6:9)]

# Subset the fitness2 based on sex
attach(fitness2)
female <- fitness2[ which(Sex=='F'),]
male <- fitness2[ which(Sex=='M'),]
detach(fitness2)

## Base model: Female
BaseF <- lm(Oxy ~ Age, data=female )

# Full model: Female
FullF <- lm(Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + MaxPulse, data=female)

# Forward model: Female
forwardF <- step(BaseF, scope = list( upper=FullF, lower=~1 ), direction = "forward", trace=FALSE)
summary(forwardF)

## 
## Call:
## lm(formula = Oxy ~ Age + Runtime + Weight + RunPulse + MaxPulse + 
##     RstPulse, data = female)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.36994 -0.65850  0.00926  0.97355  1.68656 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 130.16072   13.37383   9.732 4.48e-06 ***
## Age          -0.47173    0.10044  -4.697  0.00113 ** 
## Runtime      -3.60521    0.53721  -6.711 8.74e-05 ***
## Weight       -0.25852    0.06806  -3.799  0.00423 ** 
## RunPulse     -0.27683    0.10934  -2.532  0.03213 *  
## MaxPulse      0.22441    0.12252   1.832  0.10025    
## RstPulse      0.09181    0.06422   1.430  0.18659    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.524 on 9 degrees of freedom
## Multiple R-squared:  0.9429, Adjusted R-squared:  0.9049 
## F-statistic: 24.79 on 6 and 9 DF,  p-value: 4.116e-05

extractAIC(forwardF)

## [1]  7.00000 18.27326

# Backward model: Female
backwardF <- step(FullF, direction = "backward", trace=FALSE)
summary(backwardF)

## 
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + 
##     MaxPulse, data = female)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.36994 -0.65850  0.00926  0.97355  1.68656 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 130.16072   13.37383   9.732 4.48e-06 ***
## Age          -0.47173    0.10044  -4.697  0.00113 ** 
## Weight       -0.25852    0.06806  -3.799  0.00423 ** 
## Runtime      -3.60521    0.53721  -6.711 8.74e-05 ***
## RunPulse     -0.27683    0.10934  -2.532  0.03213 *  
## RstPulse      0.09181    0.06422   1.430  0.18659    
## MaxPulse      0.22441    0.12252   1.832  0.10025    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.524 on 9 degrees of freedom
## Multiple R-squared:  0.9429, Adjusted R-squared:  0.9049 
## F-statistic: 24.79 on 6 and 9 DF,  p-value: 4.116e-05

extractAIC(backwardF)

## [1]  7.00000 18.27326

## VIF
library(tidyverse)
library(caret)

car::vif(forwardF)

##      Age  Runtime   Weight RunPulse MaxPulse RstPulse 
## 1.658629 2.210998 1.541896 9.838546 9.993192 1.711130

## Rerun the model without variables with high VIF
fitnessF <- lm(Oxy ~ Age + Weight + Runtime + RstPulse, data=female)
summary(fitnessF)

## 
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RstPulse, data = female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2891 -0.3908  0.2184  1.2415  2.2070 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 132.29758   11.63666  11.369 2.02e-07 ***
## Age          -0.50405    0.10956  -4.601 0.000764 ***
## Weight       -0.26559    0.08474  -3.134 0.009508 ** 
## Runtime      -4.24691    0.59960  -7.083 2.04e-05 ***
## RstPulse      0.06466    0.07614   0.849 0.413906    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.903 on 11 degrees of freedom
## Multiple R-squared:  0.8912, Adjusted R-squared:  0.8516 
## F-statistic: 22.52 on 4 and 11 DF,  p-value: 2.969e-05

car::vif(fitnessF)

##      Age   Weight  Runtime RstPulse 
## 1.264928 1.532359 1.765582 1.542025

## Base model: Male
BaseM <- lm(Oxy ~ Age, data=male )

# Full model: Male
FullM <- lm(Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + MaxPulse, data=male)

# Forward model: Male
forwardM <- step(BaseM, scope = list( upper=FullM, lower=~1 ), direction = "forward", trace=FALSE)
summary(forwardM)

## 
## Call:
## lm(formula = Oxy ~ Age + Runtime + RunPulse, data = male)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7927 -0.9715 -0.1934  1.1581  4.6553 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 98.32501   18.29220   5.375 0.000225 ***
## Age         -0.08482    0.13545  -0.626 0.543947    
## Runtime     -2.55415    0.46120  -5.538 0.000176 ***
## RunPulse    -0.12142    0.08650  -1.404 0.187988    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.349 on 11 degrees of freedom
## Multiple R-squared:  0.7874, Adjusted R-squared:  0.7294 
## F-statistic: 13.58 on 3 and 11 DF,  p-value: 0.0005117

extractAIC(forwardM)

## [1]  4.00000 28.97295

# Backward model: Male
backwardM <- step(FullM, direction = "backward", trace=FALSE)
summary(backwardM)

## 
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RstPulse, data = male)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4247 -1.0283 -0.5517  0.5440  4.1581 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  35.6689    18.3027   1.949   0.0799 .  
## Age           0.2216     0.1432   1.547   0.1528    
## Weight        0.2560     0.1161   2.205   0.0520 .  
## Runtime      -3.2086     0.4780  -6.712 5.29e-05 ***
## RstPulse      0.2386     0.1313   1.817   0.0992 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.174 on 10 degrees of freedom
## Multiple R-squared:  0.8345, Adjusted R-squared:  0.7682 
## F-statistic:  12.6 on 4 and 10 DF,  p-value: 0.0006431

extractAIC(backwardM)

## [1]  5.00000 27.22104

## VIF
library(tidyverse)
library(caret)

car::vif(forwardM)

##      Age  Runtime RunPulse 
## 1.482364 1.125921 1.426422

## Regression example 5: Cereals ###############################################
## Read Cereal data
cereal <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/cereal_data.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))

# Variables
#Vt: category demand
#Pt: category price
#At: category advertising
#time: time

## Log-linear model
cereal_log <- lm(LnVt ~ LnAt + LnPt + time, data=cereal)
summary(cereal_log)

## 
## Call:
## lm(formula = LnVt ~ LnAt + LnPt + time, data = cereal)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.060532 -0.018380 -0.001035  0.018822  0.049910 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.383556   0.704221   6.225 5.67e-07 ***
## LnAt         0.047109   0.046404   1.015   0.3176    
## LnPt        -0.282954   0.123368  -2.294   0.0285 *  
## time         0.004459   0.000713   6.254 5.22e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02859 on 32 degrees of freedom
## Multiple R-squared:  0.6572, Adjusted R-squared:  0.6251 
## F-statistic: 20.45 on 3 and 32 DF,  p-value: 1.382e-07

car::vif(cereal_log)

##     LnAt     LnPt     time 
## 1.311653 2.676785 2.415814

Multiple regression

Emily Ko

10/21/2019