## In class exercise (first session): Churros ########################################
## Read data
churros <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/Class exercise/churros.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))
## Create data
biteNum <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
bite <- c(0.0, 1.2, 0.8, 1.3, 1.1, 0.9, 1.3, 1.0, 0.7, 1.2, 1.1, 0.8, 1.5, 0.7, 0.6, 0.8)
length <- c(15.0, 13.8, 13.0, 11.7, 10.6, 9.7, 8.4, 7.4, 6.7, 5.5, 4.4, 3.6, 2.1, 1.4, 0.8, 0.0)
churros <- data.frame(biteNum, bite, length)
## Linear regression
s <- lm(length ~ biteNum, data = churros)
summary(s)
##
## Call:
## lm(formula = length ~ biteNum, data = churros)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44588 -0.14114 0.00235 0.11529 0.51103
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.77353 0.11915 123.99 <2e-16 ***
## biteNum -1.01897 0.01354 -75.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2496 on 14 degrees of freedom
## Multiple R-squared: 0.9975, Adjusted R-squared: 0.9974
## F-statistic: 5668 on 1 and 14 DF, p-value: < 2.2e-16
## Scatter plot
attach(churros)
## The following objects are masked _by_ .GlobalEnv:
##
## bite, biteNum, length
plot(biteNum, length, main="Univariate Regression for Churros",
xlab="Bite Number ", ylab="Length ", pch=19)
abline(lm(length~biteNum), col="red") # regression line (y~x)
lines(lowess(biteNum,length), col="blue") # lowess line (x,y)

## Example 1: Demand Analysis #################################################################
## Read data file
demand <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/demand_analysis.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))
## Linear regression
d <- lm(Demand ~ Price, data = demand)
summary(d)
##
## Call:
## lm(formula = Demand ~ Price, data = demand)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8591 -0.4188 -0.2024 0.4543 1.4543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.12794 0.30112 33.63 < 2e-16 ***
## Price -0.89555 0.06377 -14.04 9.96e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6183 on 30 degrees of freedom
## Multiple R-squared: 0.868, Adjusted R-squared: 0.8636
## F-statistic: 197.2 on 1 and 30 DF, p-value: 9.957e-15
## Plot for linear regression
library(ggplot2)
ggplot(demand, aes(Price, Demand)) +
geom_point() +
stat_smooth(method = lm) #fitted line

## Predicted values
fitted(d)
## 1 2 3 4 5 6 7 8
## 6.545722 6.993499 5.650168 4.754613 4.306836 3.859059 8.336830 6.545722
## 9 10 11 12 13 14 15 16
## 5.202390 7.441276 6.993499 8.336830 8.336830 7.441276 7.441276 8.784607
## 17 18 19 20 21 22 23 24
## 7.441276 5.829278 5.650168 6.545722 6.097945 6.545722 3.411282 6.545722
## 25 26 27 28 29 30 31 32
## 4.306836 6.545722 3.859059 5.202390 3.859059 6.993499 3.859059 8.336830
d2 <- data.frame(Demand = demand$Demand, Price = demand$Price, Fitted = fitted(d)) # create data frame with fitted values
## Example 2: New Product Development #################################################################
## Read data file
newProduct <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/new_product.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))
## Historgram (distribution)
newProduct$Sales <- gsub(",","",newProduct$Sales)
newProduct$Sales <- as.numeric(as.character(newProduct$Sales)) # convert data type
hist(newProduct$Sales) # histogram

hist(newProduct$DWBconcept)

hist(newProduct$Uniqueness)

hist(newProduct$DWBproto)

## Summary statistics
summary(newProduct)
## Sales DWBconcept Uniqueness DWBproto
## Min. : 10850 Min. :21.00 Min. :13.00 Min. :13.00
## 1st Qu.: 82500 1st Qu.:31.00 1st Qu.:19.25 1st Qu.:20.00
## Median : 132000 Median :36.50 Median :25.00 Median :25.50
## Mean : 219696 Mean :35.94 Mean :25.44 Mean :27.09
## 3rd Qu.: 264750 3rd Qu.:39.00 3rd Qu.:29.00 3rd Qu.:29.00
## Max. :1017000 Max. :54.00 Max. :60.00 Max. :53.00
## NA's :23 NA's :23 NA's :23 NA's :23
# Another summary
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
describe(newProduct)
## vars n mean sd median trimmed mad min
## Sales 1 34 219695.88 235776.82 132000.0 175733.57 116384.10 10850
## DWBconcept 2 34 35.94 7.08 36.5 35.61 6.67 21
## Uniqueness 3 34 25.44 9.43 25.0 24.46 7.41 13
## DWBproto 4 34 27.09 8.89 25.5 26.07 5.93 13
## max range skew kurtosis se
## Sales 1017000 1006150 1.78 2.69 40435.39
## DWBconcept 54 33 0.38 0.02 1.21
## Uniqueness 60 47 1.42 3.09 1.62
## DWBproto 53 40 1.15 1.10 1.52
## Regression
linearNP <- lm(Sales ~., data=newProduct)
summary(linearNP)
##
## Call:
## lm(formula = Sales ~ ., data = newProduct)
##
## Residuals:
## Min 1Q Median 3Q Max
## -249597 -69713 -9682 63198 423486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -733105 124721 -5.878 1.96e-06 ***
## DWBconcept 12380 4011 3.087 0.004330 **
## Uniqueness 6315 2444 2.584 0.014890 *
## DWBproto 12817 3197 4.009 0.000372 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 130000 on 30 degrees of freedom
## (23 observations deleted due to missingness)
## Multiple R-squared: 0.7235, Adjusted R-squared: 0.6958
## F-statistic: 26.16 on 3 and 30 DF, p-value: 1.629e-08
## Diagnostics: Residual plots, Normal Q-Q plot, Cook's distance, etc.
plot(linearNP, 1)

plot(linearNP, 3)

plot(linearNP, 2)

plot(linearNP, 4)

# Residuals vs Leverage
plot(linearNP, 5)

## Regression example 3: CPG ##############################################################
## Read CPG data
CPG <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/cpg.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))
## Scatter plot1
x <- CPG$AdSpend
y <- CPG$Sales
# Plot with main and axis titles
# Change point shape (pch = 19) and remove frame.
plot(x, y, main = "Scatter Plot",
xlab = "Ad Spend", ylab = "Sales",
pch = 19, frame = FALSE) # descriptive a little bit

## Scatter plot2
x <- CPG$Price
y <- CPG$Sales
# Plot with main and axis titles
# Change point shape (pch = 19) and remove frame.
plot(x, y, main = "Scatter Plot",
xlab = "Price", ylab = "Sales",
pch = 19, frame = FALSE)

## Regression
CPGlm <- lm(Sales ~., data=CPG)
summary(CPGlm)
##
## Call:
## lm(formula = Sales ~ ., data = CPG)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9461 -1.4798 -0.1411 1.3872 3.7043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.233 32.717 2.086 0.0451 *
## AdSpend 2.768 1.517 1.825 0.0774 .
## Price -5.014 4.692 -1.069 0.2933
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.799 on 32 degrees of freedom
## Multiple R-squared: 0.9817, Adjusted R-squared: 0.9805
## F-statistic: 856.6 on 2 and 32 DF, p-value: < 2.2e-16
## Multicollinearity
x <- CPG$AdSpend
y <- CPG$Price
# Plot with main and axis titles
# Change point shape (pch = 19) and remove frame.
plot(x, y, main = "Scatter Plot",
xlab = "AdSpend", ylab = "Price",
pch = 19, frame = FALSE)

cor(x,y)
## [1] -0.9975562
## Separate Regression
CPGad <- lm(Sales ~ AdSpend, data=CPG)
summary(CPGad)
##
## Call:
## lm(formula = Sales ~ AdSpend, data = CPG)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8742 -1.4225 -0.3679 1.5406 2.9666
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.2918 1.1074 30.06 <2e-16 ***
## AdSpend 4.3849 0.1062 41.29 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.803 on 33 degrees of freedom
## Multiple R-squared: 0.981, Adjusted R-squared: 0.9804
## F-statistic: 1705 on 1 and 33 DF, p-value: < 2.2e-16
CPGprice <- lm(Sales ~ Price, data=CPG)
summary(CPGprice)
##
## Call:
## lm(formula = Sales ~ Price, data = CPG)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1060 -1.4534 0.1132 1.1670 4.8965
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 127.8935 1.3059 97.94 <2e-16 ***
## Price -13.5548 0.3392 -39.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.862 on 33 degrees of freedom
## Multiple R-squared: 0.9798, Adjusted R-squared: 0.9791
## F-statistic: 1597 on 1 and 33 DF, p-value: < 2.2e-16
## VIF
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.2
## v tidyr 1.0.0 v dplyr 0.8.3
## v readr 1.3.1 v stringr 1.4.0
## v tibble 2.1.3 v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
## x psych::%+%() masks ggplot2::%+%()
## x psych::alpha() masks ggplot2::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
car::vif(CPGlm)
## AdSpend Price
## 204.8537 204.8537
## Regression example 4: Fitness ###############################################
## Read Fitness data
fitness <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/fitnessdata/Fitness.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))
## Regression
fitness_lm <- lm(Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + MaxPulse, data=fitness)
summary(fitness_lm)
##
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse +
## MaxPulse, data = fitness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4303 -0.8837 0.0761 1.0573 5.3575
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 101.95579 12.27680 8.305 1.62e-08 ***
## Age -0.21838 0.09854 -2.216 0.03642 *
## Weight -0.07498 0.05494 -1.365 0.18496
## Runtime -2.64013 0.38548 -6.849 4.40e-07 ***
## RunPulse -0.36720 0.12055 -3.046 0.00556 **
## RstPulse -0.01952 0.06622 -0.295 0.77068
## MaxPulse 0.30456 0.13719 2.220 0.03612 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.327 on 24 degrees of freedom
## Multiple R-squared: 0.8474, Adjusted R-squared: 0.8092
## F-statistic: 22.21 on 6 and 24 DF, p-value: 1.074e-08
## Stepwise regression
#One way around this in R is to use stepwise regression.
#You can do forward stepwise regression, backward stepwise regression,
#or a combination of both, but R uses the AICp criterion at each step
#instead of the criteria described in the text.
#To use this procedure in the forward direction, you first must fit a base model
#(with one predictor) and a full model (with all the predictors you wish to consider).
## Forward stepwise
# Base model
Base <- lm(Oxy ~ Age, data=fitness )
# Full model
Full <- lm(Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + MaxPulse, data=fitness)
# Forward model
forward <- step(Base, scope = list( upper=Full, lower=~1 ), direction = "forward", trace=FALSE)
summary(forward) #summary of the best model
##
## Call:
## lm(formula = Oxy ~ Age + Runtime + RunPulse + MaxPulse + Weight,
## data = fitness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4927 -0.8379 0.0225 1.0092 5.3556
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 101.33103 11.86961 8.537 7.04e-09 ***
## Age -0.21206 0.09441 -2.246 0.03378 *
## Runtime -2.68864 0.34216 -7.858 3.25e-08 ***
## RunPulse -0.37070 0.11775 -3.148 0.00422 **
## MaxPulse 0.30603 0.13457 2.274 0.03181 *
## Weight -0.07327 0.05362 -1.366 0.18399
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.284 on 25 degrees of freedom
## Multiple R-squared: 0.8468, Adjusted R-squared: 0.8162
## F-statistic: 27.64 on 5 and 25 DF, p-value: 1.991e-09
extractAIC(forward) #extract AIC
## [1] 6.00000 56.54857
# Backward model
backward <- step(Full, direction = "backward", trace=FALSE)
summary(backward) #summary of the best model
##
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RunPulse + MaxPulse,
## data = fitness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4927 -0.8379 0.0225 1.0092 5.3556
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 101.33103 11.86961 8.537 7.04e-09 ***
## Age -0.21206 0.09441 -2.246 0.03378 *
## Weight -0.07327 0.05362 -1.366 0.18399
## Runtime -2.68864 0.34216 -7.858 3.25e-08 ***
## RunPulse -0.37070 0.11775 -3.148 0.00422 **
## MaxPulse 0.30603 0.13457 2.274 0.03181 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.284 on 25 degrees of freedom
## Multiple R-squared: 0.8468, Adjusted R-squared: 0.8162
## F-statistic: 27.64 on 5 and 25 DF, p-value: 1.991e-09
extractAIC(backward)
## [1] 6.00000 56.54857
## Forward and Backward model by segments
# Subset of fitness data
fitness2 <- fitness[c(5,2:4,6:9)]
# Subset the fitness2 based on sex
attach(fitness2)
female <- fitness2[ which(Sex=='F'),]
male <- fitness2[ which(Sex=='M'),]
detach(fitness2)
## Base model: Female
BaseF <- lm(Oxy ~ Age, data=female )
# Full model: Female
FullF <- lm(Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + MaxPulse, data=female)
# Forward model: Female
forwardF <- step(BaseF, scope = list( upper=FullF, lower=~1 ), direction = "forward", trace=FALSE)
summary(forwardF)
##
## Call:
## lm(formula = Oxy ~ Age + Runtime + Weight + RunPulse + MaxPulse +
## RstPulse, data = female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.36994 -0.65850 0.00926 0.97355 1.68656
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 130.16072 13.37383 9.732 4.48e-06 ***
## Age -0.47173 0.10044 -4.697 0.00113 **
## Runtime -3.60521 0.53721 -6.711 8.74e-05 ***
## Weight -0.25852 0.06806 -3.799 0.00423 **
## RunPulse -0.27683 0.10934 -2.532 0.03213 *
## MaxPulse 0.22441 0.12252 1.832 0.10025
## RstPulse 0.09181 0.06422 1.430 0.18659
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.524 on 9 degrees of freedom
## Multiple R-squared: 0.9429, Adjusted R-squared: 0.9049
## F-statistic: 24.79 on 6 and 9 DF, p-value: 4.116e-05
extractAIC(forwardF)
## [1] 7.00000 18.27326
# Backward model: Female
backwardF <- step(FullF, direction = "backward", trace=FALSE)
summary(backwardF)
##
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse +
## MaxPulse, data = female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.36994 -0.65850 0.00926 0.97355 1.68656
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 130.16072 13.37383 9.732 4.48e-06 ***
## Age -0.47173 0.10044 -4.697 0.00113 **
## Weight -0.25852 0.06806 -3.799 0.00423 **
## Runtime -3.60521 0.53721 -6.711 8.74e-05 ***
## RunPulse -0.27683 0.10934 -2.532 0.03213 *
## RstPulse 0.09181 0.06422 1.430 0.18659
## MaxPulse 0.22441 0.12252 1.832 0.10025
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.524 on 9 degrees of freedom
## Multiple R-squared: 0.9429, Adjusted R-squared: 0.9049
## F-statistic: 24.79 on 6 and 9 DF, p-value: 4.116e-05
extractAIC(backwardF)
## [1] 7.00000 18.27326
## VIF
library(tidyverse)
library(caret)
car::vif(forwardF)
## Age Runtime Weight RunPulse MaxPulse RstPulse
## 1.658629 2.210998 1.541896 9.838546 9.993192 1.711130
## Rerun the model without variables with high VIF
fitnessF <- lm(Oxy ~ Age + Weight + Runtime + RstPulse, data=female)
summary(fitnessF)
##
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RstPulse, data = female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2891 -0.3908 0.2184 1.2415 2.2070
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 132.29758 11.63666 11.369 2.02e-07 ***
## Age -0.50405 0.10956 -4.601 0.000764 ***
## Weight -0.26559 0.08474 -3.134 0.009508 **
## Runtime -4.24691 0.59960 -7.083 2.04e-05 ***
## RstPulse 0.06466 0.07614 0.849 0.413906
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.903 on 11 degrees of freedom
## Multiple R-squared: 0.8912, Adjusted R-squared: 0.8516
## F-statistic: 22.52 on 4 and 11 DF, p-value: 2.969e-05
car::vif(fitnessF)
## Age Weight Runtime RstPulse
## 1.264928 1.532359 1.765582 1.542025
## Base model: Male
BaseM <- lm(Oxy ~ Age, data=male )
# Full model: Male
FullM <- lm(Oxy ~ Age + Weight + Runtime + RunPulse + RstPulse + MaxPulse, data=male)
# Forward model: Male
forwardM <- step(BaseM, scope = list( upper=FullM, lower=~1 ), direction = "forward", trace=FALSE)
summary(forwardM)
##
## Call:
## lm(formula = Oxy ~ Age + Runtime + RunPulse, data = male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7927 -0.9715 -0.1934 1.1581 4.6553
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 98.32501 18.29220 5.375 0.000225 ***
## Age -0.08482 0.13545 -0.626 0.543947
## Runtime -2.55415 0.46120 -5.538 0.000176 ***
## RunPulse -0.12142 0.08650 -1.404 0.187988
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.349 on 11 degrees of freedom
## Multiple R-squared: 0.7874, Adjusted R-squared: 0.7294
## F-statistic: 13.58 on 3 and 11 DF, p-value: 0.0005117
extractAIC(forwardM)
## [1] 4.00000 28.97295
# Backward model: Male
backwardM <- step(FullM, direction = "backward", trace=FALSE)
summary(backwardM)
##
## Call:
## lm(formula = Oxy ~ Age + Weight + Runtime + RstPulse, data = male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4247 -1.0283 -0.5517 0.5440 4.1581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.6689 18.3027 1.949 0.0799 .
## Age 0.2216 0.1432 1.547 0.1528
## Weight 0.2560 0.1161 2.205 0.0520 .
## Runtime -3.2086 0.4780 -6.712 5.29e-05 ***
## RstPulse 0.2386 0.1313 1.817 0.0992 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.174 on 10 degrees of freedom
## Multiple R-squared: 0.8345, Adjusted R-squared: 0.7682
## F-statistic: 12.6 on 4 and 10 DF, p-value: 0.0006431
extractAIC(backwardM)
## [1] 5.00000 27.22104
## VIF
library(tidyverse)
library(caret)
car::vif(forwardM)
## Age Runtime RunPulse
## 1.482364 1.125921 1.426422
## Regression example 5: Cereals ###############################################
## Read Cereal data
cereal <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Research for Marketing Communications/5 - Multiple regression/cereal_data.csv', stringsAsFactors = F, header=T, na.strings=c("","NA"))
# Variables
#Vt: category demand
#Pt: category price
#At: category advertising
#time: time
## Log-linear model
cereal_log <- lm(LnVt ~ LnAt + LnPt + time, data=cereal)
summary(cereal_log)
##
## Call:
## lm(formula = LnVt ~ LnAt + LnPt + time, data = cereal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.060532 -0.018380 -0.001035 0.018822 0.049910
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.383556 0.704221 6.225 5.67e-07 ***
## LnAt 0.047109 0.046404 1.015 0.3176
## LnPt -0.282954 0.123368 -2.294 0.0285 *
## time 0.004459 0.000713 6.254 5.22e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02859 on 32 degrees of freedom
## Multiple R-squared: 0.6572, Adjusted R-squared: 0.6251
## F-statistic: 20.45 on 3 and 32 DF, p-value: 1.382e-07
car::vif(cereal_log)
## LnAt LnPt time
## 1.311653 2.676785 2.415814