Study the relation between consumption per capita and interaction between family income and price along with main effects
Meta-data:
DATE: Time period (1-30)
CONSUME: Ice Cream consumption in pints per capita
PRICE: Per pint price of ice cream in dollars
Inc: Weekly family income in dollars
TEMP: Mean temperature in degree F

Load the library function

pacman::p_load(tidyverse, caret, datarium,car, corrplot)

set path to directory (where your datafiles are present)

setwd("C:/Users/ngsook/Desktop/NUS EBA/Semester 2/Predictive Analytic/EBA Predictive WK 1")

read the file into dataframe df

df <- read.csv("interaction.csv")
head(df,4)
##   time_period  cons income price temp
## 1           1 0.386     78 0.270   41
## 2           2 0.374     79 0.282   56
## 3           3 0.393     81 0.277   63
## 4           4 0.425     80 0.280   68

use the code below to get the correlation plot

options(repr.plot.width=6, repr.plot.height=3)
corrplot(cor(df[, sapply(df, is.numeric)],
             use="complete.obs"), method = "number", type='upper')

## first create a linear model with no interaction terms

model = lm(cons~. ,  data = df)

check VIF using vif(model)

library(car)
vif(model)
## time_period      income       price        temp 
##    3.980849    4.476757    1.046256    1.302072

Now create model2 with interaction terms price:income

Based on domain knowledge Price and income have interaction effect.

Because when you want to buy different price of ice cream, it’s depend on the income of family.

model2 = lm(cons~.+price:income, data = df)

summary(model2)
## 
## Call:
## lm(formula = cons ~ . + price:income, data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.055281 -0.016492 -0.005084  0.013392  0.078093 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.8497119  3.5070045  -1.953   0.0626 .  
## time_period   0.0011791  0.0014741   0.800   0.4316    
## income        0.0860986  0.0410771   2.096   0.0468 *  
## price        25.4729944 12.9702903   1.964   0.0612 .  
## temp          0.0029801  0.0004848   6.147 2.38e-06 ***
## income:price -0.3114501  0.1517074  -2.053   0.0511 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03501 on 24 degrees of freedom
## Multiple R-squared:  0.7656, Adjusted R-squared:  0.7168 
## F-statistic: 15.68 on 5 and 24 DF,  p-value: 6.924e-07

check the VIF

vif(model2)
##  time_period       income        price         temp income:price 
##     3.983615  1556.888866   276.951216     1.499392  1705.185349

center the variables to avoid VIF issues

df$price_cent = scale(df$price, scale = FALSE)
df$income_cent = scale(df$income, scale = FALSE)

head(df,4)
##   time_period  cons income price temp price_cent income_cent
## 1           1 0.386     78 0.270   41    -0.0053        -6.6
## 2           2 0.374     79 0.282   56     0.0067        -5.6
## 3           3 0.393     81 0.277   63     0.0017        -3.6
## 4           4 0.425     80 0.280   68     0.0047        -4.6

create model3 with raw effects and interaction term using centered variables

model3 = lm(cons~.-income-price+price_cent:income_cent, data = df)

summary(model3)
## 
## Call:
## lm(formula = cons ~ . - income - price + price_cent:income_cent, 
##     data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.055281 -0.016492 -0.005084  0.013392  0.078093 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.1931501  0.0274590   7.034 2.84e-07 ***
## time_period             0.0011791  0.0014741   0.800   0.4316    
## temp                    0.0029801  0.0004848   6.147 2.38e-06 ***
## price_cent             -0.8756848  0.8049350  -1.088   0.2874    
## income_cent             0.0003563  0.0023259   0.153   0.8795    
## price_cent:income_cent -0.3114501  0.1517074  -2.053   0.0511 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03501 on 24 degrees of freedom
## Multiple R-squared:  0.7656, Adjusted R-squared:  0.7168 
## F-statistic: 15.68 on 5 and 24 DF,  p-value: 6.924e-07
vif(model3)
##            time_period                   temp             price_cent 
##               3.983615               1.499392               1.066659 
##            income_cent price_cent:income_cent 
##               4.991651               1.504199