pacman::p_load(tidyverse, caret, datarium,car, corrplot)
setwd("C:/Users/ngsook/Desktop/NUS EBA/Semester 2/Predictive Analytic/EBA Predictive WK 1")
df <- read.csv("interaction.csv")
head(df,4)
## time_period cons income price temp
## 1 1 0.386 78 0.270 41
## 2 2 0.374 79 0.282 56
## 3 3 0.393 81 0.277 63
## 4 4 0.425 80 0.280 68
options(repr.plot.width=6, repr.plot.height=3)
corrplot(cor(df[, sapply(df, is.numeric)],
use="complete.obs"), method = "number", type='upper')
## first create a linear model with no interaction terms
model = lm(cons~. , data = df)
summary(model)
##
## Call:
## lm(formula = cons ~ ., data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06033 -0.01853 0.00304 0.01757 0.07619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.322449 0.325914 0.989 0.332
## time_period 0.001099 0.001566 0.702 0.489
## income 0.001890 0.002340 0.808 0.427
## price -1.104229 0.846905 -1.304 0.204
## temp 0.003341 0.000480 6.961 2.69e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0372 on 25 degrees of freedom
## Multiple R-squared: 0.7244, Adjusted R-squared: 0.6803
## F-statistic: 16.43 on 4 and 25 DF, p-value: 1.012e-06
library(car)
vif(model)
## time_period income price temp
## 3.980849 4.476757 1.046256 1.302072
model2 = lm(cons~.+price:income, data = df)
summary(model2)
##
## Call:
## lm(formula = cons ~ . + price:income, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.055281 -0.016492 -0.005084 0.013392 0.078093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.8497119 3.5070045 -1.953 0.0626 .
## time_period 0.0011791 0.0014741 0.800 0.4316
## income 0.0860986 0.0410771 2.096 0.0468 *
## price 25.4729944 12.9702903 1.964 0.0612 .
## temp 0.0029801 0.0004848 6.147 2.38e-06 ***
## income:price -0.3114501 0.1517074 -2.053 0.0511 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03501 on 24 degrees of freedom
## Multiple R-squared: 0.7656, Adjusted R-squared: 0.7168
## F-statistic: 15.68 on 5 and 24 DF, p-value: 6.924e-07
vif(model2)
## time_period income price temp income:price
## 3.983615 1556.888866 276.951216 1.499392 1705.185349
df$price_cent = scale(df$price, scale = FALSE)
df$income_cent = scale(df$income, scale = FALSE)
head(df,4)
## time_period cons income price temp price_cent income_cent
## 1 1 0.386 78 0.270 41 -0.0053 -6.6
## 2 2 0.374 79 0.282 56 0.0067 -5.6
## 3 3 0.393 81 0.277 63 0.0017 -3.6
## 4 4 0.425 80 0.280 68 0.0047 -4.6
model3 = lm(cons~.-income-price+price_cent:income_cent, data = df)
summary(model3)
##
## Call:
## lm(formula = cons ~ . - income - price + price_cent:income_cent,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.055281 -0.016492 -0.005084 0.013392 0.078093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1931501 0.0274590 7.034 2.84e-07 ***
## time_period 0.0011791 0.0014741 0.800 0.4316
## temp 0.0029801 0.0004848 6.147 2.38e-06 ***
## price_cent -0.8756848 0.8049350 -1.088 0.2874
## income_cent 0.0003563 0.0023259 0.153 0.8795
## price_cent:income_cent -0.3114501 0.1517074 -2.053 0.0511 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03501 on 24 degrees of freedom
## Multiple R-squared: 0.7656, Adjusted R-squared: 0.7168
## F-statistic: 15.68 on 5 and 24 DF, p-value: 6.924e-07
vif(model3)
## time_period temp price_cent
## 3.983615 1.499392 1.066659
## income_cent price_cent:income_cent
## 4.991651 1.504199