As the price increases, the sales goes down -50 unit. And assuming all variables fixed, if the location is the U.S. the unit of Carseat sold goes up 1.2 unit.
Of course, Urban
attach(Carseats)
model <- lm(Carseats, formula = Sales ~ Price + Urban + US)
summary(model)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
model2 <- update(model, Sales ~. - Urban)
summary(model2)
##
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
Given the anova analysis, there is not much difference between model and model2.
anova(model, model2)
## Analysis of Variance Table
##
## Model 1: Sales ~ Price + Urban + US
## Model 2: Sales ~ Price + US
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 396 2420.8
## 2 397 2420.9 -1 -0.03979 0.0065 0.9357
As you see in the diagnostic plot, not much differnece here.
par(mfrow = c(2,2))
plot(model)
par(mfrow = c(2,2))
plot(model2)
But with the rules of Ocum’s razor, I would choose the simpler model, the first one. Also for the question e) yes. There is the sign of high leverage.
kable(confint(model2, level = 0.95))
| 2.5 % | 97.5 % | |
|---|---|---|
| (Intercept) | 11.7903202 | 14.2712653 |
| Price | -0.0647598 | -0.0441954 |
| USYes | 0.6915196 | 1.7077663 |
As you see, the coefficeint is significant.
set.seed (1)
x <- rnorm (100)
y <- 2* x+rnorm (100)
xy <- data.frame(cbind(x,y))
modxy <- lm(xy, formula = y ~ x + 0)
summary(modxy)
##
## Call:
## lm(formula = y ~ x + 0, data = xy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9154 -0.6472 -0.1771 0.5056 2.3109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 1.9939 0.1065 18.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9586 on 99 degrees of freedom
## Multiple R-squared: 0.7798, Adjusted R-squared: 0.7776
## F-statistic: 350.7 on 1 and 99 DF, p-value: < 2.2e-16
modyx <- lm(xy, formula = x ~ y + 0)
summary(modyx)
##
## Call:
## lm(formula = x ~ y + 0, data = xy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8699 -0.2368 0.1030 0.2858 0.8938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## y 0.39111 0.02089 18.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4246 on 99 degrees of freedom
## Multiple R-squared: 0.7798, Adjusted R-squared: 0.7776
## F-statistic: 350.7 on 1 and 99 DF, p-value: < 2.2e-16
Equation (b) is reverse function of (a)
dt <- data.frame(do.call(rbind, list(coef(modxy), coef(modyx))))
rownames(dt) <- c("x","y"); colnames(dt) <- "Coefficeints"
kable(dt)
| Coefficeints | |
|---|---|
| x | 1.9938761 |
| y | 0.3911145 |
g <- ggplot(data = xy, aes(x = x, y = y))
g + geom_point(aes(x=x,y=y))+ geom_line(aes(x = x, y = predict(modxy)))+ geom_line(aes(x = y, y = predict(modyx), colour = "red"))
a <- data.frame(summary(modxy)$coefficient)
b <- data.frame(summary(modyx)$coefficient)
kable(a)
| Estimate | Std..Error | t.value | Pr…t.. | |
|---|---|---|---|---|
| x | 1.993876 | 0.1064767 | 18.72593 | 0 |
beta <- coef(modxy)
beta
## x
## 1.993876
sum(x^2)
## [1] 81.05509
den <- (length(y)-1)*sum(x^2)
num <- sum((y-x*beta)^2)
sqrt(num/den)==a$Std..Error
## [1] FALSE
Tnum <- sqrt(length(y)-1)*sum(x*y)
Tden <- sqrt(sum(x^2)*sum(y^2) - sum(x*y)^2)
round(a$t.value,5) == round(Tnum/Tden,5)
## [1] TRUE
beta <- coef(modyx)
beta
## y
## 0.3911145
sum(y^2)
## [1] 413.2135
den <- (length(x)-1)*sum(y^2)
num <- sum((x-y*beta)^2)
round(sqrt(num/den),5)==round(b$Std..Error,5)
## [1] TRUE
Tnum <- sqrt(length(x)-1)*sum(x*y)
Tden <- sqrt(sum(x^2)*sum(y^2) - sum(x*y)^2)
round(b$t.value,5) == round(Tnum/Tden,5)
## [1] TRUE
Same.
modxyI <- lm(xy, formula = y ~ x)
summary(modxyI)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.03769261 0.09698729 -0.3886346 6.983896e-01
## x 1.99893961 0.10772703 18.5555993 7.723851e-34