sale <- read.csv("C:/Users/Shalini/Downloads/Company_Data (2).csv")
View(sale)
colnames(sale)
## [1] "Sales" "CompPrice" "Income" "Advertising" "Population"
## [6] "Price" "ShelveLoc" "Age" "Education" "Urban"
## [11] "US"
summary(sale)
## Sales CompPrice Income Advertising
## Min. : 0.000 Min. : 77 Min. : 21.00 Min. : 0.000
## 1st Qu.: 5.390 1st Qu.:115 1st Qu.: 42.75 1st Qu.: 0.000
## Median : 7.490 Median :125 Median : 69.00 Median : 5.000
## Mean : 7.496 Mean :125 Mean : 68.66 Mean : 6.635
## 3rd Qu.: 9.320 3rd Qu.:135 3rd Qu.: 91.00 3rd Qu.:12.000
## Max. :16.270 Max. :175 Max. :120.00 Max. :29.000
## Population Price ShelveLoc Age
## Min. : 10.0 Min. : 24.0 Bad : 96 Min. :25.00
## 1st Qu.:139.0 1st Qu.:100.0 Good : 85 1st Qu.:39.75
## Median :272.0 Median :117.0 Medium:219 Median :54.50
## Mean :264.8 Mean :115.8 Mean :53.32
## 3rd Qu.:398.5 3rd Qu.:131.0 3rd Qu.:66.00
## Max. :509.0 Max. :191.0 Max. :80.00
## Education Urban US
## Min. :10.0 No :118 No :142
## 1st Qu.:12.0 Yes:282 Yes:258
## Median :14.0
## Mean :13.9
## 3rd Qu.:16.0
## Max. :18.0
attach(sale)
cor(Price,Income)
## [1] -0.0566982
model <- lm(Sales~CompPrice+Income+Advertising+Population+Price+ShelveLoc+Age+Education+Urban+US)
summary(model)
##
## Call:
## lm(formula = Sales ~ CompPrice + Income + Advertising + Population +
## Price + ShelveLoc + Age + Education + Urban + US)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8692 -0.6908 0.0211 0.6636 3.4115
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.6606231 0.6034487 9.380 < 2e-16 ***
## CompPrice 0.0928153 0.0041477 22.378 < 2e-16 ***
## Income 0.0158028 0.0018451 8.565 2.58e-16 ***
## Advertising 0.1230951 0.0111237 11.066 < 2e-16 ***
## Population 0.0002079 0.0003705 0.561 0.575
## Price -0.0953579 0.0026711 -35.700 < 2e-16 ***
## ShelveLocGood 4.8501827 0.1531100 31.678 < 2e-16 ***
## ShelveLocMedium 1.9567148 0.1261056 15.516 < 2e-16 ***
## Age -0.0460452 0.0031817 -14.472 < 2e-16 ***
## Education -0.0211018 0.0197205 -1.070 0.285
## UrbanYes 0.1228864 0.1129761 1.088 0.277
## USYes -0.1840928 0.1498423 -1.229 0.220
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.019 on 388 degrees of freedom
## Multiple R-squared: 0.8734, Adjusted R-squared: 0.8698
## F-statistic: 243.4 on 11 and 388 DF, p-value: < 2.2e-16
confint(model,level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 4.4741845403 6.8470615860
## CompPrice 0.0846606548 0.1009700295
## Income 0.0121751564 0.0194305162
## Advertising 0.1012248453 0.1449653319
## Population -0.0005204751 0.0009362292
## Price -0.1006095157 -0.0901063219
## ShelveLocGood 4.5491536828 5.1512117392
## ShelveLocMedium 1.7087788955 2.2046507169
## Age -0.0523007213 -0.0397896046
## Education -0.0598742386 0.0176705608
## UrbanYes -0.0992355421 0.3450083352
## USYes -0.4786972882 0.1105116391
#predict(model,interval = "predict")
#influence.measures(model)
library(mvinfluence)
## Loading required package: car
## Loading required package: carData
## Loading required package: heplots
#library(car)
#library(carData)
#library(heplots)
influenceIndexPlot(model)

model2 <- lm(Sales~CompPrice+Income+Advertising+Population+Price+ShelveLoc+Age+Education+Urban+US,data = sale[-358,])
summary(model2)
##
## Call:
## lm(formula = Sales ~ CompPrice + Income + Advertising + Population +
## Price + ShelveLoc + Age + Education + Urban + US, data = sale[-358,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.88949 -0.69846 0.03323 0.66281 2.72243
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.5563789 0.5959245 9.324 <2e-16 ***
## CompPrice 0.0932581 0.0040927 22.787 <2e-16 ***
## Income 0.0158073 0.0018198 8.686 <2e-16 ***
## Advertising 0.1262867 0.0110098 11.470 <2e-16 ***
## Population 0.0001736 0.0003655 0.475 0.635
## Price -0.0947445 0.0026404 -35.883 <2e-16 ***
## ShelveLocGood 4.8439827 0.1510169 32.076 <2e-16 ***
## ShelveLocMedium 1.9331297 0.1245609 15.520 <2e-16 ***
## Age -0.0452035 0.0031475 -14.362 <2e-16 ***
## Education -0.0233510 0.0194605 -1.200 0.231
## UrbanYes 0.1052450 0.1115412 0.944 0.346
## USYes -0.2310424 0.1484095 -1.557 0.120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.005 on 387 degrees of freedom
## Multiple R-squared: 0.8758, Adjusted R-squared: 0.8723
## F-statistic: 248.2 on 11 and 387 DF, p-value: < 2.2e-16
model3 <- lm(Sales~CompPrice+Income+Advertising+Population+Price+ShelveLoc+Age+Education+Urban+US,data = sale[-358,-311,])
summary(model3)
##
## Call:
## lm(formula = Sales ~ CompPrice + Income + Advertising + Population +
## Price + ShelveLoc + Age + Education + Urban + US, data = sale[-358,
## -311, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.88949 -0.69846 0.03323 0.66281 2.72243
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.5563789 0.5959245 9.324 <2e-16 ***
## CompPrice 0.0932581 0.0040927 22.787 <2e-16 ***
## Income 0.0158073 0.0018198 8.686 <2e-16 ***
## Advertising 0.1262867 0.0110098 11.470 <2e-16 ***
## Population 0.0001736 0.0003655 0.475 0.635
## Price -0.0947445 0.0026404 -35.883 <2e-16 ***
## ShelveLocGood 4.8439827 0.1510169 32.076 <2e-16 ***
## ShelveLocMedium 1.9331297 0.1245609 15.520 <2e-16 ***
## Age -0.0452035 0.0031475 -14.362 <2e-16 ***
## Education -0.0233510 0.0194605 -1.200 0.231
## UrbanYes 0.1052450 0.1115412 0.944 0.346
## USYes -0.2310424 0.1484095 -1.557 0.120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.005 on 387 degrees of freedom
## Multiple R-squared: 0.8758, Adjusted R-squared: 0.8723
## F-statistic: 248.2 on 11 and 387 DF, p-value: < 2.2e-16
#library(caret)
#library(lattice)
#library(ggplot2)
#library(MASS)
#install.packages("corpcor")
#library(corpcor)
finalmodel <-lm(Sales~CompPrice+Income+Advertising+Population+Price+ShelveLoc+Age+Education+Urban+US,data = sale)
summary(finalmodel)
##
## Call:
## lm(formula = Sales ~ CompPrice + Income + Advertising + Population +
## Price + ShelveLoc + Age + Education + Urban + US, data = sale)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8692 -0.6908 0.0211 0.6636 3.4115
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.6606231 0.6034487 9.380 < 2e-16 ***
## CompPrice 0.0928153 0.0041477 22.378 < 2e-16 ***
## Income 0.0158028 0.0018451 8.565 2.58e-16 ***
## Advertising 0.1230951 0.0111237 11.066 < 2e-16 ***
## Population 0.0002079 0.0003705 0.561 0.575
## Price -0.0953579 0.0026711 -35.700 < 2e-16 ***
## ShelveLocGood 4.8501827 0.1531100 31.678 < 2e-16 ***
## ShelveLocMedium 1.9567148 0.1261056 15.516 < 2e-16 ***
## Age -0.0460452 0.0031817 -14.472 < 2e-16 ***
## Education -0.0211018 0.0197205 -1.070 0.285
## UrbanYes 0.1228864 0.1129761 1.088 0.277
## USYes -0.1840928 0.1498423 -1.229 0.220
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.019 on 388 degrees of freedom
## Multiple R-squared: 0.8734, Adjusted R-squared: 0.8698
## F-statistic: 243.4 on 11 and 388 DF, p-value: < 2.2e-16
plot(finalmodel)



