sale <- read.csv("C:/Users/Shalini/Downloads/Company_Data (2).csv")
View(sale)
colnames(sale)
##  [1] "Sales"       "CompPrice"   "Income"      "Advertising" "Population" 
##  [6] "Price"       "ShelveLoc"   "Age"         "Education"   "Urban"      
## [11] "US"
summary(sale)
##      Sales          CompPrice       Income        Advertising    
##  Min.   : 0.000   Min.   : 77   Min.   : 21.00   Min.   : 0.000  
##  1st Qu.: 5.390   1st Qu.:115   1st Qu.: 42.75   1st Qu.: 0.000  
##  Median : 7.490   Median :125   Median : 69.00   Median : 5.000  
##  Mean   : 7.496   Mean   :125   Mean   : 68.66   Mean   : 6.635  
##  3rd Qu.: 9.320   3rd Qu.:135   3rd Qu.: 91.00   3rd Qu.:12.000  
##  Max.   :16.270   Max.   :175   Max.   :120.00   Max.   :29.000  
##    Population        Price        ShelveLoc        Age       
##  Min.   : 10.0   Min.   : 24.0   Bad   : 96   Min.   :25.00  
##  1st Qu.:139.0   1st Qu.:100.0   Good  : 85   1st Qu.:39.75  
##  Median :272.0   Median :117.0   Medium:219   Median :54.50  
##  Mean   :264.8   Mean   :115.8                Mean   :53.32  
##  3rd Qu.:398.5   3rd Qu.:131.0                3rd Qu.:66.00  
##  Max.   :509.0   Max.   :191.0                Max.   :80.00  
##    Education    Urban       US     
##  Min.   :10.0   No :118   No :142  
##  1st Qu.:12.0   Yes:282   Yes:258  
##  Median :14.0                      
##  Mean   :13.9                      
##  3rd Qu.:16.0                      
##  Max.   :18.0
attach(sale)
cor(Price,Income)
## [1] -0.0566982
model <- lm(Sales~CompPrice+Income+Advertising+Population+Price+ShelveLoc+Age+Education+Urban+US)
summary(model)
## 
## Call:
## lm(formula = Sales ~ CompPrice + Income + Advertising + Population + 
##     Price + ShelveLoc + Age + Education + Urban + US)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8692 -0.6908  0.0211  0.6636  3.4115 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.6606231  0.6034487   9.380  < 2e-16 ***
## CompPrice        0.0928153  0.0041477  22.378  < 2e-16 ***
## Income           0.0158028  0.0018451   8.565 2.58e-16 ***
## Advertising      0.1230951  0.0111237  11.066  < 2e-16 ***
## Population       0.0002079  0.0003705   0.561    0.575    
## Price           -0.0953579  0.0026711 -35.700  < 2e-16 ***
## ShelveLocGood    4.8501827  0.1531100  31.678  < 2e-16 ***
## ShelveLocMedium  1.9567148  0.1261056  15.516  < 2e-16 ***
## Age             -0.0460452  0.0031817 -14.472  < 2e-16 ***
## Education       -0.0211018  0.0197205  -1.070    0.285    
## UrbanYes         0.1228864  0.1129761   1.088    0.277    
## USYes           -0.1840928  0.1498423  -1.229    0.220    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.019 on 388 degrees of freedom
## Multiple R-squared:  0.8734, Adjusted R-squared:  0.8698 
## F-statistic: 243.4 on 11 and 388 DF,  p-value: < 2.2e-16
confint(model,level = 0.95)
##                         2.5 %        97.5 %
## (Intercept)      4.4741845403  6.8470615860
## CompPrice        0.0846606548  0.1009700295
## Income           0.0121751564  0.0194305162
## Advertising      0.1012248453  0.1449653319
## Population      -0.0005204751  0.0009362292
## Price           -0.1006095157 -0.0901063219
## ShelveLocGood    4.5491536828  5.1512117392
## ShelveLocMedium  1.7087788955  2.2046507169
## Age             -0.0523007213 -0.0397896046
## Education       -0.0598742386  0.0176705608
## UrbanYes        -0.0992355421  0.3450083352
## USYes           -0.4786972882  0.1105116391
#predict(model,interval = "predict")
#influence.measures(model)
library(mvinfluence)
## Loading required package: car
## Loading required package: carData
## Loading required package: heplots
#library(car)
#library(carData)
#library(heplots)
influenceIndexPlot(model)

model2 <- lm(Sales~CompPrice+Income+Advertising+Population+Price+ShelveLoc+Age+Education+Urban+US,data = sale[-358,])
summary(model2)
## 
## Call:
## lm(formula = Sales ~ CompPrice + Income + Advertising + Population + 
##     Price + ShelveLoc + Age + Education + Urban + US, data = sale[-358, 
##     ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.88949 -0.69846  0.03323  0.66281  2.72243 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.5563789  0.5959245   9.324   <2e-16 ***
## CompPrice        0.0932581  0.0040927  22.787   <2e-16 ***
## Income           0.0158073  0.0018198   8.686   <2e-16 ***
## Advertising      0.1262867  0.0110098  11.470   <2e-16 ***
## Population       0.0001736  0.0003655   0.475    0.635    
## Price           -0.0947445  0.0026404 -35.883   <2e-16 ***
## ShelveLocGood    4.8439827  0.1510169  32.076   <2e-16 ***
## ShelveLocMedium  1.9331297  0.1245609  15.520   <2e-16 ***
## Age             -0.0452035  0.0031475 -14.362   <2e-16 ***
## Education       -0.0233510  0.0194605  -1.200    0.231    
## UrbanYes         0.1052450  0.1115412   0.944    0.346    
## USYes           -0.2310424  0.1484095  -1.557    0.120    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.005 on 387 degrees of freedom
## Multiple R-squared:  0.8758, Adjusted R-squared:  0.8723 
## F-statistic: 248.2 on 11 and 387 DF,  p-value: < 2.2e-16
model3 <- lm(Sales~CompPrice+Income+Advertising+Population+Price+ShelveLoc+Age+Education+Urban+US,data = sale[-358,-311,])
summary(model3)
## 
## Call:
## lm(formula = Sales ~ CompPrice + Income + Advertising + Population + 
##     Price + ShelveLoc + Age + Education + Urban + US, data = sale[-358, 
##     -311, ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.88949 -0.69846  0.03323  0.66281  2.72243 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.5563789  0.5959245   9.324   <2e-16 ***
## CompPrice        0.0932581  0.0040927  22.787   <2e-16 ***
## Income           0.0158073  0.0018198   8.686   <2e-16 ***
## Advertising      0.1262867  0.0110098  11.470   <2e-16 ***
## Population       0.0001736  0.0003655   0.475    0.635    
## Price           -0.0947445  0.0026404 -35.883   <2e-16 ***
## ShelveLocGood    4.8439827  0.1510169  32.076   <2e-16 ***
## ShelveLocMedium  1.9331297  0.1245609  15.520   <2e-16 ***
## Age             -0.0452035  0.0031475 -14.362   <2e-16 ***
## Education       -0.0233510  0.0194605  -1.200    0.231    
## UrbanYes         0.1052450  0.1115412   0.944    0.346    
## USYes           -0.2310424  0.1484095  -1.557    0.120    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.005 on 387 degrees of freedom
## Multiple R-squared:  0.8758, Adjusted R-squared:  0.8723 
## F-statistic: 248.2 on 11 and 387 DF,  p-value: < 2.2e-16
#library(caret)
#library(lattice)
#library(ggplot2)
#library(MASS)
#install.packages("corpcor")
#library(corpcor)
finalmodel <-lm(Sales~CompPrice+Income+Advertising+Population+Price+ShelveLoc+Age+Education+Urban+US,data = sale)
summary(finalmodel)
## 
## Call:
## lm(formula = Sales ~ CompPrice + Income + Advertising + Population + 
##     Price + ShelveLoc + Age + Education + Urban + US, data = sale)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8692 -0.6908  0.0211  0.6636  3.4115 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.6606231  0.6034487   9.380  < 2e-16 ***
## CompPrice        0.0928153  0.0041477  22.378  < 2e-16 ***
## Income           0.0158028  0.0018451   8.565 2.58e-16 ***
## Advertising      0.1230951  0.0111237  11.066  < 2e-16 ***
## Population       0.0002079  0.0003705   0.561    0.575    
## Price           -0.0953579  0.0026711 -35.700  < 2e-16 ***
## ShelveLocGood    4.8501827  0.1531100  31.678  < 2e-16 ***
## ShelveLocMedium  1.9567148  0.1261056  15.516  < 2e-16 ***
## Age             -0.0460452  0.0031817 -14.472  < 2e-16 ***
## Education       -0.0211018  0.0197205  -1.070    0.285    
## UrbanYes         0.1228864  0.1129761   1.088    0.277    
## USYes           -0.1840928  0.1498423  -1.229    0.220    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.019 on 388 degrees of freedom
## Multiple R-squared:  0.8734, Adjusted R-squared:  0.8698 
## F-statistic: 243.4 on 11 and 388 DF,  p-value: < 2.2e-16
plot(finalmodel)