Multi Linear Regression

Predict Sales of Computer

library(e1071)
library(car)
## Loading required package: carData
ComputerData <- read.csv("C:\\data science\\ds\\assignments\\Multi Linear Regression\\Computer_Data.csv")

ComputerData <- ComputerData[,-1]
orgdata <- ComputerData

attach(ComputerData)

# First Moment Business Decision
summary(ComputerData)
##      price          speed              hd              ram        
##  Min.   : 949   Min.   : 25.00   Min.   :  80.0   Min.   : 2.000  
##  1st Qu.:1794   1st Qu.: 33.00   1st Qu.: 214.0   1st Qu.: 4.000  
##  Median :2144   Median : 50.00   Median : 340.0   Median : 8.000  
##  Mean   :2220   Mean   : 52.01   Mean   : 416.6   Mean   : 8.287  
##  3rd Qu.:2595   3rd Qu.: 66.00   3rd Qu.: 528.0   3rd Qu.: 8.000  
##  Max.   :5399   Max.   :100.00   Max.   :2100.0   Max.   :32.000  
##      screen        cd       multi      premium         ads       
##  Min.   :14.00   no :3351   no :5386   no : 612   Min.   : 39.0  
##  1st Qu.:14.00   yes:2908   yes: 873   yes:5647   1st Qu.:162.5  
##  Median :14.00                                    Median :246.0  
##  Mean   :14.61                                    Mean   :221.3  
##  3rd Qu.:15.00                                    3rd Qu.:275.0  
##  Max.   :17.00                                    Max.   :339.0  
##      trend      
##  Min.   : 1.00  
##  1st Qu.:10.00  
##  Median :16.00  
##  Mean   :15.93  
##  3rd Qu.:21.50  
##  Max.   :35.00
# Second Moment Business Decision
sd(price)
## [1] 580.804
sd(speed)
## [1] 21.15774
sd(hd)
## [1] 258.5484
sd(ram)
## [1] 5.631099
sd(screen)
## [1] 0.9051152
sd(ads)
## [1] 74.83528
sd(trend)
## [1] 7.873984
var(price)
## [1] 337333.2
var(speed)
## [1] 447.6498
var(hd)
## [1] 66847.3
var(ram)
## [1] 31.70928
var(screen)
## [1] 0.8192336
var(ads)
## [1] 5600.32
var(trend)
## [1] 61.99962
# Third Moment Business Decision
skewness(price)
## [1] 0.7113836
skewness(speed)
## [1] 0.6566931
skewness(hd)
## [1] 1.377359
skewness(ram)
## [1] 1.385538
skewness(screen)
## [1] 1.633225
skewness(ads)
## [1] -0.5530629
skewness(trend)
## [1] 0.236556
# Fourth Moment Business Decision
kurtosis(price)
## [1] 0.7276838
kurtosis(speed)
## [1] -0.2770616
kurtosis(hd)
## [1] 2.447798
kurtosis(ram)
## [1] 1.458699
kurtosis(screen)
## [1] 1.847838
kurtosis(ads)
## [1] -0.5411566
kurtosis(trend)
## [1] -0.6752971
plot(speed, price)

plot(hd, price)

plot(ram, price)

plot(screen, price)

plot(ads, price)

plot(trend, price)

plot(cd, price)

plot(multi, price)

plot(premium, price)

pairs(ComputerData)

# Correlation Coefficient matrix - Strength & Direction of Correlation
#cor(ComputerData)

model <- lm(price ~ speed + hd + ram + screen + ads + trend + cd + multi + premium, data = ComputerData)
summary(model)
## 
## Call:
## lm(formula = price ~ speed + hd + ram + screen + ads + trend + 
##     cd + multi + premium, data = ComputerData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1093.77  -174.24   -11.49   146.49  2001.05 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  307.98798   60.35341   5.103 3.44e-07 ***
## speed          9.32028    0.18506  50.364  < 2e-16 ***
## hd             0.78178    0.02761  28.311  < 2e-16 ***
## ram           48.25596    1.06608  45.265  < 2e-16 ***
## screen       123.08904    3.99950  30.776  < 2e-16 ***
## ads            0.65729    0.05132  12.809  < 2e-16 ***
## trend        -51.84958    0.62871 -82.470  < 2e-16 ***
## cdyes         60.91671    9.51559   6.402 1.65e-10 ***
## multiyes     104.32382   11.41268   9.141  < 2e-16 ***
## premiumyes  -509.22473   12.34225 -41.259  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 275.3 on 6249 degrees of freedom
## Multiple R-squared:  0.7756, Adjusted R-squared:  0.7752 
## F-statistic:  2399 on 9 and 6249 DF,  p-value: < 2.2e-16
model2 <- lm(price ~ ., data = ComputerData[-c(1441, 1701),])
summary(model2)
## 
## Call:
## lm(formula = price ~ ., data = ComputerData[-c(1441, 1701), ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1095.65  -172.78   -10.84   146.42  1510.65 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  337.1635    59.9242   5.627 1.92e-08 ***
## speed          9.2992     0.1835  50.664  < 2e-16 ***
## hd             0.7749     0.0274  28.286  < 2e-16 ***
## ram           48.5222     1.0576  45.878  < 2e-16 ***
## screen       121.0926     3.9714  30.492  < 2e-16 ***
## cdyes         60.4964     9.4400   6.409 1.58e-10 ***
## multiyes     104.7703    11.3195   9.256  < 2e-16 ***
## premiumyes  -509.8352    12.2409 -41.650  < 2e-16 ***
## ads            0.6510     0.0509  12.791  < 2e-16 ***
## trend        -51.6496     0.6238 -82.793  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 273.1 on 6247 degrees of freedom
## Multiple R-squared:  0.7777, Adjusted R-squared:  0.7774 
## F-statistic:  2428 on 9 and 6247 DF,  p-value: < 2.2e-16
vif(model2)
##    speed       hd      ram   screen       cd    multi  premium      ads 
## 1.265345 4.209700 2.976450 1.081808 1.859809 1.290653 1.109381 1.217240 
##    trend 
## 2.024340
avPlots(model2)

model3 <- lm(price ~ speed + hd + ram + screen + ads + trend + premium, data = ComputerData[-c(1441, 1701),])
summary(model3)
## 
## Call:
## lm(formula = price ~ speed + hd + ram + screen + ads + trend + 
##     premium, data = ComputerData[-c(1441, 1701), ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1058.60  -173.65    -9.08   148.30  1512.02 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  300.88876   60.66031    4.96 7.23e-07 ***
## speed          9.35570    0.18644   50.18  < 2e-16 ***
## hd             0.78019    0.02762   28.25  < 2e-16 ***
## ram           49.90607    1.05985   47.09  < 2e-16 ***
## screen       119.33313    4.03484   29.58  < 2e-16 ***
## ads            0.74078    0.05084   14.57  < 2e-16 ***
## trend        -49.14498    0.60515  -81.21  < 2e-16 ***
## premiumyes  -478.43303   12.22543  -39.13  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 277.6 on 6249 degrees of freedom
## Multiple R-squared:  0.7701, Adjusted R-squared:  0.7699 
## F-statistic:  2991 on 7 and 6249 DF,  p-value: < 2.2e-16
avPlots(model3)

plot(model)

qqPlot(model)

## [1] 1441 1701