Predict Sales of Computer
library(e1071)
library(car)
## Loading required package: carData
ComputerData <- read.csv("C:\\data science\\ds\\assignments\\Multi Linear Regression\\Computer_Data.csv")
ComputerData <- ComputerData[,-1]
orgdata <- ComputerData
attach(ComputerData)
# First Moment Business Decision
summary(ComputerData)
## price speed hd ram
## Min. : 949 Min. : 25.00 Min. : 80.0 Min. : 2.000
## 1st Qu.:1794 1st Qu.: 33.00 1st Qu.: 214.0 1st Qu.: 4.000
## Median :2144 Median : 50.00 Median : 340.0 Median : 8.000
## Mean :2220 Mean : 52.01 Mean : 416.6 Mean : 8.287
## 3rd Qu.:2595 3rd Qu.: 66.00 3rd Qu.: 528.0 3rd Qu.: 8.000
## Max. :5399 Max. :100.00 Max. :2100.0 Max. :32.000
## screen cd multi premium ads
## Min. :14.00 no :3351 no :5386 no : 612 Min. : 39.0
## 1st Qu.:14.00 yes:2908 yes: 873 yes:5647 1st Qu.:162.5
## Median :14.00 Median :246.0
## Mean :14.61 Mean :221.3
## 3rd Qu.:15.00 3rd Qu.:275.0
## Max. :17.00 Max. :339.0
## trend
## Min. : 1.00
## 1st Qu.:10.00
## Median :16.00
## Mean :15.93
## 3rd Qu.:21.50
## Max. :35.00
# Second Moment Business Decision
sd(price)
## [1] 580.804
sd(speed)
## [1] 21.15774
sd(hd)
## [1] 258.5484
sd(ram)
## [1] 5.631099
sd(screen)
## [1] 0.9051152
sd(ads)
## [1] 74.83528
sd(trend)
## [1] 7.873984
var(price)
## [1] 337333.2
var(speed)
## [1] 447.6498
var(hd)
## [1] 66847.3
var(ram)
## [1] 31.70928
var(screen)
## [1] 0.8192336
var(ads)
## [1] 5600.32
var(trend)
## [1] 61.99962
# Third Moment Business Decision
skewness(price)
## [1] 0.7113836
skewness(speed)
## [1] 0.6566931
skewness(hd)
## [1] 1.377359
skewness(ram)
## [1] 1.385538
skewness(screen)
## [1] 1.633225
skewness(ads)
## [1] -0.5530629
skewness(trend)
## [1] 0.236556
# Fourth Moment Business Decision
kurtosis(price)
## [1] 0.7276838
kurtosis(speed)
## [1] -0.2770616
kurtosis(hd)
## [1] 2.447798
kurtosis(ram)
## [1] 1.458699
kurtosis(screen)
## [1] 1.847838
kurtosis(ads)
## [1] -0.5411566
kurtosis(trend)
## [1] -0.6752971
plot(speed, price)

plot(hd, price)

plot(ram, price)

plot(screen, price)

plot(ads, price)

plot(trend, price)

plot(cd, price)

plot(multi, price)

plot(premium, price)

pairs(ComputerData)

# Correlation Coefficient matrix - Strength & Direction of Correlation
#cor(ComputerData)
model <- lm(price ~ speed + hd + ram + screen + ads + trend + cd + multi + premium, data = ComputerData)
summary(model)
##
## Call:
## lm(formula = price ~ speed + hd + ram + screen + ads + trend +
## cd + multi + premium, data = ComputerData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1093.77 -174.24 -11.49 146.49 2001.05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 307.98798 60.35341 5.103 3.44e-07 ***
## speed 9.32028 0.18506 50.364 < 2e-16 ***
## hd 0.78178 0.02761 28.311 < 2e-16 ***
## ram 48.25596 1.06608 45.265 < 2e-16 ***
## screen 123.08904 3.99950 30.776 < 2e-16 ***
## ads 0.65729 0.05132 12.809 < 2e-16 ***
## trend -51.84958 0.62871 -82.470 < 2e-16 ***
## cdyes 60.91671 9.51559 6.402 1.65e-10 ***
## multiyes 104.32382 11.41268 9.141 < 2e-16 ***
## premiumyes -509.22473 12.34225 -41.259 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 275.3 on 6249 degrees of freedom
## Multiple R-squared: 0.7756, Adjusted R-squared: 0.7752
## F-statistic: 2399 on 9 and 6249 DF, p-value: < 2.2e-16
model2 <- lm(price ~ ., data = ComputerData[-c(1441, 1701),])
summary(model2)
##
## Call:
## lm(formula = price ~ ., data = ComputerData[-c(1441, 1701), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1095.65 -172.78 -10.84 146.42 1510.65
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 337.1635 59.9242 5.627 1.92e-08 ***
## speed 9.2992 0.1835 50.664 < 2e-16 ***
## hd 0.7749 0.0274 28.286 < 2e-16 ***
## ram 48.5222 1.0576 45.878 < 2e-16 ***
## screen 121.0926 3.9714 30.492 < 2e-16 ***
## cdyes 60.4964 9.4400 6.409 1.58e-10 ***
## multiyes 104.7703 11.3195 9.256 < 2e-16 ***
## premiumyes -509.8352 12.2409 -41.650 < 2e-16 ***
## ads 0.6510 0.0509 12.791 < 2e-16 ***
## trend -51.6496 0.6238 -82.793 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 273.1 on 6247 degrees of freedom
## Multiple R-squared: 0.7777, Adjusted R-squared: 0.7774
## F-statistic: 2428 on 9 and 6247 DF, p-value: < 2.2e-16
vif(model2)
## speed hd ram screen cd multi premium ads
## 1.265345 4.209700 2.976450 1.081808 1.859809 1.290653 1.109381 1.217240
## trend
## 2.024340
avPlots(model2)

model3 <- lm(price ~ speed + hd + ram + screen + ads + trend + premium, data = ComputerData[-c(1441, 1701),])
summary(model3)
##
## Call:
## lm(formula = price ~ speed + hd + ram + screen + ads + trend +
## premium, data = ComputerData[-c(1441, 1701), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1058.60 -173.65 -9.08 148.30 1512.02
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 300.88876 60.66031 4.96 7.23e-07 ***
## speed 9.35570 0.18644 50.18 < 2e-16 ***
## hd 0.78019 0.02762 28.25 < 2e-16 ***
## ram 49.90607 1.05985 47.09 < 2e-16 ***
## screen 119.33313 4.03484 29.58 < 2e-16 ***
## ads 0.74078 0.05084 14.57 < 2e-16 ***
## trend -49.14498 0.60515 -81.21 < 2e-16 ***
## premiumyes -478.43303 12.22543 -39.13 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 277.6 on 6249 degrees of freedom
## Multiple R-squared: 0.7701, Adjusted R-squared: 0.7699
## F-statistic: 2991 on 7 and 6249 DF, p-value: < 2.2e-16
avPlots(model3)

plot(model)




qqPlot(model)

## [1] 1441 1701