Assignment 13
mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\3 MLR\\Assignment\\Computer_Data.csv")
attach(mydata)
colnames(mydata)
## [1] "price" "speed" "hd" "ram" "screen" "cd" "multi"
## [8] "premium" "ads" "trend"
summary(mydata)
## price speed hd ram
## Min. : 949 Min. : 25.00 Min. : 80.0 Min. : 2.000
## 1st Qu.:1794 1st Qu.: 33.00 1st Qu.: 214.0 1st Qu.: 4.000
## Median :2144 Median : 50.00 Median : 340.0 Median : 8.000
## Mean :2220 Mean : 52.01 Mean : 416.6 Mean : 8.287
## 3rd Qu.:2595 3rd Qu.: 66.00 3rd Qu.: 528.0 3rd Qu.: 8.000
## Max. :5399 Max. :100.00 Max. :2100.0 Max. :32.000
## screen cd multi premium ads
## Min. :14.00 no :3351 no :5386 no : 612 Min. : 39.0
## 1st Qu.:14.00 yes:2908 yes: 873 yes:5647 1st Qu.:162.5
## Median :14.00 Median :246.0
## Mean :14.61 Mean :221.3
## 3rd Qu.:15.00 3rd Qu.:275.0
## Max. :17.00 Max. :339.0
## trend
## Min. : 1.00
## 1st Qu.:10.00
## Median :16.00
## Mean :15.93
## 3rd Qu.:21.50
## Max. :35.00
cor(mydata[,-c(6,7,8)])
## price speed hd ram screen
## price 1.00000000 0.3009765 0.4302578 0.6227482 0.29604147
## speed 0.30097646 1.0000000 0.3723041 0.2347605 0.18907412
## hd 0.43025779 0.3723041 1.0000000 0.7777263 0.23280153
## ram 0.62274824 0.2347605 0.7777263 1.0000000 0.20895374
## screen 0.29604147 0.1890741 0.2328015 0.2089537 1.00000000
## ads 0.05454047 -0.2152321 -0.3232220 -0.1816697 -0.09391943
## trend -0.19998694 0.4054383 0.5777901 0.2768438 0.18861444
## ads trend
## price 0.05454047 -0.1999869
## speed -0.21523206 0.4054383
## hd -0.32322200 0.5777901
## ram -0.18166971 0.2768438
## screen -0.09391943 0.1886144
## ads 1.00000000 -0.3185525
## trend -0.31855251 1.0000000
plot(mydata[,-c(6,7,8)])

library(corpcor)
cor2pcor(cor(mydata[,-c(6,7,8)]))
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.0000000 0.47421976 0.27854293 0.45050159 0.33404481
## [2,] 0.4742198 1.00000000 -0.04075062 -0.22416243 -0.07534729
## [3,] 0.2785429 -0.04075062 1.00000000 0.54127682 -0.07038820
## [4,] 0.4505016 -0.22416243 0.54127682 1.00000000 -0.08767010
## [5,] 0.3340448 -0.07534729 -0.07038820 -0.08767010 1.00000000
## [6,] 0.2113623 -0.15770602 -0.20201263 -0.03917875 -0.07686047
## [7,] -0.6610516 0.45620752 0.55770935 0.08558243 0.26462477
## [,6] [,7]
## [1,] 0.21136235 -0.66105162
## [2,] -0.15770602 0.45620752
## [3,] -0.20201263 0.55770935
## [4,] -0.03917875 0.08558243
## [5,] -0.07686047 0.26462477
## [6,] 1.00000000 0.05126715
## [7,] 0.05126715 1.00000000
# Multiple Linear Regression Model
model <- lm(price~., data = mydata)
summary(model) # R Squared value : 0.7756
##
## Call:
## lm(formula = price ~ ., data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1093.77 -174.24 -11.49 146.49 2001.05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 307.98798 60.35341 5.103 3.44e-07 ***
## speed 9.32028 0.18506 50.364 < 2e-16 ***
## hd 0.78178 0.02761 28.311 < 2e-16 ***
## ram 48.25596 1.06608 45.265 < 2e-16 ***
## screen 123.08904 3.99950 30.776 < 2e-16 ***
## cdyes 60.91671 9.51559 6.402 1.65e-10 ***
## multiyes 104.32382 11.41268 9.141 < 2e-16 ***
## premiumyes -509.22473 12.34225 -41.259 < 2e-16 ***
## ads 0.65729 0.05132 12.809 < 2e-16 ***
## trend -51.84958 0.62871 -82.470 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 275.3 on 6249 degrees of freedom
## Multiple R-squared: 0.7756, Adjusted R-squared: 0.7752
## F-statistic: 2399 on 9 and 6249 DF, p-value: < 2.2e-16
library(car)
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
#influence.measures(model)
influenceIndexPlot(model)

influencePlot(model)

## StudRes Hat CookD
## 1441 7.3058529 0.002228075 0.011819949
## 1701 7.1838002 0.002464463 0.012647347
## 3784 -0.8667018 0.020972880 0.001609237
## 4478 -1.3795547 0.020060286 0.003895407
vif(model)
## speed hd ram screen cd multi premium ads
## 1.265364 4.207395 2.974628 1.081644 1.859370 1.290568 1.109388 1.217218
## trend
## 2.022790
model2 <- lm(price~speed+hd+ram+screen+ads+trend+cd+multi+premium,data = mydata[-c(1441,1701,3784,4478),])
summary(model2) # R-squared: 0.7774
##
## Call:
## lm(formula = price ~ speed + hd + ram + screen + ads + trend +
## cd + multi + premium, data = mydata[-c(1441, 1701, 3784,
## 4478), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1094.21 -173.10 -10.94 146.35 1509.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 336.94855 59.92166 5.623 1.96e-08 ***
## speed 9.29551 0.18355 50.642 < 2e-16 ***
## hd 0.78355 0.02795 28.030 < 2e-16 ***
## ram 48.29482 1.06764 45.235 < 2e-16 ***
## screen 121.07383 3.97118 30.488 < 2e-16 ***
## ads 0.65465 0.05095 12.849 < 2e-16 ***
## trend -51.74355 0.62676 -82.558 < 2e-16 ***
## cdyes 60.31315 9.44029 6.389 1.79e-10 ***
## multiyes 104.85186 11.31912 9.263 < 2e-16 ***
## premiumyes -510.03064 12.24104 -41.666 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 273.1 on 6245 degrees of freedom
## Multiple R-squared: 0.7774, Adjusted R-squared: 0.777
## F-statistic: 2423 on 9 and 6245 DF, p-value: < 2.2e-16
# Analysing the final model combination
library(MASS)
stepAIC(model)
## Start: AIC=70336.65
## price ~ speed + hd + ram + screen + cd + multi + premium + ads +
## trend
##
## Df Sum of Sq RSS AIC
## <none> 473783875 70337
## - cd 1 3107211 476891087 70376
## - multi 1 6335218 480119093 70418
## - ads 1 12439298 486223174 70497
## - hd 1 60768013 534551889 71090
## - screen 1 71812147 545596023 71218
## - premium 1 129062420 602846296 71843
## - ram 1 155342777 629126653 72110
## - speed 1 192316497 666100373 72467
## - trend 1 515661043 989444918 74944
##
## Call:
## lm(formula = price ~ speed + hd + ram + screen + cd + multi +
## premium + ads + trend, data = mydata)
##
## Coefficients:
## (Intercept) speed hd ram screen
## 307.9880 9.3203 0.7818 48.2560 123.0890
## cdyes multiyes premiumyes ads trend
## 60.9167 104.3238 -509.2247 0.6573 -51.8496
avPlots(model)

Final_Model <- lm(log(price)~log(speed)+log(hd)+log(ram)+screen+ads+trend+cd+multi+premium,data = mydata[-c(1441,1701,3784,4478),])
summary(Final_Model) # R-squared: 0.7918
##
## Call:
## lm(formula = log(price) ~ log(speed) + log(hd) + log(ram) + screen +
## ads + trend + cd + multi + premium, data = mydata[-c(1441,
## 1701, 3784, 4478), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48044 -0.07549 -0.00289 0.07381 0.42541
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.492e+00 3.362e-02 163.329 < 2e-16 ***
## log(speed) 2.079e-01 4.130e-03 50.332 < 2e-16 ***
## log(hd) 1.434e-01 5.311e-03 27.003 < 2e-16 ***
## log(ram) 1.819e-01 4.447e-03 40.905 < 2e-16 ***
## screen 4.729e-02 1.719e-03 27.507 < 2e-16 ***
## ads 1.957e-04 2.166e-05 9.034 < 2e-16 ***
## trend -2.275e-02 2.759e-04 -82.470 < 2e-16 ***
## cdyes 4.635e-02 4.087e-03 11.341 < 2e-16 ***
## multiyes 3.398e-02 4.850e-03 7.005 2.73e-12 ***
## premiumyes -2.315e-01 5.301e-03 -43.673 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1176 on 6245 degrees of freedom
## Multiple R-squared: 0.7918, Adjusted R-squared: 0.7915
## F-statistic: 2638 on 9 and 6245 DF, p-value: < 2.2e-16
# Evaluate model LINE assumptions
plot(Final_Model)




hist(residuals(Final_Model)) # close to normal distribution
