Multivariate Regression

Import Libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(onehot)
## Warning: package 'onehot' was built under R version 3.5.2

Data Preprocessing

c.data <- read.csv("C:\\Users\\K.Saicharan\\Downloads\\Computer_Data.csv")
View(c.data)
c.data <- select(c.data, -1)
c.data$cd <- as.factor(c.data$cd)
c.data$multi <- as.factor(c.data$multi)
c.data$premium <- as.factor(c.data$premium)
c.data1 <- onehot(c.data)
c.data2 <- predict(c.data1,c.data)
c.data2 <- data.frame(c.data2)
View(c.data2)
c.data2 <- select(c.data2, -6,-8,-10)
names(c.data2)[6] <- "cd"
names(c.data2)[7] <- "multi"
names(c.data2)[8] <- "premium"
attach(c.data2)

Model

cor(c.data2)
##               price       speed          hd         ram       screen
## price    1.00000000  0.30097646  0.43025779  0.62274824  0.296041474
## speed    0.30097646  1.00000000  0.37230410  0.23476050  0.189074122
## hd       0.43025779  0.37230410  1.00000000  0.77772630  0.232801530
## ram      0.62274824  0.23476050  0.77772630  1.00000000  0.208953740
## screen   0.29604147  0.18907412  0.23280153  0.20895374  1.000000000
## cd       0.19734334  0.25825980  0.50357041  0.43850441  0.129487662
## multi   -0.01665139  0.08417193  0.09280483  0.04549689 -0.001740414
## premium -0.08069636  0.11420791  0.19692359  0.19714459  0.018745223
## ads      0.05454047 -0.21523206 -0.32322200 -0.18166971 -0.093919429
## trend   -0.19998694  0.40543833  0.57779013  0.27684384  0.188614445
##                  cd        multi     premium         ads       trend
## price    0.19734334 -0.016651388 -0.08069636  0.05454047 -0.19998694
## speed    0.25825980  0.084171934  0.11420791 -0.21523206  0.40543833
## hd       0.50357041  0.092804830  0.19692359 -0.32322200  0.57779013
## ram      0.43850441  0.045496894  0.19714459 -0.18166971  0.27684384
## screen   0.12948766 -0.001740414  0.01874522 -0.09391943  0.18861444
## cd       1.00000000  0.432179298  0.21607660 -0.06109108  0.44578018
## multi    0.43217930  1.000000000  0.12477474 -0.03039426  0.21090743
## premium  0.21607660  0.124774741  1.00000000 -0.15202274  0.04210738
## ads     -0.06109108 -0.030394260 -0.15202274  1.00000000 -0.31855251
## trend    0.44578018  0.210907431  0.04210738 -0.31855251  1.00000000
ml <- lm(price ~ speed + hd + ram + screen + cd + multi + premium + ads + trend)
summary(ml)
## 
## Call:
## lm(formula = price ~ speed + hd + ram + screen + cd + multi + 
##     premium + ads + trend)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1093.77  -174.24   -11.49   146.49  2001.05 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  307.98798   60.35341   5.103 3.44e-07 ***
## speed          9.32028    0.18506  50.364  < 2e-16 ***
## hd             0.78178    0.02761  28.311  < 2e-16 ***
## ram           48.25596    1.06608  45.265  < 2e-16 ***
## screen       123.08904    3.99950  30.776  < 2e-16 ***
## cd            60.91671    9.51559   6.402 1.65e-10 ***
## multi        104.32382   11.41268   9.141  < 2e-16 ***
## premium     -509.22473   12.34225 -41.259  < 2e-16 ***
## ads            0.65729    0.05132  12.809  < 2e-16 ***
## trend        -51.84958    0.62871 -82.470  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 275.3 on 6249 degrees of freedom
## Multiple R-squared:  0.7756, Adjusted R-squared:  0.7752 
## F-statistic:  2399 on 9 and 6249 DF,  p-value: < 2.2e-16

Transformations to improve accuracy

ml1 <- lm(price ~ sqrt(speed) + sqrt(hd) + sqrt(ram) + sqrt(screen) + sqrt(cd) + sqrt(multi) + sqrt(premium) + sqrt(ads) + sqrt(trend))
summary(ml1)
## 
## Call:
## lm(formula = price ~ sqrt(speed) + sqrt(hd) + sqrt(ram) + sqrt(screen) + 
##     sqrt(cd) + sqrt(multi) + sqrt(premium) + sqrt(ads) + sqrt(trend))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1166.68  -172.34   -18.83   146.68  1955.01 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -2309.268    117.448 -19.662  < 2e-16 ***
## sqrt(speed)     128.751      2.653  48.523  < 2e-16 ***
## sqrt(hd)         37.365      1.227  30.447  < 2e-16 ***
## sqrt(ram)       297.245      6.846  43.420  < 2e-16 ***
## sqrt(screen)    869.841     30.608  28.419  < 2e-16 ***
## sqrt(cd)         28.802      9.330   3.087  0.00203 ** 
## sqrt(multi)     102.068     11.102   9.194  < 2e-16 ***
## sqrt(premium)  -527.424     12.051 -43.768  < 2e-16 ***
## sqrt(ads)        40.549      1.304  31.088  < 2e-16 ***
## sqrt(trend)    -361.533      4.421 -81.770  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 268.1 on 6249 degrees of freedom
## Multiple R-squared:  0.7872, Adjusted R-squared:  0.7869 
## F-statistic:  2568 on 9 and 6249 DF,  p-value: < 2.2e-16