Import Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(onehot)
## Warning: package 'onehot' was built under R version 3.5.2
Data Preprocessing
c.data <- read.csv("C:\\Users\\K.Saicharan\\Downloads\\Computer_Data.csv")
View(c.data)
c.data <- select(c.data, -1)
c.data$cd <- as.factor(c.data$cd)
c.data$multi <- as.factor(c.data$multi)
c.data$premium <- as.factor(c.data$premium)
c.data1 <- onehot(c.data)
c.data2 <- predict(c.data1,c.data)
c.data2 <- data.frame(c.data2)
View(c.data2)
c.data2 <- select(c.data2, -6,-8,-10)
names(c.data2)[6] <- "cd"
names(c.data2)[7] <- "multi"
names(c.data2)[8] <- "premium"
attach(c.data2)
Model
cor(c.data2)
## price speed hd ram screen
## price 1.00000000 0.30097646 0.43025779 0.62274824 0.296041474
## speed 0.30097646 1.00000000 0.37230410 0.23476050 0.189074122
## hd 0.43025779 0.37230410 1.00000000 0.77772630 0.232801530
## ram 0.62274824 0.23476050 0.77772630 1.00000000 0.208953740
## screen 0.29604147 0.18907412 0.23280153 0.20895374 1.000000000
## cd 0.19734334 0.25825980 0.50357041 0.43850441 0.129487662
## multi -0.01665139 0.08417193 0.09280483 0.04549689 -0.001740414
## premium -0.08069636 0.11420791 0.19692359 0.19714459 0.018745223
## ads 0.05454047 -0.21523206 -0.32322200 -0.18166971 -0.093919429
## trend -0.19998694 0.40543833 0.57779013 0.27684384 0.188614445
## cd multi premium ads trend
## price 0.19734334 -0.016651388 -0.08069636 0.05454047 -0.19998694
## speed 0.25825980 0.084171934 0.11420791 -0.21523206 0.40543833
## hd 0.50357041 0.092804830 0.19692359 -0.32322200 0.57779013
## ram 0.43850441 0.045496894 0.19714459 -0.18166971 0.27684384
## screen 0.12948766 -0.001740414 0.01874522 -0.09391943 0.18861444
## cd 1.00000000 0.432179298 0.21607660 -0.06109108 0.44578018
## multi 0.43217930 1.000000000 0.12477474 -0.03039426 0.21090743
## premium 0.21607660 0.124774741 1.00000000 -0.15202274 0.04210738
## ads -0.06109108 -0.030394260 -0.15202274 1.00000000 -0.31855251
## trend 0.44578018 0.210907431 0.04210738 -0.31855251 1.00000000
ml <- lm(price ~ speed + hd + ram + screen + cd + multi + premium + ads + trend)
summary(ml)
##
## Call:
## lm(formula = price ~ speed + hd + ram + screen + cd + multi +
## premium + ads + trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1093.77 -174.24 -11.49 146.49 2001.05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 307.98798 60.35341 5.103 3.44e-07 ***
## speed 9.32028 0.18506 50.364 < 2e-16 ***
## hd 0.78178 0.02761 28.311 < 2e-16 ***
## ram 48.25596 1.06608 45.265 < 2e-16 ***
## screen 123.08904 3.99950 30.776 < 2e-16 ***
## cd 60.91671 9.51559 6.402 1.65e-10 ***
## multi 104.32382 11.41268 9.141 < 2e-16 ***
## premium -509.22473 12.34225 -41.259 < 2e-16 ***
## ads 0.65729 0.05132 12.809 < 2e-16 ***
## trend -51.84958 0.62871 -82.470 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 275.3 on 6249 degrees of freedom
## Multiple R-squared: 0.7756, Adjusted R-squared: 0.7752
## F-statistic: 2399 on 9 and 6249 DF, p-value: < 2.2e-16