computer<-read.csv("E:\\Data science\\Computer_Data.csv")
View(computer)
dim(computer)
## [1] 6259 6
attach(computer)
# Exploratory Data Analysis(60% of time)
# 1. Measures of Central Tendency
# 2. Measures of Dispersion
# 3. Third Moment Business decision
# 4. Fourth Moment Business decision
# 5. Probability distributions of variables
# 6. Graphical representations
# > Histogram,Box plot,Dot plot,Stem & Leaf plot,
# Bar plot
summary(computer)
## price speed hd ram
## Min. : 949 Min. : 25.00 Min. : 80.0 Min. : 2.000
## 1st Qu.:1794 1st Qu.: 33.00 1st Qu.: 214.0 1st Qu.: 4.000
## Median :2144 Median : 50.00 Median : 340.0 Median : 8.000
## Mean :2220 Mean : 52.01 Mean : 416.6 Mean : 8.287
## 3rd Qu.:2595 3rd Qu.: 66.00 3rd Qu.: 528.0 3rd Qu.: 8.000
## Max. :5399 Max. :100.00 Max. :2100.0 Max. :32.000
## screen ads
## Min. :14.00 Min. : 39.0
## 1st Qu.:14.00 1st Qu.:162.5
## Median :14.00 Median :246.0
## Mean :14.61 Mean :221.3
## 3rd Qu.:15.00 3rd Qu.:275.0
## Max. :17.00 Max. :339.0
#Find the correlation
cor(computer)
## price speed hd ram screen ads
## price 1.00000000 0.3009765 0.4302578 0.6227482 0.29604147 0.05454047
## speed 0.30097646 1.0000000 0.3723041 0.2347605 0.18907412 -0.21523206
## hd 0.43025779 0.3723041 1.0000000 0.7777263 0.23280153 -0.32322200
## ram 0.62274824 0.2347605 0.7777263 1.0000000 0.20895374 -0.18166971
## screen 0.29604147 0.1890741 0.2328015 0.2089537 1.00000000 -0.09391943
## ads 0.05454047 -0.2152321 -0.3232220 -0.1816697 -0.09391943 1.00000000
plot(computer)
windows()
pairs(computer)

windows()
#Linear regression model
m1<-lm(price~speed+hd+ram+screen+ads,data=computer)
summary(m1)
##
## Call:
## lm(formula = price ~ speed + hd + ram + screen + ads, data = computer)
##
## Residuals:
## Min 1Q Median 3Q Max
## -907.35 -281.57 -71.97 201.70 2472.20
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -405.73236 88.39492 -4.59 4.52e-06 ***
## speed 5.75103 0.27172 21.17 < 2e-16 ***
## hd -0.40739 0.03522 -11.56 < 2e-16 ***
## ram 73.59740 1.50119 49.03 < 2e-16 ***
## screen 107.19445 6.01382 17.82 < 2e-16 ***
## ads 1.44617 0.07502 19.28 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 415.3 on 6253 degrees of freedom
## Multiple R-squared: 0.489, Adjusted R-squared: 0.4886
## F-statistic: 1197 on 5 and 6253 DF, p-value: < 2.2e-16
m2<-lm(price~log(speed)+log(hd)+log(ram)+log(screen)+log(ads),data=computer)
summary(m2)
##
## Call:
## lm(formula = price ~ log(speed) + log(hd) + log(ram) + log(screen) +
## log(ads), data = computer)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1080.61 -265.83 -51.88 204.82 2374.08
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4410.94 253.10 -17.43 <2e-16 ***
## log(speed) 323.14 14.10 22.92 <2e-16 ***
## log(hd) -227.43 15.10 -15.06 <2e-16 ***
## log(ram) 709.63 13.85 51.25 <2e-16 ***
## log(screen) 1486.48 90.66 16.40 <2e-16 ***
## log(ads) 257.82 11.99 21.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 406.2 on 6253 degrees of freedom
## Multiple R-squared: 0.5112, Adjusted R-squared: 0.5108
## F-statistic: 1308 on 5 and 6253 DF, p-value: < 2.2e-16
m3<-lm(log(price)~speed+hd+ram+screen+ads,data=computer)
summary(m3)
##
## Call:
## lm(formula = log(price) ~ speed + hd + ram + screen + ads, data = computer)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63061 -0.11833 -0.01706 0.10794 0.80547
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.510e+00 3.920e-02 166.05 <2e-16 ***
## speed 2.682e-03 1.205e-04 22.25 <2e-16 ***
## hd -1.833e-04 1.562e-05 -11.73 <2e-16 ***
## ram 3.264e-02 6.658e-04 49.03 <2e-16 ***
## screen 4.693e-02 2.667e-03 17.60 <2e-16 ***
## ads 6.450e-04 3.327e-05 19.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1842 on 6253 degrees of freedom
## Multiple R-squared: 0.4907, Adjusted R-squared: 0.4903
## F-statistic: 1205 on 5 and 6253 DF, p-value: < 2.2e-16
m4<-lm(price~sqrt(speed)+sqrt(hd)+sqrt(ram)+sqrt(screen)+sqrt(ads),data=computer)
summary(m4)
##
## Call:
## lm(formula = price ~ sqrt(speed) + sqrt(hd) + sqrt(ram) + sqrt(screen) +
## sqrt(ads), data = computer)
##
## Residuals:
## Min 1Q Median 3Q Max
## -986.81 -268.64 -66.55 188.49 2430.45
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2917.354 175.602 -16.61 <2e-16 ***
## sqrt(speed) 87.498 3.946 22.17 <2e-16 ***
## sqrt(hd) -20.488 1.529 -13.39 <2e-16 ***
## sqrt(ram) 488.224 9.480 51.50 <2e-16 ***
## sqrt(screen) 783.263 46.171 16.96 <2e-16 ***
## sqrt(ads) 40.430 1.944 20.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 405.6 on 6253 degrees of freedom
## Multiple R-squared: 0.5127, Adjusted R-squared: 0.5123
## F-statistic: 1316 on 5 and 6253 DF, p-value: < 2.2e-16