computer<-read.csv("E:\\Data science\\Computer_Data.csv")
View(computer)
dim(computer)
## [1] 6259    6
attach(computer)
# Exploratory Data Analysis(60% of time)
# 1. Measures of Central Tendency
# 2. Measures of Dispersion
# 3. Third Moment Business decision
# 4. Fourth Moment Business decision
# 5. Probability distributions of variables
# 6. Graphical representations
#  > Histogram,Box plot,Dot plot,Stem & Leaf plot, 
#     Bar plot

summary(computer)
##      price          speed              hd              ram        
##  Min.   : 949   Min.   : 25.00   Min.   :  80.0   Min.   : 2.000  
##  1st Qu.:1794   1st Qu.: 33.00   1st Qu.: 214.0   1st Qu.: 4.000  
##  Median :2144   Median : 50.00   Median : 340.0   Median : 8.000  
##  Mean   :2220   Mean   : 52.01   Mean   : 416.6   Mean   : 8.287  
##  3rd Qu.:2595   3rd Qu.: 66.00   3rd Qu.: 528.0   3rd Qu.: 8.000  
##  Max.   :5399   Max.   :100.00   Max.   :2100.0   Max.   :32.000  
##      screen           ads       
##  Min.   :14.00   Min.   : 39.0  
##  1st Qu.:14.00   1st Qu.:162.5  
##  Median :14.00   Median :246.0  
##  Mean   :14.61   Mean   :221.3  
##  3rd Qu.:15.00   3rd Qu.:275.0  
##  Max.   :17.00   Max.   :339.0
#Find the correlation
cor(computer)
##             price      speed         hd        ram      screen         ads
## price  1.00000000  0.3009765  0.4302578  0.6227482  0.29604147  0.05454047
## speed  0.30097646  1.0000000  0.3723041  0.2347605  0.18907412 -0.21523206
## hd     0.43025779  0.3723041  1.0000000  0.7777263  0.23280153 -0.32322200
## ram    0.62274824  0.2347605  0.7777263  1.0000000  0.20895374 -0.18166971
## screen 0.29604147  0.1890741  0.2328015  0.2089537  1.00000000 -0.09391943
## ads    0.05454047 -0.2152321 -0.3232220 -0.1816697 -0.09391943  1.00000000
plot(computer)
windows()
pairs(computer)

windows()

#Linear regression model

m1<-lm(price~speed+hd+ram+screen+ads,data=computer)
summary(m1)
## 
## Call:
## lm(formula = price ~ speed + hd + ram + screen + ads, data = computer)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -907.35 -281.57  -71.97  201.70 2472.20 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -405.73236   88.39492   -4.59 4.52e-06 ***
## speed          5.75103    0.27172   21.17  < 2e-16 ***
## hd            -0.40739    0.03522  -11.56  < 2e-16 ***
## ram           73.59740    1.50119   49.03  < 2e-16 ***
## screen       107.19445    6.01382   17.82  < 2e-16 ***
## ads            1.44617    0.07502   19.28  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 415.3 on 6253 degrees of freedom
## Multiple R-squared:  0.489,  Adjusted R-squared:  0.4886 
## F-statistic:  1197 on 5 and 6253 DF,  p-value: < 2.2e-16
m2<-lm(price~log(speed)+log(hd)+log(ram)+log(screen)+log(ads),data=computer)
summary(m2)
## 
## Call:
## lm(formula = price ~ log(speed) + log(hd) + log(ram) + log(screen) + 
##     log(ads), data = computer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1080.61  -265.83   -51.88   204.82  2374.08 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4410.94     253.10  -17.43   <2e-16 ***
## log(speed)    323.14      14.10   22.92   <2e-16 ***
## log(hd)      -227.43      15.10  -15.06   <2e-16 ***
## log(ram)      709.63      13.85   51.25   <2e-16 ***
## log(screen)  1486.48      90.66   16.40   <2e-16 ***
## log(ads)      257.82      11.99   21.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 406.2 on 6253 degrees of freedom
## Multiple R-squared:  0.5112, Adjusted R-squared:  0.5108 
## F-statistic:  1308 on 5 and 6253 DF,  p-value: < 2.2e-16
m3<-lm(log(price)~speed+hd+ram+screen+ads,data=computer)
summary(m3)
## 
## Call:
## lm(formula = log(price) ~ speed + hd + ram + screen + ads, data = computer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63061 -0.11833 -0.01706  0.10794  0.80547 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.510e+00  3.920e-02  166.05   <2e-16 ***
## speed        2.682e-03  1.205e-04   22.25   <2e-16 ***
## hd          -1.833e-04  1.562e-05  -11.73   <2e-16 ***
## ram          3.264e-02  6.658e-04   49.03   <2e-16 ***
## screen       4.693e-02  2.667e-03   17.60   <2e-16 ***
## ads          6.450e-04  3.327e-05   19.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1842 on 6253 degrees of freedom
## Multiple R-squared:  0.4907, Adjusted R-squared:  0.4903 
## F-statistic:  1205 on 5 and 6253 DF,  p-value: < 2.2e-16
m4<-lm(price~sqrt(speed)+sqrt(hd)+sqrt(ram)+sqrt(screen)+sqrt(ads),data=computer)
summary(m4)
## 
## Call:
## lm(formula = price ~ sqrt(speed) + sqrt(hd) + sqrt(ram) + sqrt(screen) + 
##     sqrt(ads), data = computer)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -986.81 -268.64  -66.55  188.49 2430.45 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2917.354    175.602  -16.61   <2e-16 ***
## sqrt(speed)     87.498      3.946   22.17   <2e-16 ***
## sqrt(hd)       -20.488      1.529  -13.39   <2e-16 ***
## sqrt(ram)      488.224      9.480   51.50   <2e-16 ***
## sqrt(screen)   783.263     46.171   16.96   <2e-16 ***
## sqrt(ads)       40.430      1.944   20.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 405.6 on 6253 degrees of freedom
## Multiple R-squared:  0.5127, Adjusted R-squared:  0.5123 
## F-statistic:  1316 on 5 and 6253 DF,  p-value: < 2.2e-16