# Read the Automobile data set into R

library(readxl)
automobile_data <- read_excel("/Users/pallavisaitu/Desktop/Spring_ANYL510-50/Automobile_project_dataset.xlsx")
automobile_data
## # A tibble: 159 x 26
##    symboling `normalized-los… make  `fuel-type` aspiration no_of_doors
##        <dbl>            <dbl> <chr> <chr>       <chr>      <chr>      
##  1         2              164 audi  gas         std        four       
##  2         2              164 audi  gas         std        four       
##  3         1              158 audi  gas         std        four       
##  4         1              158 audi  gas         turbo      four       
##  5         2              192 bmw   gas         std        two        
##  6         0              192 bmw   gas         std        four       
##  7         0              188 bmw   gas         std        two        
##  8         0              188 bmw   gas         std        four       
##  9         2              121 chev… gas         std        two        
## 10         1               98 chev… gas         std        two        
## # … with 149 more rows, and 20 more variables: body_style <chr>,
## #   drive_wheels <chr>, engine_location <chr>, wheel_base <dbl>,
## #   length <dbl>, width <dbl>, height <dbl>, curb_weight <dbl>,
## #   engine_type <chr>, no_of_cylinders <chr>, engine_size <dbl>,
## #   fuel_system <chr>, bore <dbl>, stroke <dbl>, compression_ratio <dbl>,
## #   horse_power <dbl>, peak_rpm <dbl>, city_mpg <dbl>, highway_mpg <dbl>,
## #   price <dbl>
# Summary of the dataset
str(automobile_data)
## Classes 'tbl_df', 'tbl' and 'data.frame':    159 obs. of  26 variables:
##  $ symboling        : num  2 2 1 1 2 0 0 0 2 1 ...
##  $ normalized-losses: num  164 164 158 158 192 192 188 188 121 98 ...
##  $ make             : chr  "audi" "audi" "audi" "audi" ...
##  $ fuel-type        : chr  "gas" "gas" "gas" "gas" ...
##  $ aspiration       : chr  "std" "std" "std" "turbo" ...
##  $ no_of_doors      : chr  "four" "four" "four" "four" ...
##  $ body_style       : chr  "sedan" "sedan" "sedan" "sedan" ...
##  $ drive_wheels     : chr  "fwd" "4wd" "fwd" "fwd" ...
##  $ engine_location  : chr  "front" "front" "front" "front" ...
##  $ wheel_base       : num  99.8 99.4 105.8 105.8 101.2 ...
##  $ length           : num  177 177 193 193 177 ...
##  $ width            : num  66.2 66.4 71.4 71.4 64.8 64.8 64.8 64.8 60.3 63.6 ...
##  $ height           : num  54.3 54.3 55.7 55.9 54.3 54.3 54.3 54.3 53.2 52 ...
##  $ curb_weight      : num  2337 2824 2844 3086 2395 ...
##  $ engine_type      : chr  "ohc" "ohc" "ohc" "ohc" ...
##  $ no_of_cylinders  : chr  "four" "five" "five" "five" ...
##  $ engine_size      : num  109 136 136 131 108 108 164 164 61 90 ...
##  $ fuel_system      : chr  "mpfi" "mpfi" "mpfi" "mpfi" ...
##  $ bore             : num  3.19 3.19 3.19 3.13 3.5 3.5 3.31 3.31 2.91 3.03 ...
##  $ stroke           : num  3.4 3.4 3.4 3.4 2.8 2.8 3.19 3.19 3.03 3.11 ...
##  $ compression_ratio: num  10 8 8.5 8.3 8.8 8.8 9 9 9.5 9.6 ...
##  $ horse_power      : num  102 115 110 140 101 101 121 121 48 70 ...
##  $ peak_rpm         : num  5500 5500 5500 5500 5800 5800 4250 4250 5100 5400 ...
##  $ city_mpg         : num  24 18 19 17 23 23 21 21 47 38 ...
##  $ highway_mpg      : num  30 22 25 20 29 29 28 28 53 43 ...
##  $ price            : num  13950 17450 17710 23875 16430 ...
summary(automobile_data)
##    symboling       normalized-losses     make            fuel-type        
##  Min.   :-2.0000   Min.   : 65.0     Length:159         Length:159        
##  1st Qu.: 0.0000   1st Qu.: 94.0     Class :character   Class :character  
##  Median : 1.0000   Median :113.0     Mode  :character   Mode  :character  
##  Mean   : 0.7358   Mean   :121.1                                          
##  3rd Qu.: 2.0000   3rd Qu.:148.0                                          
##  Max.   : 3.0000   Max.   :256.0                                          
##   aspiration        no_of_doors         body_style       
##  Length:159         Length:159         Length:159        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##  drive_wheels       engine_location      wheel_base         length     
##  Length:159         Length:159         Min.   : 86.60   Min.   :141.1  
##  Class :character   Class :character   1st Qu.: 94.50   1st Qu.:165.7  
##  Mode  :character   Mode  :character   Median : 96.90   Median :172.4  
##                                        Mean   : 98.26   Mean   :172.4  
##                                        3rd Qu.:100.80   3rd Qu.:177.8  
##                                        Max.   :115.60   Max.   :202.6  
##      width           height       curb_weight   engine_type       
##  Min.   :60.30   Min.   :49.40   Min.   :1488   Length:159        
##  1st Qu.:64.00   1st Qu.:52.25   1st Qu.:2066   Class :character  
##  Median :65.40   Median :54.10   Median :2340   Mode  :character  
##  Mean   :65.61   Mean   :53.90   Mean   :2461                     
##  3rd Qu.:66.50   3rd Qu.:55.50   3rd Qu.:2810                     
##  Max.   :71.70   Max.   :59.80   Max.   :4066                     
##  no_of_cylinders     engine_size    fuel_system             bore     
##  Length:159         Min.   : 61.0   Length:159         Min.   :2.54  
##  Class :character   1st Qu.: 97.0   Class :character   1st Qu.:3.05  
##  Mode  :character   Median :110.0   Mode  :character   Median :3.27  
##                     Mean   :119.2                      Mean   :3.30  
##                     3rd Qu.:135.0                      3rd Qu.:3.56  
##                     Max.   :258.0                      Max.   :3.94  
##      stroke      compression_ratio  horse_power        peak_rpm   
##  Min.   :2.070   Min.   : 7.00     Min.   : 48.00   Min.   :4150  
##  1st Qu.:3.105   1st Qu.: 8.70     1st Qu.: 69.00   1st Qu.:4800  
##  Median :3.270   Median : 9.00     Median : 88.00   Median :5200  
##  Mean   :3.236   Mean   :10.16     Mean   : 95.84   Mean   :5114  
##  3rd Qu.:3.410   3rd Qu.: 9.40     3rd Qu.:114.00   3rd Qu.:5500  
##  Max.   :4.170   Max.   :23.00     Max.   :200.00   Max.   :6600  
##     city_mpg      highway_mpg        price      
##  Min.   :15.00   Min.   :18.00   Min.   : 5118  
##  1st Qu.:23.00   1st Qu.:28.00   1st Qu.: 7372  
##  Median :26.00   Median :32.00   Median : 9233  
##  Mean   :26.52   Mean   :32.08   Mean   :11446  
##  3rd Qu.:31.00   3rd Qu.:37.00   3rd Qu.:14720  
##  Max.   :49.00   Max.   :54.00   Max.   :35056
# Exploratory Data Analysis
automobile_data$price = as.numeric(automobile_data$price)
hist(automobile_data$price, main = "Automobile Price", xlab = "Price", ylab = "No. of Cars", ylim=c(0,100), xlim = c(0,40000), col = rainbow(10))

This confirms that since the Mean ($13K) is higher than the Median ($10K) our sample relevant to Price is positively skewed. Specifically, that close to 100 cars out of 205 is priced within $5-10K range (bin), 40 cars fall within $10-15K, slightly under 40 cars are within the $15-20K range, and approximately 20 cars in aggregate costs $20K and above.

# Looking at the Price of different car makes
automobile_data$make = as.factor(automobile_data$make)
plot(automobile_data$price ~ automobile_data$make, ylim=c(0,40000), xlab="Automobile Make", ylab = "Price", col=4)

Provides more insight on Make(car brands) their price range associations and distribution.

# Check covarriance 


automobile_numeric <- automobile_data[,c('wheel_base','length','width','height','curb_weight','engine_size','bore','stroke','compression_ratio','horse_power','peak_rpm','city_mpg','highway_mpg','price')]

# Find the standard deviations
standard_deviation = lapply(automobile_numeric, sd)
standard_deviation
## $wheel_base
## [1] 5.167416
## 
## $length
## [1] 11.52318
## 
## $width
## [1] 1.947883
## 
## $height
## [1] 2.268761
## 
## $curb_weight
## [1] 481.9413
## 
## $engine_size
## [1] 30.46079
## 
## $bore
## [1] 0.2673356
## 
## $stroke
## [1] 0.2948877
## 
## $compression_ratio
## [1] 3.889475
## 
## $horse_power
## [1] 30.71858
## 
## $peak_rpm
## [1] 465.7549
## 
## $city_mpg
## [1] 6.097142
## 
## $highway_mpg
## [1] 6.459189
## 
## $price
## [1] 5877.856
covarriance = cov(automobile_numeric, method = 'pearson')
covarriance
##                      wheel_base        length        width        height
## wheel_base           26.7021877    51.8955625    8.2033102    6.51561022
## length               51.8955625   132.7836048   18.8171734   13.05209736
## width                 8.2033102    18.8171734    3.7942465    1.29354908
## height                6.5156102    13.0520974    1.2935491    5.14727808
## curb_weight        2017.6689157  4838.7107316  817.2837593  401.33806226
## engine_size         102.1872821   254.8133031   46.2362551    7.67672558
## bore                  0.7986881     1.9910172    0.2981509    0.15456337
## stroke                0.2551595     0.4114116    0.1129391   -0.06109092
## compression_ratio     5.8573446     8.2832058    1.9603648    2.05877920
## horse_power          82.0580248   237.8940490   40.8006090    2.39166866
## peak_rpm           -696.1147600 -1256.2686092 -210.6747074 -259.80136932
## city_mpg            -18.2944590   -50.9053698   -7.9178887   -2.76296075
## highway_mpg         -20.4185694   -53.9321511   -8.7234058   -3.31387230
## price             22306.7472056 51540.4904745 9656.0640793 3265.00236048
##                     curb_weight   engine_size         bore       stroke
## wheel_base           2017.66892    102.187282   0.79868808   0.25515954
## length               4838.71073    254.813303   1.99101724   0.41141155
## width                 817.28376     46.236255   0.29815094   0.11293910
## height                401.33806      7.676726   0.15456337  -0.06109092
## curb_weight        232267.43643  13045.310246  83.20384324  24.70652058
## engine_size         13045.31025    927.859804   4.85123716   2.69190709
## bore                   83.20384      4.851237   0.07146834  -0.00808688
## stroke                 24.70652      2.691907  -0.00808688   0.08695876
## compression_ratio     421.24478     16.716641   0.01572074   0.27938390
## horse_power         11697.01011    759.866372   4.60078019   1.34794403
## peak_rpm           -58358.57217  -4038.912109 -38.88149829  -1.55363825
## city_mpg            -2239.56636   -129.846788  -0.96240785  -0.03605843
## highway_mpg         -2457.16961   -140.499642  -1.02026351  -0.02463657
## price             2531484.42373 150664.865417 838.93414816 278.48071611
##                   compression_ratio   horse_power      peak_rpm
## wheel_base               5.85734464     82.058025 -6.961148e+02
## length                   8.28320576    237.894049 -1.256269e+03
## width                    1.96036482     40.800609 -2.106747e+02
## height                   2.05877920      2.391669 -2.598014e+02
## curb_weight            421.24477908  11697.010111 -5.835857e+04
## engine_size             16.71664079    759.866372 -4.038912e+03
## bore                     0.01572074      4.600780 -3.888150e+01
## stroke                   0.27938390      1.347944 -1.553638e+00
## compression_ratio       15.12801263    -19.392092 -7.549936e+02
## horse_power            -19.39209219    943.631319  1.059555e+03
## peak_rpm              -754.99361118   1059.555370  2.169276e+05
## city_mpg                 6.60054454   -156.806504 -1.503065e+02
## highway_mpg              5.56427394   -164.277685 -9.860680e+01
## price                 4786.37423215 137202.392206 -4.706443e+05
##                        city_mpg   highway_mpg         price
## wheel_base        -1.829446e+01 -2.041857e+01    22306.7472
## length            -5.090537e+01 -5.393215e+01    51540.4905
## width             -7.917889e+00 -8.723406e+00     9656.0641
## height            -2.762961e+00 -3.313872e+00     3265.0024
## curb_weight       -2.239566e+03 -2.457170e+03  2531484.4237
## engine_size       -1.298468e+02 -1.404996e+02   150664.8654
## bore              -9.624078e-01 -1.020264e+00      838.9341
## stroke            -3.605843e-02 -2.463657e-02      278.4807
## compression_ratio  6.600545e+00  5.564274e+00     4786.3742
## horse_power       -1.568065e+02 -1.642777e+02   137202.3922
## peak_rpm          -1.503065e+02 -9.860680e+01  -470644.3356
## city_mpg           3.717515e+01  3.827983e+01   -24809.7693
## highway_mpg        3.827983e+01  4.172112e+01   -27339.0727
## price             -2.480977e+04 -2.733907e+04 34549193.4517
# Check correlation
#install.packages("Hmisc")
library("psych")
## Warning: package 'psych' was built under R version 3.5.2
correlation_matrix <- cor(as.matrix(automobile_numeric))
correlation_matrix
##                   wheel_base     length      width      height curb_weight
## wheel_base         1.0000000  0.8715345  0.8149912  0.55576713   0.8101815
## length             0.8715345  1.0000000  0.8383385  0.49925137   0.8712911
## width              0.8149912  0.8383385  1.0000000  0.29270580   0.8705945
## height             0.5557671  0.4992514  0.2927058  1.00000000   0.3670518
## curb_weight        0.8101815  0.8712911  0.8705945  0.36705181   1.0000000
## engine_size        0.6492056  0.7259533  0.7792534  0.11108260   0.8886261
## bore               0.5781585  0.6463176  0.5725542  0.25483608   0.6457916
## stroke             0.1674487  0.1210731  0.1966187 -0.09131269   0.1738444
## compression_ratio  0.2914314  0.1848142  0.2587517  0.23330821   0.2247240
## horse_power        0.5169475  0.6720633  0.6818718  0.03431713   0.7900954
## peak_rpm          -0.2892345 -0.2340738 -0.2322160 -0.24586416  -0.2599879
## city_mpg          -0.5806572 -0.7245444 -0.6666844 -0.19973748  -0.7621552
## highway_mpg       -0.6117499 -0.7245987 -0.6933385 -0.22613562  -0.7893380
## price              0.7344189  0.7609522  0.8433705  0.24483625   0.8936391
##                   engine_size        bore      stroke compression_ratio
## wheel_base          0.6492056  0.57815853  0.16744868        0.29143145
## length              0.7259533  0.64631755  0.12107308        0.18481418
## width               0.7792534  0.57255416  0.19661872        0.25875169
## height              0.1110826  0.25483608 -0.09131269        0.23330821
## curb_weight         0.8886261  0.64579158  0.17384442        0.22472399
## engine_size         1.0000000  0.59573688  0.29968307        0.14109671
## bore                0.5957369  1.00000000 -0.10258113        0.01511908
## stroke              0.2996831 -0.10258113  1.00000000        0.24358681
## compression_ratio   0.1410967  0.01511908  0.24358681        1.00000000
## horse_power         0.8120726  0.56023917  0.14880380       -0.16230524
## peak_rpm           -0.2846858 -0.31226891 -0.01131191       -0.41676855
## city_mpg           -0.6991393 -0.59044028 -0.02005506        0.27833158
## highway_mpg        -0.7140951 -0.59085039 -0.01293438        0.22148258
## price               0.8414956  0.53389035  0.16066434        0.20936147
##                   horse_power    peak_rpm    city_mpg highway_mpg
## wheel_base         0.51694753 -0.28923445 -0.58065720 -0.61174990
## length             0.67206330 -0.23407384 -0.72454445 -0.72459867
## width              0.68187176 -0.23221605 -0.66668439 -0.69333851
## height             0.03431713 -0.24586416 -0.19973748 -0.22613562
## curb_weight        0.79009539 -0.25998788 -0.76215523 -0.78933796
## engine_size        0.81207263 -0.28468581 -0.69913926 -0.71409510
## bore               0.56023917 -0.31226891 -0.59044028 -0.59085039
## stroke             0.14880380 -0.01131191 -0.02005506 -0.01293438
## compression_ratio -0.16230524 -0.41676855  0.27833158  0.22148258
## horse_power        1.00000000  0.07405682 -0.83721415 -0.82794105
## peak_rpm           0.07405682  1.00000000 -0.05292904 -0.03277717
## city_mpg          -0.83721415 -0.05292904  1.00000000  0.97199880
## highway_mpg       -0.82794105 -0.03277717  0.97199880  1.00000000
## price              0.75987395 -0.17191607 -0.69227306 -0.72009010
##                        price
## wheel_base         0.7344189
## length             0.7609522
## width              0.8433705
## height             0.2448363
## curb_weight        0.8936391
## engine_size        0.8414956
## bore               0.5338904
## stroke             0.1606643
## compression_ratio  0.2093615
## horse_power        0.7598739
## peak_rpm          -0.1719161
## city_mpg          -0.6922731
## highway_mpg       -0.7200901
## price              1.0000000
# Regression Analysis 

#select the subset of numeric variables for regression modelling
auto_regression <- subset(automobile_data, select = c(horse_power,city_mpg,peak_rpm,curb_weight,no_of_doors,price))


library(caTools)
set.seed(2017)
SplitRatio <- 0.8
#Split the data into training and validatation set of data
split = sample.split(auto_regression$price, SplitRatio = 0.8)
train.sample <- subset(auto_regression, split == TRUE)
valid.sample <- subset(auto_regression, split == FALSE)

#Fit the linear model in training sample set of data
fit <- lm(formula = price ~ horse_power + city_mpg + curb_weight + peak_rpm + no_of_doors, data = train.sample)
summary(fit)
## 
## Call:
## lm(formula = price ~ horse_power + city_mpg + curb_weight + peak_rpm + 
##     no_of_doors, data = train.sample)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6658.7 -1487.8  -152.7   987.7  9029.0 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.357e+04  5.482e+03  -4.299 3.49e-05 ***
## horse_power     5.652e+00  1.844e+01   0.307    0.760    
## city_mpg        5.123e+01  7.657e+01   0.669    0.505    
## curb_weight     1.187e+01  1.058e+00  11.221  < 2e-16 ***
## peak_rpm        7.273e-01  5.886e-01   1.236    0.219    
## no_of_doorstwo  7.147e+02  5.117e+02   1.397    0.165    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2570 on 121 degrees of freedom
## Multiple R-squared:  0.8278, Adjusted R-squared:  0.8207 
## F-statistic: 116.3 on 5 and 121 DF,  p-value: < 2.2e-16
#Backward Elimination removed no_of_doors as it is not significant
fit <- lm(formula = price ~ horse_power + city_mpg + peak_rpm + curb_weight , data = train.sample)
summary(fit) 
## 
## Call:
## lm(formula = price ~ horse_power + city_mpg + peak_rpm + curb_weight, 
##     data = train.sample)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6900.2 -1329.0   -12.2  1022.5  9662.8 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.366e+04  5.503e+03  -4.300 3.46e-05 ***
## horse_power  1.417e+01  1.747e+01   0.811    0.419    
## city_mpg     7.007e+01  7.566e+01   0.926    0.356    
## peak_rpm     7.426e-01  5.908e-01   1.257    0.211    
## curb_weight  1.145e+01  1.020e+00  11.235  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2580 on 122 degrees of freedom
## Multiple R-squared:  0.825,  Adjusted R-squared:  0.8193 
## F-statistic: 143.8 on 4 and 122 DF,  p-value: < 2.2e-16
#Remove city_mpg as it is not significant
fit <- lm(formula = price ~ horse_power + peak_rpm +  curb_weight , data = train.sample)
summary(fit)
## 
## Call:
## lm(formula = price ~ horse_power + peak_rpm + curb_weight, data = train.sample)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6882.4 -1310.7   -68.7  1185.8  9701.2 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.002e+04  3.842e+03  -5.209 7.74e-07 ***
## horse_power  5.739e+00  1.490e+01   0.385    0.701    
## peak_rpm     6.720e-01  5.855e-01   1.148    0.253    
## curb_weight  1.120e+01  9.826e-01  11.403  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2578 on 123 degrees of freedom
## Multiple R-squared:  0.8238, Adjusted R-squared:  0.8195 
## F-statistic: 191.7 on 3 and 123 DF,  p-value: < 2.2e-16
#Remove peak_rpm as it is not significant
fit <- lm(formula = price ~ horse_power +   curb_weight , data = train.sample)
summary(fit)
## 
## Call:
## lm(formula = price ~ horse_power + curb_weight, data = train.sample)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6765  -1260   -160   1223   9738 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.584e+04  1.244e+03 -12.738   <2e-16 ***
## horse_power  1.431e+01  1.291e+01   1.109     0.27    
## curb_weight  1.057e+01  8.119e-01  13.016   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2582 on 124 degrees of freedom
## Multiple R-squared:  0.8219, Adjusted R-squared:  0.819 
## F-statistic: 286.1 on 2 and 124 DF,  p-value: < 2.2e-16
#Residual Plot Analysis
par(mfrow = c(2,2))
plot(fit)

#Evaluate the final linear model
train.sample$pred.price <- predict(fit, 
                                   newdata = subset(train.sample, select = c(
                                     price,horse_power,curb_weight
                                   )))

valid.sample$pred.price <- predict(fit, 
                                   newdata = subset(valid.sample, select = c(
                                     price,horse_power,curb_weight
                                   )))

summary(fit)
## 
## Call:
## lm(formula = price ~ horse_power + curb_weight, data = train.sample)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6765  -1260   -160   1223   9738 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.584e+04  1.244e+03 -12.738   <2e-16 ***
## horse_power  1.431e+01  1.291e+01   1.109     0.27    
## curb_weight  1.057e+01  8.119e-01  13.016   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2582 on 124 degrees of freedom
## Multiple R-squared:  0.8219, Adjusted R-squared:  0.819 
## F-statistic: 286.1 on 2 and 124 DF,  p-value: < 2.2e-16
#Training set of data Check how good is the model on the training set
train.corr <- round(cor(train.sample$price, train.sample$pred.price),2)
#Root mean square error
train.RMSE <- round(sqrt(mean(train.sample$pred.price - train.sample$price)^2),2)

#Mean absolute error
train.MAE <- round(mean(abs(train.sample$pred.price - train.sample$price)),2)
c(train.corr^2,train.RMSE,train.MAE)
## [1]    0.8281    0.0000 1809.2900
#validation data set model check on the training set
valid.corr <- round(cor(valid.sample$price, valid.sample$pred.price),2)

#Root mean square error
valid.RMSE <- round(sqrt(mean(valid.sample$pred.price - valid.sample$price)^2),2)

#Mean absolute error
valid.MAE <- round(mean(abs(valid.sample$pred.price - valid.sample$price)),2)
c(valid.corr^2,valid.RMSE,valid.MAE)
## [1]    0.7396  457.4800 2013.6400
#This results could be improved when eliminating extreme values and normalising vars