# Reading TC.csv
dataset  = read.csv(header = TRUE,"TC.csv")

#Dividing into training and validation datasets
set.seed(1)
dataframe =  dataset
prop = 0.6
train.rows = sample(row.names(dataframe),dim(dataframe)[1]*prop)
dataframet = dataframe[train.rows,]
valid.rows = setdiff(row.names(dataframe),train.rows)
dataframev = dataframe[valid.rows,] 

# Step Wise Regression
full  = lm(Price~.,data = dataframet)
options(scipen = 999)
summary(full)
## 
## Call:
## lm(formula = Price ~ ., data = dataframet)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6583.4  -705.6    14.3   735.6  5232.8 
## 
## Coefficients:
##                Estimate  Std. Error t value             Pr(>|t|)    
## (Intercept) 1742.478982 1221.403400   1.427               0.1541    
## Age         -113.310447    3.222246 -35.165 < 0.0000000000000002 ***
## KM            -0.018781    0.001548 -12.132 < 0.0000000000000002 ***
## HP            33.007004    3.449157   9.570 < 0.0000000000000002 ***
## MC           167.100782   91.669892   1.823               0.0687 .  
## CC            -0.161064    0.085853  -1.876               0.0610 .  
## Doors         43.037558   47.403137   0.908               0.3642    
## Quarterly      8.017095    1.600868   5.008          0.000000669 ***
## Weight        11.521660    1.222486   9.425 < 0.0000000000000002 ***
## Auto        2935.244741  216.739940  13.543 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1252 on 851 degrees of freedom
## Multiple R-squared:  0.8888, Adjusted R-squared:  0.8877 
## F-statistic: 756.1 on 9 and 851 DF,  p-value: < 0.00000000000000022
step(full,data = dataframet, direction = "backward")
## Start:  AIC=12291.74
## Price ~ Age + KM + HP + MC + CC + Doors + Quarterly + Weight + 
##     Auto
## 
##             Df  Sum of Sq        RSS   AIC
## - Doors      1    1291531 1334669764 12291
## <none>                    1333378232 12292
## - MC         1    5206278 1338584510 12293
## - CC         1    5514575 1338892807 12293
## - Quarterly  1   39295861 1372674093 12315
## - Weight     1  139176562 1472554795 12375
## - HP         1  143486206 1476864438 12378
## - KM         1  230628123 1564006356 12427
## - Auto       1  287365442 1620743674 12458
## - Age        1 1937520253 3270898485 13062
## 
## Step:  AIC=12290.57
## Price ~ Age + KM + HP + MC + CC + Quarterly + Weight + Auto
## 
##             Df  Sum of Sq        RSS   AIC
## <none>                    1334669764 12291
## - MC         1    5388241 1340058004 12292
## - CC         1    5417358 1340087122 12292
## - Quarterly  1   38673391 1373343154 12313
## - HP         1  145235655 1479905419 12378
## - Weight     1  154243017 1488912780 12383
## - KM         1  229797517 1564467281 12425
## - Auto       1  286946915 1621616679 12456
## - Age        1 1944784479 3279454243 13063
## 
## Call:
## lm(formula = Price ~ Age + KM + HP + MC + CC + Quarterly + Weight + 
##     Auto, data = dataframet)
## 
## Coefficients:
## (Intercept)          Age           KM           HP           MC  
##  1624.70586   -113.42916     -0.01874     33.16521    169.89969  
##          CC    Quarterly       Weight         Auto  
##    -0.15961      7.94300     11.78443   2911.28728
# Selection of variables(indpendent variable combination)
library(leaps)
## Warning: package 'leaps' was built under R version 3.6.1
leaps=regsubsets(Price~ .,data = dataframet,nbest= 2)
plot(leaps,scale = "adjr2")

#Comparison of Independent variables
library(QuantPsyc)
## Loading required package: boot
## Loading required package: MASS
## 
## Attaching package: 'QuantPsyc'
## The following object is masked from 'package:base':
## 
##     norm
lm.beta(full)
##         Age          KM          HP          MC          CC       Doors 
## -0.56972837 -0.18637300  0.12690321  0.02110431 -0.02266249  0.01098034 
##   Quarterly      Weight        Auto 
##  0.08736771  0.17671426  0.18563836
#Prediction and Evaluation of the Model
library(forecast)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
##   method             from    
##   fitted.fracdiff    fracdiff
##   residuals.fracdiff fracdiff

prediction =  predict(full,newdata = dataframev)
accuracy(dataframev$Price,prediction)
##                 ME     RMSE      MAE       MPE     MAPE
## Test set -29.10831 1219.673 960.4698 -0.867023 9.697432
plot(dataframev$Price,prediction,col = "red",xlab = "Actual Price",ylab = "Predicted Price" ,main = "Actual Price vs. Predicted Price - Multiple Linear Regression")

PsuedoR2 = cor(dataframev$Price,prediction)^2

cat(sprintf("PsuedoRSquare is %f",PsuedoR2))
## PsuedoRSquare is 0.876029
## NEURAL NET

library(nnet)
nnmodel= nnet(Price ~ .,data= dataframet,size=10, linout=TRUE, skip=TRUE, MaxNWts=10000, trace=FALSE, maxit=100)


predictionnnet =  predict(nnmodel,newdata = dataframev)

accuracy(dataframev$Price,predictionnnet)
##                 ME     RMSE     MAE      MPE     MAPE
## Test set -58.20122 1212.581 937.807 -0.99211 9.407612
plot(dataframev$Price,predictionnnet,col = "blue",xlab = "Actual Price",ylab = "Predicted Price" ,main = "Actual Price vs. Predicted Price - Neural Network Model")

PsuedoR2nnet = cor(dataframev$Price,predictionnnet)^2

cat(sprintf("PsuedoRSquare is %f",PsuedoR2nnet))
## PsuedoRSquare is 0.877217