# Reading TC.csv
dataset = read.csv(header = TRUE,"TC.csv")
#Dividing into training and validation datasets
set.seed(1)
dataframe = dataset
prop = 0.6
train.rows = sample(row.names(dataframe),dim(dataframe)[1]*prop)
dataframet = dataframe[train.rows,]
valid.rows = setdiff(row.names(dataframe),train.rows)
dataframev = dataframe[valid.rows,]
# Step Wise Regression
full = lm(Price~.,data = dataframet)
options(scipen = 999)
summary(full)
##
## Call:
## lm(formula = Price ~ ., data = dataframet)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6583.4 -705.6 14.3 735.6 5232.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1742.478982 1221.403400 1.427 0.1541
## Age -113.310447 3.222246 -35.165 < 0.0000000000000002 ***
## KM -0.018781 0.001548 -12.132 < 0.0000000000000002 ***
## HP 33.007004 3.449157 9.570 < 0.0000000000000002 ***
## MC 167.100782 91.669892 1.823 0.0687 .
## CC -0.161064 0.085853 -1.876 0.0610 .
## Doors 43.037558 47.403137 0.908 0.3642
## Quarterly 8.017095 1.600868 5.008 0.000000669 ***
## Weight 11.521660 1.222486 9.425 < 0.0000000000000002 ***
## Auto 2935.244741 216.739940 13.543 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1252 on 851 degrees of freedom
## Multiple R-squared: 0.8888, Adjusted R-squared: 0.8877
## F-statistic: 756.1 on 9 and 851 DF, p-value: < 0.00000000000000022
step(full,data = dataframet, direction = "backward")
## Start: AIC=12291.74
## Price ~ Age + KM + HP + MC + CC + Doors + Quarterly + Weight +
## Auto
##
## Df Sum of Sq RSS AIC
## - Doors 1 1291531 1334669764 12291
## <none> 1333378232 12292
## - MC 1 5206278 1338584510 12293
## - CC 1 5514575 1338892807 12293
## - Quarterly 1 39295861 1372674093 12315
## - Weight 1 139176562 1472554795 12375
## - HP 1 143486206 1476864438 12378
## - KM 1 230628123 1564006356 12427
## - Auto 1 287365442 1620743674 12458
## - Age 1 1937520253 3270898485 13062
##
## Step: AIC=12290.57
## Price ~ Age + KM + HP + MC + CC + Quarterly + Weight + Auto
##
## Df Sum of Sq RSS AIC
## <none> 1334669764 12291
## - MC 1 5388241 1340058004 12292
## - CC 1 5417358 1340087122 12292
## - Quarterly 1 38673391 1373343154 12313
## - HP 1 145235655 1479905419 12378
## - Weight 1 154243017 1488912780 12383
## - KM 1 229797517 1564467281 12425
## - Auto 1 286946915 1621616679 12456
## - Age 1 1944784479 3279454243 13063
##
## Call:
## lm(formula = Price ~ Age + KM + HP + MC + CC + Quarterly + Weight +
## Auto, data = dataframet)
##
## Coefficients:
## (Intercept) Age KM HP MC
## 1624.70586 -113.42916 -0.01874 33.16521 169.89969
## CC Quarterly Weight Auto
## -0.15961 7.94300 11.78443 2911.28728
# Selection of variables(indpendent variable combination)
library(leaps)
## Warning: package 'leaps' was built under R version 3.6.1
leaps=regsubsets(Price~ .,data = dataframet,nbest= 2)
plot(leaps,scale = "adjr2")
#Comparison of Independent variables
library(QuantPsyc)
## Loading required package: boot
## Loading required package: MASS
##
## Attaching package: 'QuantPsyc'
## The following object is masked from 'package:base':
##
## norm
lm.beta(full)
## Age KM HP MC CC Doors
## -0.56972837 -0.18637300 0.12690321 0.02110431 -0.02266249 0.01098034
## Quarterly Weight Auto
## 0.08736771 0.17671426 0.18563836
#Prediction and Evaluation of the Model
library(forecast)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.fracdiff fracdiff
## residuals.fracdiff fracdiff

prediction = predict(full,newdata = dataframev)
accuracy(dataframev$Price,prediction)
## ME RMSE MAE MPE MAPE
## Test set -29.10831 1219.673 960.4698 -0.867023 9.697432
plot(dataframev$Price,prediction,col = "red",xlab = "Actual Price",ylab = "Predicted Price" ,main = "Actual Price vs. Predicted Price - Multiple Linear Regression")

PsuedoR2 = cor(dataframev$Price,prediction)^2
cat(sprintf("PsuedoRSquare is %f",PsuedoR2))
## PsuedoRSquare is 0.876029
## NEURAL NET
library(nnet)
nnmodel= nnet(Price ~ .,data= dataframet,size=10, linout=TRUE, skip=TRUE, MaxNWts=10000, trace=FALSE, maxit=100)
predictionnnet = predict(nnmodel,newdata = dataframev)
accuracy(dataframev$Price,predictionnnet)
## ME RMSE MAE MPE MAPE
## Test set -58.20122 1212.581 937.807 -0.99211 9.407612
plot(dataframev$Price,predictionnnet,col = "blue",xlab = "Actual Price",ylab = "Predicted Price" ,main = "Actual Price vs. Predicted Price - Neural Network Model")

PsuedoR2nnet = cor(dataframev$Price,predictionnnet)^2
cat(sprintf("PsuedoRSquare is %f",PsuedoR2nnet))
## PsuedoRSquare is 0.877217