In this paper I am going to consider several methods to solve Kaggle competition - House Prices: Advanced Regression Techniques. The competition can be found here https://www.kaggle.com/c/house-prices-advanced-regression-techniques.
Here’s a brief version of what you’ll find in the data description file.
Loading the required libraries.
library(dplyr) # library for data engineering
library(dummies) # library used for finding feature importance
library(caret) # library for creating train and test samples
library(randomForest)
library(keras)
library(MASS)
library(dplyr)
#setwd("~/Dropbox/RProjects/datasets/house_price/")
df <- read.csv("../datasets/house_price/train_method.csv",stringsAsFactors = T)
Our dependent variable, in short DV, is SalePrice
We save DV to a new variable outcome
outcome <- df$SalePrice
We only select variables that are factors and saves them to fact_df variable
fac_df <- df[,sapply(df, is.factor)]
Converting all df dataframe variables of int format to numeric format.
df <- df %>% mutate_if(is.integer,as.numeric)
num_df <- df[,sapply(df, is.numeric)]
We have 43 independent factor variables. We can find out which of these variables are more important in camparison to others. We doing it by restructuring out fact_df, applying similar technique as one hot encoding.
dummied<- dummy.data.frame(fac_df)
colnames(dummied)
FALSE [1] "MSZoningC (all)" "MSZoningFV" "MSZoningRH"
FALSE [4] "MSZoningRL" "MSZoningRM" "StreetGrvl"
FALSE [7] "StreetPave" "AlleyGrvl" "AlleyPave"
FALSE [10] "LotShapeIR1" "LotShapeIR2" "LotShapeIR3"
FALSE [13] "LotShapeReg" "LandContourBnk" "LandContourHLS"
FALSE [16] "LandContourLow" "LandContourLvl" "UtilitiesAllPub"
FALSE [19] "UtilitiesNoSeWa" "LotConfigCorner" "LotConfigCulDSac"
FALSE [22] "LotConfigFR2" "LotConfigFR3" "LotConfigInside"
FALSE [25] "LandSlopeGtl" "LandSlopeMod" "LandSlopeSev"
FALSE [28] "NeighborhoodBlmngtn" "NeighborhoodBlueste" "NeighborhoodBrDale"
FALSE [31] "NeighborhoodBrkSide" "NeighborhoodClearCr" "NeighborhoodCollgCr"
FALSE [34] "NeighborhoodCrawfor" "NeighborhoodEdwards" "NeighborhoodGilbert"
FALSE [37] "NeighborhoodIDOTRR" "NeighborhoodMeadowV" "NeighborhoodMitchel"
FALSE [40] "NeighborhoodNAmes" "NeighborhoodNoRidge" "NeighborhoodNPkVill"
FALSE [43] "NeighborhoodNridgHt" "NeighborhoodNWAmes" "NeighborhoodOldTown"
FALSE [46] "NeighborhoodSawyer" "NeighborhoodSawyerW" "NeighborhoodSomerst"
FALSE [49] "NeighborhoodStoneBr" "NeighborhoodSWISU" "NeighborhoodTimber"
FALSE [52] "NeighborhoodVeenker" "Condition1Artery" "Condition1Feedr"
FALSE [55] "Condition1Norm" "Condition1PosA" "Condition1PosN"
FALSE [58] "Condition1RRAe" "Condition1RRAn" "Condition1RRNe"
FALSE [61] "Condition1RRNn" "Condition2Artery" "Condition2Feedr"
FALSE [64] "Condition2Norm" "Condition2PosA" "Condition2PosN"
FALSE [67] "Condition2RRAe" "Condition2RRAn" "Condition2RRNn"
FALSE [70] "BldgType1Fam" "BldgType2fmCon" "BldgTypeDuplex"
FALSE [73] "BldgTypeTwnhs" "BldgTypeTwnhsE" "HouseStyle1.5Fin"
FALSE [76] "HouseStyle1.5Unf" "HouseStyle1Story" "HouseStyle2.5Fin"
FALSE [79] "HouseStyle2.5Unf" "HouseStyle2Story" "HouseStyleSFoyer"
FALSE [82] "HouseStyleSLvl" "RoofStyleFlat" "RoofStyleGable"
FALSE [85] "RoofStyleGambrel" "RoofStyleHip" "RoofStyleMansard"
FALSE [88] "RoofStyleShed" "RoofMatlClyTile" "RoofMatlCompShg"
FALSE [91] "RoofMatlMembran" "RoofMatlMetal" "RoofMatlRoll"
FALSE [94] "RoofMatlTar&Grv" "RoofMatlWdShake" "RoofMatlWdShngl"
FALSE [97] "Exterior1stAsbShng" "Exterior1stAsphShn" "Exterior1stBrkComm"
FALSE [100] "Exterior1stBrkFace" "Exterior1stCBlock" "Exterior1stCemntBd"
FALSE [103] "Exterior1stHdBoard" "Exterior1stImStucc" "Exterior1stMetalSd"
FALSE [106] "Exterior1stPlywood" "Exterior1stStone" "Exterior1stStucco"
FALSE [109] "Exterior1stVinylSd" "Exterior1stWd Sdng" "Exterior1stWdShing"
FALSE [112] "Exterior2ndAsbShng" "Exterior2ndAsphShn" "Exterior2ndBrk Cmn"
FALSE [115] "Exterior2ndBrkFace" "Exterior2ndCBlock" "Exterior2ndCmentBd"
FALSE [118] "Exterior2ndHdBoard" "Exterior2ndImStucc" "Exterior2ndMetalSd"
FALSE [121] "Exterior2ndOther" "Exterior2ndPlywood" "Exterior2ndStone"
FALSE [124] "Exterior2ndStucco" "Exterior2ndVinylSd" "Exterior2ndWd Sdng"
FALSE [127] "Exterior2ndWd Shng" "MasVnrTypeBrkCmn" "MasVnrTypeBrkFace"
FALSE [130] "MasVnrTypeNone" "MasVnrTypeStone" "ExterQualEx"
FALSE [133] "ExterQualFa" "ExterQualGd" "ExterQualTA"
FALSE [136] "ExterCondEx" "ExterCondFa" "ExterCondGd"
FALSE [139] "ExterCondPo" "ExterCondTA" "FoundationBrkTil"
FALSE [142] "FoundationCBlock" "FoundationPConc" "FoundationSlab"
FALSE [145] "FoundationStone" "FoundationWood" "BsmtQualEx"
FALSE [148] "BsmtQualFa" "BsmtQualGd" "BsmtQualTA"
FALSE [151] "BsmtCondFa" "BsmtCondGd" "BsmtCondPo"
FALSE [154] "BsmtCondTA" "BsmtExposureAv" "BsmtExposureGd"
FALSE [157] "BsmtExposureMn" "BsmtExposureNo" "BsmtFinType1ALQ"
FALSE [160] "BsmtFinType1BLQ" "BsmtFinType1GLQ" "BsmtFinType1LwQ"
FALSE [163] "BsmtFinType1Rec" "BsmtFinType1Unf" "BsmtFinType2ALQ"
FALSE [166] "BsmtFinType2BLQ" "BsmtFinType2GLQ" "BsmtFinType2LwQ"
FALSE [169] "BsmtFinType2Rec" "BsmtFinType2Unf" "HeatingFloor"
FALSE [172] "HeatingGasA" "HeatingGasW" "HeatingGrav"
FALSE [175] "HeatingOthW" "HeatingWall" "HeatingQCEx"
FALSE [178] "HeatingQCFa" "HeatingQCGd" "HeatingQCPo"
FALSE [181] "HeatingQCTA" "CentralAirN" "CentralAirY"
FALSE [184] "ElectricalFuseA" "ElectricalFuseF" "ElectricalFuseP"
FALSE [187] "ElectricalMix" "ElectricalSBrkr" "KitchenQualEx"
FALSE [190] "KitchenQualFa" "KitchenQualGd" "KitchenQualTA"
FALSE [193] "FunctionalMaj1" "FunctionalMaj2" "FunctionalMin1"
FALSE [196] "FunctionalMin2" "FunctionalMod" "FunctionalSev"
FALSE [199] "FunctionalTyp" "FireplaceQuEx" "FireplaceQuFa"
FALSE [202] "FireplaceQuGd" "FireplaceQuPo" "FireplaceQuTA"
FALSE [205] "GarageType2Types" "GarageTypeAttchd" "GarageTypeBasment"
FALSE [208] "GarageTypeBuiltIn" "GarageTypeCarPort" "GarageTypeDetchd"
FALSE [211] "GarageFinishFin" "GarageFinishRFn" "GarageFinishUnf"
FALSE [214] "GarageQualEx" "GarageQualFa" "GarageQualGd"
FALSE [217] "GarageQualPo" "GarageQualTA" "GarageCondEx"
FALSE [220] "GarageCondFa" "GarageCondGd" "GarageCondPo"
FALSE [223] "GarageCondTA" "PavedDriveN" "PavedDriveP"
FALSE [226] "PavedDriveY" "PoolQCEx" "PoolQCFa"
FALSE [229] "PoolQCGd" "FenceGdPrv" "FenceGdWo"
FALSE [232] "FenceMnPrv" "FenceMnWw" "MiscFeatureGar2"
FALSE [235] "MiscFeatureOthr" "MiscFeatureShed" "MiscFeatureTenC"
FALSE [238] "SaleTypeCOD" "SaleTypeCon" "SaleTypeConLD"
FALSE [241] "SaleTypeConLI" "SaleTypeConLw" "SaleTypeCWD"
FALSE [244] "SaleTypeNew" "SaleTypeOth" "SaleTypeWD"
FALSE [247] "SaleConditionAbnorml" "SaleConditionAdjLand" "SaleConditionAlloca"
FALSE [250] "SaleConditionFamily" "SaleConditionNormal" "SaleConditionPartial"
# between 10% and 70%
dummied <- dummied[,sapply(dummied, sum)/nrow(dummied)<0.7]
dummied <- dummied[,sapply(dummied, sum)/nrow(dummied)>0.1]
To the same dataframe dummies, we save values of our target variable from outcome vector.
dummied$SalePrice <- outcome
str(dummied)
FALSE 'data.frame': 1460 obs. of 48 variables:
FALSE $ MSZoningRM : int 0 0 0 0 0 0 0 0 1 0 ...
FALSE $ AlleyGrvl : int 0 0 0 1 0 0 0 0 1 0 ...
FALSE $ LotShapeIR1 : int 0 0 1 1 1 1 0 1 0 0 ...
FALSE $ LotShapeReg : int 1 1 0 0 0 0 1 0 1 1 ...
FALSE $ LotConfigCorner : int 0 0 0 1 0 0 0 1 0 1 ...
FALSE $ NeighborhoodCollgCr: int 1 0 1 0 0 0 0 0 0 0 ...
FALSE $ NeighborhoodNAmes : int 0 0 0 0 0 0 0 0 0 0 ...
FALSE $ HouseStyle1.5Fin : int 0 0 0 0 0 1 0 0 1 0 ...
FALSE $ HouseStyle1Story : int 0 1 0 0 0 0 1 0 0 0 ...
FALSE $ HouseStyle2Story : int 1 0 1 1 1 0 0 1 0 0 ...
FALSE $ RoofStyleHip : int 0 0 0 0 0 0 0 0 0 0 ...
FALSE $ Exterior1stHdBoard : int 0 0 0 0 0 0 0 1 0 0 ...
FALSE $ Exterior1stMetalSd : int 0 1 0 0 0 0 0 0 0 1 ...
FALSE $ Exterior1stVinylSd : int 1 0 1 0 1 1 1 0 0 0 ...
FALSE $ Exterior1stWd Sdng : int 0 0 0 1 0 0 0 0 0 0 ...
FALSE $ Exterior2ndHdBoard : int 0 0 0 0 0 0 0 1 0 0 ...
FALSE $ Exterior2ndMetalSd : int 0 1 0 0 0 0 0 0 0 1 ...
FALSE $ Exterior2ndVinylSd : int 1 0 1 0 1 1 1 0 0 0 ...
FALSE $ Exterior2ndWd Sdng : int 0 0 0 0 0 0 0 0 0 0 ...
FALSE $ MasVnrTypeBrkFace : int 1 0 1 0 1 0 0 0 0 0 ...
FALSE $ MasVnrTypeNone : int 0 1 0 1 0 1 0 0 1 1 ...
FALSE $ ExterQualGd : int 1 0 1 0 1 0 1 0 0 0 ...
FALSE $ ExterQualTA : int 0 1 0 1 0 1 0 1 1 1 ...
FALSE $ FoundationCBlock : int 0 1 0 0 0 0 0 1 0 0 ...
FALSE $ FoundationPConc : int 1 0 1 0 1 0 1 0 0 0 ...
FALSE $ BsmtQualGd : int 1 1 1 0 1 1 0 1 0 0 ...
FALSE $ BsmtQualTA : int 0 0 0 1 0 0 0 0 1 1 ...
FALSE $ BsmtExposureAv : int 0 0 0 0 1 0 1 0 0 0 ...
FALSE $ BsmtExposureNo : int 1 0 0 1 0 1 0 0 1 1 ...
FALSE $ BsmtFinType1ALQ : int 0 1 0 1 0 0 0 1 0 0 ...
FALSE $ BsmtFinType1BLQ : int 0 0 0 0 0 0 0 0 0 0 ...
FALSE $ BsmtFinType1GLQ : int 1 0 1 0 1 1 1 0 0 1 ...
FALSE $ BsmtFinType1Unf : int 0 0 0 0 0 0 0 0 1 0 ...
FALSE $ HeatingQCEx : int 1 1 1 0 1 1 1 1 0 1 ...
FALSE $ HeatingQCGd : int 0 0 0 1 0 0 0 0 1 0 ...
FALSE $ HeatingQCTA : int 0 0 0 0 0 0 0 0 0 0 ...
FALSE $ KitchenQualGd : int 1 0 1 1 1 0 1 0 0 0 ...
FALSE $ KitchenQualTA : int 0 1 0 0 0 1 0 1 1 1 ...
FALSE $ FireplaceQuGd : int 0 0 0 1 0 0 1 0 0 0 ...
FALSE $ FireplaceQuTA : int 1 1 1 0 1 1 0 1 1 1 ...
FALSE $ GarageTypeAttchd : int 1 1 1 0 1 1 1 1 0 1 ...
FALSE $ GarageTypeDetchd : int 0 0 0 1 0 0 0 0 1 0 ...
FALSE $ GarageFinishFin : int 0 0 0 0 0 0 0 0 0 0 ...
FALSE $ GarageFinishRFn : int 1 1 1 0 1 0 1 1 0 1 ...
FALSE $ GarageFinishUnf : int 0 0 0 1 0 1 0 0 1 0 ...
FALSE $ FenceGdPrv : int 1 0 1 0 1 0 0 0 0 0 ...
FALSE $ FenceMnPrv : int 0 1 0 1 0 1 1 1 1 1 ...
FALSE $ SalePrice : num 208500 181500 223500 140000 250000 ...
Removing empty space fron column names.
colnames(dummied) <- gsub(" ","_",colnames(dummied))
Using Random Forest we further determine the importance of remaining variables
rf <- randomForest(SalePrice~.,dummied)
varImpPlot(rf)
rf$importance
FALSE IncNodePurity
FALSE MSZoningRM 1.343643e+11
FALSE AlleyGrvl 2.682045e+11
FALSE LotShapeIR1 7.654186e+10
FALSE LotShapeReg 1.058383e+11
FALSE LotConfigCorner 8.388262e+10
FALSE NeighborhoodCollgCr 5.145567e+10
FALSE NeighborhoodNAmes 2.465980e+10
FALSE HouseStyle1.5Fin 3.579787e+10
FALSE HouseStyle1Story 1.188142e+11
FALSE HouseStyle2Story 1.527694e+11
FALSE RoofStyleHip 4.203915e+11
FALSE Exterior1stHdBoard 1.016034e+11
FALSE Exterior1stMetalSd 3.541593e+10
FALSE Exterior1stVinylSd 6.861900e+10
FALSE Exterior1stWd_Sdng 8.389013e+10
FALSE Exterior2ndHdBoard 8.793571e+10
FALSE Exterior2ndMetalSd 3.637456e+10
FALSE Exterior2ndVinylSd 8.129801e+10
FALSE Exterior2ndWd_Sdng 3.983202e+10
FALSE MasVnrTypeBrkFace 8.931569e+10
FALSE MasVnrTypeNone 1.392388e+11
FALSE ExterQualGd 3.805125e+11
FALSE ExterQualTA 1.424115e+12
FALSE FoundationCBlock 5.877542e+10
FALSE FoundationPConc 2.958609e+11
FALSE BsmtQualGd 4.057755e+11
FALSE BsmtQualTA 3.911208e+11
FALSE BsmtExposureAv 8.045330e+10
FALSE BsmtExposureNo 1.667733e+11
FALSE BsmtFinType1ALQ 4.394763e+10
FALSE BsmtFinType1BLQ 2.346108e+10
FALSE BsmtFinType1GLQ 1.846968e+11
FALSE BsmtFinType1Unf 7.192819e+10
FALSE HeatingQCEx 1.486179e+11
FALSE HeatingQCGd 3.264465e+10
FALSE HeatingQCTA 3.475445e+10
FALSE KitchenQualGd 3.673076e+11
FALSE KitchenQualTA 6.481577e+11
FALSE FireplaceQuGd 7.932374e+10
FALSE FireplaceQuTA 8.279677e+10
FALSE GarageTypeAttchd 1.214518e+11
FALSE GarageTypeDetchd 2.261249e+11
FALSE GarageFinishFin 1.377336e+11
FALSE GarageFinishRFn 5.673355e+10
FALSE GarageFinishUnf 6.648433e+11
FALSE FenceGdPrv 1.820009e+11
FALSE FenceMnPrv 9.647013e+10
Now we create our train and test datasets by combinding dummied and num_df dataframes.
dummied$SalePrice <- NULL
total <- cbind(dummied,num_df)
index <- createDataPartition(total$SalePrice,p=0.7,list=F)
train <- total[index,]
test <- total[-index,]
Let’s predict SalePrice using linear regression.
lm <- lm(SalePrice~.,train)
test$Predicted <- predict(lm,test)
FALSE Warning in predict.lm(lm, test): prediction from a rank-deficient fit may
FALSE be misleading
RMSE(test$Predicted,test$SalePrice)
FALSE [1] 33194.33
Now let’s predict SalePrice using random forest.
rf <- randomForest(SalePrice~.,train,do.trace=TRUE,ntree=500)
FALSE | Out-of-bag |
FALSE Tree | MSE %Var(y) |
FALSE 1 | 2.984e+09 46.07 |
FALSE 2 | 2.576e+09 39.77 |
FALSE 3 | 2.301e+09 35.53 |
FALSE 4 | 2.099e+09 32.41 |
FALSE 5 | 1.85e+09 28.56 |
FALSE 6 | 1.744e+09 26.93 |
FALSE 7 | 1.549e+09 23.91 |
FALSE 8 | 1.441e+09 22.24 |
FALSE 9 | 1.423e+09 21.98 |
FALSE 10 | 1.36e+09 21.00 |
FALSE 11 | 1.303e+09 20.12 |
FALSE 12 | 1.276e+09 19.70 |
FALSE 13 | 1.194e+09 18.43 |
FALSE 14 | 1.172e+09 18.09 |
FALSE 15 | 1.2e+09 18.53 |
FALSE 16 | 1.223e+09 18.88 |
FALSE 17 | 1.209e+09 18.66 |
FALSE 18 | 1.136e+09 17.53 |
FALSE 19 | 1.14e+09 17.60 |
FALSE 20 | 1.15e+09 17.75 |
FALSE 21 | 1.133e+09 17.49 |
FALSE 22 | 1.11e+09 17.14 |
FALSE 23 | 1.113e+09 17.18 |
FALSE 24 | 1.088e+09 16.80 |
FALSE 25 | 1.079e+09 16.67 |
FALSE 26 | 1.041e+09 16.07 |
FALSE 27 | 1.032e+09 15.94 |
FALSE 28 | 1.029e+09 15.89 |
FALSE 29 | 1.027e+09 15.85 |
FALSE 30 | 1.033e+09 15.94 |
FALSE 31 | 1.014e+09 15.65 |
FALSE 32 | 1.016e+09 15.68 |
FALSE 33 | 1.016e+09 15.68 |
FALSE 34 | 1.008e+09 15.56 |
FALSE 35 | 1.009e+09 15.58 |
FALSE 36 | 1.004e+09 15.51 |
FALSE 37 | 9.887e+08 15.27 |
FALSE 38 | 9.77e+08 15.08 |
FALSE 39 | 9.817e+08 15.16 |
FALSE 40 | 9.807e+08 15.14 |
FALSE 41 | 9.725e+08 15.01 |
FALSE 42 | 9.684e+08 14.95 |
FALSE 43 | 9.634e+08 14.87 |
FALSE 44 | 9.651e+08 14.90 |
FALSE 45 | 9.605e+08 14.83 |
FALSE 46 | 9.464e+08 14.61 |
FALSE 47 | 9.479e+08 14.64 |
FALSE 48 | 9.466e+08 14.61 |
FALSE 49 | 9.423e+08 14.55 |
FALSE 50 | 9.429e+08 14.56 |
FALSE 51 | 9.531e+08 14.71 |
FALSE 52 | 9.475e+08 14.63 |
FALSE 53 | 9.484e+08 14.64 |
FALSE 54 | 9.421e+08 14.54 |
FALSE 55 | 9.393e+08 14.50 |
FALSE 56 | 9.412e+08 14.53 |
FALSE 57 | 9.316e+08 14.38 |
FALSE 58 | 9.315e+08 14.38 |
FALSE 59 | 9.167e+08 14.15 |
FALSE 60 | 9.168e+08 14.15 |
FALSE 61 | 9.157e+08 14.14 |
FALSE 62 | 9.126e+08 14.09 |
FALSE 63 | 9.074e+08 14.01 |
FALSE 64 | 9.032e+08 13.94 |
FALSE 65 | 9.008e+08 13.91 |
FALSE 66 | 8.951e+08 13.82 |
FALSE 67 | 8.909e+08 13.75 |
FALSE 68 | 8.896e+08 13.73 |
FALSE 69 | 8.878e+08 13.71 |
FALSE 70 | 8.869e+08 13.69 |
FALSE 71 | 8.855e+08 13.67 |
FALSE 72 | 8.816e+08 13.61 |
FALSE 73 | 8.841e+08 13.65 |
FALSE 74 | 8.866e+08 13.69 |
FALSE 75 | 8.865e+08 13.69 |
FALSE 76 | 8.835e+08 13.64 |
FALSE 77 | 8.897e+08 13.74 |
FALSE 78 | 8.917e+08 13.77 |
FALSE 79 | 8.926e+08 13.78 |
FALSE 80 | 8.889e+08 13.72 |
FALSE 81 | 8.905e+08 13.75 |
FALSE 82 | 8.863e+08 13.68 |
FALSE 83 | 8.873e+08 13.70 |
FALSE 84 | 8.855e+08 13.67 |
FALSE 85 | 8.837e+08 13.64 |
FALSE 86 | 8.82e+08 13.62 |
FALSE 87 | 8.79e+08 13.57 |
FALSE 88 | 8.783e+08 13.56 |
FALSE 89 | 8.791e+08 13.57 |
FALSE 90 | 8.83e+08 13.63 |
FALSE 91 | 8.804e+08 13.59 |
FALSE 92 | 8.788e+08 13.57 |
FALSE 93 | 8.819e+08 13.62 |
FALSE 94 | 8.806e+08 13.60 |
FALSE 95 | 8.8e+08 13.59 |
FALSE 96 | 8.794e+08 13.58 |
FALSE 97 | 8.76e+08 13.52 |
FALSE 98 | 8.741e+08 13.50 |
FALSE 99 | 8.756e+08 13.52 |
FALSE 100 | 8.738e+08 13.49 |
FALSE 101 | 8.769e+08 13.54 |
FALSE 102 | 8.804e+08 13.59 |
FALSE 103 | 8.776e+08 13.55 |
FALSE 104 | 8.801e+08 13.59 |
FALSE 105 | 8.753e+08 13.51 |
FALSE 106 | 8.778e+08 13.55 |
FALSE 107 | 8.812e+08 13.61 |
FALSE 108 | 8.853e+08 13.67 |
FALSE 109 | 8.86e+08 13.68 |
FALSE 110 | 8.828e+08 13.63 |
FALSE 111 | 8.813e+08 13.61 |
FALSE 112 | 8.796e+08 13.58 |
FALSE 113 | 8.779e+08 13.55 |
FALSE 114 | 8.827e+08 13.63 |
FALSE 115 | 8.81e+08 13.60 |
FALSE 116 | 8.767e+08 13.54 |
FALSE 117 | 8.743e+08 13.50 |
FALSE 118 | 8.757e+08 13.52 |
FALSE 119 | 8.749e+08 13.51 |
FALSE 120 | 8.767e+08 13.54 |
FALSE 121 | 8.795e+08 13.58 |
FALSE 122 | 8.781e+08 13.56 |
FALSE 123 | 8.811e+08 13.60 |
FALSE 124 | 8.789e+08 13.57 |
FALSE 125 | 8.813e+08 13.61 |
FALSE 126 | 8.815e+08 13.61 |
FALSE 127 | 8.795e+08 13.58 |
FALSE 128 | 8.783e+08 13.56 |
FALSE 129 | 8.754e+08 13.52 |
FALSE 130 | 8.766e+08 13.53 |
FALSE 131 | 8.755e+08 13.52 |
FALSE 132 | 8.74e+08 13.49 |
FALSE 133 | 8.778e+08 13.55 |
FALSE 134 | 8.795e+08 13.58 |
FALSE 135 | 8.777e+08 13.55 |
FALSE 136 | 8.772e+08 13.54 |
FALSE 137 | 8.748e+08 13.51 |
FALSE 138 | 8.741e+08 13.50 |
FALSE 139 | 8.722e+08 13.47 |
FALSE 140 | 8.704e+08 13.44 |
FALSE 141 | 8.693e+08 13.42 |
FALSE 142 | 8.712e+08 13.45 |
FALSE 143 | 8.669e+08 13.38 |
FALSE 144 | 8.642e+08 13.34 |
FALSE 145 | 8.634e+08 13.33 |
FALSE 146 | 8.621e+08 13.31 |
FALSE 147 | 8.598e+08 13.27 |
FALSE 148 | 8.602e+08 13.28 |
FALSE 149 | 8.599e+08 13.28 |
FALSE 150 | 8.591e+08 13.26 |
FALSE 151 | 8.556e+08 13.21 |
FALSE 152 | 8.565e+08 13.22 |
FALSE 153 | 8.582e+08 13.25 |
FALSE 154 | 8.593e+08 13.27 |
FALSE 155 | 8.583e+08 13.25 |
FALSE 156 | 8.588e+08 13.26 |
FALSE 157 | 8.576e+08 13.24 |
FALSE 158 | 8.56e+08 13.22 |
FALSE 159 | 8.572e+08 13.23 |
FALSE 160 | 8.581e+08 13.25 |
FALSE 161 | 8.533e+08 13.17 |
FALSE 162 | 8.533e+08 13.17 |
FALSE 163 | 8.515e+08 13.15 |
FALSE 164 | 8.556e+08 13.21 |
FALSE 165 | 8.555e+08 13.21 |
FALSE 166 | 8.529e+08 13.17 |
FALSE 167 | 8.558e+08 13.21 |
FALSE 168 | 8.55e+08 13.20 |
FALSE 169 | 8.581e+08 13.25 |
FALSE 170 | 8.598e+08 13.27 |
FALSE 171 | 8.59e+08 13.26 |
FALSE 172 | 8.57e+08 13.23 |
FALSE 173 | 8.566e+08 13.22 |
FALSE 174 | 8.567e+08 13.23 |
FALSE 175 | 8.56e+08 13.22 |
FALSE 176 | 8.56e+08 13.22 |
FALSE 177 | 8.564e+08 13.22 |
FALSE 178 | 8.577e+08 13.24 |
FALSE 179 | 8.582e+08 13.25 |
FALSE 180 | 8.576e+08 13.24 |
FALSE 181 | 8.558e+08 13.21 |
FALSE 182 | 8.547e+08 13.20 |
FALSE 183 | 8.568e+08 13.23 |
FALSE 184 | 8.573e+08 13.24 |
FALSE 185 | 8.575e+08 13.24 |
FALSE 186 | 8.566e+08 13.23 |
FALSE 187 | 8.549e+08 13.20 |
FALSE 188 | 8.547e+08 13.20 |
FALSE 189 | 8.555e+08 13.21 |
FALSE 190 | 8.567e+08 13.23 |
FALSE 191 | 8.561e+08 13.22 |
FALSE 192 | 8.561e+08 13.22 |
FALSE 193 | 8.563e+08 13.22 |
FALSE 194 | 8.555e+08 13.21 |
FALSE 195 | 8.566e+08 13.22 |
FALSE 196 | 8.556e+08 13.21 |
FALSE 197 | 8.539e+08 13.18 |
FALSE 198 | 8.548e+08 13.20 |
FALSE 199 | 8.547e+08 13.20 |
FALSE 200 | 8.538e+08 13.18 |
FALSE 201 | 8.528e+08 13.17 |
FALSE 202 | 8.53e+08 13.17 |
FALSE 203 | 8.522e+08 13.16 |
FALSE 204 | 8.532e+08 13.17 |
FALSE 205 | 8.537e+08 13.18 |
FALSE 206 | 8.53e+08 13.17 |
FALSE 207 | 8.548e+08 13.20 |
FALSE 208 | 8.566e+08 13.22 |
FALSE 209 | 8.558e+08 13.21 |
FALSE 210 | 8.542e+08 13.19 |
FALSE 211 | 8.541e+08 13.19 |
FALSE 212 | 8.557e+08 13.21 |
FALSE 213 | 8.573e+08 13.24 |
FALSE 214 | 8.558e+08 13.21 |
FALSE 215 | 8.556e+08 13.21 |
FALSE 216 | 8.558e+08 13.21 |
FALSE 217 | 8.546e+08 13.19 |
FALSE 218 | 8.555e+08 13.21 |
FALSE 219 | 8.539e+08 13.18 |
FALSE 220 | 8.515e+08 13.15 |
FALSE 221 | 8.514e+08 13.14 |
FALSE 222 | 8.515e+08 13.15 |
FALSE 223 | 8.506e+08 13.13 |
FALSE 224 | 8.485e+08 13.10 |
FALSE 225 | 8.474e+08 13.08 |
FALSE 226 | 8.486e+08 13.10 |
FALSE 227 | 8.487e+08 13.10 |
FALSE 228 | 8.486e+08 13.10 |
FALSE 229 | 8.48e+08 13.09 |
FALSE 230 | 8.482e+08 13.10 |
FALSE 231 | 8.465e+08 13.07 |
FALSE 232 | 8.467e+08 13.07 |
FALSE 233 | 8.455e+08 13.05 |
FALSE 234 | 8.467e+08 13.07 |
FALSE 235 | 8.481e+08 13.09 |
FALSE 236 | 8.498e+08 13.12 |
FALSE 237 | 8.495e+08 13.12 |
FALSE 238 | 8.509e+08 13.14 |
FALSE 239 | 8.499e+08 13.12 |
FALSE 240 | 8.491e+08 13.11 |
FALSE 241 | 8.494e+08 13.11 |
FALSE 242 | 8.482e+08 13.10 |
FALSE 243 | 8.482e+08 13.10 |
FALSE 244 | 8.462e+08 13.06 |
FALSE 245 | 8.456e+08 13.05 |
FALSE 246 | 8.431e+08 13.02 |
FALSE 247 | 8.429e+08 13.01 |
FALSE 248 | 8.427e+08 13.01 |
FALSE 249 | 8.418e+08 13.00 |
FALSE 250 | 8.424e+08 13.01 |
FALSE 251 | 8.421e+08 13.00 |
FALSE 252 | 8.402e+08 12.97 |
FALSE 253 | 8.401e+08 12.97 |
FALSE 254 | 8.409e+08 12.98 |
FALSE 255 | 8.436e+08 13.02 |
FALSE 256 | 8.424e+08 13.01 |
FALSE 257 | 8.426e+08 13.01 |
FALSE 258 | 8.421e+08 13.00 |
FALSE 259 | 8.447e+08 13.04 |
FALSE 260 | 8.45e+08 13.05 |
FALSE 261 | 8.455e+08 13.05 |
FALSE 262 | 8.455e+08 13.05 |
FALSE 263 | 8.477e+08 13.09 |
FALSE 264 | 8.469e+08 13.08 |
FALSE 265 | 8.467e+08 13.07 |
FALSE 266 | 8.462e+08 13.06 |
FALSE 267 | 8.48e+08 13.09 |
FALSE 268 | 8.493e+08 13.11 |
FALSE 269 | 8.517e+08 13.15 |
FALSE 270 | 8.525e+08 13.16 |
FALSE 271 | 8.543e+08 13.19 |
FALSE 272 | 8.541e+08 13.19 |
FALSE 273 | 8.542e+08 13.19 |
FALSE 274 | 8.552e+08 13.20 |
FALSE 275 | 8.524e+08 13.16 |
FALSE 276 | 8.537e+08 13.18 |
FALSE 277 | 8.54e+08 13.19 |
FALSE 278 | 8.558e+08 13.21 |
FALSE 279 | 8.556e+08 13.21 |
FALSE 280 | 8.565e+08 13.22 |
FALSE 281 | 8.574e+08 13.24 |
FALSE 282 | 8.567e+08 13.23 |
FALSE 283 | 8.57e+08 13.23 |
FALSE 284 | 8.57e+08 13.23 |
FALSE 285 | 8.581e+08 13.25 |
FALSE 286 | 8.571e+08 13.23 |
FALSE 287 | 8.571e+08 13.23 |
FALSE 288 | 8.564e+08 13.22 |
FALSE 289 | 8.558e+08 13.21 |
FALSE 290 | 8.557e+08 13.21 |
FALSE 291 | 8.559e+08 13.21 |
FALSE 292 | 8.558e+08 13.21 |
FALSE 293 | 8.542e+08 13.19 |
FALSE 294 | 8.559e+08 13.21 |
FALSE 295 | 8.543e+08 13.19 |
FALSE 296 | 8.54e+08 13.19 |
FALSE 297 | 8.548e+08 13.20 |
FALSE 298 | 8.543e+08 13.19 |
FALSE 299 | 8.547e+08 13.20 |
FALSE 300 | 8.549e+08 13.20 |
FALSE 301 | 8.539e+08 13.18 |
FALSE 302 | 8.531e+08 13.17 |
FALSE 303 | 8.529e+08 13.17 |
FALSE 304 | 8.544e+08 13.19 |
FALSE 305 | 8.555e+08 13.21 |
FALSE 306 | 8.571e+08 13.23 |
FALSE 307 | 8.56e+08 13.22 |
FALSE 308 | 8.547e+08 13.20 |
FALSE 309 | 8.548e+08 13.20 |
FALSE 310 | 8.555e+08 13.21 |
FALSE 311 | 8.578e+08 13.24 |
FALSE 312 | 8.575e+08 13.24 |
FALSE 313 | 8.555e+08 13.21 |
FALSE 314 | 8.56e+08 13.22 |
FALSE 315 | 8.555e+08 13.21 |
FALSE 316 | 8.531e+08 13.17 |
FALSE 317 | 8.519e+08 13.15 |
FALSE 318 | 8.518e+08 13.15 |
FALSE 319 | 8.514e+08 13.14 |
FALSE 320 | 8.512e+08 13.14 |
FALSE 321 | 8.513e+08 13.14 |
FALSE 322 | 8.519e+08 13.15 |
FALSE 323 | 8.519e+08 13.15 |
FALSE 324 | 8.501e+08 13.13 |
FALSE 325 | 8.499e+08 13.12 |
FALSE 326 | 8.497e+08 13.12 |
FALSE 327 | 8.497e+08 13.12 |
FALSE 328 | 8.494e+08 13.11 |
FALSE 329 | 8.49e+08 13.11 |
FALSE 330 | 8.477e+08 13.09 |
FALSE 331 | 8.467e+08 13.07 |
FALSE 332 | 8.463e+08 13.07 |
FALSE 333 | 8.459e+08 13.06 |
FALSE 334 | 8.45e+08 13.05 |
FALSE 335 | 8.452e+08 13.05 |
FALSE 336 | 8.445e+08 13.04 |
FALSE 337 | 8.447e+08 13.04 |
FALSE 338 | 8.456e+08 13.06 |
FALSE 339 | 8.456e+08 13.05 |
FALSE 340 | 8.458e+08 13.06 |
FALSE 341 | 8.446e+08 13.04 |
FALSE 342 | 8.449e+08 13.04 |
FALSE 343 | 8.439e+08 13.03 |
FALSE 344 | 8.44e+08 13.03 |
FALSE 345 | 8.45e+08 13.05 |
FALSE 346 | 8.449e+08 13.05 |
FALSE 347 | 8.459e+08 13.06 |
FALSE 348 | 8.479e+08 13.09 |
FALSE 349 | 8.478e+08 13.09 |
FALSE 350 | 8.491e+08 13.11 |
FALSE 351 | 8.495e+08 13.11 |
FALSE 352 | 8.482e+08 13.10 |
FALSE 353 | 8.481e+08 13.09 |
FALSE 354 | 8.479e+08 13.09 |
FALSE 355 | 8.489e+08 13.11 |
FALSE 356 | 8.493e+08 13.11 |
FALSE 357 | 8.489e+08 13.11 |
FALSE 358 | 8.485e+08 13.10 |
FALSE 359 | 8.48e+08 13.09 |
FALSE 360 | 8.471e+08 13.08 |
FALSE 361 | 8.486e+08 13.10 |
FALSE 362 | 8.49e+08 13.11 |
FALSE 363 | 8.487e+08 13.10 |
FALSE 364 | 8.489e+08 13.11 |
FALSE 365 | 8.486e+08 13.10 |
FALSE 366 | 8.488e+08 13.10 |
FALSE 367 | 8.481e+08 13.09 |
FALSE 368 | 8.474e+08 13.08 |
FALSE 369 | 8.47e+08 13.08 |
FALSE 370 | 8.485e+08 13.10 |
FALSE 371 | 8.49e+08 13.11 |
FALSE 372 | 8.493e+08 13.11 |
FALSE 373 | 8.492e+08 13.11 |
FALSE 374 | 8.491e+08 13.11 |
FALSE 375 | 8.497e+08 13.12 |
FALSE 376 | 8.493e+08 13.11 |
FALSE 377 | 8.502e+08 13.13 |
FALSE 378 | 8.495e+08 13.11 |
FALSE 379 | 8.489e+08 13.11 |
FALSE 380 | 8.476e+08 13.09 |
FALSE 381 | 8.466e+08 13.07 |
FALSE 382 | 8.446e+08 13.04 |
FALSE 383 | 8.444e+08 13.04 |
FALSE 384 | 8.449e+08 13.04 |
FALSE 385 | 8.453e+08 13.05 |
FALSE 386 | 8.444e+08 13.04 |
FALSE 387 | 8.446e+08 13.04 |
FALSE 388 | 8.444e+08 13.04 |
FALSE 389 | 8.448e+08 13.04 |
FALSE 390 | 8.454e+08 13.05 |
FALSE 391 | 8.455e+08 13.05 |
FALSE 392 | 8.459e+08 13.06 |
FALSE 393 | 8.453e+08 13.05 |
FALSE 394 | 8.456e+08 13.06 |
FALSE 395 | 8.457e+08 13.06 |
FALSE 396 | 8.452e+08 13.05 |
FALSE 397 | 8.449e+08 13.04 |
FALSE 398 | 8.446e+08 13.04 |
FALSE 399 | 8.442e+08 13.03 |
FALSE 400 | 8.447e+08 13.04 |
FALSE 401 | 8.445e+08 13.04 |
FALSE 402 | 8.439e+08 13.03 |
FALSE 403 | 8.437e+08 13.03 |
FALSE 404 | 8.431e+08 13.02 |
FALSE 405 | 8.427e+08 13.01 |
FALSE 406 | 8.425e+08 13.01 |
FALSE 407 | 8.42e+08 13.00 |
FALSE 408 | 8.422e+08 13.00 |
FALSE 409 | 8.42e+08 13.00 |
FALSE 410 | 8.422e+08 13.00 |
FALSE 411 | 8.427e+08 13.01 |
FALSE 412 | 8.431e+08 13.02 |
FALSE 413 | 8.437e+08 13.03 |
FALSE 414 | 8.444e+08 13.04 |
FALSE 415 | 8.434e+08 13.02 |
FALSE 416 | 8.423e+08 13.00 |
FALSE 417 | 8.424e+08 13.01 |
FALSE 418 | 8.417e+08 13.00 |
FALSE 419 | 8.414e+08 12.99 |
FALSE 420 | 8.41e+08 12.98 |
FALSE 421 | 8.407e+08 12.98 |
FALSE 422 | 8.407e+08 12.98 |
FALSE 423 | 8.415e+08 12.99 |
FALSE 424 | 8.415e+08 12.99 |
FALSE 425 | 8.405e+08 12.98 |
FALSE 426 | 8.401e+08 12.97 |
FALSE 427 | 8.388e+08 12.95 |
FALSE 428 | 8.372e+08 12.93 |
FALSE 429 | 8.373e+08 12.93 |
FALSE 430 | 8.373e+08 12.93 |
FALSE 431 | 8.372e+08 12.93 |
FALSE 432 | 8.37e+08 12.92 |
FALSE 433 | 8.372e+08 12.93 |
FALSE 434 | 8.378e+08 12.93 |
FALSE 435 | 8.381e+08 12.94 |
FALSE 436 | 8.377e+08 12.93 |
FALSE 437 | 8.377e+08 12.93 |
FALSE 438 | 8.381e+08 12.94 |
FALSE 439 | 8.377e+08 12.93 |
FALSE 440 | 8.381e+08 12.94 |
FALSE 441 | 8.381e+08 12.94 |
FALSE 442 | 8.375e+08 12.93 |
FALSE 443 | 8.383e+08 12.94 |
FALSE 444 | 8.391e+08 12.95 |
FALSE 445 | 8.39e+08 12.95 |
FALSE 446 | 8.39e+08 12.95 |
FALSE 447 | 8.388e+08 12.95 |
FALSE 448 | 8.391e+08 12.96 |
FALSE 449 | 8.398e+08 12.97 |
FALSE 450 | 8.402e+08 12.97 |
FALSE 451 | 8.408e+08 12.98 |
FALSE 452 | 8.407e+08 12.98 |
FALSE 453 | 8.413e+08 12.99 |
FALSE 454 | 8.424e+08 13.01 |
FALSE 455 | 8.434e+08 13.02 |
FALSE 456 | 8.429e+08 13.01 |
FALSE 457 | 8.434e+08 13.02 |
FALSE 458 | 8.435e+08 13.02 |
FALSE 459 | 8.437e+08 13.03 |
FALSE 460 | 8.446e+08 13.04 |
FALSE 461 | 8.434e+08 13.02 |
FALSE 462 | 8.429e+08 13.01 |
FALSE 463 | 8.43e+08 13.01 |
FALSE 464 | 8.431e+08 13.02 |
FALSE 465 | 8.442e+08 13.03 |
FALSE 466 | 8.439e+08 13.03 |
FALSE 467 | 8.445e+08 13.04 |
FALSE 468 | 8.442e+08 13.03 |
FALSE 469 | 8.435e+08 13.02 |
FALSE 470 | 8.436e+08 13.02 |
FALSE 471 | 8.432e+08 13.02 |
FALSE 472 | 8.435e+08 13.02 |
FALSE 473 | 8.444e+08 13.04 |
FALSE 474 | 8.444e+08 13.04 |
FALSE 475 | 8.448e+08 13.04 |
FALSE 476 | 8.445e+08 13.04 |
FALSE 477 | 8.439e+08 13.03 |
FALSE 478 | 8.444e+08 13.04 |
FALSE 479 | 8.436e+08 13.02 |
FALSE 480 | 8.44e+08 13.03 |
FALSE 481 | 8.447e+08 13.04 |
FALSE 482 | 8.451e+08 13.05 |
FALSE 483 | 8.449e+08 13.04 |
FALSE 484 | 8.445e+08 13.04 |
FALSE 485 | 8.448e+08 13.04 |
FALSE 486 | 8.448e+08 13.04 |
FALSE 487 | 8.449e+08 13.04 |
FALSE 488 | 8.45e+08 13.05 |
FALSE 489 | 8.453e+08 13.05 |
FALSE 490 | 8.458e+08 13.06 |
FALSE 491 | 8.463e+08 13.07 |
FALSE 492 | 8.466e+08 13.07 |
FALSE 493 | 8.481e+08 13.09 |
FALSE 494 | 8.483e+08 13.10 |
FALSE 495 | 8.47e+08 13.08 |
FALSE 496 | 8.469e+08 13.08 |
FALSE 497 | 8.478e+08 13.09 |
FALSE 498 | 8.473e+08 13.08 |
FALSE 499 | 8.469e+08 13.08 |
FALSE 500 | 8.469e+08 13.07 |
test$Predicted <- predict(rf,test)
RMSE(test$Predicted,test$SalePrice)
FALSE [1] 30456.98
# deleting unwanted variables
set.seed(1)
num_df$X <- NULL
num_df$Id <- NULL
index[,]
FALSE [1] 1 3 4 6 7 8 10 11 12 13 17 18 19
FALSE [14] 20 21 22 23 24 25 27 28 29 30 31 32 33
FALSE [27] 35 37 38 39 40 41 44 45 47 51 52 54 55
FALSE [40] 57 58 59 60 61 62 64 65 67 68 70 71 73
FALSE [53] 76 77 78 79 80 81 83 85 86 87 88 89 91
FALSE [66] 93 94 95 96 97 99 100 101 102 103 104 105 106
FALSE [79] 110 111 112 116 118 119 120 121 123 124 125 127 128
FALSE [92] 129 130 132 134 135 138 140 141 142 143 144 145 146
FALSE [105] 147 149 150 151 152 153 154 155 156 159 160 161 162
FALSE [118] 163 165 166 167 169 171 172 174 177 178 179 180 182
FALSE [131] 184 189 191 194 196 198 199 200 201 202 203 204 205
FALSE [144] 206 208 210 213 214 217 219 220 222 223 224 225 230
FALSE [157] 231 233 234 235 236 237 240 241 242 243 244 246 248
FALSE [170] 249 250 251 252 253 254 256 257 258 260 261 262 263
FALSE [183] 264 265 266 267 268 269 270 271 273 275 276 279 280
FALSE [196] 282 283 284 286 287 289 291 292 293 294 295 296 297
FALSE [209] 298 299 300 301 302 303 304 306 307 310 311 312 315
FALSE [222] 316 317 319 321 322 323 324 325 326 328 329 332 333
FALSE [235] 334 336 337 338 339 340 341 342 344 345 346 349 350
FALSE [248] 351 353 357 358 360 361 362 363 364 366 368 370 371
FALSE [261] 372 373 375 376 379 380 381 383 384 385 386 387 388
FALSE [274] 389 390 391 392 395 396 398 400 402 404 406 407 408
FALSE [287] 409 410 411 413 414 415 416 418 420 421 422 423 424
FALSE [300] 425 426 427 428 429 430 431 435 436 438 440 442 445
FALSE [313] 446 447 448 449 452 453 456 457 458 459 460 461 463
FALSE [326] 464 465 466 468 469 470 471 472 473 474 475 476 479
FALSE [339] 480 481 482 483 484 485 486 487 488 490 491 494 496
FALSE [352] 497 498 499 500 501 504 505 506 507 509 510 511 512
FALSE [365] 513 514 515 516 517 519 520 521 522 523 527 530 531
FALSE [378] 532 533 534 535 536 537 538 540 541 542 543 544 545
FALSE [391] 547 548 549 550 551 552 556 559 561 563 564 565 566
FALSE [404] 567 568 569 570 572 573 574 575 576 577 578 581 582
FALSE [417] 583 584 585 586 587 588 589 590 591 592 593 594 595
FALSE [430] 599 600 601 602 603 604 605 607 610 611 612 613 614
FALSE [443] 615 616 618 620 622 623 626 627 630 633 635 636 638
FALSE [456] 640 642 644 646 647 648 649 650 651 652 656 658 659
FALSE [469] 660 661 662 663 665 666 667 668 669 671 672 673 674
FALSE [482] 676 677 680 682 686 687 688 689 692 693 694 695 697
FALSE [495] 698 700 701 702 704 705 707 708 709 711 712 713 715
FALSE [508] 716 717 719 721 722 723 726 727 728 730 731 732 733
FALSE [521] 735 736 738 740 741 742 744 749 750 751 752 754 755
FALSE [534] 756 758 764 765 766 767 769 770 772 777 778 779 780
FALSE [547] 782 783 785 787 788 791 792 793 794 795 796 797 798
FALSE [560] 799 800 802 806 807 810 812 813 815 816 817 821 822
FALSE [573] 826 827 830 831 832 833 834 835 836 837 838 839 840
FALSE [586] 845 846 847 849 850 851 852 853 854 855 857 861 862
FALSE [599] 863 865 866 870 871 872 873 874 877 879 880 881 884
FALSE [612] 885 886 887 888 890 892 893 895 896 897 899 901 902
FALSE [625] 904 905 906 907 908 910 912 913 915 916 918 921 923
FALSE [638] 924 925 927 931 932 933 934 935 936 937 938 939 940
FALSE [651] 941 944 945 946 948 949 950 952 953 955 956 957 959
FALSE [664] 960 961 962 964 965 966 967 968 969 972 973 974 975
FALSE [677] 976 977 978 979 980 981 982 983 985 987 988 990 991
FALSE [690] 992 994 995 997 998 999 1000 1002 1003 1005 1006 1007 1009
FALSE [703] 1010 1011 1012 1013 1014 1015 1016 1018 1019 1022 1024 1025 1026
FALSE [716] 1027 1028 1029 1031 1032 1033 1034 1035 1039 1040 1041 1043 1044
FALSE [729] 1045 1046 1047 1048 1049 1052 1054 1057 1058 1059 1061 1062 1064
FALSE [742] 1065 1066 1067 1068 1072 1073 1074 1076 1077 1078 1080 1081 1082
FALSE [755] 1083 1084 1086 1087 1092 1093 1094 1095 1098 1100 1101 1102 1103
FALSE [768] 1105 1107 1108 1109 1110 1111 1112 1113 1115 1116 1118 1119 1120
FALSE [781] 1121 1123 1126 1127 1128 1129 1130 1131 1133 1134 1136 1137 1138
FALSE [794] 1139 1140 1141 1142 1143 1147 1148 1150 1151 1152 1153 1154 1155
FALSE [807] 1156 1158 1162 1163 1164 1166 1167 1169 1170 1171 1173 1175 1178
FALSE [820] 1179 1180 1181 1182 1183 1185 1186 1187 1188 1190 1191 1193 1194
FALSE [833] 1197 1198 1199 1200 1201 1202 1203 1205 1209 1211 1212 1214 1215
FALSE [846] 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1228 1229
FALSE [859] 1230 1233 1234 1235 1238 1240 1241 1242 1243 1244 1246 1247 1248
FALSE [872] 1249 1250 1251 1252 1253 1254 1255 1257 1258 1259 1260 1261 1262
FALSE [885] 1263 1264 1265 1266 1267 1268 1271 1272 1274 1276 1277 1278 1279
FALSE [898] 1280 1281 1284 1286 1287 1289 1290 1291 1292 1293 1294 1296 1297
FALSE [911] 1299 1300 1302 1303 1305 1306 1307 1308 1309 1310 1311 1312 1313
FALSE [924] 1314 1315 1316 1317 1318 1319 1320 1321 1323 1324 1325 1326 1327
FALSE [937] 1333 1335 1336 1338 1340 1341 1343 1345 1346 1350 1351 1352 1353
FALSE [950] 1354 1355 1357 1358 1359 1360 1361 1362 1364 1365 1367 1368 1373
FALSE [963] 1375 1376 1377 1379 1380 1381 1382 1384 1385 1386 1387 1389 1390
FALSE [976] 1391 1392 1393 1394 1397 1398 1399 1401 1402 1403 1404 1405 1406
FALSE [989] 1407 1408 1409 1410 1411 1412 1415 1416 1419 1420 1421 1423 1424
FALSE [1002] 1426 1427 1430 1431 1434 1436 1437 1438 1440 1441 1444 1445 1446
FALSE [1015] 1447 1449 1450 1451 1453 1455 1457 1458 1459 1460
smp=index[,]
colnames(num_df) # names of the columns
FALSE [1] "MSSubClass" "LotFrontage" "LotArea" "OverallQual"
FALSE [5] "OverallCond" "YearBuilt" "YearRemodAdd" "MasVnrArea"
FALSE [9] "BsmtFinSF1" "BsmtFinSF2" "BsmtUnfSF" "TotalBsmtSF"
FALSE [13] "X1stFlrSF" "X2ndFlrSF" "LowQualFinSF" "GrLivArea"
FALSE [17] "BsmtFullBath" "BsmtHalfBath" "FullBath" "HalfBath"
FALSE [21] "BedroomAbvGr" "KitchenAbvGr" "TotRmsAbvGrd" "Fireplaces"
FALSE [25] "GarageYrBlt" "GarageCars" "GarageArea" "WoodDeckSF"
FALSE [29] "OpenPorchSF" "EnclosedPorch" "X3SsnPorch" "ScreenPorch"
FALSE [33] "PoolArea" "MiscVal" "MoSold" "YrSold"
FALSE [37] "SalePrice"
train_data <- total[smp,1:36] # selecting only IV from train dataset
train_targets <- total[smp,37] # selecting DV from train dataset
test_data <- total[-smp,1:36] # selecting only IV from test dataset
test_targets <- total[-smp,37] # selecting DV from test dataset
dim(train_data)
FALSE [1] 1024 36
summary(train_targets)
FALSE Min. 1st Qu. Median Mean 3rd Qu. Max.
FALSE 0.0000 0.0000 0.0000 0.4053 1.0000 1.0000
dim(test_data)
FALSE [1] 436 36
summary(test_targets)
FALSE Min. 1st Qu. Median Mean 3rd Qu. Max.
FALSE 0.0000 0.0000 0.0000 0.3922 1.0000 1.0000
mean <- apply(train_data, 2, mean)
std <- apply(train_data, 2, sd)
train_data <- scale(train_data, center = mean, scale = std)
test_data <- scale(test_data, center = mean, scale = std)
????????????????????????
train_f <- dummied[smp,]
test_f <- dummied[-smp,]
train_data <- cbind(data.frame(train_data),train_f)
test_data <- cbind(data.frame(test_data),test_f)
head(train_data)
FALSE MSZoningRM AlleyGrvl LotShapeIR1 LotShapeReg LotConfigCorner
FALSE 1 -0.4189136 -0.5981141 -0.6984949 0.7597682 -0.4755274
FALSE 3 -0.4189136 -0.5981141 1.4302515 -1.3149055 -0.4755274
FALSE 4 -0.4189136 1.6702890 1.4302515 -1.3149055 2.1008748
FALSE 6 -0.4189136 -0.5981141 1.4302515 -1.3149055 -0.4755274
FALSE 7 -0.4189136 -0.5981141 -0.6984949 0.7597682 -0.4755274
FALSE 8 -0.4189136 -0.5981141 1.4302515 -1.3149055 2.1008748
FALSE NeighborhoodCollgCr NeighborhoodNAmes HouseStyle1.5Fin HouseStyle1Story
FALSE 1 2.8811415 -0.4253321 -0.332447 -0.9840144
FALSE 3 2.8811415 -0.4253321 -0.332447 -0.9840144
FALSE 4 -0.3467457 -0.4253321 -0.332447 -0.9840144
FALSE 6 -0.3467457 -0.4253321 3.005060 -0.9840144
FALSE 7 -0.3467457 -0.4253321 -0.332447 1.0152529
FALSE 8 -0.3467457 -0.4253321 -0.332447 -0.9840144
FALSE HouseStyle2Story RoofStyleHip Exterior1stHdBoard Exterior1stMetalSd
FALSE 1 1.5064353 -0.5000608 -0.4156896 -0.4140738
FALSE 3 1.5064353 -0.5000608 -0.4156896 -0.4140738
FALSE 4 1.5064353 -0.5000608 -0.4156896 -0.4140738
FALSE 6 -0.6631705 -0.5000608 -0.4156896 -0.4140738
FALSE 7 -0.6631705 -0.5000608 -0.4156896 -0.4140738
FALSE 8 1.5064353 -0.5000608 2.4032917 -0.4140738
FALSE Exterior1stVinylSd Exterior1stWd_Sdng Exterior2ndHdBoard
FALSE 1 1.345889 -0.4156896 -0.4010493
FALSE 3 1.345889 -0.4156896 -0.4010493
FALSE 4 -0.742278 2.4032917 -0.4010493
FALSE 6 1.345889 -0.4156896 -0.4010493
FALSE 7 1.345889 -0.4156896 -0.4010493
FALSE 8 -0.742278 -0.4156896 2.4910243
FALSE Exterior2ndMetalSd Exterior2ndVinylSd Exterior2ndWd_Sdng
FALSE 1 -0.4043223 1.3632749 -0.4043223
FALSE 3 -0.4043223 1.3632749 -0.4043223
FALSE 4 -0.4043223 -0.7328114 -0.4043223
FALSE 6 -0.4043223 1.3632749 -0.4043223
FALSE 7 -0.4043223 1.3632749 -0.4043223
FALSE 8 -0.4043223 -0.7328114 -0.4043223
FALSE MasVnrTypeBrkFace MasVnrTypeNone ExterQualGd ExterQualTA
FALSE 1 1.4926841 -1.1913926 1.3691501 -1.2458326
FALSE 3 1.4926841 -1.1913926 1.3691501 -1.2458326
FALSE 4 -0.6692799 0.8385342 -0.7296668 0.8018922
FALSE 6 -0.6692799 0.8385342 -0.7296668 0.8018922
FALSE 7 -0.6692799 -1.1913926 1.3691501 -1.2458326
FALSE 8 -0.6692799 -1.1913926 -0.7296668 0.8018922
FALSE FoundationCBlock FoundationPConc BsmtQualGd BsmtQualTA BsmtExposureAv
FALSE 1 -0.8710388 1.1266185 1.1469334 -0.9224349 -0.4205219
FALSE 3 -0.8710388 1.1266185 1.1469334 -0.9224349 -0.4205219
FALSE 4 -0.8710388 -0.8867451 -0.8710388 1.0830287 -0.4205219
FALSE 6 -0.8710388 -0.8867451 1.1469334 -0.9224349 -0.4205219
FALSE 7 -0.8710388 1.1266185 -0.8710388 -0.9224349 2.3756754
FALSE 8 1.1469334 -0.8867451 1.1469334 -0.9224349 -0.4205219
FALSE BsmtExposureNo BsmtFinType1ALQ BsmtFinType1BLQ BsmtFinType1GLQ
FALSE 1 0.6723395 -0.4189136 -0.3196258 1.5346253
FALSE 3 -1.4858914 -0.4189136 -0.3196258 1.5346253
FALSE 4 0.6723395 2.3847958 -0.3196258 -0.6509885
FALSE 6 0.6723395 -0.4189136 -0.3196258 1.5346253
FALSE 7 -1.4858914 -0.4189136 -0.3196258 1.5346253
FALSE 8 -1.4858914 2.3847958 -0.3196258 -0.6509885
FALSE BsmtFinType1Unf HeatingQCEx HeatingQCGd HeatingQCTA MSZoningRM AlleyGrvl
FALSE 1 -0.6738706 0.96875 -0.433303 -0.6418809 0 0
FALSE 3 -0.6738706 0.96875 -0.433303 -0.6418809 0 0
FALSE 4 -0.6738706 -1.03125 2.305600 -0.6418809 0 1
FALSE 6 -0.6738706 0.96875 -0.433303 -0.6418809 0 0
FALSE 7 -0.6738706 0.96875 -0.433303 -0.6418809 0 0
FALSE 8 -0.6738706 0.96875 -0.433303 -0.6418809 0 0
FALSE LotShapeIR1 LotShapeReg LotConfigCorner NeighborhoodCollgCr
FALSE 1 0 1 0 1
FALSE 3 1 0 0 1
FALSE 4 1 0 1 0
FALSE 6 1 0 0 0
FALSE 7 0 1 0 0
FALSE 8 1 0 1 0
FALSE NeighborhoodNAmes HouseStyle1.5Fin HouseStyle1Story HouseStyle2Story
FALSE 1 0 0 0 1
FALSE 3 0 0 0 1
FALSE 4 0 0 0 1
FALSE 6 0 1 0 0
FALSE 7 0 0 1 0
FALSE 8 0 0 0 1
FALSE RoofStyleHip Exterior1stHdBoard Exterior1stMetalSd Exterior1stVinylSd
FALSE 1 0 0 0 1
FALSE 3 0 0 0 1
FALSE 4 0 0 0 0
FALSE 6 0 0 0 1
FALSE 7 0 0 0 1
FALSE 8 0 1 0 0
FALSE Exterior1stWd_Sdng Exterior2ndHdBoard Exterior2ndMetalSd
FALSE 1 0 0 0
FALSE 3 0 0 0
FALSE 4 1 0 0
FALSE 6 0 0 0
FALSE 7 0 0 0
FALSE 8 0 1 0
FALSE Exterior2ndVinylSd Exterior2ndWd_Sdng MasVnrTypeBrkFace MasVnrTypeNone
FALSE 1 1 0 1 0
FALSE 3 1 0 1 0
FALSE 4 0 0 0 1
FALSE 6 1 0 0 1
FALSE 7 1 0 0 0
FALSE 8 0 0 0 0
FALSE ExterQualGd ExterQualTA FoundationCBlock FoundationPConc BsmtQualGd
FALSE 1 1 0 0 1 1
FALSE 3 1 0 0 1 1
FALSE 4 0 1 0 0 0
FALSE 6 0 1 0 0 1
FALSE 7 1 0 0 1 0
FALSE 8 0 1 1 0 1
FALSE BsmtQualTA BsmtExposureAv BsmtExposureNo BsmtFinType1ALQ BsmtFinType1BLQ
FALSE 1 0 0 1 0 0
FALSE 3 0 0 0 0 0
FALSE 4 1 0 1 1 0
FALSE 6 0 0 1 0 0
FALSE 7 0 1 0 0 0
FALSE 8 0 0 0 1 0
FALSE BsmtFinType1GLQ BsmtFinType1Unf HeatingQCEx HeatingQCGd HeatingQCTA
FALSE 1 1 0 1 0 0
FALSE 3 1 0 1 0 0
FALSE 4 0 0 0 1 0
FALSE 6 1 0 1 0 0
FALSE 7 1 0 1 0 0
FALSE 8 0 0 1 0 0
FALSE KitchenQualGd KitchenQualTA FireplaceQuGd FireplaceQuTA GarageTypeAttchd
FALSE 1 1 0 0 1 1
FALSE 3 1 0 0 1 1
FALSE 4 1 0 1 0 0
FALSE 6 0 1 0 1 1
FALSE 7 1 0 1 0 1
FALSE 8 0 1 0 1 1
FALSE GarageTypeDetchd GarageFinishFin GarageFinishRFn GarageFinishUnf
FALSE 1 0 0 1 0
FALSE 3 0 0 1 0
FALSE 4 1 0 0 1
FALSE 6 0 0 0 1
FALSE 7 0 0 1 0
FALSE 8 0 0 1 0
FALSE FenceGdPrv FenceMnPrv
FALSE 1 1 0
FALSE 3 1 0
FALSE 4 0 1
FALSE 6 0 1
FALSE 7 0 1
FALSE 8 0 1
mean <- apply(train_data, 2, mean)
train_data <- as.matrix(train_data)
test_data <- as.matrix(test_data)
build_model <- function() {
model <- keras_model_sequential() %>%
layer_dense(units = 36, activation = "relu",input_shape = dim(train_data)[[2]]) %>%
layer_dropout(rate = 0.3) %>%
layer_dense(units = 24, activation = 'relu') %>%
layer_dropout(rate = 0.3) %>%
layer_dense(units = 12, activation = 'relu') %>%
layer_dropout(rate = 0.3) %>%
layer_dense(units = 6, activation = 'relu') %>%
layer_dropout(rate = 0.3) %>%
layer_dense(units = 1)
model %>% compile(
optimizer = "rmsprop",
loss = "mse",
metrics = c("mae")
)
}
k <- 4
indices <- sample(1:nrow(train_data))
folds <- cut(1:length(indices), breaks = k, labels = FALSE)
num_epochs <- 50 # 100
all_scores <- c()
for (i in 1:k) {
cat("processing fold #", i, "\n")
# Prepare the validation data: data from partition # k
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
# Prepare the training data: data from all other partitions
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
# Build the Keras model (already compiled)
model <- build_model()
# Train the model (in silent mode, verbose=0)
model %>% fit(partial_train_data, partial_train_targets,
epochs = num_epochs, batch_size = 1, verbose = 0)
# Evaluate the model on the validation data
results <- model %>% evaluate(val_data, val_targets, verbose = 0)
all_scores <- c(all_scores, results$mean_absolute_error)
}
FALSE processing fold # 1
FALSE processing fold # 2
FALSE processing fold # 3
FALSE processing fold # 4
all_scores
FALSE [1] 0.09796859 0.11577563 0.07564647 0.15996313
mean(all_scores)
FALSE [1] 0.1123385
Comparing RMSE, we can see that the best model has been obtained when using Neural Network.
A work by YOUR NAME
YOUREMAIL@gmail.com