Business Understanding

There are two data sets, one contains the train data and the test data. Uploading those data set in here.

housing_train <- read.csv("/Users/macuser/Desktop/MSDA/Data Preparation/house-prices/train.csv")
housing_test <- read.csv("/Users/macuser/Desktop/MSDA/Data Preparation/house-prices/test.csv")
colnames(housing_train)
##  [1] "Id"            "MSSubClass"    "MSZoning"      "LotFrontage"  
##  [5] "LotArea"       "Street"        "Alley"         "LotShape"     
##  [9] "LandContour"   "Utilities"     "LotConfig"     "LandSlope"    
## [13] "Neighborhood"  "Condition1"    "Condition2"    "BldgType"     
## [17] "HouseStyle"    "OverallQual"   "OverallCond"   "YearBuilt"    
## [21] "YearRemodAdd"  "RoofStyle"     "RoofMatl"      "Exterior1st"  
## [25] "Exterior2nd"   "MasVnrType"    "MasVnrArea"    "ExterQual"    
## [29] "ExterCond"     "Foundation"    "BsmtQual"      "BsmtCond"     
## [33] "BsmtExposure"  "BsmtFinType1"  "BsmtFinSF1"    "BsmtFinType2" 
## [37] "BsmtFinSF2"    "BsmtUnfSF"     "TotalBsmtSF"   "Heating"      
## [41] "HeatingQC"     "CentralAir"    "Electrical"    "X1stFlrSF"    
## [45] "X2ndFlrSF"     "LowQualFinSF"  "GrLivArea"     "BsmtFullBath" 
## [49] "BsmtHalfBath"  "FullBath"      "HalfBath"      "BedroomAbvGr" 
## [53] "KitchenAbvGr"  "KitchenQual"   "TotRmsAbvGrd"  "Functional"   
## [57] "Fireplaces"    "FireplaceQu"   "GarageType"    "GarageYrBlt"  
## [61] "GarageFinish"  "GarageCars"    "GarageArea"    "GarageQual"   
## [65] "GarageCond"    "PavedDrive"    "WoodDeckSF"    "OpenPorchSF"  
## [69] "EnclosedPorch" "X3SsnPorch"    "ScreenPorch"   "PoolArea"     
## [73] "PoolQC"        "Fence"         "MiscFeature"   "MiscVal"      
## [77] "MoSold"        "YrSold"        "SaleType"      "SaleCondition"
## [81] "SalePrice"
head(housing_train)
##   Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour
## 1  1         60       RL          65    8450   Pave  <NA>      Reg         Lvl
## 2  2         20       RL          80    9600   Pave  <NA>      Reg         Lvl
## 3  3         60       RL          68   11250   Pave  <NA>      IR1         Lvl
## 4  4         70       RL          60    9550   Pave  <NA>      IR1         Lvl
## 5  5         60       RL          84   14260   Pave  <NA>      IR1         Lvl
## 6  6         50       RL          85   14115   Pave  <NA>      IR1         Lvl
##   Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType
## 1    AllPub    Inside       Gtl      CollgCr       Norm       Norm     1Fam
## 2    AllPub       FR2       Gtl      Veenker      Feedr       Norm     1Fam
## 3    AllPub    Inside       Gtl      CollgCr       Norm       Norm     1Fam
## 4    AllPub    Corner       Gtl      Crawfor       Norm       Norm     1Fam
## 5    AllPub       FR2       Gtl      NoRidge       Norm       Norm     1Fam
## 6    AllPub    Inside       Gtl      Mitchel       Norm       Norm     1Fam
##   HouseStyle OverallQual OverallCond YearBuilt YearRemodAdd RoofStyle RoofMatl
## 1     2Story           7           5      2003         2003     Gable  CompShg
## 2     1Story           6           8      1976         1976     Gable  CompShg
## 3     2Story           7           5      2001         2002     Gable  CompShg
## 4     2Story           7           5      1915         1970     Gable  CompShg
## 5     2Story           8           5      2000         2000     Gable  CompShg
## 6     1.5Fin           5           5      1993         1995     Gable  CompShg
##   Exterior1st Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond Foundation
## 1     VinylSd     VinylSd    BrkFace        196        Gd        TA      PConc
## 2     MetalSd     MetalSd       None          0        TA        TA     CBlock
## 3     VinylSd     VinylSd    BrkFace        162        Gd        TA      PConc
## 4     Wd Sdng     Wd Shng       None          0        TA        TA     BrkTil
## 5     VinylSd     VinylSd    BrkFace        350        Gd        TA      PConc
## 6     VinylSd     VinylSd       None          0        TA        TA       Wood
##   BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2
## 1       Gd       TA           No          GLQ        706          Unf
## 2       Gd       TA           Gd          ALQ        978          Unf
## 3       Gd       TA           Mn          GLQ        486          Unf
## 4       TA       Gd           No          ALQ        216          Unf
## 5       Gd       TA           Av          GLQ        655          Unf
## 6       Gd       TA           No          GLQ        732          Unf
##   BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC CentralAir Electrical
## 1          0       150         856    GasA        Ex          Y      SBrkr
## 2          0       284        1262    GasA        Ex          Y      SBrkr
## 3          0       434         920    GasA        Ex          Y      SBrkr
## 4          0       540         756    GasA        Gd          Y      SBrkr
## 5          0       490        1145    GasA        Ex          Y      SBrkr
## 6          0        64         796    GasA        Ex          Y      SBrkr
##   X1stFlrSF X2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath
## 1       856       854            0      1710            1            0        2
## 2      1262         0            0      1262            0            1        2
## 3       920       866            0      1786            1            0        2
## 4       961       756            0      1717            1            0        1
## 5      1145      1053            0      2198            1            0        2
## 6       796       566            0      1362            1            0        1
##   HalfBath BedroomAbvGr KitchenAbvGr KitchenQual TotRmsAbvGrd Functional
## 1        1            3            1          Gd            8        Typ
## 2        0            3            1          TA            6        Typ
## 3        1            3            1          Gd            6        Typ
## 4        0            3            1          Gd            7        Typ
## 5        1            4            1          Gd            9        Typ
## 6        1            1            1          TA            5        Typ
##   Fireplaces FireplaceQu GarageType GarageYrBlt GarageFinish GarageCars
## 1          0        <NA>     Attchd        2003          RFn          2
## 2          1          TA     Attchd        1976          RFn          2
## 3          1          TA     Attchd        2001          RFn          2
## 4          1          Gd     Detchd        1998          Unf          3
## 5          1          TA     Attchd        2000          RFn          3
## 6          0        <NA>     Attchd        1993          Unf          2
##   GarageArea GarageQual GarageCond PavedDrive WoodDeckSF OpenPorchSF
## 1        548         TA         TA          Y          0          61
## 2        460         TA         TA          Y        298           0
## 3        608         TA         TA          Y          0          42
## 4        642         TA         TA          Y          0          35
## 5        836         TA         TA          Y        192          84
## 6        480         TA         TA          Y         40          30
##   EnclosedPorch X3SsnPorch ScreenPorch PoolArea PoolQC Fence MiscFeature
## 1             0          0           0        0   <NA>  <NA>        <NA>
## 2             0          0           0        0   <NA>  <NA>        <NA>
## 3             0          0           0        0   <NA>  <NA>        <NA>
## 4           272          0           0        0   <NA>  <NA>        <NA>
## 5             0          0           0        0   <NA>  <NA>        <NA>
## 6             0        320           0        0   <NA> MnPrv        Shed
##   MiscVal MoSold YrSold SaleType SaleCondition SalePrice
## 1       0      2   2008       WD        Normal    208500
## 2       0      5   2007       WD        Normal    181500
## 3       0      9   2008       WD        Normal    223500
## 4       0      2   2006       WD       Abnorml    140000
## 5       0     12   2008       WD        Normal    250000
## 6     700     10   2009       WD        Normal    143000

Data Preparation

Looking at TrainHouse many columns need to be changed, some need to replace NA to 0 and some columns are going to be changed from category to numerical so they can be use when doing analysis. You can check for the explanation for each column here with the changes made < Input Columnn guide here > .

Data Cleaning Transforming the data to factor.

housing_train$MSZoning = as.factor(housing_train$MSZoning)
levels(housing_train$MSZoning)
## [1] "C (all)" "FV"      "RH"      "RL"      "RM"
# MSZoning column of train dataset has following levels: "C (all)", "FV", "RH", "RL", "RM"
housing_test$MSZoning = as.factor(housing_test$MSZoning)
levels(housing_test$MSZoning)
## [1] "C (all)" "FV"      "RH"      "RL"      "RM"
# MSZoning column of test dataset has following levels: "C (all)", "FV", "RH", "RL", "RM"

sum(is.na(housing_train$MSZoning)) #no missing values
## [1] 0
sum(is.na(housing_test$MSZoning)) # 4 missing values
## [1] 4
# Change of factors to numeric in train dataset
housing_train$MSZoning=as.numeric(housing_train$MSZoning,"C "=1, "FV"=2, "RH"=3, "RL"=4, "RM"=5)

# Change of factors to numeric in train dataset
housing_test$MSZoning=as.numeric(housing_test$MSZoning,"C "=1, "FV"=2, "RH"=3, "RL"=4, "RM"=5)

# Imputing 0 instead of nulls
housing_test$MSZoning[is.na(housing_test$MSZoning)] <- 0 #the null values got changed to 0

# Changing null values to 0 in LotFrontage column in train dataset
sum(is.na(housing_train$LotFrontage)) #259 missing values
## [1] 259
housing_train$LotFrontage[is.na(housing_train$LotFrontage)] <- 0
sum(is.na(housing_train$LotFrontage))
## [1] 0
# all the missing values got imputed with 0

# Changing null values to 0 in LotFrontage column in test dataset
sum(is.na(housing_test$LotFrontage)) #227 missing values
## [1] 227
housing_test$LotFrontage[is.na(housing_test$LotFrontage)] <- 0
sum(is.na(housing_test$LotFrontage))
## [1] 0
# all the missing values got imputed with 0

# Street column changed to numeric in train dataset
street<-housing_train$Street
sum(is.na(housing_train$Street)) #0 missing values
## [1] 0
street = as.factor(street)
street = as.numeric(street, "Pave"= 1,"Grvl"= 2)
housing_train$Street <-street
# Pave got replaced with 1 and Grvl type of rode got replaced with 2

# Street column changed to numeric in test dataset
street1<-housing_test$Street
sum(is.na(street1)) #0 missing values
## [1] 0
street1 = as.factor(street1)
street1 = as.numeric(street1, "Pave"= 1,"Grvl"= 2)
housing_test$Street <-street1
# Pave got replaced with 1 and Grvl type of rode got replaced with 2

# Transforming Alley column to numeric in train dataset
alley<-as.factor(housing_train$Alley)
levels(alley)
## [1] "Grvl" "Pave"
alley = as.numeric(alley, "Pave"= 1,"Grvl"= 2)
sum(is.na(alley)) # 1369 NA values 
## [1] 1369
alley[is.na(alley)] <- 0 #changing NA values to 0
sum(is.na(alley)) #no missing values
## [1] 0
housing_train$Alley <- alley

# Transforming Alley column to numeric in test dataset
alley1<-as.factor(housing_test$Alley)
levels(alley1)
## [1] "Grvl" "Pave"
alley1 = as.numeric(alley1, "Pave"= 1,"Grvl"= 2)
sum(is.na(alley1)) # 1352 NA values 
## [1] 1352
alley1[is.na(alley1)] <- 0 #changing NA values to 0
sum(is.na(alley1)) #no missing values
## [1] 0
housing_test$Alley <- alley1

# Transforming LotShape column to numeric in train dataset
shape <-as.factor(housing_train$LotShape)
sum(is.na(shape)) # no missing values
## [1] 0
levels(shape) # 4 levels: "IR1", "IR2", "IR3", "Reg"
## [1] "IR1" "IR2" "IR3" "Reg"
shape=as.numeric(shape,"IR1"=1, "IR2"=2, "IR3"=3, "Reg"=4)
housing_train$LotShape <- shape

# Transforming LotShape column to numeric in test dataset
shape1 <-as.factor(housing_test$LotShape)
sum(is.na(shape1)) # no missing values
## [1] 0
levels(shape1) # 4 levels: "IR1", "IR2", "IR3", "Reg"
## [1] "IR1" "IR2" "IR3" "Reg"
shape1=as.numeric(shape1,"IR1"=1, "IR2"=2, "IR3"=3, "Reg"=4)
housing_test$LotShape <- shape1

# Transforming LandContour column to numeric in train dataset
lcontour <-as.factor(housing_train$LandContour)
sum(is.na(lcontour)) # no missing values
## [1] 0
levels(lcontour) # 4 levels: "Bnk", "HLS", "Low", "Lvl"
## [1] "Bnk" "HLS" "Low" "Lvl"
lcontour=as.numeric(lcontour,"Bnk"=1, "HLS"=2, "Low"=3, "Lvl"=4)
housing_train$LandContour <- lcontour

# Transforming LandContour column to numeric in test dataset
lcontour1 <-as.factor(housing_test$LandContour)
sum(is.na(lcontour1)) # no missing values
## [1] 0
levels(lcontour1) # 4 levels: "Bnk", "HLS", "Low", "Lvl"
## [1] "Bnk" "HLS" "Low" "Lvl"
lcontour1=as.numeric(lcontour1,"Bnk"=1, "HLS"=2, "Low"=3, "Lvl"=4)
housing_test$LandContour <- lcontour1

# Transforming Utilities column to numeric in train dataset
utility <-as.factor(housing_train$Utilities)
sum(is.na(utility)) # no missing values
## [1] 0
levels(utility) # 2 levels: "AllPub", "NoSeWa"
## [1] "AllPub" "NoSeWa"
utility=as.numeric(utility,"AllPub"=1, "NoSeWa"=2)
housing_train$Utilities <- utility

# Transforming Utilities column to numeric in test dataset
utility1 <-as.factor(housing_test$Utilities)
sum(is.na(utility1)) # 2 missing values
## [1] 2
levels(utility1) # 2 levels: "AllPub", "NoSeWa"
## [1] "AllPub"
utility1=as.numeric(utility1,"AllPub"=1)
utility1[is.na(utility1)] <- 0
housing_test$Utilities <- utility1

# Transforming LotConfig column to numeric in train dataset
lconfig <-as.factor(housing_train$LotConfig)
sum(is.na(lconfig)) # no missing values
## [1] 0
levels(lconfig)
## [1] "Corner"  "CulDSac" "FR2"     "FR3"     "Inside"
lconfig=as.numeric(lconfig,"Corner"=1, "CulDSac"=2, "FR2"=3, "FR3"=4, "Inside"=5)
housing_train$LotConfig <- lconfig

# Transforming LotConfig column to numeric in test dataset
lconfig1 <-as.factor(housing_test$LotConfig)
sum(is.na(lconfig1)) # no missing values
## [1] 0
levels(lconfig1)
## [1] "Corner"  "CulDSac" "FR2"     "FR3"     "Inside"
lconfig1=as.numeric(lconfig1,"Corner"=1, "CulDSac"=2, "FR2"=3, "FR3"=4, "Inside"=5)
housing_test$LotConfig <- lconfig1

# Transforming LandSlope column to numeric in train dataset
lslope <-as.factor(housing_train$LandSlope)
sum(is.na(lslope)) # no missing values
## [1] 0
levels(lslope)
## [1] "Gtl" "Mod" "Sev"
lslope=as.numeric(lslope,"Gtl"=1, "Mod"=2, "Sev"=3)
housing_train$LandSlope <- lslope

# Transforming LandSlope column to numeric in train dataset
lslope1 <-as.factor(housing_test$LandSlope)
sum(is.na(lslope1)) # no missing values
## [1] 0
levels(lslope1)
## [1] "Gtl" "Mod" "Sev"
lslope1=as.numeric(lslope1,"Gtl"=1, "Mod"=2, "Sev"=3)
housing_test$LandSlope <- lslope1

# Transforming Neighborhood column to numeric in train dataset
Neighborhood <-as.factor(housing_train$Neighborhood)
sum(is.na(Neighborhood)) # no missing values
## [1] 0
levels(Neighborhood)
##  [1] "Blmngtn" "Blueste" "BrDale"  "BrkSide" "ClearCr" "CollgCr" "Crawfor"
##  [8] "Edwards" "Gilbert" "IDOTRR"  "MeadowV" "Mitchel" "NAmes"   "NoRidge"
## [15] "NPkVill" "NridgHt" "NWAmes"  "OldTown" "Sawyer"  "SawyerW" "Somerst"
## [22] "StoneBr" "SWISU"   "Timber"  "Veenker"
Neighborhood=as.numeric(Neighborhood,"Blmngtn"=1, "Blueste"=2, "BrDale"=3, "BrkSide"=4, "ClearCr"=5, "CollgCr"=6, "Crawfor"=7, "Edwards"=8, "Gilbert"=9, "IDOTRR"=10, "MeadowV"=11, "Mitchel"=12, "NAmes"=13, "NoRidge"=14, "NPkVill"=15, "NridgHt"=16, "NWAmes"=17, "OldTown"=18, "SWISU"=19, "Sawyer"=20, "SawyerW"=21, "Somerst"=22, "StoneBr"=23, "Timber"=24, "Veenker"=25)
housing_train$Neighborhood <-Neighborhood

# Transforming Neighborhood column to numeric in test dataset
Neighborhood1 <-as.factor(housing_test$Neighborhood)
sum(is.na(Neighborhood1)) # no missing values
## [1] 0
levels(Neighborhood1)
##  [1] "Blmngtn" "Blueste" "BrDale"  "BrkSide" "ClearCr" "CollgCr" "Crawfor"
##  [8] "Edwards" "Gilbert" "IDOTRR"  "MeadowV" "Mitchel" "NAmes"   "NoRidge"
## [15] "NPkVill" "NridgHt" "NWAmes"  "OldTown" "Sawyer"  "SawyerW" "Somerst"
## [22] "StoneBr" "SWISU"   "Timber"  "Veenker"
Neighborhood1=as.numeric(Neighborhood1,"Blmngtn"=1, "Blueste"=2, "BrDale"=3, "BrkSide"=4, "ClearCr"=5, "CollgCr"=6, "Crawfor"=7, "Edwards"=8, "Gilbert"=9, "IDOTRR"=10, "MeadowV"=11, "Mitchel"=12, "NAmes"=13, "NoRidge"=14, "NPkVill"=15, "NridgHt"=16, "NWAmes"=17, "OldTown"=18, "SWISU"=19, "Sawyer"=20, "SawyerW"=21, "Somerst"=22, "StoneBr"=23, "Timber"=24, "Veenker"=25)
housing_test$Neighborhood <-Neighborhood1

# Transforming Condition1 column to numeric in train dataset
Condition1 <-as.factor(housing_train$Condition1)
sum(is.na(Condition1)) # no missing values
## [1] 0
levels(Condition1)
## [1] "Artery" "Feedr"  "Norm"   "PosA"   "PosN"   "RRAe"   "RRAn"   "RRNe"  
## [9] "RRNn"
Condition1=as.numeric(Condition1,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_train$Condition1 <- Condition1

# Transforming Condition1 column to numeric in test dataset
Condition1T <-as.factor(housing_test$Condition1)
sum(is.na(Condition1T)) # no missing values
## [1] 0
levels(Condition1T)
## [1] "Artery" "Feedr"  "Norm"   "PosA"   "PosN"   "RRAe"   "RRAn"   "RRNe"  
## [9] "RRNn"
Condition1T=as.numeric(Condition1T,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_test$Condition1 <-Condition1T

# Transforming Condition2 column to numeric in train dataset
condition2 <-as.factor(housing_train$Condition2)
sum(is.na(condition2)) # no missing values
## [1] 0
levels(condition2)
## [1] "Artery" "Feedr"  "Norm"   "PosA"   "PosN"   "RRAe"   "RRAn"   "RRNn"
condition2=as.numeric(condition2,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_train$Condition2 <- condition2

# Transforming Condition2 column to numeric in test dataset
Condition2T <-as.factor(housing_test$Condition2)
sum(is.na(Condition2T)) # no missing values
## [1] 0
levels(Condition2T) #values
## [1] "Artery" "Feedr"  "Norm"   "PosA"   "PosN"
Condition2T=as.numeric(Condition2T,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_test$Condition2 <- Condition2T

# Transforming BldgType column to numeric in train dataset
BldgType <-as.factor(housing_train$BldgType)
sum(is.na(BldgType)) # no missing values
## [1] 0
levels(BldgType)
## [1] "1Fam"   "2fmCon" "Duplex" "Twnhs"  "TwnhsE"
BldgType=as.numeric(BldgType,"1Fam"=1, "2FmCon"=2, "Duplx"=3, "TwnhsE"=4, "Twnhs"=5)
housing_train$BldgType <- BldgType

# Transforming BldgType column to numeric in test dataset
BldgType1 <-as.factor(housing_test$BldgType)
sum(is.na(BldgType1)) # no missing values
## [1] 0
levels(BldgType1) 
## [1] "1Fam"   "2fmCon" "Duplex" "Twnhs"  "TwnhsE"
BldgType1=as.numeric(BldgType1,"1Fam"=1, "2FmCon"=2, "Duplx"=3, "TwnhsE"=4, "Twnhs"=5)
housing_test$BldgType <- BldgType1

# Transforming HouseStyle column to numeric in train dataset
HouseStyle <-as.factor(housing_train$HouseStyle)
sum(is.na(HouseStyle)) # no missing values
## [1] 0
levels(HouseStyle)
## [1] "1.5Fin" "1.5Unf" "1Story" "2.5Fin" "2.5Unf" "2Story" "SFoyer" "SLvl"
HouseStyle=as.numeric(HouseStyle,"1Story"=1, "1.5Fin"=2, "1.5Unf"=3, "2Story"=4, "2.5Fin"=5, "2.5Unf"=6, "SFoyer"=7, "SLvl"=8)
housing_train$HouseStyle <-HouseStyle

# Transforming HouseStyle column to numeric in test dataset
HouseStyle1 <-as.factor(housing_test$HouseStyle)
sum(is.na(HouseStyle1)) # no missing values
## [1] 0
levels(HouseStyle1) 
## [1] "1.5Fin" "1.5Unf" "1Story" "2.5Unf" "2Story" "SFoyer" "SLvl"
HouseStyle1=as.numeric(HouseStyle1,"1Story"=1, "1.5Fin"=2, "1.5Unf"=3, "2Story"=4, "2.5Fin"=5, "2.5Unf"=6, "SFoyer"=7, "SLvl"=8)
housing_test$HouseStyle <- HouseStyle1

#Checking for missing values OverallQual column of  train dataset
OverallQual <-as.factor(housing_train$OverallQual)
sum(is.na(OverallQual)) #no missing values
## [1] 0
#Checking for missing values OverallQual column of  test dataset
OverallQual1 <-as.factor(housing_test$OverallQual)
sum(is.na(OverallQual1)) #no missing values
## [1] 0
#Checking for missing values in OverallCond column of train dataset
OverallCond <-as.factor(housing_train$OverallCond)
sum(is.na(OverallCond)) #no missing values
## [1] 0
#Checking for missing values in OverallCond column of test dataset
OverallCond1 <-as.factor(housing_test$OverallCond)
sum(is.na(OverallCond1)) #no missing values
## [1] 0
#Checking for missing values in YearBuilt column of train dataset
YearBuilt <-as.factor(housing_train$YearBuilt)
sum(is.na(YearBuilt)) #no missing values
## [1] 0
#Checking for missing values in YearBuilt column of test dataset
YearBuilt1 <-as.factor(housing_test$YearBuilt)
sum(is.na(YearBuilt1)) #no missing values
## [1] 0
# Transforming HouseStyle column to numeric in train dataset
RoofStyle=as.factor(housing_train$RoofStyle)
levels(RoofStyle) #"Flat"    "Gable"   "Gambrel" "Hip" "Mansard" "Shed" 
## [1] "Flat"    "Gable"   "Gambrel" "Hip"     "Mansard" "Shed"
sum(is.na(RoofStyle)) # 0 Missing Entries
## [1] 0
RoofStyle=as.numeric(RoofStyle, "Flat"=1, "Gable"=2, "Gambrel"=3, "Hip"=4, "Mansard"=5, "Shed"=6)
housing_train$RoofStyle <- RoofStyle

# Transforming HouseStyle column to numeric in test dataset
RoofStyle1=as.factor(housing_test$RoofStyle)
levels(RoofStyle1) #"Flat"    "Gable"   "Gambrel" "Hip" "Mansard" "Shed" 
## [1] "Flat"    "Gable"   "Gambrel" "Hip"     "Mansard" "Shed"
sum(is.na(RoofStyle1)) # 0 Missing Entries
## [1] 0
RoofStyle1=as.numeric(RoofStyle1, "Flat"=1, "Gable"=2, "Gambrel"=3, "Hip"=4, "Mansard"=5, "Shed"=6)
housing_test$RoofStyle <- RoofStyle1

# Transforming RoofMatl column to numeric in train dataset
RoofMatl=as.factor(housing_train$RoofMatl)
levels(RoofMatl) #"ClyTile" "CompShg" "Membran" "Metal"   "Roll"    "Tar&Grv" "WdShake" "WdShngl" 
## [1] "ClyTile" "CompShg" "Membran" "Metal"   "Roll"    "Tar&Grv" "WdShake"
## [8] "WdShngl"
sum(is.na(RoofMatl)) # 0 Missing Entries
## [1] 0
RoofMatl=as.numeric(RoofMatl, "ClyTile"=1, "CompShg"=2, "Membran"=3, "Metal"=4, "Roll"=5, "Tar&Grv"=6, "WdShake"=7, "WdShngl"=8)
housing_train$RoofMatl <- RoofMatl

# Transforming RoofMatl column to numeric in test dataset
RoofMatl1=as.factor(housing_test$RoofMatl)
levels(RoofMatl1) #"ClyTile" "CompShg" "Membran" "Metal"   "Roll"    "Tar&Grv" "WdShake" "WdShngl" 
## [1] "CompShg" "Tar&Grv" "WdShake" "WdShngl"
sum(is.na(RoofMatl1)) # 0 Missing Entries
## [1] 0
RoofMatl1=as.numeric(RoofMatl1, "ClyTile"=1, "CompShg"=2, "Membran"=3, "Metal"=4, "Roll"=5, "Tar&Grv"=6, "WdShake"=7, "WdShngl"=8)
housing_test$RoofMatl <- RoofMatl1

#Transforming Exterior1st column to numeric in train dataset
Exterior1st=as.factor(housing_train$Exterior1st)
levels(Exterior1st) #"AsbShng" "AsphShn" "BrkComm" "BrkFace" "CBlock"  "CemntBd" "HdBoard" "ImStucc" "MetalSd" "Plywood" "Stone"   "Stucco"  "VinylSd" "Wd Sdng" "WdShing"
##  [1] "AsbShng" "AsphShn" "BrkComm" "BrkFace" "CBlock"  "CemntBd" "HdBoard"
##  [8] "ImStucc" "MetalSd" "Plywood" "Stone"   "Stucco"  "VinylSd" "Wd Sdng"
## [15] "WdShing"
sum(is.na(Exterior1st)) #0 Missing Entries
## [1] 0
Exterior1st=as.numeric(Exterior1st, "AsbShng"=1, "AsphShn"=2, "BrkComm"=3, "BrkFace"=4, "CBlock"=5, "CemntBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Plywood"=10, "Stone"=11, "Stucco"=12, "VinylSd"=13, "Wd Sdng"=14, "WdShing"=15)
housing_train$Exterior1st <- Exterior1st

#Transforming Exterior1st column to numeric in test dataset
Exterior1st1=as.factor(housing_test$Exterior1st)
levels(Exterior1st1) #"AsbShng" "AsphShn" "BrkComm" "BrkFace" "CBlock"  "CemntBd" "HdBoard" "ImStucc" "MetalSd" "Plywood" "Stone"   "Stucco"  "VinylSd" "Wd Sdng" "WdShing"
##  [1] "AsbShng" "AsphShn" "BrkComm" "BrkFace" "CBlock"  "CemntBd" "HdBoard"
##  [8] "MetalSd" "Plywood" "Stucco"  "VinylSd" "Wd Sdng" "WdShing"
sum(is.na(Exterior1st1)) #0 Missing Entries
## [1] 1
Exterior1st1=as.numeric(Exterior1st1, "AsbShng"=1, "AsphShn"=2, "BrkComm"=3, "BrkFace"=4, "CBlock"=5, "CemntBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Plywood"=10, "Stone"=11, "Stucco"=12, "VinylSd"=13, "Wd Sdng"=14, "WdShing"=15)
housing_test$Exterior1st <- Exterior1st1

#Transforming Exterior2nd column to numeric in train dataset
Exterior2nd=as.factor(housing_train$Exterior2nd)
levels(Exterior2nd) # "AsbShng" "AsphShn" "Brk Cmn" "BrkFace" "CBlock"  "CmentBd" "HdBoard" "ImStucc" "MetalSd" "Other"   "Plywood" "Stone"   "Stucco"  "VinylSd" "Wd Sdng" "Wd Shng"
##  [1] "AsbShng" "AsphShn" "Brk Cmn" "BrkFace" "CBlock"  "CmentBd" "HdBoard"
##  [8] "ImStucc" "MetalSd" "Other"   "Plywood" "Stone"   "Stucco"  "VinylSd"
## [15] "Wd Sdng" "Wd Shng"
sum(is.na(Exterior2nd)) # 0 Missing Entries
## [1] 0
Exterior2nd=as.numeric(Exterior2nd, "AsbShng"=1, "AsphShn"=2, "Brk Cmn"=3, "BrkFace"=4, "CBlock"=5, "CmentBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Other"=10, "Plywood"=11, "Stone"=12, "Stucco"=13, "VinylSd"=14, "Wd Sdng"=15, "Wd Shng"=16)
housing_train$Exterior2nd <- Exterior2nd

#Transforming Exterior2nd column to numeric in test dataset
Exterior2nd1=as.factor(housing_test$Exterior2nd)
levels(Exterior2nd1) # "AsbShng" "AsphShn" "Brk Cmn" "BrkFace" "CBlock"  "CmentBd" "HdBoard" "ImStucc" "MetalSd" "Other"   "Plywood" "Stone"   "Stucco"  "VinylSd" "Wd Sdng" "Wd Shng"
##  [1] "AsbShng" "AsphShn" "Brk Cmn" "BrkFace" "CBlock"  "CmentBd" "HdBoard"
##  [8] "ImStucc" "MetalSd" "Plywood" "Stone"   "Stucco"  "VinylSd" "Wd Sdng"
## [15] "Wd Shng"
sum(is.na(Exterior2nd1)) # 0 Missing Entries
## [1] 1
Exterior2nd1=as.numeric(Exterior2nd1, "AsbShng"=1, "AsphShn"=2, "Brk Cmn"=3, "BrkFace"=4, "CBlock"=5, "CmentBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Other"=10, "Plywood"=11, "Stone"=12, "Stucco"=13, "VinylSd"=14, "Wd Sdng"=15, "Wd Shng"=16)
housing_test$Exterior2nd <- Exterior2nd1

#Transforming MasVnrType column to numeric in train dataset
MasVnrType=as.factor(housing_train$MasVnrType)
levels(MasVnrType) #"BrkCmn"  "BrkFace" "None"    "Stone" 
## [1] "BrkCmn"  "BrkFace" "None"    "Stone"
sum(is.na(MasVnrType)) # 8 Missing Values
## [1] 8
MasVnrType=as.numeric(MasVnrType, "BrkCmn"=1, "BrkFace"=2, "None"=3, "Stone"=4)
MasVnrType[is.na(MasVnrType)]<-0
housing_train$MasVnrType <- MasVnrType

#Transforming MasVnrType column to numeric in test dataset
MasVnrType1=as.factor(housing_test$MasVnrType)
levels(MasVnrType1) #"BrkCmn"  "BrkFace" "None"    "Stone" 
## [1] "BrkCmn"  "BrkFace" "None"    "Stone"
sum(is.na(MasVnrType1)) # 16 Missing Values
## [1] 16
MasVnrType1=as.numeric(MasVnrType1, "BrkCmn"=1, "BrkFace"=2, "None"=3, "Stone"=4)
MasVnrType1[is.na(MasVnrType1)]<-0
housing_test$MasVnrType <- MasVnrType1


#Transforming ExterQual column to numeric in train dataset
ExterQual=as.factor(housing_train$ExterQual)
levels(ExterQual) # TA" "Fa" "Gd" "Ex"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(ExterQual)) # 0 Missing Entries
## [1] 0
ExterQual=as.numeric(ExterQual, "Fa"=1, "TA"=2, "Gd"=3, "Ex"=4)
housing_train$ExterQual <- ExterQual

#Transforming ExterQual column to numeric in test dataset
ExterQual1=as.factor(housing_test$ExterQual)
levels(ExterQual1) # TA" "Fa" "Gd" "Ex"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(ExterQual1)) # 0 Missing Entries
## [1] 0
ExterQual1=as.numeric(ExterQual1, "Fa"=1, "TA"=2, "Gd"=3, "Ex"=4)
housing_test$ExterQual <- ExterQual1

#Transforming ExterCond column to numeric in train dataset
ExterCond=as.factor(housing_train$ExterCond)
levels(ExterCond) # TA" "Po" "Fa" "Gd" "Ex"  
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(ExterCond)) # 0 Missing Entries
## [1] 0
ExterCond=as.numeric(ExterCond, "Po"=1, "Fa"=2, "TA"=3, "Gd"=4, "Ex"=5)
housing_train$ExterCond <- ExterCond

#Transforming ExterCond column to numeric in test dataset
ExterCond1=as.factor(housing_test$ExterCond)
levels(ExterCond1) # "TA" "Po" "Fa" "Gd" "Ex"  
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(ExterCond1)) # 0 Missing Entries
## [1] 0
ExterCond1=as.numeric(ExterCond1, "Po"=1, "Fa"=2, "TA"=3, "Gd"=4, "Ex"=5)
housing_test$ExterCond <- ExterCond1

#Transforming Foundation column to numeric in train dataset
Foundation=as.factor(housing_train$Foundation)
levels(Foundation) # "BrkTil" "CBlock" "PConc" "Slab" "Stone"  "Wood" 
## [1] "BrkTil" "CBlock" "PConc"  "Slab"   "Stone"  "Wood"
sum(is.na(Foundation)) # 0 Missing Entries
## [1] 0
Foundation=as.numeric(Foundation, "BrkTil"=1, "CBlock"=2, "PConc"=3, "Slab"=4, "Stone"=5, "Wood"=6)
housing_train$Foundation <- Foundation

#Transforming Foundation column to numeric in test dataset
Foundation1=as.factor(housing_test$Foundation)
levels(Foundation1) # "BrkTil" "CBlock" "PConc" "Slab" "Stone"  "Wood" 
## [1] "BrkTil" "CBlock" "PConc"  "Slab"   "Stone"  "Wood"
sum(is.na(Foundation1)) # 0 Missing Entries
## [1] 0
Foundation1=as.numeric(Foundation1, "BrkTil"=1, "CBlock"=2, "PConc"=3, "Slab"=4, "Stone"=5, "Wood"=6)
housing_test$Foundation <- Foundation1

#Transforming BsmtQual column to numeric in train dataset
BsmtQual=as.factor(housing_train$BsmtQual)
levels(BsmtQual) # "TA" "Fa" "Gd" "Ex" 
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(BsmtQual)) # 37 Missing Entries
## [1] 37
BsmtQual=as.numeric(BsmtQual, "TA"=1, "Fa"=2, "Gd"=3, "Ex"=4)
BsmtQual[is.na(BsmtQual)]<-0
housing_train$BsmtQual <- BsmtQual

#Transforming BsmtQual column to numeric in test dataset
BsmtQual1=as.factor(housing_test$BsmtQual)
levels(BsmtQual1) # "TA" "Fa" "Gd" "Ex" 
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(BsmtQual1)) # 44 Missing Entries
## [1] 44
BsmtQual1=as.numeric(BsmtQual1, "TA"=1, "Fa"=2, "Gd"=3, "Ex"=4)
BsmtQual1[is.na(BsmtQual1)]<-0
housing_test$BsmtQual <- BsmtQual1

#Transforming BsmtCond column to numeric in train dataset
BsmtCond=as.factor(housing_train$BsmtCond)
levels(BsmtCond) # "TA" "Fa" "Gd" "Po" 
## [1] "Fa" "Gd" "Po" "TA"
sum(is.na(BsmtCond)) # 37 Missing Entries
## [1] 37
BsmtCond=as.numeric(BsmtCond, "TA"=1, "Po"=2, "Fa"=3, "Gd"=4)
BsmtCond[is.na(BsmtCond)]<-0
housing_train$BsmtCond <- BsmtCond

#Transforming BsmtCond column to numeric in test dataset
BsmtCond1=as.factor(housing_test$BsmtCond)
levels(BsmtCond1) # "TA" "Fa" "Gd" "Po" 
## [1] "Fa" "Gd" "Po" "TA"
sum(is.na(BsmtCond1)) # 45 Missing Entries
## [1] 45
BsmtCond1=as.numeric(BsmtCond1, "TA"=1, "Po"=2, "Fa"=3, "Gd"=4)
BsmtCond1[is.na(BsmtCond1)]<-0
housing_test$BsmtCond <- BsmtCond1

#Transforming BsmtExposure column to numeric in train dataset
BsmtExposure=as.factor(housing_train$BsmtExposure)
levels(BsmtExposure) # "No" "Mn" "Gd" "Av" 
## [1] "Av" "Gd" "Mn" "No"
sum(is.na(BsmtExposure)) # 38 Missing Entries
## [1] 38
BsmtExposure=as.numeric(BsmtExposure, "No"=1, "Mn"=2, "Av"=3, "Gd"=4)
BsmtExposure[is.na(BsmtExposure)]<-0
housing_train$BsmtExposure <- BsmtExposure

#Transforming BsmtExposure column to numeric in test dataset
BsmtExposure1=as.factor(housing_test$BsmtExposure)
levels(BsmtExposure1) # "No" "Mn" "Gd" "Av" 
## [1] "Av" "Gd" "Mn" "No"
sum(is.na(BsmtExposure1)) # 44 Missing Entries
## [1] 44
BsmtExposure1=as.numeric(BsmtExposure1, "No"=1, "Mn"=2, "Av"=3, "Gd"=4)
BsmtExposure1[is.na(BsmtExposure1)]<-0
housing_test$BsmtExposure <- BsmtExposure1

#Transforming BsmtFinType1 column to numeric in train dataset
BsmtFinType1=as.factor(housing_train$BsmtFinType1)
levels(BsmtFinType1) # "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
sum(is.na(BsmtFinType1)) # 37 Missing Entries
## [1] 37
BsmtFinType1=as.numeric(BsmtFinType1, "Unf"=1, "LwQ"=2, "Rec"=3, "BLQ"=4, "ALQ"=5, "GLQ"=6)
BsmtFinType1[is.na(BsmtFinType1)]<-0
housing_train$BsmtFinType1 <- BsmtFinType1

#Transforming BsmtFinType1 column to numeric in test dataset
BsmtFinType1T=as.factor(housing_test$BsmtFinType1)
levels(BsmtFinType1T) # "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
sum(is.na(BsmtFinType1T)) # 42 Missing Entries
## [1] 42
BsmtFinType1T=as.numeric(BsmtFinType1T, "Unf"=1, "LwQ"=2, "Rec"=3, "BLQ"=4, "ALQ"=5, "GLQ"=6)
BsmtFinType1T[is.na(BsmtFinType1T)]<-0
housing_test$BsmtFinType1 <- BsmtFinType1T

#Checking for missing values in BsmtFinSF1 column of train dataset
BsmtFinSF1 <-as.factor(housing_train$BsmtFinSF1)
sum(is.na(BsmtFinSF1)) #no missing values
## [1] 0
#Checking for missing values in BsmtFinSF1 column of test dataset
BsmtFinSF1T <-as.factor(housing_test$BsmtFinSF1)
sum(is.na(BsmtFinSF1T)) #1 missing value
## [1] 1
BsmtFinSF1T[is.na(BsmtFinSF1T)]<-0
housing_test$BsmtFinSF1 <- BsmtFinSF1T

#Transforming BsmtFinType2 column to numeric in train dataset
BsmtFinType2=as.factor(housing_train$BsmtFinType2)
levels(BsmtFinType2) # "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
sum(is.na(BsmtFinType2)) # 38 Missing Entries
## [1] 38
BsmtFinType2=as.numeric(BsmtFinType2, "Unf"=1, "LwQ"=2, "Rec"=3, "BLQ"=4, "ALQ"=5, "GLQ"=6)
BsmtFinType2[is.na(BsmtFinType2)]<-0
housing_train$BsmtFinType2 <- BsmtFinType2

#Transforming BsmtFinType2 column to numeric in test dataset
BsmtFinType2T=as.factor(housing_test$BsmtFinType2)
levels(BsmtFinType2T) # "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
sum(is.na(BsmtFinType2T)) # 42 Missing Entries
## [1] 42
BsmtFinType2T=as.numeric(BsmtFinType2T, "Unf"=1, "LwQ"=2, "Rec"=3, "BLQ"=4, "ALQ"=5, "GLQ"=6)
BsmtFinType2T[is.na(BsmtFinType2T)]<-0
housing_test$BsmtFinType2 <- BsmtFinType2T

#Checking for missing values in BsmtFinSF2 column of train dataset
BsmtFinSF2 <-as.factor(housing_train$BsmtFinSF2)
sum(is.na(BsmtFinSF2)) #no missing values
## [1] 0
#Checking for missing values in BsmtFinSF2 column of test dataset
BsmtFinSF2T <-as.factor(housing_test$BsmtFinSF2)
sum(is.na(BsmtFinSF2T)) #1 missing value
## [1] 1
BsmtFinSF2T[is.na(BsmtFinSF2T)]<-0
housing_test$BsmtFinSF2 <- BsmtFinSF2T

#Checking for missing values in BsmtUnfSF column of train dataset.
BsmtUnfSF <-as.factor(housing_train$BsmtUnfSF)
sum(is.na(BsmtUnfSF)) #no missing values
## [1] 0
#Checking for missing values in BsmtUnfSF column of test dataset.
BsmtUnfSFT <-as.factor(housing_test$BsmtUnfSF)
sum(is.na(BsmtUnfSFT)) #1 missing value
## [1] 1
BsmtUnfSFT[is.na(BsmtUnfSFT)]<-0
housing_test$BsmtUnfSF <- BsmtUnfSFT

#Checking for missing values in TotalBsmtSF column of train dataset.
TotalBsmtSF <-as.factor(housing_train$TotalBsmtSF)
sum(is.na(TotalBsmtSF)) #no missing value
## [1] 0
#Checking for missing values in BsmtUnfSF column of test dataset.
TotalBsmtSFT <-as.factor(housing_test$TotalBsmtSF)
sum(is.na(TotalBsmtSFT)) #1 missing value
## [1] 1
TotalBsmtSFT[is.na(TotalBsmtSFT)]<-0
housing_test$TotalBsmtSF <- TotalBsmtSFT

#Transforming Heating column to numeric in train dataset
Heating=as.factor(housing_train$Heating)
levels(Heating) # "Floor" "GasA"  "GasW"  "Grav"  "OthW"  "Wall" 
## [1] "Floor" "GasA"  "GasW"  "Grav"  "OthW"  "Wall"
sum(is.na(Heating)) # No Missing Entries
## [1] 0
Heating=as.numeric(Heating, "GasA"=6, "GasW"=2, "Grav"=3,"Wall"=4, "OthW"=5, "Floor"=1)
housing_train$Heating <- Heating

#Transforming Heating column to numeric in test dataset
HeatingT=as.factor(housing_test$Heating)
levels(HeatingT) # "Floor" "GasA"  "GasW"  "Grav"  "OthW"  "Wall" 
## [1] "GasA" "GasW" "Grav" "Wall"
sum(is.na(HeatingT)) # No Missing Entries
## [1] 0
HeatingT=as.numeric(HeatingT, "GasA"=6, "GasW"=2, "Grav"=3, "Wall"=4)
housing_test$Heating <- HeatingT

#Transforming HeatingQC column to numeric in train dataset
HeatingQC=as.factor(housing_train$HeatingQC)
levels(HeatingQC) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(HeatingQC)) # No Missing Entries
## [1] 0
HeatingQC=as.numeric(HeatingQC, "Po"=1, "Fa"=2, "TA"=3,"Gd"=4, "Ex"=5)
housing_train$HeatingQC <- HeatingQC

#Transforming HeatingQC column to numeric in train dataset
HeatingQCT=as.factor(housing_test$HeatingQC)
levels(HeatingQCT) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(HeatingQCT)) # No Missing Entries
## [1] 0
HeatingQCT=as.numeric(HeatingQCT, "Po"=1, "Fa"=2, "TA"=3,"Gd"=4, "Ex"=5)
housing_test$HeatingQC <- HeatingQCT

#Transforming CentralAir column to binary in train dataset
CentralAir=as.factor(housing_train$CentralAir)
levels(CentralAir) # "N" "Y"
## [1] "N" "Y"
sum(is.na(CentralAir)) # No Missing Entries
## [1] 0
CentralAir <- ifelse(CentralAir=="Y",1,0)
housing_train$CentralAir <- CentralAir

#Transforming CentralAir column to binary in test dataset
CentralAirT=as.factor(housing_test$CentralAir)
levels(CentralAirT) # "N" "Y"
## [1] "N" "Y"
sum(is.na(CentralAirT)) # No Missing Entries
## [1] 0
CentralAirT <- ifelse(CentralAirT=="Y",1,0)
housing_test$CentralAir <- CentralAirT

#Transforming Electrical column to numeric in train dataset
Electrical=as.factor(housing_train$Electrical)
levels(Electrical) #"FuseA" "FuseF" "FuseP" "Mix"   "SBrkr"
## [1] "FuseA" "FuseF" "FuseP" "Mix"   "SBrkr"
sum(is.na(Electrical)) # 1 Missing Entry
## [1] 1
Electrical=as.numeric(Electrical,  "FuseP"=1, "FuseF"=2,"FuseA"=3, "SBrkr"=4, "Mix"=5)
Electrical[is.na(Electrical)]<-0
housing_train$Electrical <- Electrical

#Transforming Electrical column to numeric in test dataset
ElectricalT=as.factor(housing_test$Electrical)
levels(ElectricalT) #"FuseA" "FuseF" "FuseP" "Mix"   "SBrkr"
## [1] "FuseA" "FuseF" "FuseP" "SBrkr"
sum(is.na(ElectricalT)) # 0 Missing Entries
## [1] 0
ElectricalT=as.numeric(ElectricalT, "FuseP"=1, "FuseF"=2,"FuseA"=3, "SBrkr"=4)
housing_test$Electrical <- ElectricalT

#Transforming KitchenQual column to numeric in train dataset
KitchenQual=as.factor(housing_train$KitchenQual)
levels(KitchenQual) #"Ex" "Fa" "Gd" "TA"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(KitchenQual)) # No Missing Entries
## [1] 0
KitchenQual=as.numeric(KitchenQual,  "Fa"=1, "TA"=2,"Gd"=3, "Ex"=4)
KitchenQual=KitchenQual+1 # Po =1, Fa=2, TA=3, Gd=4, Ex=5
housing_train$KitchenQual <- KitchenQual

#Transforming KitchenQual column to numeric in test dataset
KitchenQualT=as.factor(housing_test$KitchenQual)
levels(KitchenQualT) #"Ex" "Fa" "Gd" "TA"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(KitchenQualT)) # No Missing Entries
## [1] 1
KitchenQualT=as.numeric(KitchenQualT,  "Fa"=1, "TA"=2,"Gd"=3, "Ex"=4)
KitchenQualT=KitchenQualT+1 # Po =1, Fa=2, TA=3, Gd=4, Ex=5
KitchenQualT[is.na(KitchenQualT)]<-0
housing_test$KitchenQual <- KitchenQualT

#Transforming Functional column to numeric in train dataset
Functional=as.factor(housing_train$Functional)
levels(Functional) #"Maj1" "Maj2" "Min1" "Min2" "Mod"  "Sev"  "Typ"
## [1] "Maj1" "Maj2" "Min1" "Min2" "Mod"  "Sev"  "Typ"
sum(is.na(Functional)) # No Missing Entries
## [1] 0
Functional=as.numeric(Functional,  "Sev"=1, "Maj2"=2,"Maj1"=3, "Mod"=4, "Min2"=5, "Min1"=6, "Typ"=7)
Functional=Functional+1 # "Sal"=1, Sev"=2, "Maj2"=3,"Maj1"=4, "Mod"=5, "Min2"=6, "Min1"=7, "Typ"=8
housing_train$Functional <- Functional

#Transforming Functional column to numeric in test dataset
FunctionalT=as.factor(housing_test$Functional)
levels(FunctionalT) #"Maj1" "Maj2" "Min1" "Min2" "Mod"  "Sev"  "Typ"
## [1] "Maj1" "Maj2" "Min1" "Min2" "Mod"  "Sev"  "Typ"
sum(is.na(FunctionalT)) # 2 Missing Entries
## [1] 2
FunctionalT=as.numeric(FunctionalT,  "Sev"=1, "Maj2"=2,"Maj1"=3, "Mod"=4, "Min2"=5, "Min1"=6, "Typ"=7)
FunctionalT=FunctionalT +1 # "Sal"=1, Sev"=2, "Maj2"=3,"Maj1"=4, "Mod"=5, "Min2"=6, "Min1"=7, "Typ"=8
FunctionalT[is.na(FunctionalT)]<-0
housing_test$Functional <- FunctionalT

# Changing missing values of BsmtFullBath 
sum(is.na(housing_train$BsmtFullBath)) # no missing values
## [1] 0
sum(is.na(housing_test$BsmtFullBath)) #2 missing values
## [1] 2
housing_test$BsmtFullBath[is.na(housing_test$BsmtFullBath)] <- 0

# Changing missing values of BsmtHalfBath 
sum(is.na(housing_train$BsmtHalfBath)) # no missing values
## [1] 0
sum(is.na(housing_test$BsmtHalfBath)) #2 missing values
## [1] 2
housing_test$BsmtHalfBath[is.na(housing_test$BsmtHalfBath)] <- 0

#Transforming FireplaceQu column to numeric in train dataset
FireplaceQu=as.factor(housing_train$FireplaceQu)
levels(FireplaceQu) #"Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(FireplaceQu)) # 690 Missing Entries
## [1] 690
FireplaceQu=as.numeric(FireplaceQu,  "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
FireplaceQu[is.na(FireplaceQu)]<-0
housing_train$FireplaceQu <- FireplaceQu

#Transforming FireplaceQu column to numeric in test dataset
FireplaceQuT=as.factor(housing_test$FireplaceQu)
levels(FireplaceQuT) #"Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(FireplaceQuT)) # 730 Missing Entries
## [1] 730
FireplaceQuT=as.numeric(FireplaceQuT,  "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
FireplaceQuT[is.na(FireplaceQuT)]<-0
housing_test$FireplaceQu <- FireplaceQuT

#Transforming GarageType column to numeric in train dataset
GarageType=as.factor(housing_train$GarageType)
levels(GarageType) #"2Types"  "Attchd"  "Basment" "BuiltIn" "CarPort" "Detchd"
## [1] "2Types"  "Attchd"  "Basment" "BuiltIn" "CarPort" "Detchd"
sum(is.na(GarageType)) # 81 Missing Entries
## [1] 81
GarageType=as.numeric(GarageType,  "Detchd"=1, "CarPort"=2,"BuiltIn"=3, "Basment"=4, "Attchd"=5, "2Types"=6)
GarageType[is.na(GarageType)]<-0
housing_train$GarageType <- GarageType

#Transforming GarageType column to numeric in test dataset
GarageTypeT=as.factor(housing_test$GarageType)
levels(GarageTypeT) #"2Types"  "Attchd"  "Basment" "BuiltIn" "CarPort" "Detchd"
## [1] "2Types"  "Attchd"  "Basment" "BuiltIn" "CarPort" "Detchd"
sum(is.na(GarageTypeT)) # 76 Missing Entries
## [1] 76
GarageTypeT=as.numeric(GarageTypeT,  "Detchd"=1, "CarPort"=2,"BuiltIn"=3, "Basment"=4, "Attchd"=5, "2Types"=6)
GarageTypeT[is.na(GarageTypeT)]<-0
housing_test$GarageType <- GarageTypeT

# Changing missing values of GarageYrBlt 
sum(is.na(housing_train$GarageYrBlt)) # 81 missing values
## [1] 81
sum(is.na(housing_test$GarageYrBlt)) #78 missing values
## [1] 78
#Transforming GarageFinish column to numeric in train dataset
GarageFinish=as.factor(housing_train$GarageFinish)
levels(GarageFinish) #"Fin" "RFn" "Unf"
## [1] "Fin" "RFn" "Unf"
sum(is.na(GarageFinish)) # 81 Missing Entries
## [1] 81
GarageFinish=as.numeric(GarageFinish,  "Unf"=1, "RFn"=2,"Fin"=3)
GarageFinish[is.na(GarageFinish)]<-0
housing_train$GarageFinish <- GarageFinish

#Transforming GarageFinish column to numeric in test dataset
GarageFinishT=as.factor(housing_test$GarageFinish)
levels(GarageFinishT) #"Fin" "RFn" "Unf"
## [1] "Fin" "RFn" "Unf"
sum(is.na(GarageFinishT)) # 78 Missing Entries
## [1] 78
GarageFinishT=as.numeric(GarageFinishT,  "Unf"=1, "RFn"=2,"Fin"=3)
GarageFinishT[is.na(GarageFinishT)]<-0
housing_test$GarageFinish <- GarageFinishT

# Changing missing values of GarageCars 
sum(is.na(housing_train$GarageCars)) # no missing values
## [1] 0
sum(is.na(housing_test$GarageCars)) #1 missing value
## [1] 1
housing_test$GarageCars[is.na(housing_test$GarageCars)]<-0

# Changing missing values of GarageArea
sum(is.na(housing_train$GarageArea)) # no missing values
## [1] 0
sum(is.na(housing_test$GarageArea)) #1 missing value
## [1] 1
housing_test$GarageArea[is.na(housing_test$GarageArea)]<-0

#Transforming GarageQual column to numeric in train dataset
GarageQual=as.factor(housing_train$GarageQual)
levels(GarageQual) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(GarageQual)) # 81 Missing Entries
## [1] 81
GarageQual=as.numeric(GarageQual,  "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
GarageQual[is.na(GarageQual)]<-0
housing_train$GarageQual <- GarageQual

#Transforming GarageQual column to numeric in test dataset
GarageQualT=as.factor(housing_test$GarageQual)
levels(GarageQualT) # "Fa" "Gd" "Po" "TA"
## [1] "Fa" "Gd" "Po" "TA"
sum(is.na(GarageQualT)) # 78 Missing Entries
## [1] 78
GarageQualT=as.numeric(GarageQualT,  "Po"=1, "Fa"=2,"TA"=3, "Gd"=4)
GarageQualT[is.na(GarageQualT)]<-0
housing_test$GarageQual <- GarageQualT

#Transforming GarageCond column to numeric in train dataset
GarageCond=as.factor(housing_train$GarageCond)
levels(GarageCond) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(GarageCond)) # 81 Missing Entries
## [1] 81
GarageCond=as.numeric(GarageCond,  "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
GarageCond[is.na(GarageCond)]<-0
housing_train$GarageCond <- GarageCond

#Transforming GarageCond column to numeric in test dataset
GarageCondT=as.factor(housing_test$GarageCond)
levels(GarageCondT) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(GarageCondT)) # 78 Missing Entries
## [1] 78
GarageCondT=as.numeric(GarageCondT,  "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
GarageCondT[is.na(GarageCondT)]<-0
housing_test$GarageCond <- GarageCondT

#Transforming PavedDrive column to numeric in train dataset
PavedDrive=as.factor(housing_train$PavedDrive)
levels(PavedDrive) # "N" "P" "Y"
## [1] "N" "P" "Y"
sum(is.na(PavedDrive)) # 0 Missing Entries
## [1] 0
PavedDrive=as.numeric(PavedDrive,  "N"=1, "P"=2,"Y"=3)
housing_train$PavedDrive <- PavedDrive

#Transforming PavedDrive column to numeric in test dataset
PavedDriveT=as.factor(housing_test$PavedDrive)
levels(PavedDriveT) # "N" "P" "Y"
## [1] "N" "P" "Y"
sum(is.na(PavedDriveT)) # 0 Missing Entries
## [1] 0
PavedDriveT=as.numeric(PavedDriveT,  "N"=1, "P"=2,"Y"=3)
housing_test$PavedDrive <- PavedDriveT

#Transforming PoolQC column to numeric in train dataset
PoolQC=as.factor(housing_train$PoolQC)
levels(PoolQC) # "N" "P" "Y"
## [1] "Ex" "Fa" "Gd"
sum(is.na(PoolQC)) # 1453 Missing Entries
## [1] 1453
PoolQC=as.numeric(PoolQC,  "Fa"=1, "Gd"=2,"Ex"=3)
PoolQC1 <-ifelse(PoolQC==2|PoolQC==3,PoolQC+1,PoolQC) # No pool=0, Fa=1, TA=2, Gd=3, Ex=4
PoolQC1[is.na(PoolQC1)]<-0
housing_train$PoolQC <- PoolQC1

#Transforming PoolQC column to numeric in test dataset
PoolQCT=as.factor(housing_test$PoolQC)
levels(PoolQCT) # "Ex" "Gd" 
## [1] "Ex" "Gd"
sum(is.na(PoolQCT)) # 1456 Missing Entries
## [1] 1456
PoolQCT=as.numeric(PoolQCT,  "Gd"=1, "Ex"=2)
PoolQCT=PoolQCT+2
PoolQCT[is.na(PoolQCT)]<-0
housing_test$PoolQC <- PoolQCT

#Transforming Fence column to numeric in train dataset
Fence=as.factor(housing_train$Fence)
levels(Fence) # "GdPrv" "GdWo"  "MnPrv" "MnWw" 
## [1] "GdPrv" "GdWo"  "MnPrv" "MnWw"
sum(is.na(Fence)) # 1179 Missing Entries
## [1] 1179
Fence=as.numeric(Fence,  "MnWw"=1, "GdWo"=2,"MnPrv"=3, "GdPrv"=4)
Fence[is.na(Fence)]<-0
housing_train$Fence <- Fence

#Transforming Fence column to numeric in test dataset
FenceT=as.factor(housing_test$Fence)
levels(FenceT) # "GdPrv" "GdWo"  "MnPrv" "MnWw" 
## [1] "GdPrv" "GdWo"  "MnPrv" "MnWw"
sum(is.na(FenceT)) # 1169 Missing Entries
## [1] 1169
FenceT=as.numeric(FenceT,  "MnWw"=1, "GdWo"=2,"MnPrv"=3, "GdPrv"=4)
FenceT[is.na(FenceT)]<-0
housing_test$Fence <- FenceT

#Transforming MiscFeature column to numeric in train dataset
MiscFeature=as.factor(housing_train$MiscFeature)
levels(MiscFeature) # "Gar2" "Othr" "Shed" "TenC"
## [1] "Gar2" "Othr" "Shed" "TenC"
sum(is.na(MiscFeature)) # 1406 Missing Entries
## [1] 1406
MiscFeature=as.numeric(MiscFeature,  "TenC"=1, "Shed"=2,"Othr"=3, "Gar2"=4)
MiscFeature[is.na(MiscFeature)]<-0
housing_train$MiscFeature <- MiscFeature

#Transforming MiscFeature column to numeric in test dataset
MiscFeatureT=as.factor(housing_test$MiscFeature)
levels(MiscFeatureT) # "Gar2" "Othr" "Shed" 
## [1] "Gar2" "Othr" "Shed"
sum(is.na(MiscFeatureT)) # 1408 Missing Entries
## [1] 1408
MiscFeatureT=as.numeric(MiscFeatureT, "Shed"=1,"Othr"=2, "Gar2"=3)
MiscFeatureT=MiscFeatureT+1
MiscFeatureT[is.na(MiscFeatureT)]<-0 #"TenC"=1, "Shed"=2,"Othr"=3, "Gar2"=4
housing_test$MiscFeature <- MiscFeatureT

#Transforming SaleType column to numeric in train dataset
SaleType=as.factor(housing_train$SaleType)
levels(SaleType) # "COD"   "Con"   "ConLD" "ConLI" "ConLw" "CWD"   "New"   "Oth"   "WD" 
## [1] "COD"   "Con"   "ConLD" "ConLI" "ConLw" "CWD"   "New"   "Oth"   "WD"
sum(is.na(SaleType)) # 0 Missing Entries
## [1] 0
SaleType=as.numeric(SaleType,  "Oth"=1, "ConLD"=2,"ConLI"=3, "ConLw"=4, "Con"=5, "COD"=6, "New"=7, "CWD"=8, "WD"=9)
housing_train$SaleType <- SaleType

#Transforming SaleType column to numeric in test dataset
SaleTypeT=as.factor(housing_test$SaleType)
levels(SaleTypeT) # "COD"   "Con"   "ConLD" "ConLI" "ConLw" "CWD"   "New"   "Oth"   "WD" 
## [1] "COD"   "Con"   "ConLD" "ConLI" "ConLw" "CWD"   "New"   "Oth"   "WD"
sum(is.na(SaleTypeT)) # 1 Missing Entries
## [1] 1
SaleTypeT=as.numeric(SaleTypeT,  "Oth"=1, "ConLD"=2,"ConLI"=3, "ConLw"=4, "Con"=5, "COD"=6, "New"=7, "CWD"=8, "WD"=9)
SaleTypeT[is.na(SaleTypeT)]<-1
housing_test$SaleType <- SaleTypeT

#Transforming SaleCondition column to numeric in train dataset
SaleCondition=as.factor(housing_train$SaleCondition)
levels(SaleCondition) # "Abnorml" "AdjLand" "Alloca"  "Family"  "Normal"  "Partial" 
## [1] "Abnorml" "AdjLand" "Alloca"  "Family"  "Normal"  "Partial"
sum(is.na(SaleCondition)) # 0 Missing Entries
## [1] 0
SaleCondition=as.numeric(SaleCondition,  "Partial"=1, "Family"=2,"Alloca"=3, "AdjLand"=4, "Abnorml"=5, "Normal"=6)
housing_train$SaleCondition <- SaleCondition

#Transforming SaleCondition column to numeric in test dataset
SaleConditionT=as.factor(housing_test$SaleCondition)
levels(SaleConditionT) # "Abnorml" "AdjLand" "Alloca"  "Family"  "Normal"  "Partial" 
## [1] "Abnorml" "AdjLand" "Alloca"  "Family"  "Normal"  "Partial"
sum(is.na(SaleConditionT)) # 0 Missing Entries
## [1] 0
SaleConditionT=as.numeric(SaleConditionT,  "Partial"=1, "Family"=2,"Alloca"=3, "AdjLand"=4, "Abnorml"=5, "Normal"=6)
housing_test$SaleCondition <- SaleConditionT