There are two data sets, one contains the train data and the test data. Uploading those data set in here.
housing_train <- read.csv("/Users/macuser/Desktop/MSDA/Data Preparation/house-prices/train.csv")
housing_test <- read.csv("/Users/macuser/Desktop/MSDA/Data Preparation/house-prices/test.csv")
colnames(housing_train)
## [1] "Id" "MSSubClass" "MSZoning" "LotFrontage"
## [5] "LotArea" "Street" "Alley" "LotShape"
## [9] "LandContour" "Utilities" "LotConfig" "LandSlope"
## [13] "Neighborhood" "Condition1" "Condition2" "BldgType"
## [17] "HouseStyle" "OverallQual" "OverallCond" "YearBuilt"
## [21] "YearRemodAdd" "RoofStyle" "RoofMatl" "Exterior1st"
## [25] "Exterior2nd" "MasVnrType" "MasVnrArea" "ExterQual"
## [29] "ExterCond" "Foundation" "BsmtQual" "BsmtCond"
## [33] "BsmtExposure" "BsmtFinType1" "BsmtFinSF1" "BsmtFinType2"
## [37] "BsmtFinSF2" "BsmtUnfSF" "TotalBsmtSF" "Heating"
## [41] "HeatingQC" "CentralAir" "Electrical" "X1stFlrSF"
## [45] "X2ndFlrSF" "LowQualFinSF" "GrLivArea" "BsmtFullBath"
## [49] "BsmtHalfBath" "FullBath" "HalfBath" "BedroomAbvGr"
## [53] "KitchenAbvGr" "KitchenQual" "TotRmsAbvGrd" "Functional"
## [57] "Fireplaces" "FireplaceQu" "GarageType" "GarageYrBlt"
## [61] "GarageFinish" "GarageCars" "GarageArea" "GarageQual"
## [65] "GarageCond" "PavedDrive" "WoodDeckSF" "OpenPorchSF"
## [69] "EnclosedPorch" "X3SsnPorch" "ScreenPorch" "PoolArea"
## [73] "PoolQC" "Fence" "MiscFeature" "MiscVal"
## [77] "MoSold" "YrSold" "SaleType" "SaleCondition"
## [81] "SalePrice"
head(housing_train)
## Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour
## 1 1 60 RL 65 8450 Pave <NA> Reg Lvl
## 2 2 20 RL 80 9600 Pave <NA> Reg Lvl
## 3 3 60 RL 68 11250 Pave <NA> IR1 Lvl
## 4 4 70 RL 60 9550 Pave <NA> IR1 Lvl
## 5 5 60 RL 84 14260 Pave <NA> IR1 Lvl
## 6 6 50 RL 85 14115 Pave <NA> IR1 Lvl
## Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType
## 1 AllPub Inside Gtl CollgCr Norm Norm 1Fam
## 2 AllPub FR2 Gtl Veenker Feedr Norm 1Fam
## 3 AllPub Inside Gtl CollgCr Norm Norm 1Fam
## 4 AllPub Corner Gtl Crawfor Norm Norm 1Fam
## 5 AllPub FR2 Gtl NoRidge Norm Norm 1Fam
## 6 AllPub Inside Gtl Mitchel Norm Norm 1Fam
## HouseStyle OverallQual OverallCond YearBuilt YearRemodAdd RoofStyle RoofMatl
## 1 2Story 7 5 2003 2003 Gable CompShg
## 2 1Story 6 8 1976 1976 Gable CompShg
## 3 2Story 7 5 2001 2002 Gable CompShg
## 4 2Story 7 5 1915 1970 Gable CompShg
## 5 2Story 8 5 2000 2000 Gable CompShg
## 6 1.5Fin 5 5 1993 1995 Gable CompShg
## Exterior1st Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond Foundation
## 1 VinylSd VinylSd BrkFace 196 Gd TA PConc
## 2 MetalSd MetalSd None 0 TA TA CBlock
## 3 VinylSd VinylSd BrkFace 162 Gd TA PConc
## 4 Wd Sdng Wd Shng None 0 TA TA BrkTil
## 5 VinylSd VinylSd BrkFace 350 Gd TA PConc
## 6 VinylSd VinylSd None 0 TA TA Wood
## BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2
## 1 Gd TA No GLQ 706 Unf
## 2 Gd TA Gd ALQ 978 Unf
## 3 Gd TA Mn GLQ 486 Unf
## 4 TA Gd No ALQ 216 Unf
## 5 Gd TA Av GLQ 655 Unf
## 6 Gd TA No GLQ 732 Unf
## BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC CentralAir Electrical
## 1 0 150 856 GasA Ex Y SBrkr
## 2 0 284 1262 GasA Ex Y SBrkr
## 3 0 434 920 GasA Ex Y SBrkr
## 4 0 540 756 GasA Gd Y SBrkr
## 5 0 490 1145 GasA Ex Y SBrkr
## 6 0 64 796 GasA Ex Y SBrkr
## X1stFlrSF X2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath
## 1 856 854 0 1710 1 0 2
## 2 1262 0 0 1262 0 1 2
## 3 920 866 0 1786 1 0 2
## 4 961 756 0 1717 1 0 1
## 5 1145 1053 0 2198 1 0 2
## 6 796 566 0 1362 1 0 1
## HalfBath BedroomAbvGr KitchenAbvGr KitchenQual TotRmsAbvGrd Functional
## 1 1 3 1 Gd 8 Typ
## 2 0 3 1 TA 6 Typ
## 3 1 3 1 Gd 6 Typ
## 4 0 3 1 Gd 7 Typ
## 5 1 4 1 Gd 9 Typ
## 6 1 1 1 TA 5 Typ
## Fireplaces FireplaceQu GarageType GarageYrBlt GarageFinish GarageCars
## 1 0 <NA> Attchd 2003 RFn 2
## 2 1 TA Attchd 1976 RFn 2
## 3 1 TA Attchd 2001 RFn 2
## 4 1 Gd Detchd 1998 Unf 3
## 5 1 TA Attchd 2000 RFn 3
## 6 0 <NA> Attchd 1993 Unf 2
## GarageArea GarageQual GarageCond PavedDrive WoodDeckSF OpenPorchSF
## 1 548 TA TA Y 0 61
## 2 460 TA TA Y 298 0
## 3 608 TA TA Y 0 42
## 4 642 TA TA Y 0 35
## 5 836 TA TA Y 192 84
## 6 480 TA TA Y 40 30
## EnclosedPorch X3SsnPorch ScreenPorch PoolArea PoolQC Fence MiscFeature
## 1 0 0 0 0 <NA> <NA> <NA>
## 2 0 0 0 0 <NA> <NA> <NA>
## 3 0 0 0 0 <NA> <NA> <NA>
## 4 272 0 0 0 <NA> <NA> <NA>
## 5 0 0 0 0 <NA> <NA> <NA>
## 6 0 320 0 0 <NA> MnPrv Shed
## MiscVal MoSold YrSold SaleType SaleCondition SalePrice
## 1 0 2 2008 WD Normal 208500
## 2 0 5 2007 WD Normal 181500
## 3 0 9 2008 WD Normal 223500
## 4 0 2 2006 WD Abnorml 140000
## 5 0 12 2008 WD Normal 250000
## 6 700 10 2009 WD Normal 143000
Looking at TrainHouse many columns need to be changed, some need to replace NA to 0 and some columns are going to be changed from category to numerical so they can be use when doing analysis. You can check for the explanation for each column here with the changes made < Input Columnn guide here > .
housing_train$MSZoning = as.factor(housing_train$MSZoning)
levels(housing_train$MSZoning)
## [1] "C (all)" "FV" "RH" "RL" "RM"
# MSZoning column of train dataset has following levels: "C (all)", "FV", "RH", "RL", "RM"
housing_test$MSZoning = as.factor(housing_test$MSZoning)
levels(housing_test$MSZoning)
## [1] "C (all)" "FV" "RH" "RL" "RM"
# MSZoning column of test dataset has following levels: "C (all)", "FV", "RH", "RL", "RM"
sum(is.na(housing_train$MSZoning)) #no missing values
## [1] 0
sum(is.na(housing_test$MSZoning)) # 4 missing values
## [1] 4
# Change of factors to numeric in train dataset
housing_train$MSZoning=as.numeric(housing_train$MSZoning,"C "=1, "FV"=2, "RH"=3, "RL"=4, "RM"=5)
# Change of factors to numeric in train dataset
housing_test$MSZoning=as.numeric(housing_test$MSZoning,"C "=1, "FV"=2, "RH"=3, "RL"=4, "RM"=5)
# Imputing 0 instead of nulls
housing_test$MSZoning[is.na(housing_test$MSZoning)] <- 0 #the null values got changed to 0
# Changing null values to 0 in LotFrontage column in train dataset
sum(is.na(housing_train$LotFrontage)) #259 missing values
## [1] 259
housing_train$LotFrontage[is.na(housing_train$LotFrontage)] <- 0
sum(is.na(housing_train$LotFrontage))
## [1] 0
# all the missing values got imputed with 0
# Changing null values to 0 in LotFrontage column in test dataset
sum(is.na(housing_test$LotFrontage)) #227 missing values
## [1] 227
housing_test$LotFrontage[is.na(housing_test$LotFrontage)] <- 0
sum(is.na(housing_test$LotFrontage))
## [1] 0
# all the missing values got imputed with 0
# Street column changed to numeric in train dataset
street<-housing_train$Street
sum(is.na(housing_train$Street)) #0 missing values
## [1] 0
street = as.factor(street)
street = as.numeric(street, "Pave"= 1,"Grvl"= 2)
housing_train$Street <-street
# Pave got replaced with 1 and Grvl type of rode got replaced with 2
# Street column changed to numeric in test dataset
street1<-housing_test$Street
sum(is.na(street1)) #0 missing values
## [1] 0
street1 = as.factor(street1)
street1 = as.numeric(street1, "Pave"= 1,"Grvl"= 2)
housing_test$Street <-street1
# Pave got replaced with 1 and Grvl type of rode got replaced with 2
# Transforming Alley column to numeric in train dataset
alley<-as.factor(housing_train$Alley)
levels(alley)
## [1] "Grvl" "Pave"
alley = as.numeric(alley, "Pave"= 1,"Grvl"= 2)
sum(is.na(alley)) # 1369 NA values
## [1] 1369
alley[is.na(alley)] <- 0 #changing NA values to 0
sum(is.na(alley)) #no missing values
## [1] 0
housing_train$Alley <- alley
# Transforming Alley column to numeric in test dataset
alley1<-as.factor(housing_test$Alley)
levels(alley1)
## [1] "Grvl" "Pave"
alley1 = as.numeric(alley1, "Pave"= 1,"Grvl"= 2)
sum(is.na(alley1)) # 1352 NA values
## [1] 1352
alley1[is.na(alley1)] <- 0 #changing NA values to 0
sum(is.na(alley1)) #no missing values
## [1] 0
housing_test$Alley <- alley1
# Transforming LotShape column to numeric in train dataset
shape <-as.factor(housing_train$LotShape)
sum(is.na(shape)) # no missing values
## [1] 0
levels(shape) # 4 levels: "IR1", "IR2", "IR3", "Reg"
## [1] "IR1" "IR2" "IR3" "Reg"
shape=as.numeric(shape,"IR1"=1, "IR2"=2, "IR3"=3, "Reg"=4)
housing_train$LotShape <- shape
# Transforming LotShape column to numeric in test dataset
shape1 <-as.factor(housing_test$LotShape)
sum(is.na(shape1)) # no missing values
## [1] 0
levels(shape1) # 4 levels: "IR1", "IR2", "IR3", "Reg"
## [1] "IR1" "IR2" "IR3" "Reg"
shape1=as.numeric(shape1,"IR1"=1, "IR2"=2, "IR3"=3, "Reg"=4)
housing_test$LotShape <- shape1
# Transforming LandContour column to numeric in train dataset
lcontour <-as.factor(housing_train$LandContour)
sum(is.na(lcontour)) # no missing values
## [1] 0
levels(lcontour) # 4 levels: "Bnk", "HLS", "Low", "Lvl"
## [1] "Bnk" "HLS" "Low" "Lvl"
lcontour=as.numeric(lcontour,"Bnk"=1, "HLS"=2, "Low"=3, "Lvl"=4)
housing_train$LandContour <- lcontour
# Transforming LandContour column to numeric in test dataset
lcontour1 <-as.factor(housing_test$LandContour)
sum(is.na(lcontour1)) # no missing values
## [1] 0
levels(lcontour1) # 4 levels: "Bnk", "HLS", "Low", "Lvl"
## [1] "Bnk" "HLS" "Low" "Lvl"
lcontour1=as.numeric(lcontour1,"Bnk"=1, "HLS"=2, "Low"=3, "Lvl"=4)
housing_test$LandContour <- lcontour1
# Transforming Utilities column to numeric in train dataset
utility <-as.factor(housing_train$Utilities)
sum(is.na(utility)) # no missing values
## [1] 0
levels(utility) # 2 levels: "AllPub", "NoSeWa"
## [1] "AllPub" "NoSeWa"
utility=as.numeric(utility,"AllPub"=1, "NoSeWa"=2)
housing_train$Utilities <- utility
# Transforming Utilities column to numeric in test dataset
utility1 <-as.factor(housing_test$Utilities)
sum(is.na(utility1)) # 2 missing values
## [1] 2
levels(utility1) # 2 levels: "AllPub", "NoSeWa"
## [1] "AllPub"
utility1=as.numeric(utility1,"AllPub"=1)
utility1[is.na(utility1)] <- 0
housing_test$Utilities <- utility1
# Transforming LotConfig column to numeric in train dataset
lconfig <-as.factor(housing_train$LotConfig)
sum(is.na(lconfig)) # no missing values
## [1] 0
levels(lconfig)
## [1] "Corner" "CulDSac" "FR2" "FR3" "Inside"
lconfig=as.numeric(lconfig,"Corner"=1, "CulDSac"=2, "FR2"=3, "FR3"=4, "Inside"=5)
housing_train$LotConfig <- lconfig
# Transforming LotConfig column to numeric in test dataset
lconfig1 <-as.factor(housing_test$LotConfig)
sum(is.na(lconfig1)) # no missing values
## [1] 0
levels(lconfig1)
## [1] "Corner" "CulDSac" "FR2" "FR3" "Inside"
lconfig1=as.numeric(lconfig1,"Corner"=1, "CulDSac"=2, "FR2"=3, "FR3"=4, "Inside"=5)
housing_test$LotConfig <- lconfig1
# Transforming LandSlope column to numeric in train dataset
lslope <-as.factor(housing_train$LandSlope)
sum(is.na(lslope)) # no missing values
## [1] 0
levels(lslope)
## [1] "Gtl" "Mod" "Sev"
lslope=as.numeric(lslope,"Gtl"=1, "Mod"=2, "Sev"=3)
housing_train$LandSlope <- lslope
# Transforming LandSlope column to numeric in train dataset
lslope1 <-as.factor(housing_test$LandSlope)
sum(is.na(lslope1)) # no missing values
## [1] 0
levels(lslope1)
## [1] "Gtl" "Mod" "Sev"
lslope1=as.numeric(lslope1,"Gtl"=1, "Mod"=2, "Sev"=3)
housing_test$LandSlope <- lslope1
# Transforming Neighborhood column to numeric in train dataset
Neighborhood <-as.factor(housing_train$Neighborhood)
sum(is.na(Neighborhood)) # no missing values
## [1] 0
levels(Neighborhood)
## [1] "Blmngtn" "Blueste" "BrDale" "BrkSide" "ClearCr" "CollgCr" "Crawfor"
## [8] "Edwards" "Gilbert" "IDOTRR" "MeadowV" "Mitchel" "NAmes" "NoRidge"
## [15] "NPkVill" "NridgHt" "NWAmes" "OldTown" "Sawyer" "SawyerW" "Somerst"
## [22] "StoneBr" "SWISU" "Timber" "Veenker"
Neighborhood=as.numeric(Neighborhood,"Blmngtn"=1, "Blueste"=2, "BrDale"=3, "BrkSide"=4, "ClearCr"=5, "CollgCr"=6, "Crawfor"=7, "Edwards"=8, "Gilbert"=9, "IDOTRR"=10, "MeadowV"=11, "Mitchel"=12, "NAmes"=13, "NoRidge"=14, "NPkVill"=15, "NridgHt"=16, "NWAmes"=17, "OldTown"=18, "SWISU"=19, "Sawyer"=20, "SawyerW"=21, "Somerst"=22, "StoneBr"=23, "Timber"=24, "Veenker"=25)
housing_train$Neighborhood <-Neighborhood
# Transforming Neighborhood column to numeric in test dataset
Neighborhood1 <-as.factor(housing_test$Neighborhood)
sum(is.na(Neighborhood1)) # no missing values
## [1] 0
levels(Neighborhood1)
## [1] "Blmngtn" "Blueste" "BrDale" "BrkSide" "ClearCr" "CollgCr" "Crawfor"
## [8] "Edwards" "Gilbert" "IDOTRR" "MeadowV" "Mitchel" "NAmes" "NoRidge"
## [15] "NPkVill" "NridgHt" "NWAmes" "OldTown" "Sawyer" "SawyerW" "Somerst"
## [22] "StoneBr" "SWISU" "Timber" "Veenker"
Neighborhood1=as.numeric(Neighborhood1,"Blmngtn"=1, "Blueste"=2, "BrDale"=3, "BrkSide"=4, "ClearCr"=5, "CollgCr"=6, "Crawfor"=7, "Edwards"=8, "Gilbert"=9, "IDOTRR"=10, "MeadowV"=11, "Mitchel"=12, "NAmes"=13, "NoRidge"=14, "NPkVill"=15, "NridgHt"=16, "NWAmes"=17, "OldTown"=18, "SWISU"=19, "Sawyer"=20, "SawyerW"=21, "Somerst"=22, "StoneBr"=23, "Timber"=24, "Veenker"=25)
housing_test$Neighborhood <-Neighborhood1
# Transforming Condition1 column to numeric in train dataset
Condition1 <-as.factor(housing_train$Condition1)
sum(is.na(Condition1)) # no missing values
## [1] 0
levels(Condition1)
## [1] "Artery" "Feedr" "Norm" "PosA" "PosN" "RRAe" "RRAn" "RRNe"
## [9] "RRNn"
Condition1=as.numeric(Condition1,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_train$Condition1 <- Condition1
# Transforming Condition1 column to numeric in test dataset
Condition1T <-as.factor(housing_test$Condition1)
sum(is.na(Condition1T)) # no missing values
## [1] 0
levels(Condition1T)
## [1] "Artery" "Feedr" "Norm" "PosA" "PosN" "RRAe" "RRAn" "RRNe"
## [9] "RRNn"
Condition1T=as.numeric(Condition1T,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_test$Condition1 <-Condition1T
# Transforming Condition2 column to numeric in train dataset
condition2 <-as.factor(housing_train$Condition2)
sum(is.na(condition2)) # no missing values
## [1] 0
levels(condition2)
## [1] "Artery" "Feedr" "Norm" "PosA" "PosN" "RRAe" "RRAn" "RRNn"
condition2=as.numeric(condition2,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_train$Condition2 <- condition2
# Transforming Condition2 column to numeric in test dataset
Condition2T <-as.factor(housing_test$Condition2)
sum(is.na(Condition2T)) # no missing values
## [1] 0
levels(Condition2T) #values
## [1] "Artery" "Feedr" "Norm" "PosA" "PosN"
Condition2T=as.numeric(Condition2T,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_test$Condition2 <- Condition2T
# Transforming BldgType column to numeric in train dataset
BldgType <-as.factor(housing_train$BldgType)
sum(is.na(BldgType)) # no missing values
## [1] 0
levels(BldgType)
## [1] "1Fam" "2fmCon" "Duplex" "Twnhs" "TwnhsE"
BldgType=as.numeric(BldgType,"1Fam"=1, "2FmCon"=2, "Duplx"=3, "TwnhsE"=4, "Twnhs"=5)
housing_train$BldgType <- BldgType
# Transforming BldgType column to numeric in test dataset
BldgType1 <-as.factor(housing_test$BldgType)
sum(is.na(BldgType1)) # no missing values
## [1] 0
levels(BldgType1)
## [1] "1Fam" "2fmCon" "Duplex" "Twnhs" "TwnhsE"
BldgType1=as.numeric(BldgType1,"1Fam"=1, "2FmCon"=2, "Duplx"=3, "TwnhsE"=4, "Twnhs"=5)
housing_test$BldgType <- BldgType1
# Transforming HouseStyle column to numeric in train dataset
HouseStyle <-as.factor(housing_train$HouseStyle)
sum(is.na(HouseStyle)) # no missing values
## [1] 0
levels(HouseStyle)
## [1] "1.5Fin" "1.5Unf" "1Story" "2.5Fin" "2.5Unf" "2Story" "SFoyer" "SLvl"
HouseStyle=as.numeric(HouseStyle,"1Story"=1, "1.5Fin"=2, "1.5Unf"=3, "2Story"=4, "2.5Fin"=5, "2.5Unf"=6, "SFoyer"=7, "SLvl"=8)
housing_train$HouseStyle <-HouseStyle
# Transforming HouseStyle column to numeric in test dataset
HouseStyle1 <-as.factor(housing_test$HouseStyle)
sum(is.na(HouseStyle1)) # no missing values
## [1] 0
levels(HouseStyle1)
## [1] "1.5Fin" "1.5Unf" "1Story" "2.5Unf" "2Story" "SFoyer" "SLvl"
HouseStyle1=as.numeric(HouseStyle1,"1Story"=1, "1.5Fin"=2, "1.5Unf"=3, "2Story"=4, "2.5Fin"=5, "2.5Unf"=6, "SFoyer"=7, "SLvl"=8)
housing_test$HouseStyle <- HouseStyle1
#Checking for missing values OverallQual column of train dataset
OverallQual <-as.factor(housing_train$OverallQual)
sum(is.na(OverallQual)) #no missing values
## [1] 0
#Checking for missing values OverallQual column of test dataset
OverallQual1 <-as.factor(housing_test$OverallQual)
sum(is.na(OverallQual1)) #no missing values
## [1] 0
#Checking for missing values in OverallCond column of train dataset
OverallCond <-as.factor(housing_train$OverallCond)
sum(is.na(OverallCond)) #no missing values
## [1] 0
#Checking for missing values in OverallCond column of test dataset
OverallCond1 <-as.factor(housing_test$OverallCond)
sum(is.na(OverallCond1)) #no missing values
## [1] 0
#Checking for missing values in YearBuilt column of train dataset
YearBuilt <-as.factor(housing_train$YearBuilt)
sum(is.na(YearBuilt)) #no missing values
## [1] 0
#Checking for missing values in YearBuilt column of test dataset
YearBuilt1 <-as.factor(housing_test$YearBuilt)
sum(is.na(YearBuilt1)) #no missing values
## [1] 0
# Transforming HouseStyle column to numeric in train dataset
RoofStyle=as.factor(housing_train$RoofStyle)
levels(RoofStyle) #"Flat" "Gable" "Gambrel" "Hip" "Mansard" "Shed"
## [1] "Flat" "Gable" "Gambrel" "Hip" "Mansard" "Shed"
sum(is.na(RoofStyle)) # 0 Missing Entries
## [1] 0
RoofStyle=as.numeric(RoofStyle, "Flat"=1, "Gable"=2, "Gambrel"=3, "Hip"=4, "Mansard"=5, "Shed"=6)
housing_train$RoofStyle <- RoofStyle
# Transforming HouseStyle column to numeric in test dataset
RoofStyle1=as.factor(housing_test$RoofStyle)
levels(RoofStyle1) #"Flat" "Gable" "Gambrel" "Hip" "Mansard" "Shed"
## [1] "Flat" "Gable" "Gambrel" "Hip" "Mansard" "Shed"
sum(is.na(RoofStyle1)) # 0 Missing Entries
## [1] 0
RoofStyle1=as.numeric(RoofStyle1, "Flat"=1, "Gable"=2, "Gambrel"=3, "Hip"=4, "Mansard"=5, "Shed"=6)
housing_test$RoofStyle <- RoofStyle1
# Transforming RoofMatl column to numeric in train dataset
RoofMatl=as.factor(housing_train$RoofMatl)
levels(RoofMatl) #"ClyTile" "CompShg" "Membran" "Metal" "Roll" "Tar&Grv" "WdShake" "WdShngl"
## [1] "ClyTile" "CompShg" "Membran" "Metal" "Roll" "Tar&Grv" "WdShake"
## [8] "WdShngl"
sum(is.na(RoofMatl)) # 0 Missing Entries
## [1] 0
RoofMatl=as.numeric(RoofMatl, "ClyTile"=1, "CompShg"=2, "Membran"=3, "Metal"=4, "Roll"=5, "Tar&Grv"=6, "WdShake"=7, "WdShngl"=8)
housing_train$RoofMatl <- RoofMatl
# Transforming RoofMatl column to numeric in test dataset
RoofMatl1=as.factor(housing_test$RoofMatl)
levels(RoofMatl1) #"ClyTile" "CompShg" "Membran" "Metal" "Roll" "Tar&Grv" "WdShake" "WdShngl"
## [1] "CompShg" "Tar&Grv" "WdShake" "WdShngl"
sum(is.na(RoofMatl1)) # 0 Missing Entries
## [1] 0
RoofMatl1=as.numeric(RoofMatl1, "ClyTile"=1, "CompShg"=2, "Membran"=3, "Metal"=4, "Roll"=5, "Tar&Grv"=6, "WdShake"=7, "WdShngl"=8)
housing_test$RoofMatl <- RoofMatl1
#Transforming Exterior1st column to numeric in train dataset
Exterior1st=as.factor(housing_train$Exterior1st)
levels(Exterior1st) #"AsbShng" "AsphShn" "BrkComm" "BrkFace" "CBlock" "CemntBd" "HdBoard" "ImStucc" "MetalSd" "Plywood" "Stone" "Stucco" "VinylSd" "Wd Sdng" "WdShing"
## [1] "AsbShng" "AsphShn" "BrkComm" "BrkFace" "CBlock" "CemntBd" "HdBoard"
## [8] "ImStucc" "MetalSd" "Plywood" "Stone" "Stucco" "VinylSd" "Wd Sdng"
## [15] "WdShing"
sum(is.na(Exterior1st)) #0 Missing Entries
## [1] 0
Exterior1st=as.numeric(Exterior1st, "AsbShng"=1, "AsphShn"=2, "BrkComm"=3, "BrkFace"=4, "CBlock"=5, "CemntBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Plywood"=10, "Stone"=11, "Stucco"=12, "VinylSd"=13, "Wd Sdng"=14, "WdShing"=15)
housing_train$Exterior1st <- Exterior1st
#Transforming Exterior1st column to numeric in test dataset
Exterior1st1=as.factor(housing_test$Exterior1st)
levels(Exterior1st1) #"AsbShng" "AsphShn" "BrkComm" "BrkFace" "CBlock" "CemntBd" "HdBoard" "ImStucc" "MetalSd" "Plywood" "Stone" "Stucco" "VinylSd" "Wd Sdng" "WdShing"
## [1] "AsbShng" "AsphShn" "BrkComm" "BrkFace" "CBlock" "CemntBd" "HdBoard"
## [8] "MetalSd" "Plywood" "Stucco" "VinylSd" "Wd Sdng" "WdShing"
sum(is.na(Exterior1st1)) #0 Missing Entries
## [1] 1
Exterior1st1=as.numeric(Exterior1st1, "AsbShng"=1, "AsphShn"=2, "BrkComm"=3, "BrkFace"=4, "CBlock"=5, "CemntBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Plywood"=10, "Stone"=11, "Stucco"=12, "VinylSd"=13, "Wd Sdng"=14, "WdShing"=15)
housing_test$Exterior1st <- Exterior1st1
#Transforming Exterior2nd column to numeric in train dataset
Exterior2nd=as.factor(housing_train$Exterior2nd)
levels(Exterior2nd) # "AsbShng" "AsphShn" "Brk Cmn" "BrkFace" "CBlock" "CmentBd" "HdBoard" "ImStucc" "MetalSd" "Other" "Plywood" "Stone" "Stucco" "VinylSd" "Wd Sdng" "Wd Shng"
## [1] "AsbShng" "AsphShn" "Brk Cmn" "BrkFace" "CBlock" "CmentBd" "HdBoard"
## [8] "ImStucc" "MetalSd" "Other" "Plywood" "Stone" "Stucco" "VinylSd"
## [15] "Wd Sdng" "Wd Shng"
sum(is.na(Exterior2nd)) # 0 Missing Entries
## [1] 0
Exterior2nd=as.numeric(Exterior2nd, "AsbShng"=1, "AsphShn"=2, "Brk Cmn"=3, "BrkFace"=4, "CBlock"=5, "CmentBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Other"=10, "Plywood"=11, "Stone"=12, "Stucco"=13, "VinylSd"=14, "Wd Sdng"=15, "Wd Shng"=16)
housing_train$Exterior2nd <- Exterior2nd
#Transforming Exterior2nd column to numeric in test dataset
Exterior2nd1=as.factor(housing_test$Exterior2nd)
levels(Exterior2nd1) # "AsbShng" "AsphShn" "Brk Cmn" "BrkFace" "CBlock" "CmentBd" "HdBoard" "ImStucc" "MetalSd" "Other" "Plywood" "Stone" "Stucco" "VinylSd" "Wd Sdng" "Wd Shng"
## [1] "AsbShng" "AsphShn" "Brk Cmn" "BrkFace" "CBlock" "CmentBd" "HdBoard"
## [8] "ImStucc" "MetalSd" "Plywood" "Stone" "Stucco" "VinylSd" "Wd Sdng"
## [15] "Wd Shng"
sum(is.na(Exterior2nd1)) # 0 Missing Entries
## [1] 1
Exterior2nd1=as.numeric(Exterior2nd1, "AsbShng"=1, "AsphShn"=2, "Brk Cmn"=3, "BrkFace"=4, "CBlock"=5, "CmentBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Other"=10, "Plywood"=11, "Stone"=12, "Stucco"=13, "VinylSd"=14, "Wd Sdng"=15, "Wd Shng"=16)
housing_test$Exterior2nd <- Exterior2nd1
#Transforming MasVnrType column to numeric in train dataset
MasVnrType=as.factor(housing_train$MasVnrType)
levels(MasVnrType) #"BrkCmn" "BrkFace" "None" "Stone"
## [1] "BrkCmn" "BrkFace" "None" "Stone"
sum(is.na(MasVnrType)) # 8 Missing Values
## [1] 8
MasVnrType=as.numeric(MasVnrType, "BrkCmn"=1, "BrkFace"=2, "None"=3, "Stone"=4)
MasVnrType[is.na(MasVnrType)]<-0
housing_train$MasVnrType <- MasVnrType
#Transforming MasVnrType column to numeric in test dataset
MasVnrType1=as.factor(housing_test$MasVnrType)
levels(MasVnrType1) #"BrkCmn" "BrkFace" "None" "Stone"
## [1] "BrkCmn" "BrkFace" "None" "Stone"
sum(is.na(MasVnrType1)) # 16 Missing Values
## [1] 16
MasVnrType1=as.numeric(MasVnrType1, "BrkCmn"=1, "BrkFace"=2, "None"=3, "Stone"=4)
MasVnrType1[is.na(MasVnrType1)]<-0
housing_test$MasVnrType <- MasVnrType1
#Transforming ExterQual column to numeric in train dataset
ExterQual=as.factor(housing_train$ExterQual)
levels(ExterQual) # TA" "Fa" "Gd" "Ex"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(ExterQual)) # 0 Missing Entries
## [1] 0
ExterQual=as.numeric(ExterQual, "Fa"=1, "TA"=2, "Gd"=3, "Ex"=4)
housing_train$ExterQual <- ExterQual
#Transforming ExterQual column to numeric in test dataset
ExterQual1=as.factor(housing_test$ExterQual)
levels(ExterQual1) # TA" "Fa" "Gd" "Ex"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(ExterQual1)) # 0 Missing Entries
## [1] 0
ExterQual1=as.numeric(ExterQual1, "Fa"=1, "TA"=2, "Gd"=3, "Ex"=4)
housing_test$ExterQual <- ExterQual1
#Transforming ExterCond column to numeric in train dataset
ExterCond=as.factor(housing_train$ExterCond)
levels(ExterCond) # TA" "Po" "Fa" "Gd" "Ex"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(ExterCond)) # 0 Missing Entries
## [1] 0
ExterCond=as.numeric(ExterCond, "Po"=1, "Fa"=2, "TA"=3, "Gd"=4, "Ex"=5)
housing_train$ExterCond <- ExterCond
#Transforming ExterCond column to numeric in test dataset
ExterCond1=as.factor(housing_test$ExterCond)
levels(ExterCond1) # "TA" "Po" "Fa" "Gd" "Ex"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(ExterCond1)) # 0 Missing Entries
## [1] 0
ExterCond1=as.numeric(ExterCond1, "Po"=1, "Fa"=2, "TA"=3, "Gd"=4, "Ex"=5)
housing_test$ExterCond <- ExterCond1
#Transforming Foundation column to numeric in train dataset
Foundation=as.factor(housing_train$Foundation)
levels(Foundation) # "BrkTil" "CBlock" "PConc" "Slab" "Stone" "Wood"
## [1] "BrkTil" "CBlock" "PConc" "Slab" "Stone" "Wood"
sum(is.na(Foundation)) # 0 Missing Entries
## [1] 0
Foundation=as.numeric(Foundation, "BrkTil"=1, "CBlock"=2, "PConc"=3, "Slab"=4, "Stone"=5, "Wood"=6)
housing_train$Foundation <- Foundation
#Transforming Foundation column to numeric in test dataset
Foundation1=as.factor(housing_test$Foundation)
levels(Foundation1) # "BrkTil" "CBlock" "PConc" "Slab" "Stone" "Wood"
## [1] "BrkTil" "CBlock" "PConc" "Slab" "Stone" "Wood"
sum(is.na(Foundation1)) # 0 Missing Entries
## [1] 0
Foundation1=as.numeric(Foundation1, "BrkTil"=1, "CBlock"=2, "PConc"=3, "Slab"=4, "Stone"=5, "Wood"=6)
housing_test$Foundation <- Foundation1
#Transforming BsmtQual column to numeric in train dataset
BsmtQual=as.factor(housing_train$BsmtQual)
levels(BsmtQual) # "TA" "Fa" "Gd" "Ex"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(BsmtQual)) # 37 Missing Entries
## [1] 37
BsmtQual=as.numeric(BsmtQual, "TA"=1, "Fa"=2, "Gd"=3, "Ex"=4)
BsmtQual[is.na(BsmtQual)]<-0
housing_train$BsmtQual <- BsmtQual
#Transforming BsmtQual column to numeric in test dataset
BsmtQual1=as.factor(housing_test$BsmtQual)
levels(BsmtQual1) # "TA" "Fa" "Gd" "Ex"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(BsmtQual1)) # 44 Missing Entries
## [1] 44
BsmtQual1=as.numeric(BsmtQual1, "TA"=1, "Fa"=2, "Gd"=3, "Ex"=4)
BsmtQual1[is.na(BsmtQual1)]<-0
housing_test$BsmtQual <- BsmtQual1
#Transforming BsmtCond column to numeric in train dataset
BsmtCond=as.factor(housing_train$BsmtCond)
levels(BsmtCond) # "TA" "Fa" "Gd" "Po"
## [1] "Fa" "Gd" "Po" "TA"
sum(is.na(BsmtCond)) # 37 Missing Entries
## [1] 37
BsmtCond=as.numeric(BsmtCond, "TA"=1, "Po"=2, "Fa"=3, "Gd"=4)
BsmtCond[is.na(BsmtCond)]<-0
housing_train$BsmtCond <- BsmtCond
#Transforming BsmtCond column to numeric in test dataset
BsmtCond1=as.factor(housing_test$BsmtCond)
levels(BsmtCond1) # "TA" "Fa" "Gd" "Po"
## [1] "Fa" "Gd" "Po" "TA"
sum(is.na(BsmtCond1)) # 45 Missing Entries
## [1] 45
BsmtCond1=as.numeric(BsmtCond1, "TA"=1, "Po"=2, "Fa"=3, "Gd"=4)
BsmtCond1[is.na(BsmtCond1)]<-0
housing_test$BsmtCond <- BsmtCond1
#Transforming BsmtExposure column to numeric in train dataset
BsmtExposure=as.factor(housing_train$BsmtExposure)
levels(BsmtExposure) # "No" "Mn" "Gd" "Av"
## [1] "Av" "Gd" "Mn" "No"
sum(is.na(BsmtExposure)) # 38 Missing Entries
## [1] 38
BsmtExposure=as.numeric(BsmtExposure, "No"=1, "Mn"=2, "Av"=3, "Gd"=4)
BsmtExposure[is.na(BsmtExposure)]<-0
housing_train$BsmtExposure <- BsmtExposure
#Transforming BsmtExposure column to numeric in test dataset
BsmtExposure1=as.factor(housing_test$BsmtExposure)
levels(BsmtExposure1) # "No" "Mn" "Gd" "Av"
## [1] "Av" "Gd" "Mn" "No"
sum(is.na(BsmtExposure1)) # 44 Missing Entries
## [1] 44
BsmtExposure1=as.numeric(BsmtExposure1, "No"=1, "Mn"=2, "Av"=3, "Gd"=4)
BsmtExposure1[is.na(BsmtExposure1)]<-0
housing_test$BsmtExposure <- BsmtExposure1
#Transforming BsmtFinType1 column to numeric in train dataset
BsmtFinType1=as.factor(housing_train$BsmtFinType1)
levels(BsmtFinType1) # "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
sum(is.na(BsmtFinType1)) # 37 Missing Entries
## [1] 37
BsmtFinType1=as.numeric(BsmtFinType1, "Unf"=1, "LwQ"=2, "Rec"=3, "BLQ"=4, "ALQ"=5, "GLQ"=6)
BsmtFinType1[is.na(BsmtFinType1)]<-0
housing_train$BsmtFinType1 <- BsmtFinType1
#Transforming BsmtFinType1 column to numeric in test dataset
BsmtFinType1T=as.factor(housing_test$BsmtFinType1)
levels(BsmtFinType1T) # "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
sum(is.na(BsmtFinType1T)) # 42 Missing Entries
## [1] 42
BsmtFinType1T=as.numeric(BsmtFinType1T, "Unf"=1, "LwQ"=2, "Rec"=3, "BLQ"=4, "ALQ"=5, "GLQ"=6)
BsmtFinType1T[is.na(BsmtFinType1T)]<-0
housing_test$BsmtFinType1 <- BsmtFinType1T
#Checking for missing values in BsmtFinSF1 column of train dataset
BsmtFinSF1 <-as.factor(housing_train$BsmtFinSF1)
sum(is.na(BsmtFinSF1)) #no missing values
## [1] 0
#Checking for missing values in BsmtFinSF1 column of test dataset
BsmtFinSF1T <-as.factor(housing_test$BsmtFinSF1)
sum(is.na(BsmtFinSF1T)) #1 missing value
## [1] 1
BsmtFinSF1T[is.na(BsmtFinSF1T)]<-0
housing_test$BsmtFinSF1 <- BsmtFinSF1T
#Transforming BsmtFinType2 column to numeric in train dataset
BsmtFinType2=as.factor(housing_train$BsmtFinType2)
levels(BsmtFinType2) # "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
sum(is.na(BsmtFinType2)) # 38 Missing Entries
## [1] 38
BsmtFinType2=as.numeric(BsmtFinType2, "Unf"=1, "LwQ"=2, "Rec"=3, "BLQ"=4, "ALQ"=5, "GLQ"=6)
BsmtFinType2[is.na(BsmtFinType2)]<-0
housing_train$BsmtFinType2 <- BsmtFinType2
#Transforming BsmtFinType2 column to numeric in test dataset
BsmtFinType2T=as.factor(housing_test$BsmtFinType2)
levels(BsmtFinType2T) # "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
sum(is.na(BsmtFinType2T)) # 42 Missing Entries
## [1] 42
BsmtFinType2T=as.numeric(BsmtFinType2T, "Unf"=1, "LwQ"=2, "Rec"=3, "BLQ"=4, "ALQ"=5, "GLQ"=6)
BsmtFinType2T[is.na(BsmtFinType2T)]<-0
housing_test$BsmtFinType2 <- BsmtFinType2T
#Checking for missing values in BsmtFinSF2 column of train dataset
BsmtFinSF2 <-as.factor(housing_train$BsmtFinSF2)
sum(is.na(BsmtFinSF2)) #no missing values
## [1] 0
#Checking for missing values in BsmtFinSF2 column of test dataset
BsmtFinSF2T <-as.factor(housing_test$BsmtFinSF2)
sum(is.na(BsmtFinSF2T)) #1 missing value
## [1] 1
BsmtFinSF2T[is.na(BsmtFinSF2T)]<-0
housing_test$BsmtFinSF2 <- BsmtFinSF2T
#Checking for missing values in BsmtUnfSF column of train dataset.
BsmtUnfSF <-as.factor(housing_train$BsmtUnfSF)
sum(is.na(BsmtUnfSF)) #no missing values
## [1] 0
#Checking for missing values in BsmtUnfSF column of test dataset.
BsmtUnfSFT <-as.factor(housing_test$BsmtUnfSF)
sum(is.na(BsmtUnfSFT)) #1 missing value
## [1] 1
BsmtUnfSFT[is.na(BsmtUnfSFT)]<-0
housing_test$BsmtUnfSF <- BsmtUnfSFT
#Checking for missing values in TotalBsmtSF column of train dataset.
TotalBsmtSF <-as.factor(housing_train$TotalBsmtSF)
sum(is.na(TotalBsmtSF)) #no missing value
## [1] 0
#Checking for missing values in BsmtUnfSF column of test dataset.
TotalBsmtSFT <-as.factor(housing_test$TotalBsmtSF)
sum(is.na(TotalBsmtSFT)) #1 missing value
## [1] 1
TotalBsmtSFT[is.na(TotalBsmtSFT)]<-0
housing_test$TotalBsmtSF <- TotalBsmtSFT
#Transforming Heating column to numeric in train dataset
Heating=as.factor(housing_train$Heating)
levels(Heating) # "Floor" "GasA" "GasW" "Grav" "OthW" "Wall"
## [1] "Floor" "GasA" "GasW" "Grav" "OthW" "Wall"
sum(is.na(Heating)) # No Missing Entries
## [1] 0
Heating=as.numeric(Heating, "GasA"=6, "GasW"=2, "Grav"=3,"Wall"=4, "OthW"=5, "Floor"=1)
housing_train$Heating <- Heating
#Transforming Heating column to numeric in test dataset
HeatingT=as.factor(housing_test$Heating)
levels(HeatingT) # "Floor" "GasA" "GasW" "Grav" "OthW" "Wall"
## [1] "GasA" "GasW" "Grav" "Wall"
sum(is.na(HeatingT)) # No Missing Entries
## [1] 0
HeatingT=as.numeric(HeatingT, "GasA"=6, "GasW"=2, "Grav"=3, "Wall"=4)
housing_test$Heating <- HeatingT
#Transforming HeatingQC column to numeric in train dataset
HeatingQC=as.factor(housing_train$HeatingQC)
levels(HeatingQC) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(HeatingQC)) # No Missing Entries
## [1] 0
HeatingQC=as.numeric(HeatingQC, "Po"=1, "Fa"=2, "TA"=3,"Gd"=4, "Ex"=5)
housing_train$HeatingQC <- HeatingQC
#Transforming HeatingQC column to numeric in train dataset
HeatingQCT=as.factor(housing_test$HeatingQC)
levels(HeatingQCT) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(HeatingQCT)) # No Missing Entries
## [1] 0
HeatingQCT=as.numeric(HeatingQCT, "Po"=1, "Fa"=2, "TA"=3,"Gd"=4, "Ex"=5)
housing_test$HeatingQC <- HeatingQCT
#Transforming CentralAir column to binary in train dataset
CentralAir=as.factor(housing_train$CentralAir)
levels(CentralAir) # "N" "Y"
## [1] "N" "Y"
sum(is.na(CentralAir)) # No Missing Entries
## [1] 0
CentralAir <- ifelse(CentralAir=="Y",1,0)
housing_train$CentralAir <- CentralAir
#Transforming CentralAir column to binary in test dataset
CentralAirT=as.factor(housing_test$CentralAir)
levels(CentralAirT) # "N" "Y"
## [1] "N" "Y"
sum(is.na(CentralAirT)) # No Missing Entries
## [1] 0
CentralAirT <- ifelse(CentralAirT=="Y",1,0)
housing_test$CentralAir <- CentralAirT
#Transforming Electrical column to numeric in train dataset
Electrical=as.factor(housing_train$Electrical)
levels(Electrical) #"FuseA" "FuseF" "FuseP" "Mix" "SBrkr"
## [1] "FuseA" "FuseF" "FuseP" "Mix" "SBrkr"
sum(is.na(Electrical)) # 1 Missing Entry
## [1] 1
Electrical=as.numeric(Electrical, "FuseP"=1, "FuseF"=2,"FuseA"=3, "SBrkr"=4, "Mix"=5)
Electrical[is.na(Electrical)]<-0
housing_train$Electrical <- Electrical
#Transforming Electrical column to numeric in test dataset
ElectricalT=as.factor(housing_test$Electrical)
levels(ElectricalT) #"FuseA" "FuseF" "FuseP" "Mix" "SBrkr"
## [1] "FuseA" "FuseF" "FuseP" "SBrkr"
sum(is.na(ElectricalT)) # 0 Missing Entries
## [1] 0
ElectricalT=as.numeric(ElectricalT, "FuseP"=1, "FuseF"=2,"FuseA"=3, "SBrkr"=4)
housing_test$Electrical <- ElectricalT
#Transforming KitchenQual column to numeric in train dataset
KitchenQual=as.factor(housing_train$KitchenQual)
levels(KitchenQual) #"Ex" "Fa" "Gd" "TA"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(KitchenQual)) # No Missing Entries
## [1] 0
KitchenQual=as.numeric(KitchenQual, "Fa"=1, "TA"=2,"Gd"=3, "Ex"=4)
KitchenQual=KitchenQual+1 # Po =1, Fa=2, TA=3, Gd=4, Ex=5
housing_train$KitchenQual <- KitchenQual
#Transforming KitchenQual column to numeric in test dataset
KitchenQualT=as.factor(housing_test$KitchenQual)
levels(KitchenQualT) #"Ex" "Fa" "Gd" "TA"
## [1] "Ex" "Fa" "Gd" "TA"
sum(is.na(KitchenQualT)) # No Missing Entries
## [1] 1
KitchenQualT=as.numeric(KitchenQualT, "Fa"=1, "TA"=2,"Gd"=3, "Ex"=4)
KitchenQualT=KitchenQualT+1 # Po =1, Fa=2, TA=3, Gd=4, Ex=5
KitchenQualT[is.na(KitchenQualT)]<-0
housing_test$KitchenQual <- KitchenQualT
#Transforming Functional column to numeric in train dataset
Functional=as.factor(housing_train$Functional)
levels(Functional) #"Maj1" "Maj2" "Min1" "Min2" "Mod" "Sev" "Typ"
## [1] "Maj1" "Maj2" "Min1" "Min2" "Mod" "Sev" "Typ"
sum(is.na(Functional)) # No Missing Entries
## [1] 0
Functional=as.numeric(Functional, "Sev"=1, "Maj2"=2,"Maj1"=3, "Mod"=4, "Min2"=5, "Min1"=6, "Typ"=7)
Functional=Functional+1 # "Sal"=1, Sev"=2, "Maj2"=3,"Maj1"=4, "Mod"=5, "Min2"=6, "Min1"=7, "Typ"=8
housing_train$Functional <- Functional
#Transforming Functional column to numeric in test dataset
FunctionalT=as.factor(housing_test$Functional)
levels(FunctionalT) #"Maj1" "Maj2" "Min1" "Min2" "Mod" "Sev" "Typ"
## [1] "Maj1" "Maj2" "Min1" "Min2" "Mod" "Sev" "Typ"
sum(is.na(FunctionalT)) # 2 Missing Entries
## [1] 2
FunctionalT=as.numeric(FunctionalT, "Sev"=1, "Maj2"=2,"Maj1"=3, "Mod"=4, "Min2"=5, "Min1"=6, "Typ"=7)
FunctionalT=FunctionalT +1 # "Sal"=1, Sev"=2, "Maj2"=3,"Maj1"=4, "Mod"=5, "Min2"=6, "Min1"=7, "Typ"=8
FunctionalT[is.na(FunctionalT)]<-0
housing_test$Functional <- FunctionalT
# Changing missing values of BsmtFullBath
sum(is.na(housing_train$BsmtFullBath)) # no missing values
## [1] 0
sum(is.na(housing_test$BsmtFullBath)) #2 missing values
## [1] 2
housing_test$BsmtFullBath[is.na(housing_test$BsmtFullBath)] <- 0
# Changing missing values of BsmtHalfBath
sum(is.na(housing_train$BsmtHalfBath)) # no missing values
## [1] 0
sum(is.na(housing_test$BsmtHalfBath)) #2 missing values
## [1] 2
housing_test$BsmtHalfBath[is.na(housing_test$BsmtHalfBath)] <- 0
#Transforming FireplaceQu column to numeric in train dataset
FireplaceQu=as.factor(housing_train$FireplaceQu)
levels(FireplaceQu) #"Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(FireplaceQu)) # 690 Missing Entries
## [1] 690
FireplaceQu=as.numeric(FireplaceQu, "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
FireplaceQu[is.na(FireplaceQu)]<-0
housing_train$FireplaceQu <- FireplaceQu
#Transforming FireplaceQu column to numeric in test dataset
FireplaceQuT=as.factor(housing_test$FireplaceQu)
levels(FireplaceQuT) #"Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(FireplaceQuT)) # 730 Missing Entries
## [1] 730
FireplaceQuT=as.numeric(FireplaceQuT, "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
FireplaceQuT[is.na(FireplaceQuT)]<-0
housing_test$FireplaceQu <- FireplaceQuT
#Transforming GarageType column to numeric in train dataset
GarageType=as.factor(housing_train$GarageType)
levels(GarageType) #"2Types" "Attchd" "Basment" "BuiltIn" "CarPort" "Detchd"
## [1] "2Types" "Attchd" "Basment" "BuiltIn" "CarPort" "Detchd"
sum(is.na(GarageType)) # 81 Missing Entries
## [1] 81
GarageType=as.numeric(GarageType, "Detchd"=1, "CarPort"=2,"BuiltIn"=3, "Basment"=4, "Attchd"=5, "2Types"=6)
GarageType[is.na(GarageType)]<-0
housing_train$GarageType <- GarageType
#Transforming GarageType column to numeric in test dataset
GarageTypeT=as.factor(housing_test$GarageType)
levels(GarageTypeT) #"2Types" "Attchd" "Basment" "BuiltIn" "CarPort" "Detchd"
## [1] "2Types" "Attchd" "Basment" "BuiltIn" "CarPort" "Detchd"
sum(is.na(GarageTypeT)) # 76 Missing Entries
## [1] 76
GarageTypeT=as.numeric(GarageTypeT, "Detchd"=1, "CarPort"=2,"BuiltIn"=3, "Basment"=4, "Attchd"=5, "2Types"=6)
GarageTypeT[is.na(GarageTypeT)]<-0
housing_test$GarageType <- GarageTypeT
# Changing missing values of GarageYrBlt
sum(is.na(housing_train$GarageYrBlt)) # 81 missing values
## [1] 81
sum(is.na(housing_test$GarageYrBlt)) #78 missing values
## [1] 78
#Transforming GarageFinish column to numeric in train dataset
GarageFinish=as.factor(housing_train$GarageFinish)
levels(GarageFinish) #"Fin" "RFn" "Unf"
## [1] "Fin" "RFn" "Unf"
sum(is.na(GarageFinish)) # 81 Missing Entries
## [1] 81
GarageFinish=as.numeric(GarageFinish, "Unf"=1, "RFn"=2,"Fin"=3)
GarageFinish[is.na(GarageFinish)]<-0
housing_train$GarageFinish <- GarageFinish
#Transforming GarageFinish column to numeric in test dataset
GarageFinishT=as.factor(housing_test$GarageFinish)
levels(GarageFinishT) #"Fin" "RFn" "Unf"
## [1] "Fin" "RFn" "Unf"
sum(is.na(GarageFinishT)) # 78 Missing Entries
## [1] 78
GarageFinishT=as.numeric(GarageFinishT, "Unf"=1, "RFn"=2,"Fin"=3)
GarageFinishT[is.na(GarageFinishT)]<-0
housing_test$GarageFinish <- GarageFinishT
# Changing missing values of GarageCars
sum(is.na(housing_train$GarageCars)) # no missing values
## [1] 0
sum(is.na(housing_test$GarageCars)) #1 missing value
## [1] 1
housing_test$GarageCars[is.na(housing_test$GarageCars)]<-0
# Changing missing values of GarageArea
sum(is.na(housing_train$GarageArea)) # no missing values
## [1] 0
sum(is.na(housing_test$GarageArea)) #1 missing value
## [1] 1
housing_test$GarageArea[is.na(housing_test$GarageArea)]<-0
#Transforming GarageQual column to numeric in train dataset
GarageQual=as.factor(housing_train$GarageQual)
levels(GarageQual) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(GarageQual)) # 81 Missing Entries
## [1] 81
GarageQual=as.numeric(GarageQual, "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
GarageQual[is.na(GarageQual)]<-0
housing_train$GarageQual <- GarageQual
#Transforming GarageQual column to numeric in test dataset
GarageQualT=as.factor(housing_test$GarageQual)
levels(GarageQualT) # "Fa" "Gd" "Po" "TA"
## [1] "Fa" "Gd" "Po" "TA"
sum(is.na(GarageQualT)) # 78 Missing Entries
## [1] 78
GarageQualT=as.numeric(GarageQualT, "Po"=1, "Fa"=2,"TA"=3, "Gd"=4)
GarageQualT[is.na(GarageQualT)]<-0
housing_test$GarageQual <- GarageQualT
#Transforming GarageCond column to numeric in train dataset
GarageCond=as.factor(housing_train$GarageCond)
levels(GarageCond) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(GarageCond)) # 81 Missing Entries
## [1] 81
GarageCond=as.numeric(GarageCond, "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
GarageCond[is.na(GarageCond)]<-0
housing_train$GarageCond <- GarageCond
#Transforming GarageCond column to numeric in test dataset
GarageCondT=as.factor(housing_test$GarageCond)
levels(GarageCondT) # "Ex" "Fa" "Gd" "Po" "TA"
## [1] "Ex" "Fa" "Gd" "Po" "TA"
sum(is.na(GarageCondT)) # 78 Missing Entries
## [1] 78
GarageCondT=as.numeric(GarageCondT, "Po"=1, "Fa"=2,"TA"=3, "Gd"=4, "Ex"=5)
GarageCondT[is.na(GarageCondT)]<-0
housing_test$GarageCond <- GarageCondT
#Transforming PavedDrive column to numeric in train dataset
PavedDrive=as.factor(housing_train$PavedDrive)
levels(PavedDrive) # "N" "P" "Y"
## [1] "N" "P" "Y"
sum(is.na(PavedDrive)) # 0 Missing Entries
## [1] 0
PavedDrive=as.numeric(PavedDrive, "N"=1, "P"=2,"Y"=3)
housing_train$PavedDrive <- PavedDrive
#Transforming PavedDrive column to numeric in test dataset
PavedDriveT=as.factor(housing_test$PavedDrive)
levels(PavedDriveT) # "N" "P" "Y"
## [1] "N" "P" "Y"
sum(is.na(PavedDriveT)) # 0 Missing Entries
## [1] 0
PavedDriveT=as.numeric(PavedDriveT, "N"=1, "P"=2,"Y"=3)
housing_test$PavedDrive <- PavedDriveT
#Transforming PoolQC column to numeric in train dataset
PoolQC=as.factor(housing_train$PoolQC)
levels(PoolQC) # "N" "P" "Y"
## [1] "Ex" "Fa" "Gd"
sum(is.na(PoolQC)) # 1453 Missing Entries
## [1] 1453
PoolQC=as.numeric(PoolQC, "Fa"=1, "Gd"=2,"Ex"=3)
PoolQC1 <-ifelse(PoolQC==2|PoolQC==3,PoolQC+1,PoolQC) # No pool=0, Fa=1, TA=2, Gd=3, Ex=4
PoolQC1[is.na(PoolQC1)]<-0
housing_train$PoolQC <- PoolQC1
#Transforming PoolQC column to numeric in test dataset
PoolQCT=as.factor(housing_test$PoolQC)
levels(PoolQCT) # "Ex" "Gd"
## [1] "Ex" "Gd"
sum(is.na(PoolQCT)) # 1456 Missing Entries
## [1] 1456
PoolQCT=as.numeric(PoolQCT, "Gd"=1, "Ex"=2)
PoolQCT=PoolQCT+2
PoolQCT[is.na(PoolQCT)]<-0
housing_test$PoolQC <- PoolQCT
#Transforming Fence column to numeric in train dataset
Fence=as.factor(housing_train$Fence)
levels(Fence) # "GdPrv" "GdWo" "MnPrv" "MnWw"
## [1] "GdPrv" "GdWo" "MnPrv" "MnWw"
sum(is.na(Fence)) # 1179 Missing Entries
## [1] 1179
Fence=as.numeric(Fence, "MnWw"=1, "GdWo"=2,"MnPrv"=3, "GdPrv"=4)
Fence[is.na(Fence)]<-0
housing_train$Fence <- Fence
#Transforming Fence column to numeric in test dataset
FenceT=as.factor(housing_test$Fence)
levels(FenceT) # "GdPrv" "GdWo" "MnPrv" "MnWw"
## [1] "GdPrv" "GdWo" "MnPrv" "MnWw"
sum(is.na(FenceT)) # 1169 Missing Entries
## [1] 1169
FenceT=as.numeric(FenceT, "MnWw"=1, "GdWo"=2,"MnPrv"=3, "GdPrv"=4)
FenceT[is.na(FenceT)]<-0
housing_test$Fence <- FenceT
#Transforming MiscFeature column to numeric in train dataset
MiscFeature=as.factor(housing_train$MiscFeature)
levels(MiscFeature) # "Gar2" "Othr" "Shed" "TenC"
## [1] "Gar2" "Othr" "Shed" "TenC"
sum(is.na(MiscFeature)) # 1406 Missing Entries
## [1] 1406
MiscFeature=as.numeric(MiscFeature, "TenC"=1, "Shed"=2,"Othr"=3, "Gar2"=4)
MiscFeature[is.na(MiscFeature)]<-0
housing_train$MiscFeature <- MiscFeature
#Transforming MiscFeature column to numeric in test dataset
MiscFeatureT=as.factor(housing_test$MiscFeature)
levels(MiscFeatureT) # "Gar2" "Othr" "Shed"
## [1] "Gar2" "Othr" "Shed"
sum(is.na(MiscFeatureT)) # 1408 Missing Entries
## [1] 1408
MiscFeatureT=as.numeric(MiscFeatureT, "Shed"=1,"Othr"=2, "Gar2"=3)
MiscFeatureT=MiscFeatureT+1
MiscFeatureT[is.na(MiscFeatureT)]<-0 #"TenC"=1, "Shed"=2,"Othr"=3, "Gar2"=4
housing_test$MiscFeature <- MiscFeatureT
#Transforming SaleType column to numeric in train dataset
SaleType=as.factor(housing_train$SaleType)
levels(SaleType) # "COD" "Con" "ConLD" "ConLI" "ConLw" "CWD" "New" "Oth" "WD"
## [1] "COD" "Con" "ConLD" "ConLI" "ConLw" "CWD" "New" "Oth" "WD"
sum(is.na(SaleType)) # 0 Missing Entries
## [1] 0
SaleType=as.numeric(SaleType, "Oth"=1, "ConLD"=2,"ConLI"=3, "ConLw"=4, "Con"=5, "COD"=6, "New"=7, "CWD"=8, "WD"=9)
housing_train$SaleType <- SaleType
#Transforming SaleType column to numeric in test dataset
SaleTypeT=as.factor(housing_test$SaleType)
levels(SaleTypeT) # "COD" "Con" "ConLD" "ConLI" "ConLw" "CWD" "New" "Oth" "WD"
## [1] "COD" "Con" "ConLD" "ConLI" "ConLw" "CWD" "New" "Oth" "WD"
sum(is.na(SaleTypeT)) # 1 Missing Entries
## [1] 1
SaleTypeT=as.numeric(SaleTypeT, "Oth"=1, "ConLD"=2,"ConLI"=3, "ConLw"=4, "Con"=5, "COD"=6, "New"=7, "CWD"=8, "WD"=9)
SaleTypeT[is.na(SaleTypeT)]<-1
housing_test$SaleType <- SaleTypeT
#Transforming SaleCondition column to numeric in train dataset
SaleCondition=as.factor(housing_train$SaleCondition)
levels(SaleCondition) # "Abnorml" "AdjLand" "Alloca" "Family" "Normal" "Partial"
## [1] "Abnorml" "AdjLand" "Alloca" "Family" "Normal" "Partial"
sum(is.na(SaleCondition)) # 0 Missing Entries
## [1] 0
SaleCondition=as.numeric(SaleCondition, "Partial"=1, "Family"=2,"Alloca"=3, "AdjLand"=4, "Abnorml"=5, "Normal"=6)
housing_train$SaleCondition <- SaleCondition
#Transforming SaleCondition column to numeric in test dataset
SaleConditionT=as.factor(housing_test$SaleCondition)
levels(SaleConditionT) # "Abnorml" "AdjLand" "Alloca" "Family" "Normal" "Partial"
## [1] "Abnorml" "AdjLand" "Alloca" "Family" "Normal" "Partial"
sum(is.na(SaleConditionT)) # 0 Missing Entries
## [1] 0
SaleConditionT=as.numeric(SaleConditionT, "Partial"=1, "Family"=2,"Alloca"=3, "AdjLand"=4, "Abnorml"=5, "Normal"=6)
housing_test$SaleCondition <- SaleConditionT