There are two data sets, one contains the train data and the test data. Uploading those data set in here.
library(readr)
housing_train <- read.csv("/Users/jusimioni/Desktop/HouseTrain.csv")
housing_test <- read.csv("/Users/jusimioni/Desktop/test.csv")
colnames(housing_train)
## [1] "Id" "MSSubClass" "MSZoning" "LotFrontage"
## [5] "LotArea" "Street" "Alley" "LotShape"
## [9] "LandContour" "Utilities" "LotConfig" "LandSlope"
## [13] "Neighborhood" "Condition1" "Condition2" "BldgType"
## [17] "HouseStyle" "OverallQual" "OverallCond" "YearBuilt"
## [21] "YearRemodAdd" "RoofStyle" "RoofMatl" "Exterior1st"
## [25] "Exterior2nd" "MasVnrType" "MasVnrArea" "ExterQual"
## [29] "ExterCond" "Foundation" "BsmtQual" "BsmtCond"
## [33] "BsmtExposure" "BsmtFinType1" "BsmtFinSF1" "BsmtFinType2"
## [37] "BsmtFinSF2" "BsmtUnfSF" "TotalBsmtSF" "Heating"
## [41] "HeatingQC" "CentralAir" "Electrical" "X1stFlrSF"
## [45] "X2ndFlrSF" "LowQualFinSF" "GrLivArea" "BsmtFullBath"
## [49] "BsmtHalfBath" "FullBath" "HalfBath" "BedroomAbvGr"
## [53] "KitchenAbvGr" "KitchenQual" "TotRmsAbvGrd" "Functional"
## [57] "Fireplaces" "FireplaceQu" "GarageType" "GarageYrBlt"
## [61] "GarageFinish" "GarageCars" "GarageArea" "GarageQual"
## [65] "GarageCond" "PavedDrive" "WoodDeckSF" "OpenPorchSF"
## [69] "EnclosedPorch" "X3SsnPorch" "ScreenPorch" "PoolArea"
## [73] "PoolQC" "Fence" "MiscFeature" "MiscVal"
## [77] "MoSold" "YrSold" "SaleType" "SaleCondition"
## [81] "SalePrice"
head(housing_train)
## Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour
## 1 1 60 RL 65 8450 Pave <NA> Reg Lvl
## 2 2 20 RL 80 9600 Pave <NA> Reg Lvl
## 3 3 60 RL 68 11250 Pave <NA> IR1 Lvl
## 4 4 70 RL 60 9550 Pave <NA> IR1 Lvl
## 5 5 60 RL 84 14260 Pave <NA> IR1 Lvl
## 6 6 50 RL 85 14115 Pave <NA> IR1 Lvl
## Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType
## 1 AllPub Inside Gtl CollgCr Norm Norm 1Fam
## 2 AllPub FR2 Gtl Veenker Feedr Norm 1Fam
## 3 AllPub Inside Gtl CollgCr Norm Norm 1Fam
## 4 AllPub Corner Gtl Crawfor Norm Norm 1Fam
## 5 AllPub FR2 Gtl NoRidge Norm Norm 1Fam
## 6 AllPub Inside Gtl Mitchel Norm Norm 1Fam
## HouseStyle OverallQual OverallCond YearBuilt YearRemodAdd RoofStyle RoofMatl
## 1 2Story 7 5 2003 2003 Gable CompShg
## 2 1Story 6 8 1976 1976 Gable CompShg
## 3 2Story 7 5 2001 2002 Gable CompShg
## 4 2Story 7 5 1915 1970 Gable CompShg
## 5 2Story 8 5 2000 2000 Gable CompShg
## 6 1.5Fin 5 5 1993 1995 Gable CompShg
## Exterior1st Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond Foundation
## 1 VinylSd VinylSd BrkFace 196 Gd TA PConc
## 2 MetalSd MetalSd None 0 TA TA CBlock
## 3 VinylSd VinylSd BrkFace 162 Gd TA PConc
## 4 Wd Sdng Wd Shng None 0 TA TA BrkTil
## 5 VinylSd VinylSd BrkFace 350 Gd TA PConc
## 6 VinylSd VinylSd None 0 TA TA Wood
## BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2
## 1 Gd TA No GLQ 706 Unf
## 2 Gd TA Gd ALQ 978 Unf
## 3 Gd TA Mn GLQ 486 Unf
## 4 TA Gd No ALQ 216 Unf
## 5 Gd TA Av GLQ 655 Unf
## 6 Gd TA No GLQ 732 Unf
## BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC CentralAir Electrical
## 1 0 150 856 GasA Ex Y SBrkr
## 2 0 284 1262 GasA Ex Y SBrkr
## 3 0 434 920 GasA Ex Y SBrkr
## 4 0 540 756 GasA Gd Y SBrkr
## 5 0 490 1145 GasA Ex Y SBrkr
## 6 0 64 796 GasA Ex Y SBrkr
## X1stFlrSF X2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath
## 1 856 854 0 1710 1 0 2
## 2 1262 0 0 1262 0 1 2
## 3 920 866 0 1786 1 0 2
## 4 961 756 0 1717 1 0 1
## 5 1145 1053 0 2198 1 0 2
## 6 796 566 0 1362 1 0 1
## HalfBath BedroomAbvGr KitchenAbvGr KitchenQual TotRmsAbvGrd Functional
## 1 1 3 1 Gd 8 Typ
## 2 0 3 1 TA 6 Typ
## 3 1 3 1 Gd 6 Typ
## 4 0 3 1 Gd 7 Typ
## 5 1 4 1 Gd 9 Typ
## 6 1 1 1 TA 5 Typ
## Fireplaces FireplaceQu GarageType GarageYrBlt GarageFinish GarageCars
## 1 0 <NA> Attchd 2003 RFn 2
## 2 1 TA Attchd 1976 RFn 2
## 3 1 TA Attchd 2001 RFn 2
## 4 1 Gd Detchd 1998 Unf 3
## 5 1 TA Attchd 2000 RFn 3
## 6 0 <NA> Attchd 1993 Unf 2
## GarageArea GarageQual GarageCond PavedDrive WoodDeckSF OpenPorchSF
## 1 548 TA TA Y 0 61
## 2 460 TA TA Y 298 0
## 3 608 TA TA Y 0 42
## 4 642 TA TA Y 0 35
## 5 836 TA TA Y 192 84
## 6 480 TA TA Y 40 30
## EnclosedPorch X3SsnPorch ScreenPorch PoolArea PoolQC Fence MiscFeature
## 1 0 0 0 0 <NA> <NA> <NA>
## 2 0 0 0 0 <NA> <NA> <NA>
## 3 0 0 0 0 <NA> <NA> <NA>
## 4 272 0 0 0 <NA> <NA> <NA>
## 5 0 0 0 0 <NA> <NA> <NA>
## 6 0 320 0 0 <NA> MnPrv Shed
## MiscVal MoSold YrSold SaleType SaleCondition SalePrice
## 1 0 2 2008 WD Normal 208500
## 2 0 5 2007 WD Normal 181500
## 3 0 9 2008 WD Normal 223500
## 4 0 2 2006 WD Abnorml 140000
## 5 0 12 2008 WD Normal 250000
## 6 700 10 2009 WD Normal 143000
Looking at TrainHouse many columns need to be changed, some need to
replace NA to 0 and some columns are going to be changed from category
to numerical so they can be use when doing analysis. You can check for
the explanation for each column here with the changes made < Input
Columnn guide here > .
## Data Cleaning Transforming the data to factor.
housing_train$MSZoning = as.factor(housing_train$MSZoning)
levels(housing_train$MSZoning)
## [1] "C (all)" "FV" "RH" "RL" "RM"
# MSZoning column of train dataset has following levels: "C (all)", "FV", "RH", "RL", "RM"
housing_test$MSZoning = as.factor(housing_test$MSZoning)
levels(housing_test$MSZoning)
## [1] "C (all)" "FV" "RH" "RL" "RM"
# MSZoning column of test dataset has following levels: "C (all)", "FV", "RH", "RL", "RM"
sum(is.na(housing_train$MSZoning)) #no missing values
## [1] 0
sum(is.na(housing_test$MSZoning)) # 4 missing values
## [1] 4
# Change of factors to numeric in train dataset
housing_train$MSZoning=as.numeric(housing_train$MSZoning,"C "=1, "FV"=2, "RH"=3, "RL"=4, "RM"=5)
# Change of factors to numeric in train dataset
housing_test$MSZoning=as.numeric(housing_test$MSZoning,"C "=1, "FV"=2, "RH"=3, "RL"=4, "RM"=5)
# Imputing 0 instead of nulls
housing_test$MSZoning[is.na(housing_test$MSZoning)] <- 0 #the null values got changed to 0
# Changing null values to 0 in LotFrontage column in train dataset
sum(is.na(housing_train$LotFrontage)) #259 missing values
## [1] 259
housing_train$LotFrontage[is.na(housing_train$LotFrontage)] <- 0
sum(is.na(housing_train$LotFrontage))
## [1] 0
# all the missing values got imputed with 0
# Changing null values to 0 in LotFrontage column in test dataset
sum(is.na(housing_test$LotFrontage)) #227 missing values
## [1] 227
housing_test$LotFrontage[is.na(housing_test$LotFrontage)] <- 0
sum(is.na(housing_test$LotFrontage))
## [1] 0
# all the missing values got imputed with 0
# Street column changed to numeric in train dataset
street<-housing_train$Street
sum(is.na(housing_train$Street)) #0 missing values
## [1] 0
street = as.factor(street)
street = as.numeric(street, "Pave"= 1,"Grvl"= 2)
housing_train$Street <-street
# Pave got replaced with 1 and Grvl type of rode got replaced with 2
# Street column changed to numeric in test dataset
street1<-housing_test$Street
sum(is.na(street1)) #0 missing values
## [1] 0
street1 = as.factor(street1)
street1 = as.numeric(street1, "Pave"= 1,"Grvl"= 2)
housing_test$Street <-street1
# Pave got replaced with 1 and Grvl type of rode got replaced with 2
# Transforming Alley column to numeric in train dataset
alley<-as.factor(housing_train$Alley)
levels(alley)
## [1] "Grvl" "Pave"
alley = as.numeric(alley, "Pave"= 1,"Grvl"= 2)
sum(is.na(alley)) # 1369 NA values
## [1] 1369
alley[is.na(alley)] <- 0 #changing NA values to 0
sum(is.na(alley)) #no missing values
## [1] 0
housing_train$Alley <- alley
# Transforming Alley column to numeric in test dataset
alley1<-as.factor(housing_test$Alley)
levels(alley1)
## [1] "Grvl" "Pave"
alley1 = as.numeric(alley1, "Pave"= 1,"Grvl"= 2)
sum(is.na(alley1)) # 1352 NA values
## [1] 1352
alley1[is.na(alley1)] <- 0 #changing NA values to 0
sum(is.na(alley1)) #no missing values
## [1] 0
housing_test$Alley <- alley1
# Transforming LotShape column to numeric in train dataset
shape <-as.factor(housing_train$LotShape)
sum(is.na(shape)) # no missing values
## [1] 0
levels(shape) # 4 levels: "IR1", "IR2", "IR3", "Reg"
## [1] "IR1" "IR2" "IR3" "Reg"
shape=as.numeric(shape,"IR1"=1, "IR2"=2, "IR3"=3, "Reg"=4)
housing_train$LotShape <- shape
# Transforming LotShape column to numeric in test dataset
shape1 <-as.factor(housing_test$LotShape)
sum(is.na(shape1)) # no missing values
## [1] 0
levels(shape1) # 4 levels: "IR1", "IR2", "IR3", "Reg"
## [1] "IR1" "IR2" "IR3" "Reg"
shape1=as.numeric(shape1,"IR1"=1, "IR2"=2, "IR3"=3, "Reg"=4)
housing_test$LotShape <- shape1
# Transforming LandContour column to numeric in train dataset
lcontour <-as.factor(housing_train$LandContour)
sum(is.na(lcontour)) # no missing values
## [1] 0
levels(lcontour) # 4 levels: "Bnk", "HLS", "Low", "Lvl"
## [1] "Bnk" "HLS" "Low" "Lvl"
lcontour=as.numeric(lcontour,"Bnk"=1, "HLS"=2, "Low"=3, "Lvl"=4)
housing_train$LandContour <- lcontour
# Transforming LandContour column to numeric in test dataset
lcontour1 <-as.factor(housing_test$LandContour)
sum(is.na(lcontour1)) # no missing values
## [1] 0
levels(lcontour1) # 4 levels: "Bnk", "HLS", "Low", "Lvl"
## [1] "Bnk" "HLS" "Low" "Lvl"
lcontour1=as.numeric(lcontour1,"Bnk"=1, "HLS"=2, "Low"=3, "Lvl"=4)
housing_test$LandContour <- lcontour1
# Transforming Utilities column to numeric in train dataset
Utilities <-as.factor(housing_train$Utilities)
sum(is.na(Utilities)) # no missing values
## [1] 0
levels(Utilities) # 2 levels: "AllPub", "NoSeWa"
## [1] "AllPub" "NoSeWa"
Utilities=as.numeric(Utilities,"AllPub"=1, "NoSeWa"=2)
housing_train$Utilities <- Utilities
# Transforming Utilities column to numeric in test dataset
Utilities <-as.factor(housing_test$Utilities)
sum(is.na(Utilities)) # 2 missing values
## [1] 2
levels(Utilities) # 2 levels: "AllPub", "NoSeWa"
## [1] "AllPub"
Utilities=as.numeric(Utilities,"AllPub"=1)
Utilities[is.na(Utilities)] <- 0
housing_test$Utilities <- Utilities
# Transforming LotConfig column to numeric in train dataset
lconfig <-as.factor(housing_train$LotConfig)
sum(is.na(lconfig)) # no missing values
## [1] 0
levels(lconfig)
## [1] "Corner" "CulDSac" "FR2" "FR3" "Inside"
lconfig=as.numeric(lconfig,"Corner"=1, "CulDSac"=2, "FR2"=3, "FR3"=4, "Inside"=5)
housing_train$LotConfig <- lconfig
# Transforming LotConfig column to numeric in test dataset
lconfig1 <-as.factor(housing_test$LotConfig)
sum(is.na(lconfig1)) # no missing values
## [1] 0
levels(lconfig1)
## [1] "Corner" "CulDSac" "FR2" "FR3" "Inside"
lconfig1=as.numeric(lconfig1,"Corner"=1, "CulDSac"=2, "FR2"=3, "FR3"=4, "Inside"=5)
housing_test$LotConfig <- lconfig1
# Transforming LandSlope column to numeric in train dataset
lslope <-as.factor(housing_train$LandSlope)
sum(is.na(lslope)) # no missing values
## [1] 0
levels(lslope)
## [1] "Gtl" "Mod" "Sev"
lslope=as.numeric(lslope,"Gtl"=1, "Mod"=2, "Sev"=3)
housing_train$LandSlope <- lslope
# Transforming LandSlope column to numeric in test dataset
lslope1 <-as.factor(housing_test$LandSlope)
sum(is.na(lslope1)) # no missing values
## [1] 0
levels(lslope1)
## [1] "Gtl" "Mod" "Sev"
lslope1=as.numeric(lslope1,"Blmngtn"=1, "Blueste"=2, "Sev"=3)
housing_test$lslope <- lslope1
# Transforming LandSlope1 column to numeric in train dataset
Neighborhood <-as.factor(housing_train$Neighborhood)
sum(is.na(Neighborhood)) # no missing values
## [1] 0
levels(Neighborhood)
## [1] "Blmngtn" "Blueste" "BrDale" "BrkSide" "ClearCr" "CollgCr" "Crawfor"
## [8] "Edwards" "Gilbert" "IDOTRR" "MeadowV" "Mitchel" "NAmes" "NoRidge"
## [15] "NPkVill" "NridgHt" "NWAmes" "OldTown" "Sawyer" "SawyerW" "Somerst"
## [22] "StoneBr" "SWISU" "Timber" "Veenker"
Neighborhood=as.numeric(Neighborhood,"Gtl"=1, "Mod"=2, "BrDale"=3, "BrkSide"=4, "ClearCr"=5, "CollgCr"=6, "Crawfor"=7, "Edwards"=8, "Gilbert"=9, "IDOTRR"=10, "MeadowV"=11, "Mitchel"=12, "Names"=13, "NoRidge"=14, "NPkVill"=15, "NridgHt"=16, "NWAmes"=17, "OldTown"=18, "SWISU"=19, "Sawyer"=20, "SawyerW"=21, "Somerst"=22, "StoneBr"=23, "Timber"=24, "Veenker"=25)
housing_test$Neighborhood <- levels(Neighborhood)
# Transforming Neighborhood column to numeric in test dataset
Neighborhood1 <-as.factor(housing_test$Neighborhood)
sum(is.na(Neighborhood1)) # no missing values
## [1] 0
levels(Neighborhood1)
## character(0)
Neighborhood1=as.numeric(Neighborhood1,"Gtl"=1, "Mod"=2, "BrDale"=3, "BrkSide"=4, "ClearCr"=5, "CollgCr"=6, "Crawfor"=7, "Edwards"=8, "Gilbert"=9, "IDOTRR"=10, "MeadowV"=11, "Mitchel"=12, "Names"=13, "NoRidge"=14, "NPkVill"=15, "NridgHt"=16, "NWAmes"=17, "OldTown"=18, "SWISU"=19, "Sawyer"=20, "SawyerW"=21, "Somerst"=22, "StoneBr"=23, "Timber"=24, "Veenker"=25)
housing_test$Neighborhood1 <- levels(Neighborhood1)
# Transforming Condition1 column to numeric in train dataset
Condition1 <-as.factor(housing_train$Condition1)
sum(is.na(Condition1)) # no missing values
## [1] 0
levels(Condition1)
## [1] "Artery" "Feedr" "Norm" "PosA" "PosN" "RRAe" "RRAn" "RRNe"
## [9] "RRNn"
Condition1=as.numeric(Condition1,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_test$Condition1 <- levels(Condition1)
# Transforming Condition1 column to numeric in test dataset
Condition1T <-as.factor(housing_test$Condition1)
sum(is.na(Condition1T)) # no missing values
## [1] 0
levels(Condition1T) #values
## character(0)
Condition1T=as.numeric(Condition1T,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_test$Condition1T <- levels(Condition1T)
# Transforming Condition2 column to numeric in train dataset
Condition2 <-as.factor(housing_train$Condition2)
sum(is.na(Condition2)) # no missing values
## [1] 0
levels(Condition2)
## [1] "Artery" "Feedr" "Norm" "PosA" "PosN" "RRAe" "RRAn" "RRNn"
Condition2=as.numeric(Condition2,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_test$Condition2 <- levels(Condition2)
# Transforming Condition2 column to numeric in test dataset
Condition2T <-as.factor(housing_test$Condition2)
sum(is.na(Condition2T)) # no missing values
## [1] 0
levels(Condition2T) #values
## character(0)
Condition2T=as.numeric(Condition2T,"Artery"=1, "Feedr"=2, "Norm"=3, "RRNn"=4, "RRAn"=5, "PosN"=6, "PosA"=7, "RRNe"=8, "RRAe"=9)
housing_test$Condition2T <- levels(Condition2T)
# Transforming BldgType column to numeric in train dataset
BldgType <-as.factor(housing_train$BldgType)
sum(is.na(BldgType)) # no missing values
## [1] 0
levels(BldgType)
## [1] "1Fam" "2fmCon" "Duplex" "Twnhs" "TwnhsE"
BldgType=as.numeric(BldgType,"1Fam"=1, "2FmCon"=2, "Duplx"=3, "TwnhsE"=4, "TwnhsI"=5)
housing_test$BldgType <- levels(BldgType)
# Transforming BldgType column to numeric in test dataset
BldgType1 <-as.factor(housing_test$BldgType)
sum(is.na(BldgType1)) # no missing values
## [1] 0
levels(BldgType1)
## character(0)
BldgType1=as.numeric(BldgType1,"1Fam"=1, "2FmCon"=2, "Duplx"=3, "TwnhsE"=4, "TwnhsI"=5)
housing_test$BldgType <- levels(BldgType1)
# Transforming HouseStyle column to numeric in train dataset
HouseStyle <-as.factor(housing_train$HouseStyle)
sum(is.na(HouseStyle)) # no missing values
## [1] 0
levels(HouseStyle)
## [1] "1.5Fin" "1.5Unf" "1Story" "2.5Fin" "2.5Unf" "2Story" "SFoyer" "SLvl"
HouseStyle=as.numeric(HouseStyle,"1Story"=1, "1.5Fin"=2, "1.5Unf"=3, "2Story"=4, "2.5Fin"=5, "2.5Unf"=6, "SFoyer"=7, "SLvl"=8)
housing_test$HouseStyle <- levels(HouseStyle)
# Transforming HouseStyle column to numeric in test dataset
HouseStyle1 <-as.factor(housing_test$HouseStyle)
sum(is.na(HouseStyle1)) # no missing values
## [1] 0
levels(HouseStyle1)
## character(0)
HouseStyle1=as.numeric(HouseStyle1,"1Story"=1, "1.5Fin"=2, "1.5Unf"=3, "2Story"=4, "2.5Fin"=5, "2.5Unf"=6, "SFoyer"=7, "SLvl"=8)
housing_test$HouseStyle <- levels(HouseStyle1)
#Checking for missing values
OverallQual <-as.factor(housing_test$OverallQual)
sum(is.na(OverallQual)) #no missing values
## [1] 0
#Checking for missing values
OverallCond <-as.factor(housing_test$OverallCond)
sum(is.na(OverallCond)) #no missing values
## [1] 0
#Checking for missing values
YearBuilt <-as.factor(housing_test$YearBuilt)
sum(is.na(YearBuilt)) #no missing values
## [1] 0
#Checking for missing values
YearRemodAdd <-as.factor(housing_test$YearRemodAdd)
sum(is.na(YearRemodAdd)) #no missing values
## [1] 0
#For the year built and year remodel a new column in be create to show the difference between the years
#housing_train$AmountYears <- (housing_train$YearBuilt + housing_train$YearRemodAdd)
# Transforming BldgType column to numeric in train dataset
RoofStyle <-as.factor(housing_train$RoofStyle)
sum(is.na(RoofStyle)) # no missing values
## [1] 0
levels(RoofStyle)
## [1] "Flat" "Gable" "Gambrel" "Hip" "Mansard" "Shed"
RoofStyle=as.numeric(BldgType,"Flat"=1, "Gable"=2, "Gambrel"=3, "Hip"=4, "Mansard"=5, "Shed"=6)
housing_test$RoofStyle <- levels(RoofStyle)
# Transforming RoofStyle column to numeric in test dataset
RoofStyle1 <-as.factor(housing_test$RoofStyle)
sum(is.na(RoofStyle1)) # no missing values
## [1] 0
levels(RoofStyle1)
## character(0)
RoofStyle1=as.numeric(RoofStyle1,"Flat"=1, "Gable"=2, "Gambrel"=3, "Hip"=4, "Mansard"=5, "Shed"=6)
housing_test$RoofStyle <- levels(RoofStyle1)
# Transforming RoofMatl column to numeric in train dataset
RoofMatl <-as.factor(housing_train$RoofMatl)
sum(is.na(RoofMatl)) # no missing values
## [1] 0
levels(RoofMatl)
## [1] "ClyTile" "CompShg" "Membran" "Metal" "Roll" "Tar&Grv" "WdShake"
## [8] "WdShngl"
RoofMatl=as.numeric(RoofMatl,"ClyTile"=1, "CompShg"=2, "Membran"=3, "Metal"=4, "Roll"=5, "Tar&Grv"=6, "WdShake"=7, "WdShngl"=8)
housing_test$RoofMatl <- levels(RoofMatl)
# Transforming RoofMatl column to numeric in test dataset
RoofMatl1 <-as.factor(housing_test$RoofMatl)
sum(is.na(RoofMatl1)) # no missing values
## [1] 0
levels(RoofMatl1)
## character(0)
RoofMatl1=as.numeric(RoofMatl1,"ClyTile"=1, "CompShg"=2, "Membran"=3, "Metal"=4, "Roll"=5, "Tar&Grv"=6, "WdShake"=7, "WdShngl"=8)
housing_test$RoofMatl <- levels(RoofMatl1)
# Transforming Exterior1st column to numeric in train dataset
Exterior1st <-as.factor(housing_train$Exterior1st)
sum(is.na(Exterior1st)) # no missing values
## [1] 0
levels(Exterior1st)
## [1] "AsbShng" "AsphShn" "BrkComm" "BrkFace" "CBlock" "CemntBd" "HdBoard"
## [8] "ImStucc" "MetalSd" "Plywood" "Stone" "Stucco" "VinylSd" "Wd Sdng"
## [15] "WdShing"
Exterior1st=as.numeric(Exterior1st,"AsbShng"=1, "AsphShn"=2, "BrkComm"=3, "BrkFace"=4, "CBlock"=5, "CemntBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Plywood"=10, "Stone"=11, "Stucco"=12,"VinylSd"=13, "Wd Sdng"=14, "WdShing"=15)
housing_test$Exterior1st <- levels(Exterior1st)
# Transforming Exterior2st column to numeric in test dataset
Exterior1st <-as.factor(housing_test$Exterior1st)
sum(is.na(Exterior1st)) # no missing values
## [1] 0
levels(Exterior1st)
## character(0)
Exterior1st=as.numeric(Exterior1st,"AsbShng"=1, "AsphShn"=2, "BrkComm"=3, "BrkFace"=4, "CBlock"=5, "CemntBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Plywood"=10, "Stone"=11, "Stucco"=12,"VinylSd"=13, "Wd Sdng"=14, "WdShing"=15)
housing_test$Exterior1st <- levels(Exterior1st)
# Transforming Exterior2nd column to numeric in train dataset
Exterior2nd <-as.factor(housing_train$Exterior2nd)
sum(is.na(Exterior2nd)) # no missing values
## [1] 0
levels(Exterior2nd)
## [1] "AsbShng" "AsphShn" "Brk Cmn" "BrkFace" "CBlock" "CmentBd" "HdBoard"
## [8] "ImStucc" "MetalSd" "Other" "Plywood" "Stone" "Stucco" "VinylSd"
## [15] "Wd Sdng" "Wd Shng"
Exterior2nd = as.numeric(Exterior2nd,"AsbShng"=1, "AsphShn"=2, "BrkComm"=3, "BrkFace"=4, "CBlock"=5, "CemntBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Plywood"=10, "Stone"=11, "Stucco"=12,"VinylSd"=13, "Wd Sdng"=14, "WdShing"=15)
housing_test$Exterior2nd <- levels(Exterior2nd)
# Transforming Exterior2nd column to numeric in test dataset
Exterior2nd <-as.factor(housing_test$Exterior2nd)
sum(is.na(Exterior2nd)) # 1 missing values
## [1] 0
levels(Exterior2nd)
## character(0)
Exterior2nd=as.numeric(Exterior2nd,"AsbShng"=1, "AsphShn"=2, "Brk Cmn"=3, "BrkFace"=4, "CBlock"=5, "CemntBd"=6, "HdBoard"=7, "ImStucc"=8, "MetalSd"=9, "Plywood"=10,"PreCast"=11, "Stone"=12, "Stucco"=13,"VinylSd"=14, "Wd Sdng"=15, "Wd Shing"=16)
housing_test$Exterior2nd <- levels(Exterior2nd)
# Transforming MasVnrType column to numeric in train dataset
MasVnrType <-as.factor(housing_train$MasVnrType)
sum(is.na(MasVnrType)) #8 missing values
## [1] 8
levels(MasVnrType)
## [1] "BrkCmn" "BrkFace" "None" "Stone"
MasVnrType=as.numeric(MasVnrType,"BrkCmn"=1, "BrkFace"=2, "None"=0, "Stone"=4) #none is showing as NA
housing_train$MasVnrType <- levels(MasVnrType)
# Transforming MasVnrType column to numeric in test dataset
MasVnrType <-as.factor(housing_test$MasVnrType)
sum(is.na(MasVnrType)) #16 missing values
## [1] 16
levels(MasVnrType)
## [1] "BrkCmn" "BrkFace" "None" "Stone"
MasVnrType=as.numeric(MasVnrType,"BrkCmn"=1, "BrkFace"=2, "None"=0, "Stone"=4) #none is showing as NA
housing_test$MasVnrType <- levels(MasVnrType)
MasVnrType
## [1] 3 2 3 2 3 3 3 3 3 3 3 2 2 3 3 4 4 2 2 2 2 4 3 2
## [25] 3 3 2 2 3 3 2 3 1 2 2 3 3 2 3 2 2 3 3 3 3 2 3 3
## [49] 3 2 3 2 3 2 2 4 2 3 2 2 2 4 3 3 3 3 3 3 3 2 2 3
## [73] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3 3 3 3 3 3 3 2 3
## [97] 3 3 3 3 2 2 3 3 3 3 3 2 3 3 3 3 4 3 2 2 4 3 3 3
## [121] 3 3 2 3 2 3 3 3 3 3 3 2 3 3 3 2 2 2 3 3 3 3 3 4
## [145] 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 2
## [169] 2 3 3 3 3 3 3 2 2 2 2 2 2 3 3 3 2 2 3 3 2 3 3 2
## [193] 2 3 3 3 3 3 3 2 4 3 4 4 4 2 4 4 4 3 2 2 4 4 4 4
## [217] 4 4 2 2 2 2 4 3 3 2 3 2 3 3 3 NA 3 3 3 2 3 2 2 2
## [241] 2 4 2 4 4 4 NA 3 4 4 4 2 3 3 3 3 3 3 3 2 3 3 3 2
## [265] 2 3 3 3 2 3 3 3 3 3 3 2 2 2 3 4 4 2 2 2 3 2 3 2
## [289] 2 2 2 3 3 2 2 3 3 2 2 2 4 3 2 3 3 2 2 2 2 3 3 3
## [313] 3 2 2 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 4 2 3 3 4 2
## [337] 3 3 3 3 3 3 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [361] 3 3 3 3 3 2 3 3 3 3 3 3 3 3 4 3 3 3 3 3 3 3 2 2
## [385] 3 2 2 3 3 3 3 3 2 3 2 3 3 3 3 2 3 2 2 2 4 4 3 4
## [409] 3 2 4 3 3 3 3 2 2 2 2 2 3 2 NA 3 2 2 2 2 3 3 2 3
## [433] 3 2 2 3 2 3 3 3 3 3 3 3 4 3 4 3 3 3 2 3 3 3 4 3
## [457] 3 3 3 3 4 2 3 2 3 4 2 2 3 3 3 3 2 3 2 3 3 3 3 3
## [481] 3 2 3 4 2 3 2 2 2 2 2 2 2 2 3 2 2 2 3 2 3 2 2 2
## [505] 3 3 4 4 2 2 2 2 4 4 4 2 2 2 2 4 2 2 2 4 2 2 2 3
## [529] 3 2 4 2 NA 3 3 2 2 2 3 2 2 2 4 4 NA 3 2 4 3 2 2 3
## [553] 2 2 2 3 3 3 3 3 3 2 3 2 2 3 3 3 4 2 2 3 2 2 2 2
## [577] 2 3 3 2 3 NA 3 3 3 3 3 3 3 3 3 3 3 3 2 3 2 2 2 3
## [601] 2 3 4 2 3 2 3 3 3 3 3 3 3 2 2 3 3 3 3 3 3 3 3 3
## [625] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2
## [649] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [673] 3 3 3 3 3 3 2 3 1 2 3 3 3 3 4 3 3 3 3 3 3 3 1 2
## [697] 2 3 3 3 4 4 4 2 3 3 2 2 2 3 2 2 3 2 2 2 3 2 2 3
## [721] 4 4 3 3 2 2 3 3 3 3 1 3 3 3 4 3 3 2 3 3 3 3 3 3
## [745] 3 2 4 3 3 3 3 3 3 3 3 2 3 3 3 3 4 4 2 3 3 3 2 4
## [769] 3 3 4 2 2 2 3 3 2 2 3 2 2 3 3 3 3 3 3 3 3 3 3 2
## [793] 3 2 3 3 3 2 3 1 3 3 4 2 3 2 2 2 3 2 4 2 2 2 2 4
## [817] 3 3 2 3 3 3 2 2 3 2 4 4 3 2 2 2 4 2 2 3 4 4 2 2
## [841] 4 2 2 3 4 4 4 4 2 2 4 NA 3 4 3 3 3 4 4 2 2 2 3 3
## [865] 3 NA 2 3 3 3 2 2 3 2 2 2 3 2 2 2 NA 2 2 3 3 3 3 3
## [889] 3 NA 3 2 2 3 3 2 2 3 3 3 3 2 2 3 3 4 4 3 NA 2 2 3
## [913] 3 3 3 2 3 2 2 3 2 3 2 2 3 2 3 3 3 3 2 3 2 2 2 2
## [937] 2 3 3 3 3 2 3 2 2 3 1 3 1 2 3 3 3 3 4 3 3 3 3 3
## [961] 3 3 3 3 3 3 3 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 4 3
## [985] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1009] 3 2 3 3 3 3 3 3 3 2 3 2 2 3 2 2 3 3 2 3 3 2 3 2
## [1033] 3 3 3 4 3 3 3 2 3 3 3 3 3 4 3 4 2 2 3 3 2 3 3 3
## [1057] 3 3 3 2 3 2 3 2 2 3 3 3 3 3 3 3 2 2 2 2 2 3 3 3
## [1081] 3 3 2 4 3 4 3 3 3 4 4 3 1 3 3 3 3 3 3 3 3 3 3 3
## [1105] 3 3 3 2 3 3 3 3 2 3 3 3 3 3 3 3 3 3 4 3 2 3 3 4
## [1129] 3 2 2 3 NA 3 2 2 2 2 2 3 2 3 3 2 3 2 3 3 3 3 NA 3
## [1153] 3 3 4 2 4 4 2 3 3 3 2 2 3 2 3 2 2 2 4 2 2 2 2 2
## [1177] 4 3 2 2 3 3 2 2 2 2 2 3 3 3 3 4 2 4 2 2 2 NA 3 2
## [1201] 2 2 2 2 2 2 3 3 3 2 3 2 2 2 2 3 3 3 2 2 2 2 2 2
## [1225] 2 4 NA 2 3 3 3 3 2 3 2 2 3 3 3 3 3 2 3 3 3 2 3 3
## [1249] 3 3 2 2 3 3 2 3 3 3 2 2 3 2 2 3 3 2 3 2 3 2 3 3
## [1273] 3 4 4 3 3 3 3 2 3 2 2 3 2 3 2 3 3 3 3 3 3 3 3 3
## [1297] 3 3 3 3 2 1 2 1 3 3 3 3 3 3 3 3 3 4 3 3 3 3 3 3
## [1321] 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3 3
## [1345] 3 3 3 2 3 3 2 2 3 2 4 2 3 2 3 3 3 3 3 4 2 4 3 2
## [1369] 3 3 3 3 2 3 2 2 3 3 2 3 3 3 2 2 3 2 2 3 3 2 3 2
## [1393] 3 2 3 3 3 3 3 2 3 2 NA 3 2 4 3 3 3 2 3 3 3 3 3 3
## [1417] 3 3 3 3 3 3 2 3 3 2 3 3 3 3 2 3 3 3 4 4 3 3 3 2
## [1441] 3 3 4 2 3 2 3 3 3 3 3 2 3 3 3 3 3 3 2
# Transforming ExterQual column to numeric in train dataset
ExterQual <-as.factor(housing_train$ExterQual)
sum(is.na(ExterQual)) #0 missing values
## [1] 0
levels(ExterQual)
## [1] "Ex" "Fa" "Gd" "TA"
ExterQual=as.numeric(ExterQual,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5 )
housing_train$ExterQual <- levels(ExterQual)
# Transforming ExterQual column to numeric in test dataset
ExterQual <-as.factor(housing_test$ExterQual)
sum(is.na(ExterQual)) #No missing values
## [1] 0
levels(ExterQual)
## [1] "Ex" "Fa" "Gd" "TA"
ExterQual=as.numeric(ExterQual,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5 )
housing_test$ExterQual <- levels(ExterQual)
# Transforming ExterCond column to numeric in train dataset
ExterCond <-as.factor(housing_train$ExterCond)
sum(is.na(ExterCond)) #No missing values
## [1] 0
levels(ExterCond)
## [1] "Ex" "Fa" "Gd" "Po" "TA"
ExterCond=as.numeric(ExterCond,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5 )
housing_train$ExterCond <- levels(ExterCond)
# Transforming ExterCond column to numeric in test dataset
ExterCond <-as.factor(housing_test$ExterCond)
sum(is.na(ExterCond)) #No missing values
## [1] 0
levels(ExterCond)
## [1] "Ex" "Fa" "Gd" "Po" "TA"
ExterCond=as.numeric(ExterCond,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5 )
housing_test$ExterCond <- levels(ExterCond)
# Transforming Foundation column to numeric in train dataset
Foundation <-as.factor(housing_train$Foundation)
sum(is.na(Foundation)) #No missing values
## [1] 0
levels(Foundation)
## [1] "BrkTil" "CBlock" "PConc" "Slab" "Stone" "Wood"
Foundation=as.numeric(Foundation,"BrkTil"=1, "CBlock"=2, "PConc"=3, "Slab"=4, "Stone"=5, "Wood" = 6 )
housing_train$Foundation <- levels(Foundation)
# Transforming Foundation column to numeric in test dataset
Foundation <-as.factor(housing_test$Foundation)
sum(is.na(Foundation)) #No missing values
## [1] 0
levels(Foundation)
## [1] "BrkTil" "CBlock" "PConc" "Slab" "Stone" "Wood"
Foundation=as.numeric(Foundation,"BrkTil"=1, "CBlock"=2, "PConc"=3, "Slab"=4, "Stone"=5, "Wood" = 6 )
housing_test$Foundation<- levels(Foundation)
BsmtQual
# Transforming BsmtQual column to numeric in train dataset
BsmtQual <-as.factor(housing_train$BsmtQual)
sum(is.na(BsmtQual)) #37 missing values
## [1] 37
levels(BsmtQual)
## [1] "Ex" "Fa" "Gd" "TA"
BsmtQual=as.numeric(BsmtQual,"Ex"=1, "Fa"=2, "Gd"=3, "TA"=4, "Fa"=5, "Po" = 6, "NA"=0)
housing_train$BsmtQual <- levels(BsmtQual)
# Transforming BsmtQual column to numeric in test dataset
BsmtQual <-as.factor(housing_test$BsmtQual)
sum(is.na(BsmtQual)) #44 missing values
## [1] 44
levels(BsmtQual)
## [1] "Ex" "Fa" "Gd" "TA"
BsmtQual=as.numeric(BsmtQual,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5, "NA"=0 )
housing_test$BsmtQual<- levels(BsmtQual)
BsmtCond
# Transforming BsmtCond column to numeric in train dataset
BsmtCond <-as.factor(housing_train$BsmtCond)
sum(is.na(BsmtCond)) #37 missing values
## [1] 37
levels(BsmtCond)
## [1] "Fa" "Gd" "Po" "TA"
BsmtCond=as.numeric(BsmtCond,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5, "NA"=0)
housing_train$BsmtCond <- levels(BsmtCond)
# Transforming BsmtCond column to numeric in test dataset
BsmtCond <-as.factor(housing_test$BsmtCond)
sum(is.na(BsmtCond)) #45 missing values
## [1] 45
levels(BsmtCond)
## [1] "Fa" "Gd" "Po" "TA"
BsmtCond=as.numeric(BsmtCond,"Ex"=1, "Fa"=4, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5, "NA"=0 )
housing_train$BsmtCond<- levels(BsmtCond)
BsmtExposure
# Transforming BsmtExposure column to numeric in train dataset
BsmtExposure <-as.factor(housing_train$BsmtExposure)
sum(is.na(BsmtExposure)) #38 missing values
## [1] 38
levels(BsmtExposure)
## [1] "Av" "Gd" "Mn" "No"
BsmtExposure=as.numeric(BsmtExposure,"Av"=1, "Gd"=2, "Mn"=3, "No"=4, "NA"=0)
housing_train$BsmtExposure <- levels(BsmtExposure)
# Transforming BsmtExposure column to numeric in test dataset
BsmtCond <-as.factor(housing_test$BsmtExposure)
sum(is.na(BsmtExposure)) #38 missing values
## [1] 38
levels(BsmtExposure)
## NULL
BsmtExposure=as.numeric(BsmtExposure,"Av"=1, "Gd"=2, "Mn"=3, "No"=4, "NA"=0)
housing_train$BsmtExposure<- levels(BsmtExposure)
BsmtFinType1
# Transforming BsmtFinType1 column to numeric in train dataset
BsmtFinType1 <-as.factor(housing_train$BsmtFinType1)
sum(is.na(BsmtFinType1)) #37 missing values
## [1] 37
levels(BsmtFinType1)
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
BsmtFinType1=as.numeric(BsmtFinType1,"ALQ"=1, "BLQ"=2, "GLQ"=3, "LwQ"=4, "Rec"=5, "Unf"=6)
housing_train$BsmtFinType1 <- levels(BsmtFinType1)
# Transforming BsmtFinType1 column to numeric in test dataset
BsmtFinType1 <-as.factor(housing_test$BsmtFinType1)
sum(is.na(BsmtFinType1)) #42 missing values
## [1] 42
levels(BsmtFinType1)
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
BsmtFinType1=as.numeric(BsmtFinType1,"ALQ"=1, "BLQ"=2, "GLQ"=3, "LwQ"=4, "Rec"=5, "Unf"=6)
housing_train$BsmtFinType1<- levels(BsmtFinType1)
BsmtFinSF1
BsmtFinSF1 <-as.factor(housing_train$BsmtFinSF1)
sum(is.na(BsmtFinSF1))
## [1] 0
BsmtFinSF1 <-as.factor(housing_test$BsmtFinSF1)
sum(is.na(BsmtFinSF1)) #1 missing value
## [1] 1
BsmtFinType2
# Transforming BsmtFinType2 column to numeric in train dataset
BsmtFinType2 <-as.factor(housing_train$BsmtFinType2)
sum(is.na(BsmtFinType2)) #38 missing values
## [1] 38
levels(BsmtFinType2)
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
BsmtFinType2=as.numeric(BsmtFinType2,"ALQ"=1, "BLQ"=2, "GLQ"=3, "LwQ"=4, "Rec"=5, "Unf"=6)
housing_train$BsmtFinType2 <- levels(BsmtFinType2)
# Transforming BsmtFinType2 column to numeric in test dataset
BsmtFinType2 <-as.factor(housing_test$BsmtFinType2)
sum(is.na(BsmtFinType2)) #42 missing values
## [1] 42
levels(BsmtFinType2)
## [1] "ALQ" "BLQ" "GLQ" "LwQ" "Rec" "Unf"
BsmtFinType2=as.numeric(BsmtFinType2,"ALQ"=1, "BLQ"=2, "GLQ"=3, "LwQ"=4, "Rec"=5, "Unf"=6)
housing_train$BsmtFinType2<- levels(BsmtFinType2)
BsmtUnfSF
BsmtUnfSF <-as.factor(housing_train$BsmtUnfSF)
sum(is.na(BsmtUnfSF)) #No missing values
## [1] 0
BsmtUnfSF <-as.factor(housing_test$BsmtUnfSF)
sum(is.na(BsmtUnfSF)) #1 missing values
## [1] 1
TotalBsmtSF
TotalBsmtSF <-as.factor(housing_train$TotalBsmtSF)
sum(is.na(TotalBsmtSF)) #No missing values
## [1] 0
TotalBsmtSF <-as.factor(housing_test$TotalBsmtSF)
sum(is.na(TotalBsmtSF)) #1 missing values
## [1] 1
Heating
# Transforming Heating column to numeric in train dataset
Heating <-as.factor(housing_train$Heating)
sum(is.na(Heating)) #No missing values
## [1] 0
levels(Heating)
## [1] "Floor" "GasA" "GasW" "Grav" "OthW" "Wall"
Heating=as.numeric(Heating,"Floor"=1, "GasA"=2, "GasW"=3, "Grav"=4, "OthW"=5, "Wall"=6)
housing_train$Heating <- levels(Heating)
# Transforming Heating column to numeric in test dataset
Heating <-as.factor(housing_test$Heating)
sum(is.na(Heating)) #No missing values
## [1] 0
levels(Heating)
## [1] "GasA" "GasW" "Grav" "Wall"
Heating=as.numeric(Heating,"Floor"=1, "GasA"=2, "GasW"=3, "Grav"=4, "OthW"=5, "Wall"=6)
housing_train$Heating<- levels(Heating)
HeatingQC
# Transforming HeatingQC column to numeric in train dataset
HeatingQC <-as.factor(housing_train$HeatingQC)
sum(is.na(HeatingQC)) #No missing values
## [1] 0
levels(HeatingQC)
## [1] "Ex" "Fa" "Gd" "Po" "TA"
HeatingQC=as.numeric(HeatingQC,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5)
housing_train$HeatingQC <- levels(HeatingQC)
# Transforming HHeatingQC column to numeric in test dataset
HeatingQC <-as.factor(housing_test$HeatingQC)
sum(is.na(HeatingQC)) #No missing values
## [1] 0
levels(HeatingQC)
## [1] "Ex" "Fa" "Gd" "Po" "TA"
HeatingQC=as.numeric(HeatingQC,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5)
housing_train$HeatingQC<- levels(HeatingQC)
CentralAir
# Transforming CentralAir column to numeric in train dataset
CentralAir <-as.factor(housing_train$CentralAir)
sum(is.na(CentralAir)) #No missing values
## [1] 0
levels(CentralAir)
## [1] "N" "Y"
CentralAir=as.numeric(CentralAir,"N"=0, "Y"=1)
housing_train$CentralAir <- levels(CentralAir)
# Transforming CentralAir column to numeric in test dataset
CentralAir <-as.factor(housing_test$CentralAir)
sum(is.na(CentralAir)) #No missing values
## [1] 0
levels(CentralAir)
## [1] "N" "Y"
CentralAir=as.numeric(CentralAir,"N"=0, "Y"=1)
housing_train$CentralAir<- levels(CentralAir)
Electrical
# Transforming Electrical column to numeric in train dataset
Electrical <-as.factor(housing_train$Electrical)
sum(is.na(Electrical)) #1 missing values
## [1] 1
levels(Electrical)
## [1] "FuseA" "FuseF" "FuseP" "Mix" "SBrkr"
Electrical=as.numeric(Electrical,"SBrkr"=1, "FuseA"=2, "FuseF"=3, "FuseP"=4, "Mix"=5 )
housing_train$Electrical <- levels(Electrical)
# Transforming Electrical column to numeric in test dataset
Electrical <-as.factor(housing_test$Electrical)
sum(is.na(Electrical)) #No missing values
## [1] 0
levels(Electrical)
## [1] "FuseA" "FuseF" "FuseP" "SBrkr"
Electrical=as.numeric(Electrical,"SBrkr"=1, "FuseA"=2, "FuseF"=3, "FuseP"=4, "Mix"=5 )
housing_train$Electrical<- levels(Electrical)
KitchenQual
# Transforming KitchenQual column to numeric in train dataset
KitchenQual <-as.factor(housing_train$KitchenQual)
sum(is.na(KitchenQual)) #No missing values
## [1] 0
levels(KitchenQual)
## [1] "Ex" "Fa" "Gd" "TA"
KitchenQual=as.numeric(KitchenQual,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5)
housing_train$KitchenQual <- levels(KitchenQual)
# Transforming KitchenQual column to numeric in test dataset
KitchenQual <-as.factor(housing_test$KitchenQual)
sum(is.na(KitchenQual)) #1 missing values
## [1] 1
levels(KitchenQual)
## [1] "Ex" "Fa" "Gd" "TA"
KitchenQual=as.numeric(KitchenQual,"Ex"=1, "Gd"=2, "TA"=3, "Fa"=4, "Po"=5 )
housing_train$KitchenQual<- levels(KitchenQual)