title: “Proyecto Final” output: html_document
Integrantes:
#Kelvin Silva Collazos #Vania Cárdenas Vuckovic #Ceysa Bardales Trauco
Importando las bases de datos y uniendo las bases de datos Train y Test para el tratamiento de los datos
Train=read.csv("train.csv")
Test=read.csv("test.csv")
Test$SalePrice <- 0
Test$BBDD<-"Test"
Train$BBDD<-"Train"
BBDDCompleta= rbind.data.frame(Train,Test)
names(BBDDCompleta)
## [1] "Id" "MSSubClass" "MSZoning" "LotFrontage"
## [5] "LotArea" "Street" "Alley" "LotShape"
## [9] "LandContour" "Utilities" "LotConfig" "LandSlope"
## [13] "Neighborhood" "Condition1" "Condition2" "BldgType"
## [17] "HouseStyle" "OverallQual" "OverallCond" "YearBuilt"
## [21] "YearRemodAdd" "RoofStyle" "RoofMatl" "Exterior1st"
## [25] "Exterior2nd" "MasVnrType" "MasVnrArea" "ExterQual"
## [29] "ExterCond" "Foundation" "BsmtQual" "BsmtCond"
## [33] "BsmtExposure" "BsmtFinType1" "BsmtFinSF1" "BsmtFinType2"
## [37] "BsmtFinSF2" "BsmtUnfSF" "TotalBsmtSF" "Heating"
## [41] "HeatingQC" "CentralAir" "Electrical" "X1stFlrSF"
## [45] "X2ndFlrSF" "LowQualFinSF" "GrLivArea" "BsmtFullBath"
## [49] "BsmtHalfBath" "FullBath" "HalfBath" "BedroomAbvGr"
## [53] "KitchenAbvGr" "KitchenQual" "TotRmsAbvGrd" "Functional"
## [57] "Fireplaces" "FireplaceQu" "GarageType" "GarageYrBlt"
## [61] "GarageFinish" "GarageCars" "GarageArea" "GarageQual"
## [65] "GarageCond" "PavedDrive" "WoodDeckSF" "OpenPorchSF"
## [69] "EnclosedPorch" "X3SsnPorch" "ScreenPorch" "PoolArea"
## [73] "PoolQC" "Fence" "MiscFeature" "MiscVal"
## [77] "MoSold" "YrSold" "SaleType" "SaleCondition"
## [81] "SalePrice" "BBDD"
Analizando el cojunto de datos de de train y test para identificar cualquier anomalia en la data
str(BBDDCompleta)
## 'data.frame': 2919 obs. of 82 variables:
## $ Id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ MSSubClass : int 60 20 60 70 60 50 20 60 50 190 ...
## $ MSZoning : chr "RL" "RL" "RL" "RL" ...
## $ LotFrontage : int 65 80 68 60 84 85 75 NA 51 50 ...
## $ LotArea : int 8450 9600 11250 9550 14260 14115 10084 10382 6120 7420 ...
## $ Street : chr "Pave" "Pave" "Pave" "Pave" ...
## $ Alley : chr NA NA NA NA ...
## $ LotShape : chr "Reg" "Reg" "IR1" "IR1" ...
## $ LandContour : chr "Lvl" "Lvl" "Lvl" "Lvl" ...
## $ Utilities : chr "AllPub" "AllPub" "AllPub" "AllPub" ...
## $ LotConfig : chr "Inside" "FR2" "Inside" "Corner" ...
## $ LandSlope : chr "Gtl" "Gtl" "Gtl" "Gtl" ...
## $ Neighborhood : chr "CollgCr" "Veenker" "CollgCr" "Crawfor" ...
## $ Condition1 : chr "Norm" "Feedr" "Norm" "Norm" ...
## $ Condition2 : chr "Norm" "Norm" "Norm" "Norm" ...
## $ BldgType : chr "1Fam" "1Fam" "1Fam" "1Fam" ...
## $ HouseStyle : chr "2Story" "1Story" "2Story" "2Story" ...
## $ OverallQual : int 7 6 7 7 8 5 8 7 7 5 ...
## $ OverallCond : int 5 8 5 5 5 5 5 6 5 6 ...
## $ YearBuilt : int 2003 1976 2001 1915 2000 1993 2004 1973 1931 1939 ...
## $ YearRemodAdd : int 2003 1976 2002 1970 2000 1995 2005 1973 1950 1950 ...
## $ RoofStyle : chr "Gable" "Gable" "Gable" "Gable" ...
## $ RoofMatl : chr "CompShg" "CompShg" "CompShg" "CompShg" ...
## $ Exterior1st : chr "VinylSd" "MetalSd" "VinylSd" "Wd Sdng" ...
## $ Exterior2nd : chr "VinylSd" "MetalSd" "VinylSd" "Wd Shng" ...
## $ MasVnrType : chr "BrkFace" "None" "BrkFace" "None" ...
## $ MasVnrArea : int 196 0 162 0 350 0 186 240 0 0 ...
## $ ExterQual : chr "Gd" "TA" "Gd" "TA" ...
## $ ExterCond : chr "TA" "TA" "TA" "TA" ...
## $ Foundation : chr "PConc" "CBlock" "PConc" "BrkTil" ...
## $ BsmtQual : chr "Gd" "Gd" "Gd" "TA" ...
## $ BsmtCond : chr "TA" "TA" "TA" "Gd" ...
## $ BsmtExposure : chr "No" "Gd" "Mn" "No" ...
## $ BsmtFinType1 : chr "GLQ" "ALQ" "GLQ" "ALQ" ...
## $ BsmtFinSF1 : int 706 978 486 216 655 732 1369 859 0 851 ...
## $ BsmtFinType2 : chr "Unf" "Unf" "Unf" "Unf" ...
## $ BsmtFinSF2 : int 0 0 0 0 0 0 0 32 0 0 ...
## $ BsmtUnfSF : int 150 284 434 540 490 64 317 216 952 140 ...
## $ TotalBsmtSF : int 856 1262 920 756 1145 796 1686 1107 952 991 ...
## $ Heating : chr "GasA" "GasA" "GasA" "GasA" ...
## $ HeatingQC : chr "Ex" "Ex" "Ex" "Gd" ...
## $ CentralAir : chr "Y" "Y" "Y" "Y" ...
## $ Electrical : chr "SBrkr" "SBrkr" "SBrkr" "SBrkr" ...
## $ X1stFlrSF : int 856 1262 920 961 1145 796 1694 1107 1022 1077 ...
## $ X2ndFlrSF : int 854 0 866 756 1053 566 0 983 752 0 ...
## $ LowQualFinSF : int 0 0 0 0 0 0 0 0 0 0 ...
## $ GrLivArea : int 1710 1262 1786 1717 2198 1362 1694 2090 1774 1077 ...
## $ BsmtFullBath : int 1 0 1 1 1 1 1 1 0 1 ...
## $ BsmtHalfBath : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FullBath : int 2 2 2 1 2 1 2 2 2 1 ...
## $ HalfBath : int 1 0 1 0 1 1 0 1 0 0 ...
## $ BedroomAbvGr : int 3 3 3 3 4 1 3 3 2 2 ...
## $ KitchenAbvGr : int 1 1 1 1 1 1 1 1 2 2 ...
## $ KitchenQual : chr "Gd" "TA" "Gd" "Gd" ...
## $ TotRmsAbvGrd : int 8 6 6 7 9 5 7 7 8 5 ...
## $ Functional : chr "Typ" "Typ" "Typ" "Typ" ...
## $ Fireplaces : int 0 1 1 1 1 0 1 2 2 2 ...
## $ FireplaceQu : chr NA "TA" "TA" "Gd" ...
## $ GarageType : chr "Attchd" "Attchd" "Attchd" "Detchd" ...
## $ GarageYrBlt : int 2003 1976 2001 1998 2000 1993 2004 1973 1931 1939 ...
## $ GarageFinish : chr "RFn" "RFn" "RFn" "Unf" ...
## $ GarageCars : int 2 2 2 3 3 2 2 2 2 1 ...
## $ GarageArea : int 548 460 608 642 836 480 636 484 468 205 ...
## $ GarageQual : chr "TA" "TA" "TA" "TA" ...
## $ GarageCond : chr "TA" "TA" "TA" "TA" ...
## $ PavedDrive : chr "Y" "Y" "Y" "Y" ...
## $ WoodDeckSF : int 0 298 0 0 192 40 255 235 90 0 ...
## $ OpenPorchSF : int 61 0 42 35 84 30 57 204 0 4 ...
## $ EnclosedPorch: int 0 0 0 272 0 0 0 228 205 0 ...
## $ X3SsnPorch : int 0 0 0 0 0 320 0 0 0 0 ...
## $ ScreenPorch : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PoolArea : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PoolQC : chr NA NA NA NA ...
## $ Fence : chr NA NA NA NA ...
## $ MiscFeature : chr NA NA NA NA ...
## $ MiscVal : int 0 0 0 0 0 700 0 350 0 0 ...
## $ MoSold : int 2 5 9 2 12 10 8 11 4 1 ...
## $ YrSold : int 2008 2007 2008 2006 2008 2009 2007 2009 2008 2008 ...
## $ SaleType : chr "WD" "WD" "WD" "WD" ...
## $ SaleCondition: chr "Normal" "Normal" "Normal" "Abnorml" ...
## $ SalePrice : num 208500 181500 223500 140000 250000 ...
## $ BBDD : chr "Train" "Train" "Train" "Train" ...
table(is.na(Train$SalePrice))
##
## FALSE
## 1460
summary(BBDDCompleta)
## Id MSSubClass MSZoning LotFrontage
## Min. : 1.0 Min. : 20.00 Length:2919 Min. : 21.00
## 1st Qu.: 730.5 1st Qu.: 20.00 Class :character 1st Qu.: 59.00
## Median :1460.0 Median : 50.00 Mode :character Median : 68.00
## Mean :1460.0 Mean : 57.14 Mean : 69.31
## 3rd Qu.:2189.5 3rd Qu.: 70.00 3rd Qu.: 80.00
## Max. :2919.0 Max. :190.00 Max. :313.00
## NA's :486
## LotArea Street Alley LotShape
## Min. : 1300 Length:2919 Length:2919 Length:2919
## 1st Qu.: 7478 Class :character Class :character Class :character
## Median : 9453 Mode :character Mode :character Mode :character
## Mean : 10168
## 3rd Qu.: 11570
## Max. :215245
##
## LandContour Utilities LotConfig LandSlope
## Length:2919 Length:2919 Length:2919 Length:2919
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Neighborhood Condition1 Condition2 BldgType
## Length:2919 Length:2919 Length:2919 Length:2919
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## HouseStyle OverallQual OverallCond YearBuilt
## Length:2919 Min. : 1.000 Min. :1.000 Min. :1872
## Class :character 1st Qu.: 5.000 1st Qu.:5.000 1st Qu.:1954
## Mode :character Median : 6.000 Median :5.000 Median :1973
## Mean : 6.089 Mean :5.565 Mean :1971
## 3rd Qu.: 7.000 3rd Qu.:6.000 3rd Qu.:2001
## Max. :10.000 Max. :9.000 Max. :2010
##
## YearRemodAdd RoofStyle RoofMatl Exterior1st
## Min. :1950 Length:2919 Length:2919 Length:2919
## 1st Qu.:1965 Class :character Class :character Class :character
## Median :1993 Mode :character Mode :character Mode :character
## Mean :1984
## 3rd Qu.:2004
## Max. :2010
##
## Exterior2nd MasVnrType MasVnrArea ExterQual
## Length:2919 Length:2919 Min. : 0.0 Length:2919
## Class :character Class :character 1st Qu.: 0.0 Class :character
## Mode :character Mode :character Median : 0.0 Mode :character
## Mean : 102.2
## 3rd Qu.: 164.0
## Max. :1600.0
## NA's :23
## ExterCond Foundation BsmtQual BsmtCond
## Length:2919 Length:2919 Length:2919 Length:2919
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2
## Length:2919 Length:2919 Min. : 0.0 Length:2919
## Class :character Class :character 1st Qu.: 0.0 Class :character
## Mode :character Mode :character Median : 368.5 Mode :character
## Mean : 441.4
## 3rd Qu.: 733.0
## Max. :5644.0
## NA's :1
## BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Length:2919
## 1st Qu.: 0.00 1st Qu.: 220.0 1st Qu.: 793.0 Class :character
## Median : 0.00 Median : 467.0 Median : 989.5 Mode :character
## Mean : 49.58 Mean : 560.8 Mean :1051.8
## 3rd Qu.: 0.00 3rd Qu.: 805.5 3rd Qu.:1302.0
## Max. :1526.00 Max. :2336.0 Max. :6110.0
## NA's :1 NA's :1 NA's :1
## HeatingQC CentralAir Electrical X1stFlrSF
## Length:2919 Length:2919 Length:2919 Min. : 334
## Class :character Class :character Class :character 1st Qu.: 876
## Mode :character Mode :character Mode :character Median :1082
## Mean :1160
## 3rd Qu.:1388
## Max. :5095
##
## X2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath
## Min. : 0.0 Min. : 0.000 Min. : 334 Min. :0.0000
## 1st Qu.: 0.0 1st Qu.: 0.000 1st Qu.:1126 1st Qu.:0.0000
## Median : 0.0 Median : 0.000 Median :1444 Median :0.0000
## Mean : 336.5 Mean : 4.694 Mean :1501 Mean :0.4299
## 3rd Qu.: 704.0 3rd Qu.: 0.000 3rd Qu.:1744 3rd Qu.:1.0000
## Max. :2065.0 Max. :1064.000 Max. :5642 Max. :3.0000
## NA's :2
## BsmtHalfBath FullBath HalfBath BedroomAbvGr
## Min. :0.00000 Min. :0.000 Min. :0.0000 Min. :0.00
## 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:2.00
## Median :0.00000 Median :2.000 Median :0.0000 Median :3.00
## Mean :0.06136 Mean :1.568 Mean :0.3803 Mean :2.86
## 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:3.00
## Max. :2.00000 Max. :4.000 Max. :2.0000 Max. :8.00
## NA's :2
## KitchenAbvGr KitchenQual TotRmsAbvGrd Functional
## Min. :0.000 Length:2919 Min. : 2.000 Length:2919
## 1st Qu.:1.000 Class :character 1st Qu.: 5.000 Class :character
## Median :1.000 Mode :character Median : 6.000 Mode :character
## Mean :1.045 Mean : 6.452
## 3rd Qu.:1.000 3rd Qu.: 7.000
## Max. :3.000 Max. :15.000
##
## Fireplaces FireplaceQu GarageType GarageYrBlt
## Min. :0.0000 Length:2919 Length:2919 Min. :1895
## 1st Qu.:0.0000 Class :character Class :character 1st Qu.:1960
## Median :1.0000 Mode :character Mode :character Median :1979
## Mean :0.5971 Mean :1978
## 3rd Qu.:1.0000 3rd Qu.:2002
## Max. :4.0000 Max. :2207
## NA's :159
## GarageFinish GarageCars GarageArea GarageQual
## Length:2919 Min. :0.000 Min. : 0.0 Length:2919
## Class :character 1st Qu.:1.000 1st Qu.: 320.0 Class :character
## Mode :character Median :2.000 Median : 480.0 Mode :character
## Mean :1.767 Mean : 472.9
## 3rd Qu.:2.000 3rd Qu.: 576.0
## Max. :5.000 Max. :1488.0
## NA's :1 NA's :1
## GarageCond PavedDrive WoodDeckSF OpenPorchSF
## Length:2919 Length:2919 Min. : 0.00 Min. : 0.00
## Class :character Class :character 1st Qu.: 0.00 1st Qu.: 0.00
## Mode :character Mode :character Median : 0.00 Median : 26.00
## Mean : 93.71 Mean : 47.49
## 3rd Qu.: 168.00 3rd Qu.: 70.00
## Max. :1424.00 Max. :742.00
##
## EnclosedPorch X3SsnPorch ScreenPorch PoolArea
## Min. : 0.0 Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.0 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 0.0 Median : 0.000 Median : 0.00 Median : 0.000
## Mean : 23.1 Mean : 2.602 Mean : 16.06 Mean : 2.252
## 3rd Qu.: 0.0 3rd Qu.: 0.000 3rd Qu.: 0.00 3rd Qu.: 0.000
## Max. :1012.0 Max. :508.000 Max. :576.00 Max. :800.000
##
## PoolQC Fence MiscFeature MiscVal
## Length:2919 Length:2919 Length:2919 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.: 0.00
## Mode :character Mode :character Mode :character Median : 0.00
## Mean : 50.83
## 3rd Qu.: 0.00
## Max. :17000.00
##
## MoSold YrSold SaleType SaleCondition
## Min. : 1.000 Min. :2006 Length:2919 Length:2919
## 1st Qu.: 4.000 1st Qu.:2007 Class :character Class :character
## Median : 6.000 Median :2008 Mode :character Mode :character
## Mean : 6.213 Mean :2008
## 3rd Qu.: 8.000 3rd Qu.:2009
## Max. :12.000 Max. :2010
##
## SalePrice BBDD
## Min. : 0 Length:2919
## 1st Qu.: 0 Class :character
## Median : 34900 Mode :character
## Mean : 90492
## 3rd Qu.:163000
## Max. :755000
##
table(is.na(BBDDCompleta$MSZoning)) #Completar los NA's con la el que màs se repite
##
## FALSE TRUE
## 2915 4
table(BBDDCompleta$MSZoning)
##
## C (all) FV RH RL RM
## 25 139 26 2265 460
BBDDCompleta$MSZoning[is.na(BBDDCompleta$MSZoning)] <- "RL"
table(is.na(BBDDCompleta$Street))
##
## FALSE
## 2919
table((Train$Street)) #Eliminamos la variable por que casi el 90% es la misma respuesta
##
## Grvl Pave
## 6 1454
BBDDCompleta<-BBDDCompleta[,-6]
table(is.na(BBDDCompleta$Alley))
##
## FALSE TRUE
## 198 2721
2721/(2721+198)
## [1] 0.9321686
table((BBDDCompleta$Alley)) #Eliminamos la variable por que casi el 90% es nulo
##
## Grvl Pave
## 120 78
BBDDCompleta<-BBDDCompleta[,-6]
table(is.na(BBDDCompleta$Utilities))
##
## FALSE TRUE
## 2917 2
table((BBDDCompleta$Utilities)) #Eliminamos la variable por que casi el 100% es igual
##
## AllPub NoSeWa
## 2916 1
BBDDCompleta<-BBDDCompleta[,-8]
table(is.na(BBDDCompleta$RoofMatl))
##
## FALSE
## 2919
table((BBDDCompleta$RoofMatl))
##
## ClyTile CompShg Membran Metal Roll Tar&Grv WdShake WdShngl
## 1 2876 1 1 1 23 9 7
(2876)/(2919)#Eliminamos la variable por que casi el 100% es igual
## [1] 0.9852689
BBDDCompleta<-BBDDCompleta[,-20]
table(is.na(BBDDCompleta$Exterior1st)) #Completar los NA's con la el que màs se repite
##
## FALSE TRUE
## 2918 1
table((BBDDCompleta$Exterior1st))
##
## AsbShng AsphShn BrkComm BrkFace CBlock CemntBd HdBoard ImStucc MetalSd Plywood
## 44 2 6 87 2 126 442 1 450 221
## Stone Stucco VinylSd Wd Sdng WdShing
## 2 43 1025 411 56
BBDDCompleta$Exterior1st[is.na(BBDDCompleta$Exterior1st)] <- "VinylSd"
table(is.na(BBDDCompleta$Exterior2nd)) #Completar los NA's con la el que màs se repite
##
## FALSE TRUE
## 2918 1
table((BBDDCompleta$Exterior2nd))
##
## AsbShng AsphShn Brk Cmn BrkFace CBlock CmentBd HdBoard ImStucc MetalSd Other
## 38 4 22 47 3 126 406 15 447 1
## Plywood Stone Stucco VinylSd Wd Sdng Wd Shng
## 270 6 47 1014 391 81
BBDDCompleta$Exterior2nd[is.na(BBDDCompleta$Exterior2nd)] <- "VinylSd"
table(is.na(BBDDCompleta$MasVnrType))
##
## FALSE TRUE
## 2895 24
table((BBDDCompleta$MasVnrType))
##
## BrkCmn BrkFace None Stone
## 25 879 1742 249
(1742+24)/(2895+24) #Eliminamos la variable por que más del 60% es nula
## [1] 0.6050017
BBDDCompleta<-BBDDCompleta[,-22]
table(is.na(BBDDCompleta$MasVnrArea))
##
## FALSE TRUE
## 2896 23
hist((BBDDCompleta$MasVnrArea))
(1738+23)/(2896+23) #Eliminamos la variable por que más del 60% es nula
## [1] 0.6032888
BBDDCompleta<-BBDDCompleta[,-22]
table(is.na(BBDDCompleta$BsmtQual))
##
## FALSE TRUE
## 2838 81
table((BBDDCompleta$BsmtQual))
##
## Ex Fa Gd TA
## 258 88 1209 1283
BBDDCompleta$BsmtQual[is.na(BBDDCompleta$BsmtQual)] <- "TA" #Completar los NA's con la el que màs se repite
table(is.na(BBDDCompleta$BsmtCond))
##
## FALSE TRUE
## 2837 82
table((BBDDCompleta$BsmtCond))
##
## Fa Gd Po TA
## 104 122 5 2606
BBDDCompleta$BsmtCond[is.na(BBDDCompleta$BsmtCond)] <- "TA" #Completar los NA's con la el que màs se repite
table(is.na(BBDDCompleta$BsmtExposure))
##
## FALSE TRUE
## 2837 82
table((BBDDCompleta$BsmtExposure))
##
## Av Gd Mn No
## 418 276 239 1904
BBDDCompleta$BsmtExposure[is.na(BBDDCompleta$BsmtExposure)] <- "No" #Completar los NA's con la el que màs se repite
table(is.na(BBDDCompleta$BsmtFinType1))
##
## FALSE TRUE
## 2840 79
table((BBDDCompleta$BsmtFinType1))
##
## ALQ BLQ GLQ LwQ Rec Unf
## 429 269 849 154 288 851
BBDDCompleta$BsmtFinType1[is.na(BBDDCompleta$BsmtFinType1)] <- "Unf" #Completar los NA's con la el que màs se repite
table(is.na(BBDDCompleta$BsmtFinSF1))
##
## FALSE TRUE
## 2918 1
table((BBDDCompleta$BsmtFinSF1))
##
## 0 2 16 20 24 25 27 28 32 33 35 36 40 41 42 48
## 929 1 14 8 27 1 1 5 1 1 1 4 3 1 1 4
## 49 50 51 52 53 54 55 56 57 60 63 64 65 68 70 72
## 1 2 1 2 1 2 1 4 2 4 1 1 1 3 2 1
## 73 75 76 77 78 80 81 85 88 94 96 100 104 108 110 111
## 1 1 1 1 1 4 2 1 1 1 1 1 5 1 3 2
## 113 114 116 119 120 121 122 125 126 128 129 130 131 132 133 134
## 1 3 3 3 6 2 1 1 1 1 1 3 1 1 2 1
## 138 140 141 143 144 148 149 150 152 154 155 156 162 165 167 168
## 3 1 1 1 5 1 1 3 2 1 1 3 2 1 1 5
## 169 170 172 173 175 176 179 180 181 182 185 186 187 188 189 190
## 1 1 1 2 1 3 1 3 1 3 1 1 2 2 3 2
## 191 192 193 194 196 197 198 200 201 203 204 205 206 207 208 209
## 1 4 1 2 5 1 2 1 1 3 1 1 1 1 1 2
## 210 212 213 215 216 218 219 220 221 222 223 224 225 226 228 230
## 4 1 1 1 3 1 1 3 1 1 1 3 1 2 2 1
## 231 234 236 238 239 240 241 242 244 246 247 248 249 250 251 252
## 2 3 2 1 1 3 2 1 1 2 4 2 2 5 1 5
## 254 256 257 258 259 260 261 262 263 264 266 267 270 271 273 274
## 1 3 3 1 2 2 1 2 1 3 2 2 1 1 2 2
## 275 276 278 279 280 281 282 283 284 285 286 288 290 292 294 296
## 2 6 1 1 5 2 2 2 3 2 1 8 2 3 3 1
## 297 298 299 300 301 305 306 308 309 310 311 312 314 315 316 317
## 3 2 4 9 2 2 3 3 1 3 3 6 1 3 1 2
## 318 319 320 321 322 324 326 328 329 330 331 332 334 335 336 337
## 1 4 2 2 1 2 2 1 4 5 2 1 2 1 5 1
## 338 339 340 341 342 343 344 346 347 348 349 350 351 352 353 354
## 4 1 3 3 2 2 1 1 1 2 1 4 3 2 5 2
## 355 356 358 360 361 362 363 364 365 366 368 369 370 371 372 373
## 1 1 2 7 2 1 1 1 1 1 6 1 3 3 2 1
## 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
## 2 7 1 1 4 3 2 3 1 1 8 2 2 2 2 1
## 390 392 393 394 397 398 399 400 402 403 404 405 406 408 410 412
## 4 2 1 1 3 1 3 5 2 4 1 2 2 4 4 1
## 414 415 416 417 419 420 421 422 423 424 425 426 427 428 429 430
## 3 2 2 2 2 3 2 1 1 1 4 2 1 4 1 1
## 432 433 434 435 436 437 438 439 440 441 442 443 444 445 448 450
## 5 1 1 3 1 3 2 1 1 2 4 1 2 3 2 5
## 452 453 454 455 456 457 458 459 460 462 464 465 466 467 468 469
## 1 2 3 1 7 3 2 1 3 3 1 1 1 1 6 1
## 471 472 474 475 476 477 480 481 482 483 484 485 486 488 489 490
## 2 2 3 1 4 1 4 1 1 4 2 2 3 1 2 4
## 491 492 493 494 495 496 497 498 500 501 502 503 504 505 506 507
## 1 3 2 3 4 1 1 2 7 1 1 3 6 3 2 4
## 509 510 512 513 514 515 516 518 520 521 522 523 524 526 527 528
## 2 4 2 3 1 1 3 1 4 2 3 2 2 1 3 6
## 531 532 533 534 535 536 537 538 539 540 543 544 546 547 548 549
## 2 3 3 2 2 2 1 1 4 3 1 6 4 6 3 4
## 550 551 552 553 554 556 557 559 560 562 564 565 566 567 568 569
## 5 1 3 6 2 1 1 1 6 1 2 4 2 2 2 3
## 570 572 573 574 575 576 577 578 579 580 583 584 585 586 587 588
## 3 1 1 3 2 2 1 3 1 2 2 2 2 1 1 6
## 590 592 593 594 595 596 599 600 601 602 603 604 605 606 607 608
## 1 1 2 3 5 1 2 8 1 7 3 2 1 2 1 2
## 609 611 612 614 615 616 617 619 620 621 622 623 624 625 626 630
## 2 2 4 1 1 5 2 2 1 2 2 2 7 6 4 2
## 631 632 633 634 636 637 638 639 641 642 643 644 645 646 647 648
## 1 3 4 1 2 5 2 2 4 1 3 3 1 3 1 5
## 649 650 651 652 654 655 656 658 659 660 661 662 663 664 666 668
## 2 2 2 1 2 4 4 3 4 4 1 6 4 2 3 2
## 669 670 671 672 673 674 678 679 680 681 682 683 684 685 686 687
## 1 1 1 5 2 3 2 2 3 2 2 1 1 4 5 1
## 688 689 690 691 692 694 695 696 697 698 699 700 701 702 704 705
## 1 1 3 1 1 1 3 1 5 3 2 7 1 1 4 2
## 706 708 709 710 712 713 714 716 717 718 719 720 722 724 725 726
## 3 2 1 1 3 1 1 2 2 1 2 2 1 2 1 1
## 727 728 729 731 732 733 734 735 736 737 738 739 740 741 742 744
## 2 3 2 1 5 3 4 2 2 2 2 3 2 2 1 2
## 745 746 747 748 749 750 751 755 756 758 759 760 762 763 764 765
## 2 1 4 2 1 1 1 3 1 5 1 2 3 2 1 2
## 766 767 769 770 771 772 773 774 775 776 777 778 779 780 781 782
## 2 5 3 2 1 1 3 2 3 3 2 3 3 3 4 1
## 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 799
## 1 5 2 3 2 4 3 2 2 1 2 1 1 2 1 2
## 800 803 804 806 808 809 810 811 812 813 814 816 819 820 821 822
## 3 2 2 1 1 1 2 1 4 2 2 4 2 3 2 2
## 824 826 827 828 830 831 832 833 836 837 838 840 841 842 844 846
## 2 1 1 4 1 2 3 2 4 1 1 1 4 1 1 3
## 847 848 850 851 852 853 854 856 859 860 862 863 864 865 866 867
## 2 1 2 4 1 1 1 3 1 2 1 1 4 2 3 1
## 870 871 872 873 874 876 880 881 883 885 888 890 892 893 894 895
## 3 2 3 1 1 2 1 1 1 1 1 2 1 1 2 1
## 896 897 899 900 901 902 903 904 905 906 908 909 910 912 913 914
## 1 1 1 1 2 4 2 5 1 1 1 1 2 1 2 1
## 915 916 918 919 920 922 923 924 925 926 929 930 931 932 935 936
## 5 1 1 1 3 2 2 1 2 1 2 2 2 2 1 7
## 937 938 939 941 943 944 945 946 949 950 951 952 953 954 955 956
## 1 2 1 2 1 2 2 1 1 1 1 1 1 1 1 2
## 958 960 962 964 965 967 968 969 970 973 975 976 978 980 982 983
## 2 1 3 3 1 2 1 1 1 1 2 2 1 2 1 1
## 984 985 986 987 988 990 991 994 996 998 1000 1001 1002 1003 1004 1005
## 1 1 4 1 4 1 1 2 2 3 4 1 1 1 2 3
## 1010 1011 1012 1013 1014 1015 1016 1018 1021 1022 1023 1024 1026 1027 1029 1030
## 1 1 2 1 1 1 2 2 1 1 3 2 1 1 1 2
## 1032 1033 1034 1035 1036 1037 1038 1039 1040 1044 1046 1047 1048 1051 1053 1056
## 2 1 1 1 4 1 1 2 2 1 1 1 1 1 2 3
## 1059 1064 1065 1070 1071 1073 1074 1075 1078 1079 1080 1082 1084 1085 1086 1087
## 4 1 2 2 1 1 1 1 2 1 1 2 3 1 1 1
## 1088 1090 1092 1094 1096 1097 1098 1101 1104 1106 1110 1111 1112 1115 1116 1118
## 2 1 1 1 1 1 1 1 2 1 2 1 1 1 3 1
## 1121 1122 1124 1126 1127 1128 1129 1136 1137 1138 1141 1142 1148 1149 1150 1151
## 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1
## 1152 1153 1154 1157 1158 1159 1162 1163 1165 1170 1172 1173 1178 1180 1181 1182
## 2 3 1 1 1 2 1 1 1 1 1 3 1 1 1 1
## 1186 1188 1191 1194 1196 1198 1200 1201 1204 1206 1213 1216 1218 1219 1220 1223
## 1 1 1 1 2 2 5 3 1 2 1 1 4 2 2 1
## 1224 1225 1230 1231 1232 1234 1236 1237 1238 1239 1243 1246 1247 1249 1252 1258
## 1 1 1 1 2 2 1 1 1 1 1 2 1 3 1 2
## 1259 1260 1261 1262 1270 1271 1274 1277 1280 1282 1283 1285 1288 1290 1294 1298
## 1 1 1 1 1 1 3 1 1 1 1 1 2 1 1 1
## 1300 1302 1304 1308 1309 1312 1314 1319 1320 1324 1328 1329 1330 1332 1333 1334
## 4 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1
## 1336 1337 1338 1341 1346 1350 1351 1358 1359 1360 1361 1369 1373 1375 1383 1386
## 1 1 1 1 1 2 1 1 1 2 1 2 3 1 2 2
## 1387 1390 1392 1398 1400 1406 1410 1412 1414 1416 1420 1422 1430 1433 1436 1440
## 2 1 1 1 2 1 2 1 1 2 1 1 2 1 1 1
## 1441 1443 1445 1447 1455 1456 1460 1464 1470 1474 1476 1478 1480 1500 1505 1513
## 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1
## 1518 1531 1538 1540 1557 1558 1562 1564 1567 1571 1572 1573 1576 1593 1606 1619
## 2 1 1 1 1 1 1 1 2 1 2 3 1 1 1 1
## 1632 1636 1640 1646 1660 1682 1684 1696 1721 1728 1732 1733 1758 1767 1810 1812
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1836 1880 1904 1965 1972 2085 2096 2146 2158 2188 2257 2260 2288 4010 5644
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
hist(BBDDCompleta$BsmtFinSF1)
BBDDCompleta$BsmtFinSF1[is.na(BBDDCompleta$BsmtFinSF1)] <- round(median(BBDDCompleta$BsmtFinSF1, na.rm = TRUE))
table(is.na(BBDDCompleta$BsmtFinType2))
##
## FALSE TRUE
## 2839 80
table((BBDDCompleta$BsmtFinType2))
##
## ALQ BLQ GLQ LwQ Rec Unf
## 52 68 34 87 105 2493
BBDDCompleta$BsmtFinType2[is.na(BBDDCompleta$BsmtFinType2)] <- "Unf"
table(is.na(BBDDCompleta$BsmtFinSF2))
##
## FALSE TRUE
## 2918 1
table((BBDDCompleta$BsmtFinSF2))
##
## 0 6 12 28 32 35 38 40 41 42 46 48 52 60 63 64
## 2571 1 1 1 1 1 1 2 2 2 1 1 1 2 1 2
## 66 68 72 76 78 80 81 92 93 95 96 102 105 106 108 110
## 1 2 2 1 1 2 1 1 2 1 2 1 2 1 2 2
## 113 116 117 119 120 121 123 125 127 128 132 136 138 139 144 147
## 1 2 2 1 1 2 1 1 2 2 1 1 1 1 3 3
## 149 150 153 154 156 159 162 163 165 167 168 169 173 174 175 177
## 1 1 1 1 1 2 3 1 1 1 3 1 1 2 1 1
## 180 181 182 184 186 193 196 201 202 206 208 210 211 215 216 219
## 5 1 2 1 1 1 1 1 2 1 1 2 1 1 1 1
## 227 228 230 232 239 240 243 247 250 252 258 259 262 263 264 270
## 1 1 1 1 1 2 1 2 1 2 1 1 1 1 2 2
## 273 276 278 279 281 284 286 287 288 290 294 297 306 308 311 319
## 2 2 1 2 1 1 1 2 2 2 5 1 1 1 1 1
## 321 324 334 336 337 344 345 350 351 352 354 360 362 364 373 374
## 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 3
## 375 377 380 382 387 391 393 396 398 400 402 404 411 417 419 420
## 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1
## 432 435 438 441 442 448 449 450 453 456 465 466 468 469 472 474
## 1 3 1 1 1 1 1 1 1 1 2 1 2 2 1 1
## 479 480 483 486 488 491 492 495 497 499 500 506 507 512 522 529
## 1 2 3 1 1 1 2 2 1 1 1 1 1 1 1 1
## 530 531 532 539 543 544 546 547 551 555 557 580 590 596 600 604
## 1 1 1 3 1 1 1 1 2 1 1 1 2 2 1 1
## 606 608 612 613 619 620 622 624 627 630 634 645 661 668 670 679
## 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1
## 682 684 688 690 691 692 694 712 713 722 723 748 750 755 761 764
## 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1
## 768 774 791 799 811 820 823 826 829 831 841 842 850 852 859 869
## 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1
## 872 873 875 884 891 904 906 912 915 955 956 972 981 982 1020 1029
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1031 1037 1039 1057 1061 1063 1073 1080 1083 1085 1120 1127 1164 1393 1474 1526
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
hist(BBDDCompleta$BsmtFinSF2) #Eliminamos la variable por que la mayoría es 0
BBDDCompleta<-BBDDCompleta[,-31]
table(is.na(BBDDCompleta$Heating))
##
## FALSE
## 2919
table((BBDDCompleta$Heating))
##
## Floor GasA GasW Grav OthW Wall
## 1 2874 27 9 2 6
(2874)/2919
## [1] 0.9845838
BBDDCompleta<-BBDDCompleta[,-33] #Eliminamos la variable por que casi el 98% es igual
table(is.na(BBDDCompleta$CentralAir))
##
## FALSE
## 2919
table((BBDDCompleta$CentralAir))
##
## N Y
## 196 2723
(2723)/2919
## [1] 0.9328537
BBDDCompleta<-BBDDCompleta[,-34] #Eliminamos la variable por que casi el 93% es igual
table(is.na(BBDDCompleta$Electrical))
##
## FALSE TRUE
## 2918 1
table((BBDDCompleta$Electrical))
##
## FuseA FuseF FuseP Mix SBrkr
## 188 50 8 1 2671
BBDDCompleta$Electrical[is.na(BBDDCompleta$Electrical)] <- "SBrkr"
table(is.na(BBDDCompleta$BsmtFullBath))
##
## FALSE TRUE
## 2917 2
table((BBDDCompleta$BsmtFullBath))
##
## 0 1 2 3
## 1705 1172 38 2
BBDDCompleta$BsmtFullBath[is.na(BBDDCompleta$BsmtFullBath)] <- "0"
table(is.na(BBDDCompleta$BsmtHalfBath))
##
## FALSE TRUE
## 2917 2
table((BBDDCompleta$BsmtHalfBath))
##
## 0 1 2
## 2742 171 4
BBDDCompleta$BsmtHalfBath[is.na(BBDDCompleta$BsmtHalfBath)] <- "0"
table(is.na(BBDDCompleta$KitchenQual))
##
## FALSE TRUE
## 2918 1
table((BBDDCompleta$KitchenQual))
##
## Ex Fa Gd TA
## 205 70 1151 1492
BBDDCompleta$KitchenQual[is.na(BBDDCompleta$KitchenQual)] <- "TA"
table(is.na(BBDDCompleta$EnclosedPorch))
##
## FALSE
## 2919
table((BBDDCompleta$EnclosedPorch)) #Eliminamos la variable por que tiene la mayorìa en 0
##
## 0 16 18 19 20 23 24 25 26 28 30 32 34 35 36 37
## 2460 1 1 1 2 1 2 1 1 1 3 4 3 3 5 1
## 39 40 41 42 43 44 45 48 50 51 52 54 55 56 57 60
## 2 8 2 4 1 1 2 3 1 1 2 1 3 5 1 6
## 64 66 67 68 70 72 75 77 78 80 81 84 86 87 88 90
## 4 1 1 3 5 1 1 4 2 5 3 8 1 1 2 4
## 91 92 94 96 98 99 100 101 102 104 105 108 109 112 113 114
## 1 1 2 13 2 1 6 1 4 1 4 2 1 22 1 2
## 115 116 117 120 121 122 123 126 128 129 130 132 133 134 135 136
## 2 8 1 9 2 1 2 5 7 1 1 1 1 2 2 2
## 137 138 139 140 143 144 145 148 150 154 156 158 160 161 162 164
## 2 2 1 5 2 11 1 5 7 3 5 2 7 1 1 5
## 165 167 168 169 170 172 174 175 176 177 180 183 184 185 186 189
## 1 1 9 3 2 1 1 1 5 1 7 2 3 2 2 1
## 190 192 194 196 198 200 202 203 204 205 207 208 209 210 211 212
## 3 10 1 2 1 2 2 1 1 4 1 2 1 1 2 3
## 213 214 216 218 219 220 221 222 224 225 226 228 230 231 234 236
## 1 1 6 3 1 1 1 2 3 1 1 3 1 2 2 3
## 238 239 240 242 244 246 248 249 252 254 256 259 260 264 265 268
## 1 3 5 1 3 2 3 1 4 1 1 1 1 3 1 1
## 272 275 280 286 288 290 291 293 294 296 301 318 324 330 334 364
## 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1
## 368 386 429 432 552 584 1012
## 1 1 1 1 1 1 1
BBDDCompleta<-BBDDCompleta[,-60]
table(is.na(BBDDCompleta$X3SsnPorch))
##
## FALSE
## 2919
table((BBDDCompleta$X3SsnPorch)) #Eliminamos la variable por que tiene la mayorìa en 0
##
## 0 23 86 96 120 130 140 144 150 153 162 168 174 176 180 182
## 2882 1 1 1 1 1 1 2 1 3 1 3 1 1 2 1
## 196 216 219 224 225 238 245 255 290 304 320 323 360 407 508
## 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
BBDDCompleta<-BBDDCompleta[,-60]
table(is.na(BBDDCompleta$ScreenPorch))
##
## FALSE
## 2919
table((BBDDCompleta$ScreenPorch)) #Eliminamos la variable por que tiene la mayorìa en 0
##
## 0 40 53 60 63 64 80 84 88 90 92 94 95 99 100 104
## 2663 1 1 1 1 1 1 1 1 3 2 1 2 1 5 1
## 108 109 110 111 112 113 115 116 117 119 120 121 122 123 126 128
## 2 1 2 1 3 1 3 2 1 1 9 1 1 1 4 1
## 130 135 138 140 141 142 143 144 145 147 148 150 152 153 154 155
## 1 1 2 2 1 3 1 13 2 3 1 1 1 4 1 4
## 156 160 161 162 163 164 165 166 168 170 171 174 175 176 178 180
## 4 6 3 1 1 1 2 1 10 2 1 1 3 2 1 7
## 182 184 185 189 190 192 195 196 197 198 200 201 204 208 210 216
## 3 2 1 4 1 11 3 2 1 2 7 1 2 1 3 8
## 217 220 221 222 224 225 227 228 231 233 234 240 252 255 256 259
## 1 1 1 1 6 4 2 1 1 1 1 2 1 2 3 2
## 260 263 264 265 266 270 271 273 276 280 287 288 291 312 322 342
## 1 1 1 1 2 1 1 1 1 1 1 4 1 1 2 1
## 348 374 385 396 410 440 480 490 576
## 1 1 1 1 1 1 1 1 1
BBDDCompleta<-BBDDCompleta[,-60]
table(is.na(BBDDCompleta$PoolArea))
##
## FALSE
## 2919
table((BBDDCompleta$PoolArea)) #Eliminamos la variable por que tiene la mayorìa en 0
##
## 0 144 228 368 444 480 512 519 555 561 576 648 738 800
## 2906 1 1 1 1 1 1 1 1 1 1 1 1 1
BBDDCompleta<-BBDDCompleta[,-60]
table(is.na(BBDDCompleta$PoolQC))
##
## FALSE TRUE
## 10 2909
#Eliminamos la variable por que tiene la mayorìa es NA
BBDDCompleta<-BBDDCompleta[,-60]
table(is.na(BBDDCompleta$Fence))
##
## FALSE TRUE
## 571 2348
#Eliminamos la variable por que tiene la mayorìa es NA
BBDDCompleta<-BBDDCompleta[,-60]
table(is.na(BBDDCompleta$MiscFeature))
##
## FALSE TRUE
## 105 2814
#Eliminamos la variable por que tiene la mayorìa es NA
BBDDCompleta<-BBDDCompleta[,-60]
table(is.na(BBDDCompleta$MiscVal))
##
## FALSE
## 2919
table((BBDDCompleta$MiscVal))#Eliminamos la variable por que tiene la mayorìa es NA
##
## 0 54 80 300 350 400 420 450 455 460 480 490 500
## 2816 1 1 1 1 18 1 9 1 1 2 1 13
## 560 600 620 650 700 750 800 900 1000 1150 1200 1300 1400
## 1 8 1 3 7 1 1 1 1 1 3 1 1
## 1500 1512 2000 2500 3000 3500 4500 6500 8300 12500 15500 17000
## 3 1 7 2 2 1 2 1 1 1 1 1
BBDDCompleta<-BBDDCompleta[,-60]
table(is.na(BBDDCompleta$SaleType))
##
## FALSE TRUE
## 2918 1
table((BBDDCompleta$SaleType))#Eliminamos la variable por que tiene la mayorìa es NA
##
## COD Con ConLD ConLI ConLw CWD New Oth WD
## 87 5 26 9 8 12 239 7 2525
BBDDCompleta$SaleType[is.na(BBDDCompleta$SaleType)] <- "WD"
table(is.na(BBDDCompleta$SaleCondition))
##
## FALSE
## 2919
table((BBDDCompleta$SaleCondition))#Eliminamos la variable por que tiene la mayorìa es NA
##
## Abnorml AdjLand Alloca Family Normal Partial
## 190 12 24 46 2402 245
BBDDCompleta$SaleType[is.na(BBDDCompleta$SaleType)] <- "WD"
##Separamos las BBDD limpias en Test y Train nuevamente, con las nuevas variables seleccionadas
TrainF<- dplyr::filter(BBDDCompleta,BBDD=="Train")
TestF<- dplyr::filter(BBDDCompleta,BBDD=="Test")
##REGRESIÓN 1 - REGRESIÓN LINEAL
#Anàlisis de correlación
cor.test(TrainF$SalePrice,TrainF$LotArea,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$LotArea
## t = 10.445, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2154574 0.3109369
## sample estimates:
## cor
## 0.2638434
cor.test(TrainF$SalePrice,TrainF$GrLivArea,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$GrLivArea
## t = 38.348, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6821200 0.7332695
## sample estimates:
## cor
## 0.7086245
cor.test(TrainF$SalePrice,TrainF$OverallCond,method="pearson") #Baja correlation, eliminamos la variable
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$OverallCond
## t = -2.9819, df = 1458, p-value = 0.002912
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.12864437 -0.02666008
## sample estimates:
## cor
## -0.07785589
cor.test(TrainF$SalePrice,TrainF$YearBuilt,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$YearBuilt
## t = 23.424, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4845947 0.5591987
## sample estimates:
## cor
## 0.5228973
cor.test(TrainF$SalePrice,TrainF$YearRemodAdd,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$YearRemodAdd
## t = 22.466, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4679732 0.5442445
## sample estimates:
## cor
## 0.507101
cor.test(TrainF$SalePrice,TrainF$TotalBsmtSF,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$TotalBsmtSF
## t = 29.671, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5805529 0.6445923
## sample estimates:
## cor
## 0.6135806
cor.test(TrainF$SalePrice,TrainF$BsmtFinSF1,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$BsmtFinSF1
## t = 15.998, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3418953 0.4292133
## sample estimates:
## cor
## 0.3864198
cor.test(TrainF$SalePrice,TrainF$GarageArea,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$GarageArea
## t = 30.446, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5910324 0.6538222
## sample estimates:
## cor
## 0.6234314
cor.test(TrainF$SalePrice,TrainF$X1stFlrSF,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$X1stFlrSF
## t = 29.078, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5723391 0.6373448
## sample estimates:
## cor
## 0.6058522
cor.test(TrainF$SalePrice,TrainF$X2ndFlrSF,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$X2ndFlrSF
## t = 12.867, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2724957 0.3646620
## sample estimates:
## cor
## 0.3193338
cor.test(TrainF$SalePrice,TrainF$BedroomAbvGr,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$BedroomAbvGr
## t = 6.5159, df = 1458, p-value = 9.927e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1179285 0.2176373
## sample estimates:
## cor
## 0.1682132
cor.test(TrainF$SalePrice,TrainF$KitchenAbvGr,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$KitchenAbvGr
## t = -5.2381, df = 1458, p-value = 1.86e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.18591342 -0.08519911
## sample estimates:
## cor
## -0.1359074
cor.test(TrainF$SalePrice,TrainF$TotRmsAbvGrd,method="pearson")
##
## Pearson's product-moment correlation
##
## data: TrainF$SalePrice and TrainF$TotRmsAbvGrd
## t = 24.099, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4960020 0.5694337
## sample estimates:
## cor
## 0.5337232
Model1 = lm(SalePrice ~ LotArea+
OverallQual+
OverallCond+
YearBuilt+
YearRemodAdd+
TotalBsmtSF+
BsmtFinSF1+
GarageArea+
X1stFlrSF+
X2ndFlrSF+
BedroomAbvGr+
KitchenAbvGr+
TotRmsAbvGrd,data=TrainF)
summary(Model1)
##
## Call:
## lm(formula = SalePrice ~ LotArea + OverallQual + OverallCond +
## YearBuilt + YearRemodAdd + TotalBsmtSF + BsmtFinSF1 + GarageArea +
## X1stFlrSF + X2ndFlrSF + BedroomAbvGr + KitchenAbvGr + TotRmsAbvGrd,
## data = TrainF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -564411 -16505 -1784 13107 287932
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.083e+06 1.152e+05 -9.400 < 2e-16 ***
## LotArea 5.317e-01 1.017e-01 5.229 1.95e-07 ***
## OverallQual 1.836e+04 1.176e+03 15.614 < 2e-16 ***
## OverallCond 4.562e+03 1.030e+03 4.429 1.02e-05 ***
## YearBuilt 3.613e+02 5.227e+01 6.912 7.14e-12 ***
## YearRemodAdd 1.535e+02 6.574e+01 2.334 0.0197 *
## TotalBsmtSF 1.038e+01 4.246e+00 2.445 0.0146 *
## BsmtFinSF1 1.588e+01 2.533e+00 6.270 4.75e-10 ***
## GarageArea 3.495e+01 5.896e+00 5.928 3.82e-09 ***
## X1stFlrSF 5.609e+01 5.491e+00 10.214 < 2e-16 ***
## X2ndFlrSF 4.502e+01 3.993e+00 11.277 < 2e-16 ***
## BedroomAbvGr -9.486e+03 1.701e+03 -5.576 2.94e-08 ***
## KitchenAbvGr -2.579e+04 4.811e+03 -5.360 9.66e-08 ***
## TotRmsAbvGrd 6.602e+03 1.246e+03 5.299 1.35e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36010 on 1446 degrees of freedom
## Multiple R-squared: 0.7963, Adjusted R-squared: 0.7945
## F-statistic: 434.9 on 13 and 1446 DF, p-value: < 2.2e-16
PredTest = predict(Model1, newdata=TestF, type="response")
TestF$SalePrice<- PredTest
MySubmission = data.frame(ID = Test$Id, SalePrice = PredTest)
Se realizan las correciones y arreglos en la data
table(is.na(MySubmission$SalePrice))
##
## FALSE TRUE
## 1457 2
hist(MySubmission$SalePrice)
MySubmission$SalePrice[is.na(MySubmission$SalePrice)] <- round(median(MySubmission$SalePrice, na.rm = TRUE))
sort(MySubmission$SalePrice)
## [1] -8811.688 -4538.751 -1000.422 4586.368 10386.859 11378.140
## [7] 11834.357 13503.969 19095.665 21290.052 21422.835 22280.450
## [13] 23053.013 23752.017 30053.455 31659.384 32442.220 34155.865
## [19] 35566.079 36390.893 36429.056 38575.721 39487.621 39868.795
## [25] 39894.446 44504.693 47239.970 47680.881 49921.940 51541.431
## [31] 52646.575 52972.287 54370.515 55224.781 56342.492 56785.436
## [37] 58189.392 62115.990 62388.432 62680.062 63276.901 63541.018
## [43] 64890.560 66967.223 66994.153 67230.675 67640.143 68207.747
## [49] 68360.527 69363.820 69700.415 70098.553 70796.395 71562.699
## [55] 72544.331 72765.746 72843.056 72965.269 73017.905 73065.628
## [61] 73153.505 75078.406 75391.455 75650.617 76930.372 76941.007
## [67] 76982.387 77556.496 78284.445 78383.016 78396.503 78435.408
## [73] 79533.009 79608.551 79813.211 81797.250 81865.642 82194.237
## [79] 82471.518 83012.319 83242.240 84051.653 84082.159 84119.966
## [85] 84174.213 84186.290 84440.990 85641.645 85744.036 87976.957
## [91] 88090.947 88436.384 88555.446 88569.017 89282.933 89291.868
## [97] 89844.175 90236.518 90435.125 91670.983 92630.882 92642.625
## [103] 92892.183 92993.896 93880.053 94381.148 94461.580 94641.954
## [109] 94723.009 94974.179 95544.582 95546.406 95580.585 95604.879
## [115] 95723.254 95945.809 95982.889 96317.935 96379.085 96757.726
## [121] 97650.164 97685.677 97699.165 97930.452 98098.077 98930.983
## [127] 98980.694 99051.641 99188.508 99341.093 100112.685 100927.866
## [133] 101256.008 102507.915 102629.029 102639.748 102683.122 103048.123
## [139] 103230.548 103349.306 103378.334 103469.049 103531.797 103535.480
## [145] 103554.797 103702.858 104222.175 104347.189 104804.071 105082.223
## [151] 105163.240 105214.478 105324.896 105367.495 105576.911 105843.858
## [157] 105858.512 105891.365 105991.893 106081.463 106175.953 106279.896
## [163] 106323.383 106687.204 106779.118 106802.903 106808.320 106809.275
## [169] 107113.123 107122.016 107498.079 107791.875 108201.238 108290.995
## [175] 108296.648 108333.967 108532.185 108695.206 108743.899 108937.653
## [181] 109071.798 109286.471 109403.223 109561.148 109853.717 110038.393
## [187] 110230.412 110320.260 110565.615 111382.547 111398.403 111448.962
## [193] 111648.515 111725.072 111831.582 111850.012 112106.900 112132.331
## [199] 112480.492 112503.705 112533.070 112561.752 112613.455 112675.434
## [205] 112809.948 113065.482 113122.152 113235.962 113632.043 113685.544
## [211] 113782.489 113839.143 113904.637 113911.904 114080.593 114102.498
## [217] 114169.456 114347.910 114490.486 114645.713 114662.264 114751.258
## [223] 114758.496 114792.547 114801.046 114817.935 114902.548 115067.946
## [229] 115123.973 115362.267 115481.481 116043.991 116281.894 116465.714
## [235] 116573.217 116646.440 116924.578 116977.357 117014.653 117092.550
## [241] 117227.433 117233.474 117271.622 117283.748 117356.953 117381.393
## [247] 117605.215 117664.527 117948.215 117964.435 118020.297 118020.922
## [253] 118062.724 118191.631 118245.275 118279.299 118387.700 118408.889
## [259] 118498.871 118555.499 118615.119 118837.610 118943.493 118965.214
## [265] 119076.619 119085.323 119162.964 119227.168 119231.256 119281.741
## [271] 119296.717 119319.772 119382.797 119451.041 119501.548 119663.076
## [277] 119753.601 120051.439 120064.107 120259.502 120383.758 120417.054
## [283] 120452.652 120521.138 120547.719 120623.425 120728.760 121136.446
## [289] 121376.437 121382.266 121447.011 121553.672 121683.773 121756.409
## [295] 121836.387 121993.789 122049.573 122051.566 122087.109 122124.078
## [301] 122188.423 122254.836 122352.974 122603.280 122614.146 122650.115
## [307] 123062.396 123145.800 123215.892 123376.229 123547.075 123583.382
## [313] 123718.590 124089.852 124101.982 124137.242 124149.758 124217.435
## [319] 124348.636 124349.065 124373.280 124384.506 124530.969 124607.825
## [325] 124659.101 124768.581 124838.335 124873.038 124918.737 125249.287
## [331] 125365.629 125507.540 125561.145 125768.203 125787.806 125818.407
## [337] 125832.666 125890.034 125894.061 125912.830 126097.530 126261.402
## [343] 126303.338 126319.675 126774.586 126912.450 126930.025 126933.051
## [349] 127363.918 127469.093 127502.679 127537.865 127570.789 127577.331
## [355] 127928.896 127966.319 128015.419 128086.162 128740.545 128916.545
## [361] 128964.433 128972.841 129126.617 129279.617 129289.784 129368.416
## [367] 129412.546 129546.493 129771.619 129864.412 130151.263 130205.821
## [373] 130307.857 130339.568 130350.769 130514.641 130579.686 130864.689
## [379] 131032.488 131045.378 131064.473 131098.829 131103.851 131106.574
## [385] 131156.782 131227.991 131553.378 131584.569 131610.497 131631.477
## [391] 131741.536 132239.623 132263.943 132280.708 132364.983 132479.750
## [397] 132508.189 132527.425 132535.450 132651.864 132669.177 132670.084
## [403] 132695.386 132713.878 132796.365 132937.761 133354.567 133521.849
## [409] 133754.907 133811.489 133825.122 133920.410 133945.985 133956.513
## [415] 134204.997 134261.790 134346.227 134458.229 134475.690 134481.123
## [421] 134500.749 134534.898 134584.392 134602.172 134668.207 134739.300
## [427] 135014.206 135027.373 135182.512 135296.296 135411.047 135551.491
## [433] 135582.470 135704.350 135774.316 135780.467 135907.357 135954.447
## [439] 135980.606 136041.475 136114.645 136131.536 136153.675 136167.297
## [445] 136411.446 136517.736 136522.157 136607.547 136884.716 136992.737
## [451] 137036.265 137226.948 137243.690 137378.888 137471.109 137553.422
## [457] 137608.274 137619.237 137751.519 137754.311 137779.094 137939.047
## [463] 138033.777 138064.055 138434.582 138441.252 138449.782 138591.011
## [469] 138633.613 138728.717 138742.818 138793.938 138796.887 138893.256
## [475] 138978.193 138981.451 139003.090 139017.017 139076.508 139078.078
## [481] 139133.740 139140.210 139263.011 139572.570 139636.753 139667.424
## [487] 139731.468 139831.975 140193.252 140543.299 140945.958 140997.203
## [493] 141086.041 141129.688 141380.625 141408.757 141500.316 141501.902
## [499] 141502.858 141540.028 141754.717 141759.667 141964.882 142003.730
## [505] 142071.157 142100.767 142217.182 142354.596 142381.238 142438.633
## [511] 142441.436 142460.394 142661.677 142860.526 143094.607 143197.102
## [517] 143232.395 143361.385 143443.762 143579.262 143618.431 143729.609
## [523] 143749.520 143797.606 143813.668 144037.045 144059.750 144140.577
## [529] 144151.751 144335.658 144396.666 144479.116 144523.271 144598.721
## [535] 144874.784 144925.334 144975.247 145059.588 145101.291 145179.200
## [541] 145207.022 145247.847 145265.878 145380.059 145442.175 145557.250
## [547] 145694.510 145708.943 145781.614 145817.894 145914.318 145947.662
## [553] 146088.349 146101.497 146150.284 146177.060 146224.626 146234.695
## [559] 146235.614 146254.234 146292.176 146427.743 146605.221 146605.835
## [565] 146699.698 146717.416 146876.530 146945.211 146950.926 147530.512
## [571] 147707.792 147836.126 147845.853 147853.746 147945.303 148020.149
## [577] 148255.950 148387.203 148411.071 148851.707 148913.209 148994.288
## [583] 149032.147 149100.694 149441.289 149473.472 149499.809 149568.208
## [589] 149574.928 149612.975 149851.547 150135.059 150139.442 150234.922
## [595] 150244.272 150373.498 150534.519 150959.577 151102.502 151252.640
## [601] 151254.722 152626.140 152933.533 153310.007 153347.750 153408.731
## [607] 153579.089 153602.595 153626.892 153763.384 153833.813 154021.205
## [613] 154106.579 154208.757 154548.678 154597.853 154667.766 154676.032
## [619] 154868.682 155007.020 155213.068 155237.436 155420.207 155488.340
## [625] 155490.699 155507.297 156000.661 156056.642 156102.407 156147.606
## [631] 156157.725 156188.058 156508.930 156568.462 156817.095 156863.476
## [637] 156871.209 156903.015 156970.082 157026.303 157547.040 157685.502
## [643] 157762.194 158124.994 158272.019 158346.092 158484.404 158493.845
## [649] 158540.885 158574.987 158599.044 158637.458 158708.013 158730.139
## [655] 158748.353 158771.734 158957.078 158971.111 158999.031 159101.382
## [661] 159260.260 159419.963 159436.240 159519.680 159570.343 159673.601
## [667] 159746.742 159769.143 160128.601 160340.949 160470.338 160514.850
## [673] 160590.508 160598.702 160676.836 160681.140 160735.207 160799.886
## [679] 160973.428 161527.428 161527.569 161729.742 161787.838 161816.497
## [685] 161845.391 161891.535 162130.124 162660.315 162662.476 162695.135
## [691] 162762.114 163047.679 163167.261 163198.791 163378.820 163501.628
## [697] 163675.213 163696.456 163992.427 164091.893 164149.684 164190.602
## [703] 164197.482 164203.903 164423.266 164431.747 164632.831 164682.385
## [709] 164926.661 164984.356 165051.435 165096.826 165120.474 165120.474
## [715] 165225.430 165474.618 165672.250 165814.375 166081.493 166135.752
## [721] 166217.516 166386.846 166437.084 166713.414 167002.768 167006.780
## [727] 167112.628 167245.186 167301.000 167301.000 167301.287 167366.252
## [733] 168020.142 168298.665 168470.513 168471.378 168537.298 168647.953
## [739] 168654.924 168795.349 168983.294 169004.272 169094.022 169142.103
## [745] 169476.333 169589.815 169680.731 169751.042 170049.054 170304.775
## [751] 170310.079 170520.171 170607.501 170732.272 170771.861 171203.810
## [757] 171241.952 171361.356 171370.698 171377.921 171456.311 171755.432
## [763] 171771.938 171814.489 171915.051 172068.986 172123.461 172209.294
## [769] 172230.104 172355.811 172436.225 172492.497 172769.203 172976.111
## [775] 173003.846 173196.863 173286.050 173334.965 173340.871 173443.883
## [781] 173446.108 173465.909 173556.345 173898.868 174379.690 174394.323
## [787] 174426.800 174494.035 174736.610 174974.307 175029.942 175234.237
## [793] 175354.255 175366.848 175549.202 175939.385 176064.859 176231.217
## [799] 176251.390 176309.177 176382.501 176749.971 176758.745 177028.706
## [805] 177064.585 177159.191 177409.266 177438.981 177510.300 177568.608
## [811] 177630.805 177746.169 177853.795 177914.041 178361.988 178643.421
## [817] 178672.231 178776.985 178898.299 178943.723 178974.462 179009.479
## [823] 179347.390 179362.707 179405.584 179439.880 179762.623 179785.715
## [829] 179902.387 179972.918 180068.029 180155.657 180201.540 180304.524
## [835] 180691.324 180737.442 180767.799 180887.322 181044.134 181050.662
## [841] 181117.726 181133.419 181239.464 182374.189 182445.232 182704.086
## [847] 183756.950 183763.165 183908.943 183943.530 184025.741 184113.355
## [853] 184243.901 184347.881 184495.454 184749.633 185391.266 185517.339
## [859] 185596.469 185651.152 185717.087 185822.430 185889.071 185906.676
## [865] 186239.436 186375.061 186413.233 186583.956 186644.766 187031.393
## [871] 187144.810 187283.514 187511.998 187728.055 187886.882 188042.191
## [877] 188206.078 188334.868 188425.433 188426.197 188797.644 188815.170
## [883] 188901.906 188983.937 189158.164 189433.481 189627.320 190043.463
## [889] 190061.957 190728.971 190761.924 190822.382 190885.740 191195.831
## [895] 191266.187 191402.101 191441.666 191576.669 191747.771 191912.922
## [901] 191999.340 192043.811 192096.013 192289.228 192454.391 192526.236
## [907] 192537.092 192643.087 192718.419 192896.704 193144.136 193626.144
## [913] 193788.989 194269.111 194483.901 194517.036 194615.493 194970.499
## [919] 195047.097 195339.326 195367.123 195502.732 195594.594 195630.645
## [925] 195749.653 195797.984 195844.266 195868.246 195955.068 196057.538
## [931] 196165.698 196186.938 196229.510 196372.846 196392.004 196473.339
## [937] 196508.324 196763.270 197067.403 197114.349 197332.335 197435.430
## [943] 197446.168 197683.148 197727.922 197836.076 198219.334 198260.408
## [949] 198781.481 198975.768 198981.656 199183.584 199199.475 199400.320
## [955] 199451.371 199458.742 199602.961 199658.249 199732.618 199799.032
## [961] 199812.518 199820.325 199932.161 200410.570 200413.196 200591.250
## [967] 200694.544 200695.200 200903.455 201276.798 201424.514 201443.776
## [973] 201540.163 201717.579 201785.621 201865.615 201882.693 201968.675
## [979] 201990.113 202030.815 202045.939 202196.603 202278.544 202374.449
## [985] 202570.076 202729.712 202809.031 203170.691 203189.026 203374.162
## [991] 203424.465 203459.430 203462.816 203534.287 203657.328 203699.500
## [997] 203765.822 203855.742 204103.229 204236.260 204551.954 204720.142
## [1003] 204871.043 204970.393 204991.498 205531.812 205655.912 205715.805
## [1009] 205738.698 205970.259 206011.472 206047.629 206107.308 206211.597
## [1015] 206364.565 206486.920 206507.422 206841.964 206950.292 207185.253
## [1021] 207542.590 207544.721 207618.413 207649.617 207738.329 208398.159
## [1027] 208868.396 208924.302 209127.520 209237.011 209770.862 209798.303
## [1033] 209872.573 210175.029 210274.469 210314.630 210326.674 210999.174
## [1039] 211472.177 211734.516 211848.437 211896.653 212149.544 212272.742
## [1045] 212629.539 212682.378 213082.978 213419.887 213561.624 213984.026
## [1051] 214105.687 214163.041 214231.687 214254.633 214430.511 214455.051
## [1057] 214575.573 214687.679 214901.445 215014.400 215040.323 215056.609
## [1063] 215258.405 215572.300 215599.911 216412.129 216701.431 216885.677
## [1069] 216893.641 216989.119 217225.400 217735.757 217829.713 217892.965
## [1075] 217921.304 218111.567 218114.684 218333.208 218363.109 218550.616
## [1081] 218612.681 218717.645 218818.163 219225.705 219265.387 219406.283
## [1087] 219843.973 220193.561 220581.769 220812.911 220864.211 221040.816
## [1093] 221248.427 221382.479 221587.039 221605.910 221607.676 221721.861
## [1099] 221803.031 221814.411 221817.481 222151.652 222211.056 222618.605
## [1105] 222626.531 222805.688 222881.706 223117.434 223224.519 223439.175
## [1111] 223951.735 224097.586 224105.402 224458.657 224565.705 224647.383
## [1117] 224764.857 224779.484 224791.291 224851.553 224930.803 224980.386
## [1123] 224989.997 225034.884 225064.038 225436.942 225503.075 225545.243
## [1129] 225576.295 225655.195 225666.193 225769.378 226155.846 226280.700
## [1135] 226727.979 226787.869 227216.390 227436.719 227542.120 228003.827
## [1141] 228767.794 228857.146 229413.478 229545.951 229548.619 229624.119
## [1147] 229850.610 229967.208 230225.341 230240.406 230327.425 230414.159
## [1153] 230619.878 230798.595 230804.909 231036.282 231372.069 231507.928
## [1159] 231623.778 231727.977 231833.935 231870.135 231943.052 232043.752
## [1165] 232539.343 232603.557 232609.196 232825.320 234027.498 234225.945
## [1171] 234273.987 234351.844 234406.551 234546.724 234728.966 235345.197
## [1177] 235408.712 235475.433 235702.713 235770.309 235982.864 236417.351
## [1183] 238170.526 238281.420 238284.304 238467.396 238487.070 238548.203
## [1189] 238611.898 239005.423 239166.245 239268.895 239285.815 239318.735
## [1195] 239335.669 239343.919 239432.588 239495.688 239599.897 239788.173
## [1201] 239853.213 240278.506 240405.868 240549.707 240835.885 242032.512
## [1207] 242045.756 242386.374 242481.007 242829.420 243111.099 243176.397
## [1213] 243290.765 243350.229 243574.740 243663.907 243815.191 243828.597
## [1219] 243990.877 244030.415 244446.567 244542.536 244812.615 245141.583
## [1225] 245358.460 245736.673 246013.663 246400.959 246446.200 246667.771
## [1231] 246761.759 246779.867 246861.508 247051.041 247110.358 247636.536
## [1237] 248192.632 248641.166 248686.843 248909.121 249144.511 249715.077
## [1243] 249786.354 250362.832 250548.640 250690.283 250711.486 251129.786
## [1249] 251171.316 252397.501 252633.649 252699.358 253248.607 254148.764
## [1255] 254355.951 254408.527 254853.674 255196.161 255697.614 256126.173
## [1261] 256579.489 256708.794 257141.543 257152.294 257158.419 257575.038
## [1267] 257737.022 258079.592 258462.911 259137.398 259402.262 259572.242
## [1273] 259872.488 260077.570 260269.072 260270.880 260438.381 260453.597
## [1279] 260624.976 260905.197 261742.141 262071.400 262225.802 262534.334
## [1285] 262673.881 262700.825 262736.301 262972.460 263023.510 263096.421
## [1291] 263331.563 263618.983 263644.524 263917.635 263941.169 264590.732
## [1297] 264805.519 265066.220 265619.190 265890.376 267125.134 267376.555
## [1303] 267442.383 268431.272 268433.072 269277.548 269485.163 269984.437
## [1309] 270117.471 270205.373 270594.152 271247.000 271638.628 271666.090
## [1315] 272046.460 272447.986 272938.092 273174.887 273471.693 273673.889
## [1321] 273772.887 273793.407 273927.874 274017.408 275076.273 275130.553
## [1327] 275198.968 275213.525 275872.161 276908.655 276937.018 277332.258
## [1333] 277907.519 278119.811 278213.058 278560.500 279559.889 279765.628
## [1339] 279929.074 279985.570 280201.167 280561.202 280768.208 281202.528
## [1345] 282023.637 283887.339 283926.364 284131.858 285957.613 286743.985
## [1351] 286954.253 287073.281 288478.444 288875.949 289098.938 289237.581
## [1357] 289308.052 290231.101 290341.246 291002.312 291556.974 292209.556
## [1363] 292916.563 293243.254 293281.865 293390.410 293693.136 294551.510
## [1369] 295174.401 295913.024 296033.989 296775.375 296938.132 297775.118
## [1375] 298089.870 298867.753 299904.686 300492.886 301727.172 302617.395
## [1381] 302912.465 302937.869 303517.138 303518.502 304288.988 305361.168
## [1387] 305632.213 305697.883 306143.775 306955.101 307717.974 307832.726
## [1393] 308497.813 309166.587 309224.250 309459.225 309599.033 310324.289
## [1399] 311179.887 312563.466 314230.735 314332.398 314678.114 314771.298
## [1405] 315121.425 315147.408 316001.706 317590.528 319347.402 321123.598
## [1411] 322223.719 322537.271 323272.081 323659.906 324359.437 325001.431
## [1417] 326300.660 326536.671 326621.764 328769.397 328964.800 329462.856
## [1423] 329829.633 332296.019 332334.501 334443.838 335872.478 335974.519
## [1429] 336097.414 338180.781 338795.305 340436.142 342410.532 342968.140
## [1435] 345222.883 354018.443 354992.058 355990.767 358938.298 359738.879
## [1441] 361066.227 362065.260 362940.930 368229.221 369412.622 370166.744
## [1447] 374961.510 379318.406 381128.306 382250.316 382330.739 384146.534
## [1453] 384957.749 387268.194 391542.733 400300.945 403827.251 411970.076
## [1459] 674890.953
min(TrainF$SalePrice)
## [1] 34900
MySubmission$SalePrice[MySubmission$SalePrice<34900] <- 34900 #Cambiamos los números menores a cero con el menor valor del Train set
#Error en Kaggle 0.19887
write.csv(MySubmission, "Submission.csv", row.names=FALSE)
##SEGUNDO MODELO: ÁRBOL
A continuación se analiza el conjunto de datos mediante el modelo del arbol
library(caTools)
library(rpart)
library(rpart.plot)
FirstTree = rpart(SalePrice ~MSSubClass+MSZoning+LotFrontage+LotArea+LotConfig+LandSlope
+Neighborhood+HouseStyle +OverallQual +OverallCond +YearBuilt +YearRemodAdd
+RoofStyle+Foundation+BsmtQual+X1stFlrSF
+X2ndFlrSF
+YrSold+
SaleType+
SaleCondition,data=TrainF)
FirstTree = rpart(SalePrice ~MSSubClass+MSZoning+LotFrontage+LandSlope+Foundation+ SaleType+SaleCondition+
LotArea+
OverallQual+
OverallCond+
YearBuilt+
BsmtFinSF1+
GarageArea+
X1stFlrSF+
X2ndFlrSF+
BedroomAbvGr+
KitchenAbvGr+
TotRmsAbvGrd+
BldgType+
HouseStyle+
Functional+RoofStyle+ExterQual,data=TrainF)
prp(FirstTree)
PredTest2 = predict(FirstTree, newdata=Test)
Test$SalePrice<- PredTest2
MySubmission = data.frame(ID = Test$Id, SalePrice = PredTest2)
#Revisando los resultados
Se presentan los resultados del modelo en graficas
sort(MySubmission$SalePrice)
## [1] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [9] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [17] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [25] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [33] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [41] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [49] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [57] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [65] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [73] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [81] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [89] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [97] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [105] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [113] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [121] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [129] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [137] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [145] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [153] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [161] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [169] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [177] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [185] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [193] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [201] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [209] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [217] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [225] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [233] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [241] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [249] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [257] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [265] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [273] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [281] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [289] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [297] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [305] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [313] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [321] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [329] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [337] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
## [345] 114127.0 114127.0 114127.0 129076.5 129076.5 129076.5 129076.5 129076.5
## [353] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
## [361] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
## [369] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
## [377] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
## [385] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
## [393] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
## [401] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
## [409] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
## [417] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
## [425] 129076.5 129076.5 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [433] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [441] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [449] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [457] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [465] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [473] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [481] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [489] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [497] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [505] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [513] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [521] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [529] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [537] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [545] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [553] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [561] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [569] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [577] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [585] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [593] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [601] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [609] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [617] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [625] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [633] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [641] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
## [649] 143465.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [657] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [665] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [673] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [681] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [689] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [697] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [705] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [713] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [721] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [729] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [737] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [745] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [753] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [761] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [769] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [777] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [785] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [793] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [801] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [809] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [817] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [825] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [833] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [841] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [849] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [857] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [865] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [873] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [881] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [889] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [897] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [905] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [913] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
## [921] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 194507.5
## [929] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [937] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [945] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [953] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [961] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [969] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [977] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [985] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [993] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1001] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1009] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1017] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1025] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1033] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1041] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1049] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1057] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1065] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1073] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1081] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1089] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1097] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1105] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1113] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1121] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1129] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1137] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1145] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 236143.5
## [1153] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1161] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1169] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1177] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1185] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1193] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1201] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1209] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1217] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1225] 236143.5 236143.5 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6
## [1233] 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6
## [1241] 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6
## [1249] 258744.6 258744.6 258744.6 258744.6 259700.6 259700.6 259700.6 259700.6
## [1257] 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6
## [1265] 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6
## [1273] 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6
## [1281] 259700.6 259700.6 259700.6 288220.9 288220.9 288220.9 288220.9 288220.9
## [1289] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1297] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1305] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1313] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1321] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1329] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1337] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1345] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1353] 288220.9 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1
## [1361] 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1
## [1369] 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1
## [1377] 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 361253.6 361253.6
## [1385] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1393] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1401] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1409] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1417] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1425] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1433] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1441] 361253.6 361253.6 361253.6 489036.8 489036.8 489036.8 489036.8 489036.8
## [1449] 489036.8 489036.8 489036.8 489036.8 489036.8 489036.8 489036.8 489036.8
## [1457] 489036.8 489036.8 489036.8
hist(MySubmission$SalePrice)
write.csv(MySubmission, "Submission.csv", row.names=FALSE)
#Error en Kaggle 0.23854
##Modelo 3 : RANDOM FOREST
A continuación se analiza el conjunto de datos mediante el modelo random forest
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
library(caret)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## Loading required package: lattice
RandomForest= randomForest(SalePrice~MSSubClass+MSZoning+LotArea+Neighborhood+Condition1
+Condition2+HouseStyle+OverallQual+OverallCond+YearBuilt+X1stFlrSF
+X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
+Foundation,data=TrainF)
summary(RandomForest)
## Length Class Mode
## call 3 -none- call
## type 1 -none- character
## predicted 1460 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 1460 -none- numeric
## importance 19 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 1460 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## terms 3 terms call
PredTest3 = predict(RandomForest, newdata=TestF)
TestF$SalePrice<- PredTest3
MySubmission = data.frame(ID = Test$Id, SalePrice = PredTest3)
#Revisando los resultados
hist(MySubmission$SalePrice)
summary(MySubmission$SalePrice)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 55899 130232 159320 178978 207414 534289
#Error en Kaggle 0.14939
#MODELO 3 LASSO
A continuación se analiza el conjunto de datos mediante el modelo del lasso
library(gglasso)
library(tidyverse)
x_train = model.matrix(SalePrice ~LotArea+Condition1
+OverallQual+OverallCond+YearBuilt+X1stFlrSF
+X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
+Foundation,TrainF)[, -1]
y_train <- TrainF$SalePrice
x_test = model.matrix(SalePrice ~ LotArea+Condition1
+OverallQual+OverallCond+YearBuilt+X1stFlrSF
+X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
+Foundation, TestF)[, -1]
y_test <- TestF$SalePrice
LASSO <- glmnet(
x = x_train,
y = y_train,
alpha = 1,
nlambda = 100,
standardize = TRUE
)
sal.cv=cv.glmnet(x_train,y_train,alpha=1)
plot(sal.cv)
mejor.lambda=sal.cv$lambda.min
mejor.lambda
log(mejor.lambda)
LASSO <- glmnet(
x = x_train,
y = y_train,
alpha = 1,
nlambda = mejor.lambda,
standardize = TRUE
)
PredTest4=predict(LASSO,s=mejor.lambda,newx=x_test)
PredTest4 <- as.integer(PredTest4)
TestF$SalePrice<- PredTest4
MySubmission = data.frame(ID = Test$Id, SalePrice = TestF$SalePrice)
MySubmission$SalePrice[MySubmission$SalePrice<34900] <- 34900 #Cambiamos los números menores a cero con el menor valor del Train set
hist(MySubmission$SalePrice)
summary(MySubmission$SalePrice)
write.csv(MySubmission, "Submission.csv", row.names=FALSE)
# Error en Kaggle 0.17747
#MODELO 5 RIDGE
A continuación se analiza el conjunto de datos mediante el modelo ridge
x_train = model.matrix(SalePrice ~LotArea+Condition1
+OverallQual+OverallCond+YearBuilt+X1stFlrSF
+X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
+Foundation,TrainF)[, -1]
y_train <- TrainF$SalePrice
x_test = model.matrix(SalePrice ~ LotArea+Condition1
+OverallQual+OverallCond+YearBuilt+X1stFlrSF
+X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
+Foundation, TestF)[, -1]
y_test <- TestF$SalePrice
ridge <- glmnet(
x = x_train,
y = y_train,
alpha = 0,
nlambda = 100,
standardize = TRUE
)
sal.cv=cv.glmnet(x_train,y_train,alpha=0)
plot(sal.cv)
mejor.lambda=sal.cv$lambda.min
mejor.lambda
log(mejor.lambda)
ridge <- glmnet(
x = x_train,
y = y_train,
alpha = 0,
nlambda = mejor.lambda,
standardize = TRUE
)
PredTest5=predict(ridge,s=mejor.lambda,newx=x_test)
PredTest5 <- as.integer(PredTest5)
TestF$SalePrice<- PredTest5
MySubmission = data.frame(ID = Test$Id, SalePrice = TestF$SalePrice)
MySubmission$SalePrice[MySubmission$SalePrice<34900] <- 34900 #Cambiamos los números menores a cero con el menor valor del Train set
hist(MySubmission$SalePrice)
summary(MySubmission$SalePrice)
write.csv(MySubmission, "Submission.csv", row.names=FALSE)
#Error en Kaggle 0.17434