title: “Proyecto Final” output: html_document

Integrantes:

#Kelvin Silva Collazos #Vania Cárdenas Vuckovic #Ceysa Bardales Trauco

1. CARGA DE DATOS

Importando las bases de datos y uniendo las bases de datos Train y Test para el tratamiento de los datos

Train=read.csv("train.csv")
Test=read.csv("test.csv")
Test$SalePrice <- 0
Test$BBDD<-"Test"
Train$BBDD<-"Train"
BBDDCompleta= rbind.data.frame(Train,Test)
names(BBDDCompleta)
##  [1] "Id"            "MSSubClass"    "MSZoning"      "LotFrontage"  
##  [5] "LotArea"       "Street"        "Alley"         "LotShape"     
##  [9] "LandContour"   "Utilities"     "LotConfig"     "LandSlope"    
## [13] "Neighborhood"  "Condition1"    "Condition2"    "BldgType"     
## [17] "HouseStyle"    "OverallQual"   "OverallCond"   "YearBuilt"    
## [21] "YearRemodAdd"  "RoofStyle"     "RoofMatl"      "Exterior1st"  
## [25] "Exterior2nd"   "MasVnrType"    "MasVnrArea"    "ExterQual"    
## [29] "ExterCond"     "Foundation"    "BsmtQual"      "BsmtCond"     
## [33] "BsmtExposure"  "BsmtFinType1"  "BsmtFinSF1"    "BsmtFinType2" 
## [37] "BsmtFinSF2"    "BsmtUnfSF"     "TotalBsmtSF"   "Heating"      
## [41] "HeatingQC"     "CentralAir"    "Electrical"    "X1stFlrSF"    
## [45] "X2ndFlrSF"     "LowQualFinSF"  "GrLivArea"     "BsmtFullBath" 
## [49] "BsmtHalfBath"  "FullBath"      "HalfBath"      "BedroomAbvGr" 
## [53] "KitchenAbvGr"  "KitchenQual"   "TotRmsAbvGrd"  "Functional"   
## [57] "Fireplaces"    "FireplaceQu"   "GarageType"    "GarageYrBlt"  
## [61] "GarageFinish"  "GarageCars"    "GarageArea"    "GarageQual"   
## [65] "GarageCond"    "PavedDrive"    "WoodDeckSF"    "OpenPorchSF"  
## [69] "EnclosedPorch" "X3SsnPorch"    "ScreenPorch"   "PoolArea"     
## [73] "PoolQC"        "Fence"         "MiscFeature"   "MiscVal"      
## [77] "MoSold"        "YrSold"        "SaleType"      "SaleCondition"
## [81] "SalePrice"     "BBDD"

DATA ANALYSIS

Analisis de la base de datos

Analizando el cojunto de datos de de train y test para identificar cualquier anomalia en la data

str(BBDDCompleta)
## 'data.frame':    2919 obs. of  82 variables:
##  $ Id           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ MSSubClass   : int  60 20 60 70 60 50 20 60 50 190 ...
##  $ MSZoning     : chr  "RL" "RL" "RL" "RL" ...
##  $ LotFrontage  : int  65 80 68 60 84 85 75 NA 51 50 ...
##  $ LotArea      : int  8450 9600 11250 9550 14260 14115 10084 10382 6120 7420 ...
##  $ Street       : chr  "Pave" "Pave" "Pave" "Pave" ...
##  $ Alley        : chr  NA NA NA NA ...
##  $ LotShape     : chr  "Reg" "Reg" "IR1" "IR1" ...
##  $ LandContour  : chr  "Lvl" "Lvl" "Lvl" "Lvl" ...
##  $ Utilities    : chr  "AllPub" "AllPub" "AllPub" "AllPub" ...
##  $ LotConfig    : chr  "Inside" "FR2" "Inside" "Corner" ...
##  $ LandSlope    : chr  "Gtl" "Gtl" "Gtl" "Gtl" ...
##  $ Neighborhood : chr  "CollgCr" "Veenker" "CollgCr" "Crawfor" ...
##  $ Condition1   : chr  "Norm" "Feedr" "Norm" "Norm" ...
##  $ Condition2   : chr  "Norm" "Norm" "Norm" "Norm" ...
##  $ BldgType     : chr  "1Fam" "1Fam" "1Fam" "1Fam" ...
##  $ HouseStyle   : chr  "2Story" "1Story" "2Story" "2Story" ...
##  $ OverallQual  : int  7 6 7 7 8 5 8 7 7 5 ...
##  $ OverallCond  : int  5 8 5 5 5 5 5 6 5 6 ...
##  $ YearBuilt    : int  2003 1976 2001 1915 2000 1993 2004 1973 1931 1939 ...
##  $ YearRemodAdd : int  2003 1976 2002 1970 2000 1995 2005 1973 1950 1950 ...
##  $ RoofStyle    : chr  "Gable" "Gable" "Gable" "Gable" ...
##  $ RoofMatl     : chr  "CompShg" "CompShg" "CompShg" "CompShg" ...
##  $ Exterior1st  : chr  "VinylSd" "MetalSd" "VinylSd" "Wd Sdng" ...
##  $ Exterior2nd  : chr  "VinylSd" "MetalSd" "VinylSd" "Wd Shng" ...
##  $ MasVnrType   : chr  "BrkFace" "None" "BrkFace" "None" ...
##  $ MasVnrArea   : int  196 0 162 0 350 0 186 240 0 0 ...
##  $ ExterQual    : chr  "Gd" "TA" "Gd" "TA" ...
##  $ ExterCond    : chr  "TA" "TA" "TA" "TA" ...
##  $ Foundation   : chr  "PConc" "CBlock" "PConc" "BrkTil" ...
##  $ BsmtQual     : chr  "Gd" "Gd" "Gd" "TA" ...
##  $ BsmtCond     : chr  "TA" "TA" "TA" "Gd" ...
##  $ BsmtExposure : chr  "No" "Gd" "Mn" "No" ...
##  $ BsmtFinType1 : chr  "GLQ" "ALQ" "GLQ" "ALQ" ...
##  $ BsmtFinSF1   : int  706 978 486 216 655 732 1369 859 0 851 ...
##  $ BsmtFinType2 : chr  "Unf" "Unf" "Unf" "Unf" ...
##  $ BsmtFinSF2   : int  0 0 0 0 0 0 0 32 0 0 ...
##  $ BsmtUnfSF    : int  150 284 434 540 490 64 317 216 952 140 ...
##  $ TotalBsmtSF  : int  856 1262 920 756 1145 796 1686 1107 952 991 ...
##  $ Heating      : chr  "GasA" "GasA" "GasA" "GasA" ...
##  $ HeatingQC    : chr  "Ex" "Ex" "Ex" "Gd" ...
##  $ CentralAir   : chr  "Y" "Y" "Y" "Y" ...
##  $ Electrical   : chr  "SBrkr" "SBrkr" "SBrkr" "SBrkr" ...
##  $ X1stFlrSF    : int  856 1262 920 961 1145 796 1694 1107 1022 1077 ...
##  $ X2ndFlrSF    : int  854 0 866 756 1053 566 0 983 752 0 ...
##  $ LowQualFinSF : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ GrLivArea    : int  1710 1262 1786 1717 2198 1362 1694 2090 1774 1077 ...
##  $ BsmtFullBath : int  1 0 1 1 1 1 1 1 0 1 ...
##  $ BsmtHalfBath : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ FullBath     : int  2 2 2 1 2 1 2 2 2 1 ...
##  $ HalfBath     : int  1 0 1 0 1 1 0 1 0 0 ...
##  $ BedroomAbvGr : int  3 3 3 3 4 1 3 3 2 2 ...
##  $ KitchenAbvGr : int  1 1 1 1 1 1 1 1 2 2 ...
##  $ KitchenQual  : chr  "Gd" "TA" "Gd" "Gd" ...
##  $ TotRmsAbvGrd : int  8 6 6 7 9 5 7 7 8 5 ...
##  $ Functional   : chr  "Typ" "Typ" "Typ" "Typ" ...
##  $ Fireplaces   : int  0 1 1 1 1 0 1 2 2 2 ...
##  $ FireplaceQu  : chr  NA "TA" "TA" "Gd" ...
##  $ GarageType   : chr  "Attchd" "Attchd" "Attchd" "Detchd" ...
##  $ GarageYrBlt  : int  2003 1976 2001 1998 2000 1993 2004 1973 1931 1939 ...
##  $ GarageFinish : chr  "RFn" "RFn" "RFn" "Unf" ...
##  $ GarageCars   : int  2 2 2 3 3 2 2 2 2 1 ...
##  $ GarageArea   : int  548 460 608 642 836 480 636 484 468 205 ...
##  $ GarageQual   : chr  "TA" "TA" "TA" "TA" ...
##  $ GarageCond   : chr  "TA" "TA" "TA" "TA" ...
##  $ PavedDrive   : chr  "Y" "Y" "Y" "Y" ...
##  $ WoodDeckSF   : int  0 298 0 0 192 40 255 235 90 0 ...
##  $ OpenPorchSF  : int  61 0 42 35 84 30 57 204 0 4 ...
##  $ EnclosedPorch: int  0 0 0 272 0 0 0 228 205 0 ...
##  $ X3SsnPorch   : int  0 0 0 0 0 320 0 0 0 0 ...
##  $ ScreenPorch  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PoolArea     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PoolQC       : chr  NA NA NA NA ...
##  $ Fence        : chr  NA NA NA NA ...
##  $ MiscFeature  : chr  NA NA NA NA ...
##  $ MiscVal      : int  0 0 0 0 0 700 0 350 0 0 ...
##  $ MoSold       : int  2 5 9 2 12 10 8 11 4 1 ...
##  $ YrSold       : int  2008 2007 2008 2006 2008 2009 2007 2009 2008 2008 ...
##  $ SaleType     : chr  "WD" "WD" "WD" "WD" ...
##  $ SaleCondition: chr  "Normal" "Normal" "Normal" "Abnorml" ...
##  $ SalePrice    : num  208500 181500 223500 140000 250000 ...
##  $ BBDD         : chr  "Train" "Train" "Train" "Train" ...
table(is.na(Train$SalePrice))
## 
## FALSE 
##  1460
summary(BBDDCompleta)
##        Id           MSSubClass       MSZoning          LotFrontage    
##  Min.   :   1.0   Min.   : 20.00   Length:2919        Min.   : 21.00  
##  1st Qu.: 730.5   1st Qu.: 20.00   Class :character   1st Qu.: 59.00  
##  Median :1460.0   Median : 50.00   Mode  :character   Median : 68.00  
##  Mean   :1460.0   Mean   : 57.14                      Mean   : 69.31  
##  3rd Qu.:2189.5   3rd Qu.: 70.00                      3rd Qu.: 80.00  
##  Max.   :2919.0   Max.   :190.00                      Max.   :313.00  
##                                                       NA's   :486     
##     LotArea          Street             Alley             LotShape        
##  Min.   :  1300   Length:2919        Length:2919        Length:2919       
##  1st Qu.:  7478   Class :character   Class :character   Class :character  
##  Median :  9453   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 10168                                                           
##  3rd Qu.: 11570                                                           
##  Max.   :215245                                                           
##                                                                           
##  LandContour         Utilities          LotConfig          LandSlope        
##  Length:2919        Length:2919        Length:2919        Length:2919       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Neighborhood        Condition1         Condition2          BldgType        
##  Length:2919        Length:2919        Length:2919        Length:2919       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   HouseStyle         OverallQual      OverallCond      YearBuilt   
##  Length:2919        Min.   : 1.000   Min.   :1.000   Min.   :1872  
##  Class :character   1st Qu.: 5.000   1st Qu.:5.000   1st Qu.:1954  
##  Mode  :character   Median : 6.000   Median :5.000   Median :1973  
##                     Mean   : 6.089   Mean   :5.565   Mean   :1971  
##                     3rd Qu.: 7.000   3rd Qu.:6.000   3rd Qu.:2001  
##                     Max.   :10.000   Max.   :9.000   Max.   :2010  
##                                                                    
##   YearRemodAdd   RoofStyle           RoofMatl         Exterior1st       
##  Min.   :1950   Length:2919        Length:2919        Length:2919       
##  1st Qu.:1965   Class :character   Class :character   Class :character  
##  Median :1993   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :1984                                                           
##  3rd Qu.:2004                                                           
##  Max.   :2010                                                           
##                                                                         
##  Exterior2nd         MasVnrType          MasVnrArea      ExterQual        
##  Length:2919        Length:2919        Min.   :   0.0   Length:2919       
##  Class :character   Class :character   1st Qu.:   0.0   Class :character  
##  Mode  :character   Mode  :character   Median :   0.0   Mode  :character  
##                                        Mean   : 102.2                     
##                                        3rd Qu.: 164.0                     
##                                        Max.   :1600.0                     
##                                        NA's   :23                         
##   ExterCond          Foundation          BsmtQual           BsmtCond        
##  Length:2919        Length:2919        Length:2919        Length:2919       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  BsmtExposure       BsmtFinType1         BsmtFinSF1     BsmtFinType2      
##  Length:2919        Length:2919        Min.   :   0.0   Length:2919       
##  Class :character   Class :character   1st Qu.:   0.0   Class :character  
##  Mode  :character   Mode  :character   Median : 368.5   Mode  :character  
##                                        Mean   : 441.4                     
##                                        3rd Qu.: 733.0                     
##                                        Max.   :5644.0                     
##                                        NA's   :1                          
##    BsmtFinSF2        BsmtUnfSF       TotalBsmtSF       Heating         
##  Min.   :   0.00   Min.   :   0.0   Min.   :   0.0   Length:2919       
##  1st Qu.:   0.00   1st Qu.: 220.0   1st Qu.: 793.0   Class :character  
##  Median :   0.00   Median : 467.0   Median : 989.5   Mode  :character  
##  Mean   :  49.58   Mean   : 560.8   Mean   :1051.8                     
##  3rd Qu.:   0.00   3rd Qu.: 805.5   3rd Qu.:1302.0                     
##  Max.   :1526.00   Max.   :2336.0   Max.   :6110.0                     
##  NA's   :1         NA's   :1        NA's   :1                          
##   HeatingQC          CentralAir         Electrical          X1stFlrSF   
##  Length:2919        Length:2919        Length:2919        Min.   : 334  
##  Class :character   Class :character   Class :character   1st Qu.: 876  
##  Mode  :character   Mode  :character   Mode  :character   Median :1082  
##                                                           Mean   :1160  
##                                                           3rd Qu.:1388  
##                                                           Max.   :5095  
##                                                                         
##    X2ndFlrSF       LowQualFinSF        GrLivArea     BsmtFullBath   
##  Min.   :   0.0   Min.   :   0.000   Min.   : 334   Min.   :0.0000  
##  1st Qu.:   0.0   1st Qu.:   0.000   1st Qu.:1126   1st Qu.:0.0000  
##  Median :   0.0   Median :   0.000   Median :1444   Median :0.0000  
##  Mean   : 336.5   Mean   :   4.694   Mean   :1501   Mean   :0.4299  
##  3rd Qu.: 704.0   3rd Qu.:   0.000   3rd Qu.:1744   3rd Qu.:1.0000  
##  Max.   :2065.0   Max.   :1064.000   Max.   :5642   Max.   :3.0000  
##                                                     NA's   :2       
##   BsmtHalfBath        FullBath        HalfBath       BedroomAbvGr 
##  Min.   :0.00000   Min.   :0.000   Min.   :0.0000   Min.   :0.00  
##  1st Qu.:0.00000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:2.00  
##  Median :0.00000   Median :2.000   Median :0.0000   Median :3.00  
##  Mean   :0.06136   Mean   :1.568   Mean   :0.3803   Mean   :2.86  
##  3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.:3.00  
##  Max.   :2.00000   Max.   :4.000   Max.   :2.0000   Max.   :8.00  
##  NA's   :2                                                        
##   KitchenAbvGr   KitchenQual         TotRmsAbvGrd     Functional       
##  Min.   :0.000   Length:2919        Min.   : 2.000   Length:2919       
##  1st Qu.:1.000   Class :character   1st Qu.: 5.000   Class :character  
##  Median :1.000   Mode  :character   Median : 6.000   Mode  :character  
##  Mean   :1.045                      Mean   : 6.452                     
##  3rd Qu.:1.000                      3rd Qu.: 7.000                     
##  Max.   :3.000                      Max.   :15.000                     
##                                                                        
##    Fireplaces     FireplaceQu         GarageType         GarageYrBlt  
##  Min.   :0.0000   Length:2919        Length:2919        Min.   :1895  
##  1st Qu.:0.0000   Class :character   Class :character   1st Qu.:1960  
##  Median :1.0000   Mode  :character   Mode  :character   Median :1979  
##  Mean   :0.5971                                         Mean   :1978  
##  3rd Qu.:1.0000                                         3rd Qu.:2002  
##  Max.   :4.0000                                         Max.   :2207  
##                                                         NA's   :159   
##  GarageFinish         GarageCars      GarageArea      GarageQual       
##  Length:2919        Min.   :0.000   Min.   :   0.0   Length:2919       
##  Class :character   1st Qu.:1.000   1st Qu.: 320.0   Class :character  
##  Mode  :character   Median :2.000   Median : 480.0   Mode  :character  
##                     Mean   :1.767   Mean   : 472.9                     
##                     3rd Qu.:2.000   3rd Qu.: 576.0                     
##                     Max.   :5.000   Max.   :1488.0                     
##                     NA's   :1       NA's   :1                          
##   GarageCond         PavedDrive          WoodDeckSF       OpenPorchSF    
##  Length:2919        Length:2919        Min.   :   0.00   Min.   :  0.00  
##  Class :character   Class :character   1st Qu.:   0.00   1st Qu.:  0.00  
##  Mode  :character   Mode  :character   Median :   0.00   Median : 26.00  
##                                        Mean   :  93.71   Mean   : 47.49  
##                                        3rd Qu.: 168.00   3rd Qu.: 70.00  
##                                        Max.   :1424.00   Max.   :742.00  
##                                                                          
##  EnclosedPorch      X3SsnPorch       ScreenPorch        PoolArea      
##  Min.   :   0.0   Min.   :  0.000   Min.   :  0.00   Min.   :  0.000  
##  1st Qu.:   0.0   1st Qu.:  0.000   1st Qu.:  0.00   1st Qu.:  0.000  
##  Median :   0.0   Median :  0.000   Median :  0.00   Median :  0.000  
##  Mean   :  23.1   Mean   :  2.602   Mean   : 16.06   Mean   :  2.252  
##  3rd Qu.:   0.0   3rd Qu.:  0.000   3rd Qu.:  0.00   3rd Qu.:  0.000  
##  Max.   :1012.0   Max.   :508.000   Max.   :576.00   Max.   :800.000  
##                                                                       
##     PoolQC             Fence           MiscFeature           MiscVal        
##  Length:2919        Length:2919        Length:2919        Min.   :    0.00  
##  Class :character   Class :character   Class :character   1st Qu.:    0.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :    0.00  
##                                                           Mean   :   50.83  
##                                                           3rd Qu.:    0.00  
##                                                           Max.   :17000.00  
##                                                                             
##      MoSold           YrSold       SaleType         SaleCondition     
##  Min.   : 1.000   Min.   :2006   Length:2919        Length:2919       
##  1st Qu.: 4.000   1st Qu.:2007   Class :character   Class :character  
##  Median : 6.000   Median :2008   Mode  :character   Mode  :character  
##  Mean   : 6.213   Mean   :2008                                        
##  3rd Qu.: 8.000   3rd Qu.:2009                                        
##  Max.   :12.000   Max.   :2010                                        
##                                                                       
##    SalePrice          BBDD          
##  Min.   :     0   Length:2919       
##  1st Qu.:     0   Class :character  
##  Median : 34900   Mode  :character  
##  Mean   : 90492                     
##  3rd Qu.:163000                     
##  Max.   :755000                     
## 
table(is.na(BBDDCompleta$MSZoning)) #Completar los NA's con la el que màs se repite
## 
## FALSE  TRUE 
##  2915     4
table(BBDDCompleta$MSZoning)
## 
## C (all)      FV      RH      RL      RM 
##      25     139      26    2265     460
BBDDCompleta$MSZoning[is.na(BBDDCompleta$MSZoning)] <- "RL"

table(is.na(BBDDCompleta$Street)) 
## 
## FALSE 
##  2919
table((Train$Street)) #Eliminamos la variable por que casi el 90% es la misma respuesta
## 
## Grvl Pave 
##    6 1454
BBDDCompleta<-BBDDCompleta[,-6]

table(is.na(BBDDCompleta$Alley)) 
## 
## FALSE  TRUE 
##   198  2721
2721/(2721+198)
## [1] 0.9321686
table((BBDDCompleta$Alley)) #Eliminamos la variable por que casi el 90% es nulo
## 
## Grvl Pave 
##  120   78
BBDDCompleta<-BBDDCompleta[,-6]

table(is.na(BBDDCompleta$Utilities)) 
## 
## FALSE  TRUE 
##  2917     2
table((BBDDCompleta$Utilities)) #Eliminamos la variable por que casi el 100% es igual
## 
## AllPub NoSeWa 
##   2916      1
BBDDCompleta<-BBDDCompleta[,-8]

table(is.na(BBDDCompleta$RoofMatl)) 
## 
## FALSE 
##  2919
table((BBDDCompleta$RoofMatl))
## 
## ClyTile CompShg Membran   Metal    Roll Tar&Grv WdShake WdShngl 
##       1    2876       1       1       1      23       9       7
(2876)/(2919)#Eliminamos la variable por que casi el 100% es igual
## [1] 0.9852689
BBDDCompleta<-BBDDCompleta[,-20]

table(is.na(BBDDCompleta$Exterior1st)) #Completar los NA's con la el que màs se repite
## 
## FALSE  TRUE 
##  2918     1
table((BBDDCompleta$Exterior1st))
## 
## AsbShng AsphShn BrkComm BrkFace  CBlock CemntBd HdBoard ImStucc MetalSd Plywood 
##      44       2       6      87       2     126     442       1     450     221 
##   Stone  Stucco VinylSd Wd Sdng WdShing 
##       2      43    1025     411      56
BBDDCompleta$Exterior1st[is.na(BBDDCompleta$Exterior1st)] <- "VinylSd"

table(is.na(BBDDCompleta$Exterior2nd)) #Completar los NA's con la el que màs se repite
## 
## FALSE  TRUE 
##  2918     1
table((BBDDCompleta$Exterior2nd))
## 
## AsbShng AsphShn Brk Cmn BrkFace  CBlock CmentBd HdBoard ImStucc MetalSd   Other 
##      38       4      22      47       3     126     406      15     447       1 
## Plywood   Stone  Stucco VinylSd Wd Sdng Wd Shng 
##     270       6      47    1014     391      81
BBDDCompleta$Exterior2nd[is.na(BBDDCompleta$Exterior2nd)] <- "VinylSd"

table(is.na(BBDDCompleta$MasVnrType)) 
## 
## FALSE  TRUE 
##  2895    24
table((BBDDCompleta$MasVnrType))  
## 
##  BrkCmn BrkFace    None   Stone 
##      25     879    1742     249
(1742+24)/(2895+24) #Eliminamos la variable por que más del 60% es nula
## [1] 0.6050017
BBDDCompleta<-BBDDCompleta[,-22]

table(is.na(BBDDCompleta$MasVnrArea)) 
## 
## FALSE  TRUE 
##  2896    23
hist((BBDDCompleta$MasVnrArea))  

(1738+23)/(2896+23) #Eliminamos la variable por que más del 60% es nula
## [1] 0.6032888
BBDDCompleta<-BBDDCompleta[,-22]

table(is.na(BBDDCompleta$BsmtQual)) 
## 
## FALSE  TRUE 
##  2838    81
table((BBDDCompleta$BsmtQual))  
## 
##   Ex   Fa   Gd   TA 
##  258   88 1209 1283
BBDDCompleta$BsmtQual[is.na(BBDDCompleta$BsmtQual)] <- "TA" #Completar los NA's con la el que màs se repite

table(is.na(BBDDCompleta$BsmtCond)) 
## 
## FALSE  TRUE 
##  2837    82
table((BBDDCompleta$BsmtCond))  
## 
##   Fa   Gd   Po   TA 
##  104  122    5 2606
BBDDCompleta$BsmtCond[is.na(BBDDCompleta$BsmtCond)] <- "TA" #Completar los NA's con la el que màs se repite

table(is.na(BBDDCompleta$BsmtExposure)) 
## 
## FALSE  TRUE 
##  2837    82
table((BBDDCompleta$BsmtExposure))  
## 
##   Av   Gd   Mn   No 
##  418  276  239 1904
BBDDCompleta$BsmtExposure[is.na(BBDDCompleta$BsmtExposure)] <- "No" #Completar los NA's con la el que màs se repite

table(is.na(BBDDCompleta$BsmtFinType1)) 
## 
## FALSE  TRUE 
##  2840    79
table((BBDDCompleta$BsmtFinType1))  
## 
## ALQ BLQ GLQ LwQ Rec Unf 
## 429 269 849 154 288 851
BBDDCompleta$BsmtFinType1[is.na(BBDDCompleta$BsmtFinType1)] <- "Unf" #Completar los NA's con la el que màs se repite

table(is.na(BBDDCompleta$BsmtFinSF1)) 
## 
## FALSE  TRUE 
##  2918     1
table((BBDDCompleta$BsmtFinSF1))  
## 
##    0    2   16   20   24   25   27   28   32   33   35   36   40   41   42   48 
##  929    1   14    8   27    1    1    5    1    1    1    4    3    1    1    4 
##   49   50   51   52   53   54   55   56   57   60   63   64   65   68   70   72 
##    1    2    1    2    1    2    1    4    2    4    1    1    1    3    2    1 
##   73   75   76   77   78   80   81   85   88   94   96  100  104  108  110  111 
##    1    1    1    1    1    4    2    1    1    1    1    1    5    1    3    2 
##  113  114  116  119  120  121  122  125  126  128  129  130  131  132  133  134 
##    1    3    3    3    6    2    1    1    1    1    1    3    1    1    2    1 
##  138  140  141  143  144  148  149  150  152  154  155  156  162  165  167  168 
##    3    1    1    1    5    1    1    3    2    1    1    3    2    1    1    5 
##  169  170  172  173  175  176  179  180  181  182  185  186  187  188  189  190 
##    1    1    1    2    1    3    1    3    1    3    1    1    2    2    3    2 
##  191  192  193  194  196  197  198  200  201  203  204  205  206  207  208  209 
##    1    4    1    2    5    1    2    1    1    3    1    1    1    1    1    2 
##  210  212  213  215  216  218  219  220  221  222  223  224  225  226  228  230 
##    4    1    1    1    3    1    1    3    1    1    1    3    1    2    2    1 
##  231  234  236  238  239  240  241  242  244  246  247  248  249  250  251  252 
##    2    3    2    1    1    3    2    1    1    2    4    2    2    5    1    5 
##  254  256  257  258  259  260  261  262  263  264  266  267  270  271  273  274 
##    1    3    3    1    2    2    1    2    1    3    2    2    1    1    2    2 
##  275  276  278  279  280  281  282  283  284  285  286  288  290  292  294  296 
##    2    6    1    1    5    2    2    2    3    2    1    8    2    3    3    1 
##  297  298  299  300  301  305  306  308  309  310  311  312  314  315  316  317 
##    3    2    4    9    2    2    3    3    1    3    3    6    1    3    1    2 
##  318  319  320  321  322  324  326  328  329  330  331  332  334  335  336  337 
##    1    4    2    2    1    2    2    1    4    5    2    1    2    1    5    1 
##  338  339  340  341  342  343  344  346  347  348  349  350  351  352  353  354 
##    4    1    3    3    2    2    1    1    1    2    1    4    3    2    5    2 
##  355  356  358  360  361  362  363  364  365  366  368  369  370  371  372  373 
##    1    1    2    7    2    1    1    1    1    1    6    1    3    3    2    1 
##  374  375  376  377  378  379  380  381  382  383  384  385  386  387  388  389 
##    2    7    1    1    4    3    2    3    1    1    8    2    2    2    2    1 
##  390  392  393  394  397  398  399  400  402  403  404  405  406  408  410  412 
##    4    2    1    1    3    1    3    5    2    4    1    2    2    4    4    1 
##  414  415  416  417  419  420  421  422  423  424  425  426  427  428  429  430 
##    3    2    2    2    2    3    2    1    1    1    4    2    1    4    1    1 
##  432  433  434  435  436  437  438  439  440  441  442  443  444  445  448  450 
##    5    1    1    3    1    3    2    1    1    2    4    1    2    3    2    5 
##  452  453  454  455  456  457  458  459  460  462  464  465  466  467  468  469 
##    1    2    3    1    7    3    2    1    3    3    1    1    1    1    6    1 
##  471  472  474  475  476  477  480  481  482  483  484  485  486  488  489  490 
##    2    2    3    1    4    1    4    1    1    4    2    2    3    1    2    4 
##  491  492  493  494  495  496  497  498  500  501  502  503  504  505  506  507 
##    1    3    2    3    4    1    1    2    7    1    1    3    6    3    2    4 
##  509  510  512  513  514  515  516  518  520  521  522  523  524  526  527  528 
##    2    4    2    3    1    1    3    1    4    2    3    2    2    1    3    6 
##  531  532  533  534  535  536  537  538  539  540  543  544  546  547  548  549 
##    2    3    3    2    2    2    1    1    4    3    1    6    4    6    3    4 
##  550  551  552  553  554  556  557  559  560  562  564  565  566  567  568  569 
##    5    1    3    6    2    1    1    1    6    1    2    4    2    2    2    3 
##  570  572  573  574  575  576  577  578  579  580  583  584  585  586  587  588 
##    3    1    1    3    2    2    1    3    1    2    2    2    2    1    1    6 
##  590  592  593  594  595  596  599  600  601  602  603  604  605  606  607  608 
##    1    1    2    3    5    1    2    8    1    7    3    2    1    2    1    2 
##  609  611  612  614  615  616  617  619  620  621  622  623  624  625  626  630 
##    2    2    4    1    1    5    2    2    1    2    2    2    7    6    4    2 
##  631  632  633  634  636  637  638  639  641  642  643  644  645  646  647  648 
##    1    3    4    1    2    5    2    2    4    1    3    3    1    3    1    5 
##  649  650  651  652  654  655  656  658  659  660  661  662  663  664  666  668 
##    2    2    2    1    2    4    4    3    4    4    1    6    4    2    3    2 
##  669  670  671  672  673  674  678  679  680  681  682  683  684  685  686  687 
##    1    1    1    5    2    3    2    2    3    2    2    1    1    4    5    1 
##  688  689  690  691  692  694  695  696  697  698  699  700  701  702  704  705 
##    1    1    3    1    1    1    3    1    5    3    2    7    1    1    4    2 
##  706  708  709  710  712  713  714  716  717  718  719  720  722  724  725  726 
##    3    2    1    1    3    1    1    2    2    1    2    2    1    2    1    1 
##  727  728  729  731  732  733  734  735  736  737  738  739  740  741  742  744 
##    2    3    2    1    5    3    4    2    2    2    2    3    2    2    1    2 
##  745  746  747  748  749  750  751  755  756  758  759  760  762  763  764  765 
##    2    1    4    2    1    1    1    3    1    5    1    2    3    2    1    2 
##  766  767  769  770  771  772  773  774  775  776  777  778  779  780  781  782 
##    2    5    3    2    1    1    3    2    3    3    2    3    3    3    4    1 
##  783  784  785  786  787  788  789  790  791  792  793  794  795  796  797  799 
##    1    5    2    3    2    4    3    2    2    1    2    1    1    2    1    2 
##  800  803  804  806  808  809  810  811  812  813  814  816  819  820  821  822 
##    3    2    2    1    1    1    2    1    4    2    2    4    2    3    2    2 
##  824  826  827  828  830  831  832  833  836  837  838  840  841  842  844  846 
##    2    1    1    4    1    2    3    2    4    1    1    1    4    1    1    3 
##  847  848  850  851  852  853  854  856  859  860  862  863  864  865  866  867 
##    2    1    2    4    1    1    1    3    1    2    1    1    4    2    3    1 
##  870  871  872  873  874  876  880  881  883  885  888  890  892  893  894  895 
##    3    2    3    1    1    2    1    1    1    1    1    2    1    1    2    1 
##  896  897  899  900  901  902  903  904  905  906  908  909  910  912  913  914 
##    1    1    1    1    2    4    2    5    1    1    1    1    2    1    2    1 
##  915  916  918  919  920  922  923  924  925  926  929  930  931  932  935  936 
##    5    1    1    1    3    2    2    1    2    1    2    2    2    2    1    7 
##  937  938  939  941  943  944  945  946  949  950  951  952  953  954  955  956 
##    1    2    1    2    1    2    2    1    1    1    1    1    1    1    1    2 
##  958  960  962  964  965  967  968  969  970  973  975  976  978  980  982  983 
##    2    1    3    3    1    2    1    1    1    1    2    2    1    2    1    1 
##  984  985  986  987  988  990  991  994  996  998 1000 1001 1002 1003 1004 1005 
##    1    1    4    1    4    1    1    2    2    3    4    1    1    1    2    3 
## 1010 1011 1012 1013 1014 1015 1016 1018 1021 1022 1023 1024 1026 1027 1029 1030 
##    1    1    2    1    1    1    2    2    1    1    3    2    1    1    1    2 
## 1032 1033 1034 1035 1036 1037 1038 1039 1040 1044 1046 1047 1048 1051 1053 1056 
##    2    1    1    1    4    1    1    2    2    1    1    1    1    1    2    3 
## 1059 1064 1065 1070 1071 1073 1074 1075 1078 1079 1080 1082 1084 1085 1086 1087 
##    4    1    2    2    1    1    1    1    2    1    1    2    3    1    1    1 
## 1088 1090 1092 1094 1096 1097 1098 1101 1104 1106 1110 1111 1112 1115 1116 1118 
##    2    1    1    1    1    1    1    1    2    1    2    1    1    1    3    1 
## 1121 1122 1124 1126 1127 1128 1129 1136 1137 1138 1141 1142 1148 1149 1150 1151 
##    1    1    1    1    1    1    1    1    1    1    1    1    3    1    1    1 
## 1152 1153 1154 1157 1158 1159 1162 1163 1165 1170 1172 1173 1178 1180 1181 1182 
##    2    3    1    1    1    2    1    1    1    1    1    3    1    1    1    1 
## 1186 1188 1191 1194 1196 1198 1200 1201 1204 1206 1213 1216 1218 1219 1220 1223 
##    1    1    1    1    2    2    5    3    1    2    1    1    4    2    2    1 
## 1224 1225 1230 1231 1232 1234 1236 1237 1238 1239 1243 1246 1247 1249 1252 1258 
##    1    1    1    1    2    2    1    1    1    1    1    2    1    3    1    2 
## 1259 1260 1261 1262 1270 1271 1274 1277 1280 1282 1283 1285 1288 1290 1294 1298 
##    1    1    1    1    1    1    3    1    1    1    1    1    2    1    1    1 
## 1300 1302 1304 1308 1309 1312 1314 1319 1320 1324 1328 1329 1330 1332 1333 1334 
##    4    1    1    1    3    1    1    1    1    1    1    1    1    1    1    1 
## 1336 1337 1338 1341 1346 1350 1351 1358 1359 1360 1361 1369 1373 1375 1383 1386 
##    1    1    1    1    1    2    1    1    1    2    1    2    3    1    2    2 
## 1387 1390 1392 1398 1400 1406 1410 1412 1414 1416 1420 1422 1430 1433 1436 1440 
##    2    1    1    1    2    1    2    1    1    2    1    1    2    1    1    1 
## 1441 1443 1445 1447 1455 1456 1460 1464 1470 1474 1476 1478 1480 1500 1505 1513 
##    1    1    1    1    2    1    1    1    1    1    2    1    1    1    1    1 
## 1518 1531 1538 1540 1557 1558 1562 1564 1567 1571 1572 1573 1576 1593 1606 1619 
##    2    1    1    1    1    1    1    1    2    1    2    3    1    1    1    1 
## 1632 1636 1640 1646 1660 1682 1684 1696 1721 1728 1732 1733 1758 1767 1810 1812 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1836 1880 1904 1965 1972 2085 2096 2146 2158 2188 2257 2260 2288 4010 5644 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
hist(BBDDCompleta$BsmtFinSF1)

BBDDCompleta$BsmtFinSF1[is.na(BBDDCompleta$BsmtFinSF1)] <- round(median(BBDDCompleta$BsmtFinSF1, na.rm = TRUE))


table(is.na(BBDDCompleta$BsmtFinType2)) 
## 
## FALSE  TRUE 
##  2839    80
table((BBDDCompleta$BsmtFinType2))  
## 
##  ALQ  BLQ  GLQ  LwQ  Rec  Unf 
##   52   68   34   87  105 2493
BBDDCompleta$BsmtFinType2[is.na(BBDDCompleta$BsmtFinType2)] <- "Unf"

table(is.na(BBDDCompleta$BsmtFinSF2)) 
## 
## FALSE  TRUE 
##  2918     1
table((BBDDCompleta$BsmtFinSF2))  
## 
##    0    6   12   28   32   35   38   40   41   42   46   48   52   60   63   64 
## 2571    1    1    1    1    1    1    2    2    2    1    1    1    2    1    2 
##   66   68   72   76   78   80   81   92   93   95   96  102  105  106  108  110 
##    1    2    2    1    1    2    1    1    2    1    2    1    2    1    2    2 
##  113  116  117  119  120  121  123  125  127  128  132  136  138  139  144  147 
##    1    2    2    1    1    2    1    1    2    2    1    1    1    1    3    3 
##  149  150  153  154  156  159  162  163  165  167  168  169  173  174  175  177 
##    1    1    1    1    1    2    3    1    1    1    3    1    1    2    1    1 
##  180  181  182  184  186  193  196  201  202  206  208  210  211  215  216  219 
##    5    1    2    1    1    1    1    1    2    1    1    2    1    1    1    1 
##  227  228  230  232  239  240  243  247  250  252  258  259  262  263  264  270 
##    1    1    1    1    1    2    1    2    1    2    1    1    1    1    2    2 
##  273  276  278  279  281  284  286  287  288  290  294  297  306  308  311  319 
##    2    2    1    2    1    1    1    2    2    2    5    1    1    1    1    1 
##  321  324  334  336  337  344  345  350  351  352  354  360  362  364  373  374 
##    1    1    1    1    1    2    1    1    1    1    2    1    1    1    1    3 
##  375  377  380  382  387  391  393  396  398  400  402  404  411  417  419  420 
##    1    1    1    1    1    2    1    1    1    2    1    1    1    1    1    1 
##  432  435  438  441  442  448  449  450  453  456  465  466  468  469  472  474 
##    1    3    1    1    1    1    1    1    1    1    2    1    2    2    1    1 
##  479  480  483  486  488  491  492  495  497  499  500  506  507  512  522  529 
##    1    2    3    1    1    1    2    2    1    1    1    1    1    1    1    1 
##  530  531  532  539  543  544  546  547  551  555  557  580  590  596  600  604 
##    1    1    1    3    1    1    1    1    2    1    1    1    2    2    1    1 
##  606  608  612  613  619  620  622  624  627  630  634  645  661  668  670  679 
##    1    1    1    1    1    2    1    1    1    1    1    1    1    1    2    1 
##  682  684  688  690  691  692  694  712  713  722  723  748  750  755  761  764 
##    1    1    1    1    1    1    1    2    1    1    2    1    1    1    1    1 
##  768  774  791  799  811  820  823  826  829  831  841  842  850  852  859  869 
##    1    1    1    1    1    1    1    1    1    1    2    1    1    1    1    1 
##  872  873  875  884  891  904  906  912  915  955  956  972  981  982 1020 1029 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1031 1037 1039 1057 1061 1063 1073 1080 1083 1085 1120 1127 1164 1393 1474 1526 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
hist(BBDDCompleta$BsmtFinSF2) #Eliminamos la variable por que la mayoría es 0

BBDDCompleta<-BBDDCompleta[,-31]

table(is.na(BBDDCompleta$Heating)) 
## 
## FALSE 
##  2919
table((BBDDCompleta$Heating))  
## 
## Floor  GasA  GasW  Grav  OthW  Wall 
##     1  2874    27     9     2     6
(2874)/2919
## [1] 0.9845838
BBDDCompleta<-BBDDCompleta[,-33] #Eliminamos la variable por que casi el 98% es igual

table(is.na(BBDDCompleta$CentralAir)) 
## 
## FALSE 
##  2919
table((BBDDCompleta$CentralAir))  
## 
##    N    Y 
##  196 2723
(2723)/2919
## [1] 0.9328537
BBDDCompleta<-BBDDCompleta[,-34] #Eliminamos la variable por que casi el 93% es igual

table(is.na(BBDDCompleta$Electrical)) 
## 
## FALSE  TRUE 
##  2918     1
table((BBDDCompleta$Electrical))  
## 
## FuseA FuseF FuseP   Mix SBrkr 
##   188    50     8     1  2671
BBDDCompleta$Electrical[is.na(BBDDCompleta$Electrical)] <- "SBrkr"

table(is.na(BBDDCompleta$BsmtFullBath)) 
## 
## FALSE  TRUE 
##  2917     2
table((BBDDCompleta$BsmtFullBath))  
## 
##    0    1    2    3 
## 1705 1172   38    2
BBDDCompleta$BsmtFullBath[is.na(BBDDCompleta$BsmtFullBath)] <- "0"

table(is.na(BBDDCompleta$BsmtHalfBath)) 
## 
## FALSE  TRUE 
##  2917     2
table((BBDDCompleta$BsmtHalfBath))  
## 
##    0    1    2 
## 2742  171    4
BBDDCompleta$BsmtHalfBath[is.na(BBDDCompleta$BsmtHalfBath)] <- "0"

table(is.na(BBDDCompleta$KitchenQual)) 
## 
## FALSE  TRUE 
##  2918     1
table((BBDDCompleta$KitchenQual))  
## 
##   Ex   Fa   Gd   TA 
##  205   70 1151 1492
BBDDCompleta$KitchenQual[is.na(BBDDCompleta$KitchenQual)] <- "TA"

table(is.na(BBDDCompleta$EnclosedPorch)) 
## 
## FALSE 
##  2919
table((BBDDCompleta$EnclosedPorch))  #Eliminamos la variable por que tiene la mayorìa en 0
## 
##    0   16   18   19   20   23   24   25   26   28   30   32   34   35   36   37 
## 2460    1    1    1    2    1    2    1    1    1    3    4    3    3    5    1 
##   39   40   41   42   43   44   45   48   50   51   52   54   55   56   57   60 
##    2    8    2    4    1    1    2    3    1    1    2    1    3    5    1    6 
##   64   66   67   68   70   72   75   77   78   80   81   84   86   87   88   90 
##    4    1    1    3    5    1    1    4    2    5    3    8    1    1    2    4 
##   91   92   94   96   98   99  100  101  102  104  105  108  109  112  113  114 
##    1    1    2   13    2    1    6    1    4    1    4    2    1   22    1    2 
##  115  116  117  120  121  122  123  126  128  129  130  132  133  134  135  136 
##    2    8    1    9    2    1    2    5    7    1    1    1    1    2    2    2 
##  137  138  139  140  143  144  145  148  150  154  156  158  160  161  162  164 
##    2    2    1    5    2   11    1    5    7    3    5    2    7    1    1    5 
##  165  167  168  169  170  172  174  175  176  177  180  183  184  185  186  189 
##    1    1    9    3    2    1    1    1    5    1    7    2    3    2    2    1 
##  190  192  194  196  198  200  202  203  204  205  207  208  209  210  211  212 
##    3   10    1    2    1    2    2    1    1    4    1    2    1    1    2    3 
##  213  214  216  218  219  220  221  222  224  225  226  228  230  231  234  236 
##    1    1    6    3    1    1    1    2    3    1    1    3    1    2    2    3 
##  238  239  240  242  244  246  248  249  252  254  256  259  260  264  265  268 
##    1    3    5    1    3    2    3    1    4    1    1    1    1    3    1    1 
##  272  275  280  286  288  290  291  293  294  296  301  318  324  330  334  364 
##    1    1    4    1    1    1    1    1    1    1    1    1    1    1    1    1 
##  368  386  429  432  552  584 1012 
##    1    1    1    1    1    1    1
BBDDCompleta<-BBDDCompleta[,-60] 

table(is.na(BBDDCompleta$X3SsnPorch)) 
## 
## FALSE 
##  2919
table((BBDDCompleta$X3SsnPorch))  #Eliminamos la variable por que tiene la mayorìa en 0
## 
##    0   23   86   96  120  130  140  144  150  153  162  168  174  176  180  182 
## 2882    1    1    1    1    1    1    2    1    3    1    3    1    1    2    1 
##  196  216  219  224  225  238  245  255  290  304  320  323  360  407  508 
##    1    2    1    1    1    1    1    1    1    1    1    1    1    1    1
BBDDCompleta<-BBDDCompleta[,-60] 

table(is.na(BBDDCompleta$ScreenPorch)) 
## 
## FALSE 
##  2919
table((BBDDCompleta$ScreenPorch))  #Eliminamos la variable por que tiene la mayorìa en 0
## 
##    0   40   53   60   63   64   80   84   88   90   92   94   95   99  100  104 
## 2663    1    1    1    1    1    1    1    1    3    2    1    2    1    5    1 
##  108  109  110  111  112  113  115  116  117  119  120  121  122  123  126  128 
##    2    1    2    1    3    1    3    2    1    1    9    1    1    1    4    1 
##  130  135  138  140  141  142  143  144  145  147  148  150  152  153  154  155 
##    1    1    2    2    1    3    1   13    2    3    1    1    1    4    1    4 
##  156  160  161  162  163  164  165  166  168  170  171  174  175  176  178  180 
##    4    6    3    1    1    1    2    1   10    2    1    1    3    2    1    7 
##  182  184  185  189  190  192  195  196  197  198  200  201  204  208  210  216 
##    3    2    1    4    1   11    3    2    1    2    7    1    2    1    3    8 
##  217  220  221  222  224  225  227  228  231  233  234  240  252  255  256  259 
##    1    1    1    1    6    4    2    1    1    1    1    2    1    2    3    2 
##  260  263  264  265  266  270  271  273  276  280  287  288  291  312  322  342 
##    1    1    1    1    2    1    1    1    1    1    1    4    1    1    2    1 
##  348  374  385  396  410  440  480  490  576 
##    1    1    1    1    1    1    1    1    1
BBDDCompleta<-BBDDCompleta[,-60] 

table(is.na(BBDDCompleta$PoolArea)) 
## 
## FALSE 
##  2919
table((BBDDCompleta$PoolArea))  #Eliminamos la variable por que tiene la mayorìa en 0
## 
##    0  144  228  368  444  480  512  519  555  561  576  648  738  800 
## 2906    1    1    1    1    1    1    1    1    1    1    1    1    1
BBDDCompleta<-BBDDCompleta[,-60] 

table(is.na(BBDDCompleta$PoolQC)) 
## 
## FALSE  TRUE 
##    10  2909
#Eliminamos la variable por que tiene la mayorìa es NA
BBDDCompleta<-BBDDCompleta[,-60] 

table(is.na(BBDDCompleta$Fence)) 
## 
## FALSE  TRUE 
##   571  2348
#Eliminamos la variable por que tiene la mayorìa es NA
BBDDCompleta<-BBDDCompleta[,-60] 

table(is.na(BBDDCompleta$MiscFeature)) 
## 
## FALSE  TRUE 
##   105  2814
#Eliminamos la variable por que tiene la mayorìa es NA
BBDDCompleta<-BBDDCompleta[,-60] 

table(is.na(BBDDCompleta$MiscVal)) 
## 
## FALSE 
##  2919
table((BBDDCompleta$MiscVal))#Eliminamos la variable por que tiene la mayorìa es NA
## 
##     0    54    80   300   350   400   420   450   455   460   480   490   500 
##  2816     1     1     1     1    18     1     9     1     1     2     1    13 
##   560   600   620   650   700   750   800   900  1000  1150  1200  1300  1400 
##     1     8     1     3     7     1     1     1     1     1     3     1     1 
##  1500  1512  2000  2500  3000  3500  4500  6500  8300 12500 15500 17000 
##     3     1     7     2     2     1     2     1     1     1     1     1
BBDDCompleta<-BBDDCompleta[,-60] 

table(is.na(BBDDCompleta$SaleType)) 
## 
## FALSE  TRUE 
##  2918     1
table((BBDDCompleta$SaleType))#Eliminamos la variable por que tiene la mayorìa es NA
## 
##   COD   Con ConLD ConLI ConLw   CWD   New   Oth    WD 
##    87     5    26     9     8    12   239     7  2525
BBDDCompleta$SaleType[is.na(BBDDCompleta$SaleType)] <- "WD" 

table(is.na(BBDDCompleta$SaleCondition)) 
## 
## FALSE 
##  2919
table((BBDDCompleta$SaleCondition))#Eliminamos la variable por que tiene la mayorìa es NA
## 
## Abnorml AdjLand  Alloca  Family  Normal Partial 
##     190      12      24      46    2402     245
BBDDCompleta$SaleType[is.na(BBDDCompleta$SaleType)] <- "WD" 

##Separamos las BBDD limpias en Test y Train nuevamente, con las nuevas variables seleccionadas

TrainF<- dplyr::filter(BBDDCompleta,BBDD=="Train")
TestF<- dplyr::filter(BBDDCompleta,BBDD=="Test")

##REGRESIÓN 1 - REGRESIÓN LINEAL

#Anàlisis de correlación

cor.test(TrainF$SalePrice,TrainF$LotArea,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$LotArea
## t = 10.445, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2154574 0.3109369
## sample estimates:
##       cor 
## 0.2638434
cor.test(TrainF$SalePrice,TrainF$GrLivArea,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$GrLivArea
## t = 38.348, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6821200 0.7332695
## sample estimates:
##       cor 
## 0.7086245
cor.test(TrainF$SalePrice,TrainF$OverallCond,method="pearson") #Baja correlation, eliminamos la variable
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$OverallCond
## t = -2.9819, df = 1458, p-value = 0.002912
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.12864437 -0.02666008
## sample estimates:
##         cor 
## -0.07785589
cor.test(TrainF$SalePrice,TrainF$YearBuilt,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$YearBuilt
## t = 23.424, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4845947 0.5591987
## sample estimates:
##       cor 
## 0.5228973
cor.test(TrainF$SalePrice,TrainF$YearRemodAdd,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$YearRemodAdd
## t = 22.466, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4679732 0.5442445
## sample estimates:
##      cor 
## 0.507101
cor.test(TrainF$SalePrice,TrainF$TotalBsmtSF,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$TotalBsmtSF
## t = 29.671, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5805529 0.6445923
## sample estimates:
##       cor 
## 0.6135806
cor.test(TrainF$SalePrice,TrainF$BsmtFinSF1,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$BsmtFinSF1
## t = 15.998, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3418953 0.4292133
## sample estimates:
##       cor 
## 0.3864198
cor.test(TrainF$SalePrice,TrainF$GarageArea,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$GarageArea
## t = 30.446, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5910324 0.6538222
## sample estimates:
##       cor 
## 0.6234314
cor.test(TrainF$SalePrice,TrainF$X1stFlrSF,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$X1stFlrSF
## t = 29.078, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5723391 0.6373448
## sample estimates:
##       cor 
## 0.6058522
cor.test(TrainF$SalePrice,TrainF$X2ndFlrSF,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$X2ndFlrSF
## t = 12.867, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2724957 0.3646620
## sample estimates:
##       cor 
## 0.3193338
cor.test(TrainF$SalePrice,TrainF$BedroomAbvGr,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$BedroomAbvGr
## t = 6.5159, df = 1458, p-value = 9.927e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1179285 0.2176373
## sample estimates:
##       cor 
## 0.1682132
cor.test(TrainF$SalePrice,TrainF$KitchenAbvGr,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$KitchenAbvGr
## t = -5.2381, df = 1458, p-value = 1.86e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.18591342 -0.08519911
## sample estimates:
##        cor 
## -0.1359074
cor.test(TrainF$SalePrice,TrainF$TotRmsAbvGrd,method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  TrainF$SalePrice and TrainF$TotRmsAbvGrd
## t = 24.099, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4960020 0.5694337
## sample estimates:
##       cor 
## 0.5337232
Model1 = lm(SalePrice ~ LotArea+
              OverallQual+
              OverallCond+
              YearBuilt+
              YearRemodAdd+
              TotalBsmtSF+
              BsmtFinSF1+
              GarageArea+
              X1stFlrSF+
              X2ndFlrSF+
              BedroomAbvGr+
              KitchenAbvGr+
              TotRmsAbvGrd,data=TrainF)
summary(Model1)
## 
## Call:
## lm(formula = SalePrice ~ LotArea + OverallQual + OverallCond + 
##     YearBuilt + YearRemodAdd + TotalBsmtSF + BsmtFinSF1 + GarageArea + 
##     X1stFlrSF + X2ndFlrSF + BedroomAbvGr + KitchenAbvGr + TotRmsAbvGrd, 
##     data = TrainF)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -564411  -16505   -1784   13107  287932 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.083e+06  1.152e+05  -9.400  < 2e-16 ***
## LotArea       5.317e-01  1.017e-01   5.229 1.95e-07 ***
## OverallQual   1.836e+04  1.176e+03  15.614  < 2e-16 ***
## OverallCond   4.562e+03  1.030e+03   4.429 1.02e-05 ***
## YearBuilt     3.613e+02  5.227e+01   6.912 7.14e-12 ***
## YearRemodAdd  1.535e+02  6.574e+01   2.334   0.0197 *  
## TotalBsmtSF   1.038e+01  4.246e+00   2.445   0.0146 *  
## BsmtFinSF1    1.588e+01  2.533e+00   6.270 4.75e-10 ***
## GarageArea    3.495e+01  5.896e+00   5.928 3.82e-09 ***
## X1stFlrSF     5.609e+01  5.491e+00  10.214  < 2e-16 ***
## X2ndFlrSF     4.502e+01  3.993e+00  11.277  < 2e-16 ***
## BedroomAbvGr -9.486e+03  1.701e+03  -5.576 2.94e-08 ***
## KitchenAbvGr -2.579e+04  4.811e+03  -5.360 9.66e-08 ***
## TotRmsAbvGrd  6.602e+03  1.246e+03   5.299 1.35e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36010 on 1446 degrees of freedom
## Multiple R-squared:  0.7963, Adjusted R-squared:  0.7945 
## F-statistic: 434.9 on 13 and 1446 DF,  p-value: < 2.2e-16
PredTest = predict(Model1, newdata=TestF, type="response")
TestF$SalePrice<- PredTest
MySubmission = data.frame(ID = Test$Id, SalePrice = PredTest)

Revisando los resultados

Se realizan las correciones y arreglos en la data

table(is.na(MySubmission$SalePrice))
## 
## FALSE  TRUE 
##  1457     2
hist(MySubmission$SalePrice)

MySubmission$SalePrice[is.na(MySubmission$SalePrice)] <- round(median(MySubmission$SalePrice, na.rm = TRUE))
sort(MySubmission$SalePrice)
##    [1]  -8811.688  -4538.751  -1000.422   4586.368  10386.859  11378.140
##    [7]  11834.357  13503.969  19095.665  21290.052  21422.835  22280.450
##   [13]  23053.013  23752.017  30053.455  31659.384  32442.220  34155.865
##   [19]  35566.079  36390.893  36429.056  38575.721  39487.621  39868.795
##   [25]  39894.446  44504.693  47239.970  47680.881  49921.940  51541.431
##   [31]  52646.575  52972.287  54370.515  55224.781  56342.492  56785.436
##   [37]  58189.392  62115.990  62388.432  62680.062  63276.901  63541.018
##   [43]  64890.560  66967.223  66994.153  67230.675  67640.143  68207.747
##   [49]  68360.527  69363.820  69700.415  70098.553  70796.395  71562.699
##   [55]  72544.331  72765.746  72843.056  72965.269  73017.905  73065.628
##   [61]  73153.505  75078.406  75391.455  75650.617  76930.372  76941.007
##   [67]  76982.387  77556.496  78284.445  78383.016  78396.503  78435.408
##   [73]  79533.009  79608.551  79813.211  81797.250  81865.642  82194.237
##   [79]  82471.518  83012.319  83242.240  84051.653  84082.159  84119.966
##   [85]  84174.213  84186.290  84440.990  85641.645  85744.036  87976.957
##   [91]  88090.947  88436.384  88555.446  88569.017  89282.933  89291.868
##   [97]  89844.175  90236.518  90435.125  91670.983  92630.882  92642.625
##  [103]  92892.183  92993.896  93880.053  94381.148  94461.580  94641.954
##  [109]  94723.009  94974.179  95544.582  95546.406  95580.585  95604.879
##  [115]  95723.254  95945.809  95982.889  96317.935  96379.085  96757.726
##  [121]  97650.164  97685.677  97699.165  97930.452  98098.077  98930.983
##  [127]  98980.694  99051.641  99188.508  99341.093 100112.685 100927.866
##  [133] 101256.008 102507.915 102629.029 102639.748 102683.122 103048.123
##  [139] 103230.548 103349.306 103378.334 103469.049 103531.797 103535.480
##  [145] 103554.797 103702.858 104222.175 104347.189 104804.071 105082.223
##  [151] 105163.240 105214.478 105324.896 105367.495 105576.911 105843.858
##  [157] 105858.512 105891.365 105991.893 106081.463 106175.953 106279.896
##  [163] 106323.383 106687.204 106779.118 106802.903 106808.320 106809.275
##  [169] 107113.123 107122.016 107498.079 107791.875 108201.238 108290.995
##  [175] 108296.648 108333.967 108532.185 108695.206 108743.899 108937.653
##  [181] 109071.798 109286.471 109403.223 109561.148 109853.717 110038.393
##  [187] 110230.412 110320.260 110565.615 111382.547 111398.403 111448.962
##  [193] 111648.515 111725.072 111831.582 111850.012 112106.900 112132.331
##  [199] 112480.492 112503.705 112533.070 112561.752 112613.455 112675.434
##  [205] 112809.948 113065.482 113122.152 113235.962 113632.043 113685.544
##  [211] 113782.489 113839.143 113904.637 113911.904 114080.593 114102.498
##  [217] 114169.456 114347.910 114490.486 114645.713 114662.264 114751.258
##  [223] 114758.496 114792.547 114801.046 114817.935 114902.548 115067.946
##  [229] 115123.973 115362.267 115481.481 116043.991 116281.894 116465.714
##  [235] 116573.217 116646.440 116924.578 116977.357 117014.653 117092.550
##  [241] 117227.433 117233.474 117271.622 117283.748 117356.953 117381.393
##  [247] 117605.215 117664.527 117948.215 117964.435 118020.297 118020.922
##  [253] 118062.724 118191.631 118245.275 118279.299 118387.700 118408.889
##  [259] 118498.871 118555.499 118615.119 118837.610 118943.493 118965.214
##  [265] 119076.619 119085.323 119162.964 119227.168 119231.256 119281.741
##  [271] 119296.717 119319.772 119382.797 119451.041 119501.548 119663.076
##  [277] 119753.601 120051.439 120064.107 120259.502 120383.758 120417.054
##  [283] 120452.652 120521.138 120547.719 120623.425 120728.760 121136.446
##  [289] 121376.437 121382.266 121447.011 121553.672 121683.773 121756.409
##  [295] 121836.387 121993.789 122049.573 122051.566 122087.109 122124.078
##  [301] 122188.423 122254.836 122352.974 122603.280 122614.146 122650.115
##  [307] 123062.396 123145.800 123215.892 123376.229 123547.075 123583.382
##  [313] 123718.590 124089.852 124101.982 124137.242 124149.758 124217.435
##  [319] 124348.636 124349.065 124373.280 124384.506 124530.969 124607.825
##  [325] 124659.101 124768.581 124838.335 124873.038 124918.737 125249.287
##  [331] 125365.629 125507.540 125561.145 125768.203 125787.806 125818.407
##  [337] 125832.666 125890.034 125894.061 125912.830 126097.530 126261.402
##  [343] 126303.338 126319.675 126774.586 126912.450 126930.025 126933.051
##  [349] 127363.918 127469.093 127502.679 127537.865 127570.789 127577.331
##  [355] 127928.896 127966.319 128015.419 128086.162 128740.545 128916.545
##  [361] 128964.433 128972.841 129126.617 129279.617 129289.784 129368.416
##  [367] 129412.546 129546.493 129771.619 129864.412 130151.263 130205.821
##  [373] 130307.857 130339.568 130350.769 130514.641 130579.686 130864.689
##  [379] 131032.488 131045.378 131064.473 131098.829 131103.851 131106.574
##  [385] 131156.782 131227.991 131553.378 131584.569 131610.497 131631.477
##  [391] 131741.536 132239.623 132263.943 132280.708 132364.983 132479.750
##  [397] 132508.189 132527.425 132535.450 132651.864 132669.177 132670.084
##  [403] 132695.386 132713.878 132796.365 132937.761 133354.567 133521.849
##  [409] 133754.907 133811.489 133825.122 133920.410 133945.985 133956.513
##  [415] 134204.997 134261.790 134346.227 134458.229 134475.690 134481.123
##  [421] 134500.749 134534.898 134584.392 134602.172 134668.207 134739.300
##  [427] 135014.206 135027.373 135182.512 135296.296 135411.047 135551.491
##  [433] 135582.470 135704.350 135774.316 135780.467 135907.357 135954.447
##  [439] 135980.606 136041.475 136114.645 136131.536 136153.675 136167.297
##  [445] 136411.446 136517.736 136522.157 136607.547 136884.716 136992.737
##  [451] 137036.265 137226.948 137243.690 137378.888 137471.109 137553.422
##  [457] 137608.274 137619.237 137751.519 137754.311 137779.094 137939.047
##  [463] 138033.777 138064.055 138434.582 138441.252 138449.782 138591.011
##  [469] 138633.613 138728.717 138742.818 138793.938 138796.887 138893.256
##  [475] 138978.193 138981.451 139003.090 139017.017 139076.508 139078.078
##  [481] 139133.740 139140.210 139263.011 139572.570 139636.753 139667.424
##  [487] 139731.468 139831.975 140193.252 140543.299 140945.958 140997.203
##  [493] 141086.041 141129.688 141380.625 141408.757 141500.316 141501.902
##  [499] 141502.858 141540.028 141754.717 141759.667 141964.882 142003.730
##  [505] 142071.157 142100.767 142217.182 142354.596 142381.238 142438.633
##  [511] 142441.436 142460.394 142661.677 142860.526 143094.607 143197.102
##  [517] 143232.395 143361.385 143443.762 143579.262 143618.431 143729.609
##  [523] 143749.520 143797.606 143813.668 144037.045 144059.750 144140.577
##  [529] 144151.751 144335.658 144396.666 144479.116 144523.271 144598.721
##  [535] 144874.784 144925.334 144975.247 145059.588 145101.291 145179.200
##  [541] 145207.022 145247.847 145265.878 145380.059 145442.175 145557.250
##  [547] 145694.510 145708.943 145781.614 145817.894 145914.318 145947.662
##  [553] 146088.349 146101.497 146150.284 146177.060 146224.626 146234.695
##  [559] 146235.614 146254.234 146292.176 146427.743 146605.221 146605.835
##  [565] 146699.698 146717.416 146876.530 146945.211 146950.926 147530.512
##  [571] 147707.792 147836.126 147845.853 147853.746 147945.303 148020.149
##  [577] 148255.950 148387.203 148411.071 148851.707 148913.209 148994.288
##  [583] 149032.147 149100.694 149441.289 149473.472 149499.809 149568.208
##  [589] 149574.928 149612.975 149851.547 150135.059 150139.442 150234.922
##  [595] 150244.272 150373.498 150534.519 150959.577 151102.502 151252.640
##  [601] 151254.722 152626.140 152933.533 153310.007 153347.750 153408.731
##  [607] 153579.089 153602.595 153626.892 153763.384 153833.813 154021.205
##  [613] 154106.579 154208.757 154548.678 154597.853 154667.766 154676.032
##  [619] 154868.682 155007.020 155213.068 155237.436 155420.207 155488.340
##  [625] 155490.699 155507.297 156000.661 156056.642 156102.407 156147.606
##  [631] 156157.725 156188.058 156508.930 156568.462 156817.095 156863.476
##  [637] 156871.209 156903.015 156970.082 157026.303 157547.040 157685.502
##  [643] 157762.194 158124.994 158272.019 158346.092 158484.404 158493.845
##  [649] 158540.885 158574.987 158599.044 158637.458 158708.013 158730.139
##  [655] 158748.353 158771.734 158957.078 158971.111 158999.031 159101.382
##  [661] 159260.260 159419.963 159436.240 159519.680 159570.343 159673.601
##  [667] 159746.742 159769.143 160128.601 160340.949 160470.338 160514.850
##  [673] 160590.508 160598.702 160676.836 160681.140 160735.207 160799.886
##  [679] 160973.428 161527.428 161527.569 161729.742 161787.838 161816.497
##  [685] 161845.391 161891.535 162130.124 162660.315 162662.476 162695.135
##  [691] 162762.114 163047.679 163167.261 163198.791 163378.820 163501.628
##  [697] 163675.213 163696.456 163992.427 164091.893 164149.684 164190.602
##  [703] 164197.482 164203.903 164423.266 164431.747 164632.831 164682.385
##  [709] 164926.661 164984.356 165051.435 165096.826 165120.474 165120.474
##  [715] 165225.430 165474.618 165672.250 165814.375 166081.493 166135.752
##  [721] 166217.516 166386.846 166437.084 166713.414 167002.768 167006.780
##  [727] 167112.628 167245.186 167301.000 167301.000 167301.287 167366.252
##  [733] 168020.142 168298.665 168470.513 168471.378 168537.298 168647.953
##  [739] 168654.924 168795.349 168983.294 169004.272 169094.022 169142.103
##  [745] 169476.333 169589.815 169680.731 169751.042 170049.054 170304.775
##  [751] 170310.079 170520.171 170607.501 170732.272 170771.861 171203.810
##  [757] 171241.952 171361.356 171370.698 171377.921 171456.311 171755.432
##  [763] 171771.938 171814.489 171915.051 172068.986 172123.461 172209.294
##  [769] 172230.104 172355.811 172436.225 172492.497 172769.203 172976.111
##  [775] 173003.846 173196.863 173286.050 173334.965 173340.871 173443.883
##  [781] 173446.108 173465.909 173556.345 173898.868 174379.690 174394.323
##  [787] 174426.800 174494.035 174736.610 174974.307 175029.942 175234.237
##  [793] 175354.255 175366.848 175549.202 175939.385 176064.859 176231.217
##  [799] 176251.390 176309.177 176382.501 176749.971 176758.745 177028.706
##  [805] 177064.585 177159.191 177409.266 177438.981 177510.300 177568.608
##  [811] 177630.805 177746.169 177853.795 177914.041 178361.988 178643.421
##  [817] 178672.231 178776.985 178898.299 178943.723 178974.462 179009.479
##  [823] 179347.390 179362.707 179405.584 179439.880 179762.623 179785.715
##  [829] 179902.387 179972.918 180068.029 180155.657 180201.540 180304.524
##  [835] 180691.324 180737.442 180767.799 180887.322 181044.134 181050.662
##  [841] 181117.726 181133.419 181239.464 182374.189 182445.232 182704.086
##  [847] 183756.950 183763.165 183908.943 183943.530 184025.741 184113.355
##  [853] 184243.901 184347.881 184495.454 184749.633 185391.266 185517.339
##  [859] 185596.469 185651.152 185717.087 185822.430 185889.071 185906.676
##  [865] 186239.436 186375.061 186413.233 186583.956 186644.766 187031.393
##  [871] 187144.810 187283.514 187511.998 187728.055 187886.882 188042.191
##  [877] 188206.078 188334.868 188425.433 188426.197 188797.644 188815.170
##  [883] 188901.906 188983.937 189158.164 189433.481 189627.320 190043.463
##  [889] 190061.957 190728.971 190761.924 190822.382 190885.740 191195.831
##  [895] 191266.187 191402.101 191441.666 191576.669 191747.771 191912.922
##  [901] 191999.340 192043.811 192096.013 192289.228 192454.391 192526.236
##  [907] 192537.092 192643.087 192718.419 192896.704 193144.136 193626.144
##  [913] 193788.989 194269.111 194483.901 194517.036 194615.493 194970.499
##  [919] 195047.097 195339.326 195367.123 195502.732 195594.594 195630.645
##  [925] 195749.653 195797.984 195844.266 195868.246 195955.068 196057.538
##  [931] 196165.698 196186.938 196229.510 196372.846 196392.004 196473.339
##  [937] 196508.324 196763.270 197067.403 197114.349 197332.335 197435.430
##  [943] 197446.168 197683.148 197727.922 197836.076 198219.334 198260.408
##  [949] 198781.481 198975.768 198981.656 199183.584 199199.475 199400.320
##  [955] 199451.371 199458.742 199602.961 199658.249 199732.618 199799.032
##  [961] 199812.518 199820.325 199932.161 200410.570 200413.196 200591.250
##  [967] 200694.544 200695.200 200903.455 201276.798 201424.514 201443.776
##  [973] 201540.163 201717.579 201785.621 201865.615 201882.693 201968.675
##  [979] 201990.113 202030.815 202045.939 202196.603 202278.544 202374.449
##  [985] 202570.076 202729.712 202809.031 203170.691 203189.026 203374.162
##  [991] 203424.465 203459.430 203462.816 203534.287 203657.328 203699.500
##  [997] 203765.822 203855.742 204103.229 204236.260 204551.954 204720.142
## [1003] 204871.043 204970.393 204991.498 205531.812 205655.912 205715.805
## [1009] 205738.698 205970.259 206011.472 206047.629 206107.308 206211.597
## [1015] 206364.565 206486.920 206507.422 206841.964 206950.292 207185.253
## [1021] 207542.590 207544.721 207618.413 207649.617 207738.329 208398.159
## [1027] 208868.396 208924.302 209127.520 209237.011 209770.862 209798.303
## [1033] 209872.573 210175.029 210274.469 210314.630 210326.674 210999.174
## [1039] 211472.177 211734.516 211848.437 211896.653 212149.544 212272.742
## [1045] 212629.539 212682.378 213082.978 213419.887 213561.624 213984.026
## [1051] 214105.687 214163.041 214231.687 214254.633 214430.511 214455.051
## [1057] 214575.573 214687.679 214901.445 215014.400 215040.323 215056.609
## [1063] 215258.405 215572.300 215599.911 216412.129 216701.431 216885.677
## [1069] 216893.641 216989.119 217225.400 217735.757 217829.713 217892.965
## [1075] 217921.304 218111.567 218114.684 218333.208 218363.109 218550.616
## [1081] 218612.681 218717.645 218818.163 219225.705 219265.387 219406.283
## [1087] 219843.973 220193.561 220581.769 220812.911 220864.211 221040.816
## [1093] 221248.427 221382.479 221587.039 221605.910 221607.676 221721.861
## [1099] 221803.031 221814.411 221817.481 222151.652 222211.056 222618.605
## [1105] 222626.531 222805.688 222881.706 223117.434 223224.519 223439.175
## [1111] 223951.735 224097.586 224105.402 224458.657 224565.705 224647.383
## [1117] 224764.857 224779.484 224791.291 224851.553 224930.803 224980.386
## [1123] 224989.997 225034.884 225064.038 225436.942 225503.075 225545.243
## [1129] 225576.295 225655.195 225666.193 225769.378 226155.846 226280.700
## [1135] 226727.979 226787.869 227216.390 227436.719 227542.120 228003.827
## [1141] 228767.794 228857.146 229413.478 229545.951 229548.619 229624.119
## [1147] 229850.610 229967.208 230225.341 230240.406 230327.425 230414.159
## [1153] 230619.878 230798.595 230804.909 231036.282 231372.069 231507.928
## [1159] 231623.778 231727.977 231833.935 231870.135 231943.052 232043.752
## [1165] 232539.343 232603.557 232609.196 232825.320 234027.498 234225.945
## [1171] 234273.987 234351.844 234406.551 234546.724 234728.966 235345.197
## [1177] 235408.712 235475.433 235702.713 235770.309 235982.864 236417.351
## [1183] 238170.526 238281.420 238284.304 238467.396 238487.070 238548.203
## [1189] 238611.898 239005.423 239166.245 239268.895 239285.815 239318.735
## [1195] 239335.669 239343.919 239432.588 239495.688 239599.897 239788.173
## [1201] 239853.213 240278.506 240405.868 240549.707 240835.885 242032.512
## [1207] 242045.756 242386.374 242481.007 242829.420 243111.099 243176.397
## [1213] 243290.765 243350.229 243574.740 243663.907 243815.191 243828.597
## [1219] 243990.877 244030.415 244446.567 244542.536 244812.615 245141.583
## [1225] 245358.460 245736.673 246013.663 246400.959 246446.200 246667.771
## [1231] 246761.759 246779.867 246861.508 247051.041 247110.358 247636.536
## [1237] 248192.632 248641.166 248686.843 248909.121 249144.511 249715.077
## [1243] 249786.354 250362.832 250548.640 250690.283 250711.486 251129.786
## [1249] 251171.316 252397.501 252633.649 252699.358 253248.607 254148.764
## [1255] 254355.951 254408.527 254853.674 255196.161 255697.614 256126.173
## [1261] 256579.489 256708.794 257141.543 257152.294 257158.419 257575.038
## [1267] 257737.022 258079.592 258462.911 259137.398 259402.262 259572.242
## [1273] 259872.488 260077.570 260269.072 260270.880 260438.381 260453.597
## [1279] 260624.976 260905.197 261742.141 262071.400 262225.802 262534.334
## [1285] 262673.881 262700.825 262736.301 262972.460 263023.510 263096.421
## [1291] 263331.563 263618.983 263644.524 263917.635 263941.169 264590.732
## [1297] 264805.519 265066.220 265619.190 265890.376 267125.134 267376.555
## [1303] 267442.383 268431.272 268433.072 269277.548 269485.163 269984.437
## [1309] 270117.471 270205.373 270594.152 271247.000 271638.628 271666.090
## [1315] 272046.460 272447.986 272938.092 273174.887 273471.693 273673.889
## [1321] 273772.887 273793.407 273927.874 274017.408 275076.273 275130.553
## [1327] 275198.968 275213.525 275872.161 276908.655 276937.018 277332.258
## [1333] 277907.519 278119.811 278213.058 278560.500 279559.889 279765.628
## [1339] 279929.074 279985.570 280201.167 280561.202 280768.208 281202.528
## [1345] 282023.637 283887.339 283926.364 284131.858 285957.613 286743.985
## [1351] 286954.253 287073.281 288478.444 288875.949 289098.938 289237.581
## [1357] 289308.052 290231.101 290341.246 291002.312 291556.974 292209.556
## [1363] 292916.563 293243.254 293281.865 293390.410 293693.136 294551.510
## [1369] 295174.401 295913.024 296033.989 296775.375 296938.132 297775.118
## [1375] 298089.870 298867.753 299904.686 300492.886 301727.172 302617.395
## [1381] 302912.465 302937.869 303517.138 303518.502 304288.988 305361.168
## [1387] 305632.213 305697.883 306143.775 306955.101 307717.974 307832.726
## [1393] 308497.813 309166.587 309224.250 309459.225 309599.033 310324.289
## [1399] 311179.887 312563.466 314230.735 314332.398 314678.114 314771.298
## [1405] 315121.425 315147.408 316001.706 317590.528 319347.402 321123.598
## [1411] 322223.719 322537.271 323272.081 323659.906 324359.437 325001.431
## [1417] 326300.660 326536.671 326621.764 328769.397 328964.800 329462.856
## [1423] 329829.633 332296.019 332334.501 334443.838 335872.478 335974.519
## [1429] 336097.414 338180.781 338795.305 340436.142 342410.532 342968.140
## [1435] 345222.883 354018.443 354992.058 355990.767 358938.298 359738.879
## [1441] 361066.227 362065.260 362940.930 368229.221 369412.622 370166.744
## [1447] 374961.510 379318.406 381128.306 382250.316 382330.739 384146.534
## [1453] 384957.749 387268.194 391542.733 400300.945 403827.251 411970.076
## [1459] 674890.953
min(TrainF$SalePrice)
## [1] 34900
MySubmission$SalePrice[MySubmission$SalePrice<34900] <- 34900 #Cambiamos los números menores a cero con el menor valor del Train set


#Error en Kaggle 0.19887


write.csv(MySubmission, "Submission.csv", row.names=FALSE)

##SEGUNDO MODELO: ÁRBOL

A continuación se analiza el conjunto de datos mediante el modelo del arbol

library(caTools)
library(rpart)
library(rpart.plot)
 
FirstTree = rpart(SalePrice ~MSSubClass+MSZoning+LotFrontage+LotArea+LotConfig+LandSlope
                  +Neighborhood+HouseStyle +OverallQual +OverallCond +YearBuilt +YearRemodAdd
                  +RoofStyle+Foundation+BsmtQual+X1stFlrSF 
                  +X2ndFlrSF
                  +YrSold+
                    SaleType+
                    SaleCondition,data=TrainF)

FirstTree = rpart(SalePrice ~MSSubClass+MSZoning+LotFrontage+LandSlope+Foundation+ SaleType+SaleCondition+
                    LotArea+
                    OverallQual+
                    OverallCond+
                    YearBuilt+
                    BsmtFinSF1+
                    GarageArea+
                    X1stFlrSF+
                    X2ndFlrSF+
                    BedroomAbvGr+
                    KitchenAbvGr+
                    TotRmsAbvGrd+
                    BldgType+
                    HouseStyle+
                    Functional+RoofStyle+ExterQual,data=TrainF)

prp(FirstTree)

PredTest2 = predict(FirstTree, newdata=Test)
Test$SalePrice<- PredTest2
MySubmission = data.frame(ID = Test$Id, SalePrice = PredTest2)

#Revisando los resultados

Se presentan los resultados del modelo en graficas

sort(MySubmission$SalePrice)
##    [1] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##    [9] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [17] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [25] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [33] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [41] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [49] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [57] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [65] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [73] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [81] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [89] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##   [97] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [105] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [113] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [121] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [129] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [137] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [145] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [153] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [161] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [169] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [177] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [185] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [193] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [201] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [209] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [217] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [225] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [233] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [241] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [249] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [257] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [265] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [273] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [281] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [289] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [297] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [305] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [313] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [321] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [329] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [337] 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0 114127.0
##  [345] 114127.0 114127.0 114127.0 129076.5 129076.5 129076.5 129076.5 129076.5
##  [353] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
##  [361] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
##  [369] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
##  [377] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
##  [385] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
##  [393] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
##  [401] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
##  [409] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
##  [417] 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5 129076.5
##  [425] 129076.5 129076.5 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [433] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [441] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [449] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [457] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [465] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [473] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [481] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [489] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [497] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [505] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [513] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [521] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [529] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [537] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [545] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [553] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [561] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [569] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [577] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [585] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [593] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [601] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [609] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [617] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [625] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [633] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [641] 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0 143465.0
##  [649] 143465.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [657] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [665] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [673] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [681] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [689] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [697] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [705] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [713] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [721] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [729] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [737] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [745] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [753] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [761] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [769] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [777] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [785] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [793] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [801] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [809] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [817] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [825] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [833] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [841] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [849] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [857] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [865] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [873] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [881] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [889] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [897] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [905] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [913] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0
##  [921] 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 171463.0 194507.5
##  [929] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
##  [937] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
##  [945] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
##  [953] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
##  [961] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
##  [969] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
##  [977] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
##  [985] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
##  [993] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1001] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1009] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1017] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1025] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1033] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1041] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1049] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1057] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1065] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1073] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1081] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1089] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1097] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1105] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1113] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1121] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1129] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1137] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5
## [1145] 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 194507.5 236143.5
## [1153] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1161] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1169] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1177] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1185] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1193] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1201] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1209] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1217] 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5 236143.5
## [1225] 236143.5 236143.5 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6
## [1233] 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6
## [1241] 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6 258744.6
## [1249] 258744.6 258744.6 258744.6 258744.6 259700.6 259700.6 259700.6 259700.6
## [1257] 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6
## [1265] 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6
## [1273] 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6 259700.6
## [1281] 259700.6 259700.6 259700.6 288220.9 288220.9 288220.9 288220.9 288220.9
## [1289] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1297] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1305] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1313] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1321] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1329] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1337] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1345] 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9 288220.9
## [1353] 288220.9 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1
## [1361] 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1
## [1369] 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1
## [1377] 341293.1 341293.1 341293.1 341293.1 341293.1 341293.1 361253.6 361253.6
## [1385] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1393] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1401] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1409] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1417] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1425] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1433] 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6 361253.6
## [1441] 361253.6 361253.6 361253.6 489036.8 489036.8 489036.8 489036.8 489036.8
## [1449] 489036.8 489036.8 489036.8 489036.8 489036.8 489036.8 489036.8 489036.8
## [1457] 489036.8 489036.8 489036.8
hist(MySubmission$SalePrice)

write.csv(MySubmission, "Submission.csv", row.names=FALSE)
#Error en Kaggle 0.23854

##Modelo 3 : RANDOM FOREST

A continuación se analiza el conjunto de datos mediante el modelo random forest

library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
library(caret)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
## Loading required package: lattice
RandomForest= randomForest(SalePrice~MSSubClass+MSZoning+LotArea+Neighborhood+Condition1
                           +Condition2+HouseStyle+OverallQual+OverallCond+YearBuilt+X1stFlrSF
                           +X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
                           +Foundation,data=TrainF)

summary(RandomForest)
##                 Length Class  Mode     
## call               3   -none- call     
## type               1   -none- character
## predicted       1460   -none- numeric  
## mse              500   -none- numeric  
## rsq              500   -none- numeric  
## oob.times       1460   -none- numeric  
## importance        19   -none- numeric  
## importanceSD       0   -none- NULL     
## localImportance    0   -none- NULL     
## proximity          0   -none- NULL     
## ntree              1   -none- numeric  
## mtry               1   -none- numeric  
## forest            11   -none- list     
## coefs              0   -none- NULL     
## y               1460   -none- numeric  
## test               0   -none- NULL     
## inbag              0   -none- NULL     
## terms              3   terms  call
PredTest3 = predict(RandomForest, newdata=TestF)
TestF$SalePrice<- PredTest3
MySubmission = data.frame(ID = Test$Id, SalePrice = PredTest3)

#Revisando los resultados

hist(MySubmission$SalePrice)

summary(MySubmission$SalePrice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   55899  130232  159320  178978  207414  534289
#Error en Kaggle 0.14939

#MODELO 3 LASSO

A continuación se analiza el conjunto de datos mediante el modelo del lasso

library(gglasso)
library(tidyverse)

x_train  = model.matrix(SalePrice ~LotArea+Condition1
                       +OverallQual+OverallCond+YearBuilt+X1stFlrSF
                        +X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
                        +Foundation,TrainF)[, -1]

y_train <- TrainF$SalePrice


x_test  = model.matrix(SalePrice ~ LotArea+Condition1
                        +OverallQual+OverallCond+YearBuilt+X1stFlrSF
                        +X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
                        +Foundation, TestF)[, -1]
y_test <- TestF$SalePrice


LASSO <- glmnet(
  x           = x_train,
  y           = y_train,
  alpha       = 1,
  nlambda     = 100,
  standardize = TRUE
)


sal.cv=cv.glmnet(x_train,y_train,alpha=1)
plot(sal.cv)
mejor.lambda=sal.cv$lambda.min
mejor.lambda
log(mejor.lambda)

LASSO <- glmnet(
  x           = x_train,
  y           = y_train,
  alpha       = 1,
  nlambda     = mejor.lambda,
  standardize = TRUE
)

PredTest4=predict(LASSO,s=mejor.lambda,newx=x_test)
PredTest4 <- as.integer(PredTest4)


TestF$SalePrice<- PredTest4

MySubmission = data.frame(ID = Test$Id, SalePrice = TestF$SalePrice)


MySubmission$SalePrice[MySubmission$SalePrice<34900] <- 34900 #Cambiamos los números menores a cero con el menor valor del Train set

hist(MySubmission$SalePrice)
summary(MySubmission$SalePrice)

write.csv(MySubmission, "Submission.csv", row.names=FALSE)


# Error en Kaggle 0.17747

#MODELO 5 RIDGE

A continuación se analiza el conjunto de datos mediante el modelo ridge

x_train  = model.matrix(SalePrice ~LotArea+Condition1
                        +OverallQual+OverallCond+YearBuilt+X1stFlrSF
                        +X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
                        +Foundation,TrainF)[, -1]

y_train <- TrainF$SalePrice


x_test  = model.matrix(SalePrice ~ LotArea+Condition1
                       +OverallQual+OverallCond+YearBuilt+X1stFlrSF
                       +X2ndFlrSF+MoSold+GrLivArea+LandContour+LotConfig+BldgType+RoofStyle
                       +Foundation, TestF)[, -1]
y_test <- TestF$SalePrice


ridge <- glmnet(
  x           = x_train,
  y           = y_train,
  alpha       = 0,
  nlambda     = 100,
  standardize = TRUE
)


sal.cv=cv.glmnet(x_train,y_train,alpha=0)
plot(sal.cv)
mejor.lambda=sal.cv$lambda.min
mejor.lambda
log(mejor.lambda)

ridge <- glmnet(
  x           = x_train,
  y           = y_train,
  alpha       = 0,
  nlambda     = mejor.lambda,
  standardize = TRUE
)

PredTest5=predict(ridge,s=mejor.lambda,newx=x_test)
PredTest5 <- as.integer(PredTest5)


TestF$SalePrice<- PredTest5

MySubmission = data.frame(ID = Test$Id, SalePrice = TestF$SalePrice)


MySubmission$SalePrice[MySubmission$SalePrice<34900] <- 34900 #Cambiamos los números menores a cero con el menor valor del Train set

hist(MySubmission$SalePrice)
summary(MySubmission$SalePrice)

write.csv(MySubmission, "Submission.csv", row.names=FALSE)

#Error en Kaggle 0.17434