library(readr)
## Warning: package 'readr' was built under R version 4.4.3
train <- read_csv("https://raw.githubusercontent.com/ankita1112/House-Prices-Advanced-Regression/master/train.csv")
## Rows: 1460 Columns: 81
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (43): MSZoning, Street, Alley, LotShape, LandContour, Utilities, LotConf...
## dbl (38): Id, MSSubClass, LotFrontage, LotArea, OverallQual, OverallCond, Ye...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(train)
ncol(train)
## [1] 81
str(train)
## spc_tbl_ [1,460 × 81] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Id : num [1:1460] 1 2 3 4 5 6 7 8 9 10 ...
## $ MSSubClass : num [1:1460] 60 20 60 70 60 50 20 60 50 190 ...
## $ MSZoning : chr [1:1460] "RL" "RL" "RL" "RL" ...
## $ LotFrontage : num [1:1460] 65 80 68 60 84 85 75 NA 51 50 ...
## $ LotArea : num [1:1460] 8450 9600 11250 9550 14260 ...
## $ Street : chr [1:1460] "Pave" "Pave" "Pave" "Pave" ...
## $ Alley : chr [1:1460] NA NA NA NA ...
## $ LotShape : chr [1:1460] "Reg" "Reg" "IR1" "IR1" ...
## $ LandContour : chr [1:1460] "Lvl" "Lvl" "Lvl" "Lvl" ...
## $ Utilities : chr [1:1460] "AllPub" "AllPub" "AllPub" "AllPub" ...
## $ LotConfig : chr [1:1460] "Inside" "FR2" "Inside" "Corner" ...
## $ LandSlope : chr [1:1460] "Gtl" "Gtl" "Gtl" "Gtl" ...
## $ Neighborhood : chr [1:1460] "CollgCr" "Veenker" "CollgCr" "Crawfor" ...
## $ Condition1 : chr [1:1460] "Norm" "Feedr" "Norm" "Norm" ...
## $ Condition2 : chr [1:1460] "Norm" "Norm" "Norm" "Norm" ...
## $ BldgType : chr [1:1460] "1Fam" "1Fam" "1Fam" "1Fam" ...
## $ HouseStyle : chr [1:1460] "2Story" "1Story" "2Story" "2Story" ...
## $ OverallQual : num [1:1460] 7 6 7 7 8 5 8 7 7 5 ...
## $ OverallCond : num [1:1460] 5 8 5 5 5 5 5 6 5 6 ...
## $ YearBuilt : num [1:1460] 2003 1976 2001 1915 2000 ...
## $ YearRemodAdd : num [1:1460] 2003 1976 2002 1970 2000 ...
## $ RoofStyle : chr [1:1460] "Gable" "Gable" "Gable" "Gable" ...
## $ RoofMatl : chr [1:1460] "CompShg" "CompShg" "CompShg" "CompShg" ...
## $ Exterior1st : chr [1:1460] "VinylSd" "MetalSd" "VinylSd" "Wd Sdng" ...
## $ Exterior2nd : chr [1:1460] "VinylSd" "MetalSd" "VinylSd" "Wd Shng" ...
## $ MasVnrType : chr [1:1460] "BrkFace" "None" "BrkFace" "None" ...
## $ MasVnrArea : num [1:1460] 196 0 162 0 350 0 186 240 0 0 ...
## $ ExterQual : chr [1:1460] "Gd" "TA" "Gd" "TA" ...
## $ ExterCond : chr [1:1460] "TA" "TA" "TA" "TA" ...
## $ Foundation : chr [1:1460] "PConc" "CBlock" "PConc" "BrkTil" ...
## $ BsmtQual : chr [1:1460] "Gd" "Gd" "Gd" "TA" ...
## $ BsmtCond : chr [1:1460] "TA" "TA" "TA" "Gd" ...
## $ BsmtExposure : chr [1:1460] "No" "Gd" "Mn" "No" ...
## $ BsmtFinType1 : chr [1:1460] "GLQ" "ALQ" "GLQ" "ALQ" ...
## $ BsmtFinSF1 : num [1:1460] 706 978 486 216 655 ...
## $ BsmtFinType2 : chr [1:1460] "Unf" "Unf" "Unf" "Unf" ...
## $ BsmtFinSF2 : num [1:1460] 0 0 0 0 0 0 0 32 0 0 ...
## $ BsmtUnfSF : num [1:1460] 150 284 434 540 490 64 317 216 952 140 ...
## $ TotalBsmtSF : num [1:1460] 856 1262 920 756 1145 ...
## $ Heating : chr [1:1460] "GasA" "GasA" "GasA" "GasA" ...
## $ HeatingQC : chr [1:1460] "Ex" "Ex" "Ex" "Gd" ...
## $ CentralAir : chr [1:1460] "Y" "Y" "Y" "Y" ...
## $ Electrical : chr [1:1460] "SBrkr" "SBrkr" "SBrkr" "SBrkr" ...
## $ 1stFlrSF : num [1:1460] 856 1262 920 961 1145 ...
## $ 2ndFlrSF : num [1:1460] 854 0 866 756 1053 ...
## $ LowQualFinSF : num [1:1460] 0 0 0 0 0 0 0 0 0 0 ...
## $ GrLivArea : num [1:1460] 1710 1262 1786 1717 2198 ...
## $ BsmtFullBath : num [1:1460] 1 0 1 1 1 1 1 1 0 1 ...
## $ BsmtHalfBath : num [1:1460] 0 1 0 0 0 0 0 0 0 0 ...
## $ FullBath : num [1:1460] 2 2 2 1 2 1 2 2 2 1 ...
## $ HalfBath : num [1:1460] 1 0 1 0 1 1 0 1 0 0 ...
## $ BedroomAbvGr : num [1:1460] 3 3 3 3 4 1 3 3 2 2 ...
## $ KitchenAbvGr : num [1:1460] 1 1 1 1 1 1 1 1 2 2 ...
## $ KitchenQual : chr [1:1460] "Gd" "TA" "Gd" "Gd" ...
## $ TotRmsAbvGrd : num [1:1460] 8 6 6 7 9 5 7 7 8 5 ...
## $ Functional : chr [1:1460] "Typ" "Typ" "Typ" "Typ" ...
## $ Fireplaces : num [1:1460] 0 1 1 1 1 0 1 2 2 2 ...
## $ FireplaceQu : chr [1:1460] NA "TA" "TA" "Gd" ...
## $ GarageType : chr [1:1460] "Attchd" "Attchd" "Attchd" "Detchd" ...
## $ GarageYrBlt : num [1:1460] 2003 1976 2001 1998 2000 ...
## $ GarageFinish : chr [1:1460] "RFn" "RFn" "RFn" "Unf" ...
## $ GarageCars : num [1:1460] 2 2 2 3 3 2 2 2 2 1 ...
## $ GarageArea : num [1:1460] 548 460 608 642 836 480 636 484 468 205 ...
## $ GarageQual : chr [1:1460] "TA" "TA" "TA" "TA" ...
## $ GarageCond : chr [1:1460] "TA" "TA" "TA" "TA" ...
## $ PavedDrive : chr [1:1460] "Y" "Y" "Y" "Y" ...
## $ WoodDeckSF : num [1:1460] 0 298 0 0 192 40 255 235 90 0 ...
## $ OpenPorchSF : num [1:1460] 61 0 42 35 84 30 57 204 0 4 ...
## $ EnclosedPorch: num [1:1460] 0 0 0 272 0 0 0 228 205 0 ...
## $ 3SsnPorch : num [1:1460] 0 0 0 0 0 320 0 0 0 0 ...
## $ ScreenPorch : num [1:1460] 0 0 0 0 0 0 0 0 0 0 ...
## $ PoolArea : num [1:1460] 0 0 0 0 0 0 0 0 0 0 ...
## $ PoolQC : chr [1:1460] NA NA NA NA ...
## $ Fence : chr [1:1460] NA NA NA NA ...
## $ MiscFeature : chr [1:1460] NA NA NA NA ...
## $ MiscVal : num [1:1460] 0 0 0 0 0 700 0 350 0 0 ...
## $ MoSold : num [1:1460] 2 5 9 2 12 10 8 11 4 1 ...
## $ YrSold : num [1:1460] 2008 2007 2008 2006 2008 ...
## $ SaleType : chr [1:1460] "WD" "WD" "WD" "WD" ...
## $ SaleCondition: chr [1:1460] "Normal" "Normal" "Normal" "Abnorml" ...
## $ SalePrice : num [1:1460] 208500 181500 223500 140000 250000 ...
## - attr(*, "spec")=
## .. cols(
## .. Id = col_double(),
## .. MSSubClass = col_double(),
## .. MSZoning = col_character(),
## .. LotFrontage = col_double(),
## .. LotArea = col_double(),
## .. Street = col_character(),
## .. Alley = col_character(),
## .. LotShape = col_character(),
## .. LandContour = col_character(),
## .. Utilities = col_character(),
## .. LotConfig = col_character(),
## .. LandSlope = col_character(),
## .. Neighborhood = col_character(),
## .. Condition1 = col_character(),
## .. Condition2 = col_character(),
## .. BldgType = col_character(),
## .. HouseStyle = col_character(),
## .. OverallQual = col_double(),
## .. OverallCond = col_double(),
## .. YearBuilt = col_double(),
## .. YearRemodAdd = col_double(),
## .. RoofStyle = col_character(),
## .. RoofMatl = col_character(),
## .. Exterior1st = col_character(),
## .. Exterior2nd = col_character(),
## .. MasVnrType = col_character(),
## .. MasVnrArea = col_double(),
## .. ExterQual = col_character(),
## .. ExterCond = col_character(),
## .. Foundation = col_character(),
## .. BsmtQual = col_character(),
## .. BsmtCond = col_character(),
## .. BsmtExposure = col_character(),
## .. BsmtFinType1 = col_character(),
## .. BsmtFinSF1 = col_double(),
## .. BsmtFinType2 = col_character(),
## .. BsmtFinSF2 = col_double(),
## .. BsmtUnfSF = col_double(),
## .. TotalBsmtSF = col_double(),
## .. Heating = col_character(),
## .. HeatingQC = col_character(),
## .. CentralAir = col_character(),
## .. Electrical = col_character(),
## .. `1stFlrSF` = col_double(),
## .. `2ndFlrSF` = col_double(),
## .. LowQualFinSF = col_double(),
## .. GrLivArea = col_double(),
## .. BsmtFullBath = col_double(),
## .. BsmtHalfBath = col_double(),
## .. FullBath = col_double(),
## .. HalfBath = col_double(),
## .. BedroomAbvGr = col_double(),
## .. KitchenAbvGr = col_double(),
## .. KitchenQual = col_character(),
## .. TotRmsAbvGrd = col_double(),
## .. Functional = col_character(),
## .. Fireplaces = col_double(),
## .. FireplaceQu = col_character(),
## .. GarageType = col_character(),
## .. GarageYrBlt = col_double(),
## .. GarageFinish = col_character(),
## .. GarageCars = col_double(),
## .. GarageArea = col_double(),
## .. GarageQual = col_character(),
## .. GarageCond = col_character(),
## .. PavedDrive = col_character(),
## .. WoodDeckSF = col_double(),
## .. OpenPorchSF = col_double(),
## .. EnclosedPorch = col_double(),
## .. `3SsnPorch` = col_double(),
## .. ScreenPorch = col_double(),
## .. PoolArea = col_double(),
## .. PoolQC = col_character(),
## .. Fence = col_character(),
## .. MiscFeature = col_character(),
## .. MiscVal = col_double(),
## .. MoSold = col_double(),
## .. YrSold = col_double(),
## .. SaleType = col_character(),
## .. SaleCondition = col_character(),
## .. SalePrice = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
train$OverallQual[is.na(train$OverallQual)]
## numeric(0)
train$GrLivArea[is.na(train$GrLivArea)]
## numeric(0)
train$TotalBsmtSF[is.na(train$TotalBsmtSF)]
## numeric(0)
train$GarageArea[is.na(train$GarageArea)]
## numeric(0)
train$GarageCars[is.na(train$GarageCars)]
## numeric(0)
#Variables de ubicación:
train$Neighborhood[is.na(train$Neighborhood)]#Neighborhood: Barrio donde se ubica la casa.
## character(0)
train$LotArea[is.na(train$LotArea)]#LotArea: Área total del lote.
## numeric(0)
train$LotFrontage[is.na(train$LotFrontage)]<- mean (train$LotFrontage,na.m=TRUE)
mean(train$LotFrontage)#LotFrontage: Longitud de fachada del lote en pies.
## [1] NA
#Variables temporales:
train$YearBuilt[is.na(train$YearBuilt)]#YearBuilt: Año de construcción.
## numeric(0)
train$YearRemodAdd[is.na(train$YearRemodAdd)]#YearRemodAdd: Año de renovación (si aplica).
## numeric(0)
train$YrSold[is.na(train$YrSold)]#YrSold: Año de la venta.
## numeric(0)
#Variables adicionales:
train$KitchenAbvGr[is.na(train$kit)]#KitchenAbvGr: Número de cocinas por encima del suelo.
## Warning: Unknown or uninitialised column: `kit`.
## numeric(0)
train$Fireplaces[is.na(train$Fireplaces)]#Fireplaces: Número de chimeneas.
## numeric(0)
train$PoolArea[is.na(train$PoolArea)]#PoolArea: Área de la piscina (si existe).
## numeric(0)
train$MasVnrArea[is.na(train$MasVnrArea)]<- mean (train$MasVnrArea,na.m=TRUE)
mean(train$MasVnrArea)#MasVnrArea: Área de revestimiento de mampostería.
## [1] NA
#Variable objetivo (target):
train$SalePrice[is.na(train$SalePrice)]#SalePrice: Precio final de venta de la vivienda.
## numeric(0)
train_media_saleprice<-mean(train$SalePrice)
print(paste("el promedio de los los precios finles es",train_media_saleprice))
## [1] "el promedio de los los precios finles es 180921.195890411"
train_mediana_saleprice<-median(train$SalePrice)
print(paste("la mediana de los los precios finles es",train_mediana_saleprice))
## [1] "la mediana de los los precios finles es 163000"
train_des_estandar_saleprice<-sd(train$SalePrice)
print(paste("la desviasion estandar del precio final es",train_des_estandar_saleprice))
## [1] "la desviasion estandar del precio final es 79442.5028828866"
print(paste("Indican que la mayor cantidad son precios finales son superiores al numero de datos existentes "))
## [1] "Indican que la mayor cantidad son precios finales son superiores al numero de datos existentes "
train_mean_grlivarea<-mean(train$GrLivArea)
prom_pies_cuadrados<-train_media_saleprice/train_mean_grlivarea
print(paste("la media de GrLivArea es Área habitable sobre el suelo, valor es de",train_mean_grlivarea, "pies cuadrados esto quiere decir que en promedio las viviedas tienen esa area contruida parra vivir y de acuerdo con el romedio de los precios que es ", train_media_saleprice," podemos promediar que el vaor de un pie cuadrado es de ",prom_pies_cuadrados ))
## [1] "la media de GrLivArea es Área habitable sobre el suelo, valor es de 1515.46369863014 pies cuadrados esto quiere decir que en promedio las viviedas tienen esa area contruida parra vivir y de acuerdo con el romedio de los precios que es 180921.195890411 podemos promediar que el vaor de un pie cuadrado es de 119.383391402875"
train_garagearea_rango<- range(train$GarageArea)
train_garagearea_varianza<-var(train$GarageArea)
print(paste("el rango del tamaño del garaje en pies cuadrados es de",train_garagearea_rango,"y la vaianza es de", train_garagearea_varianza))
## [1] "el rango del tamaño del garaje en pies cuadrados es de 0 y la vaianza es de 45712.5102289051"
## [2] "el rango del tamaño del garaje en pies cuadrados es de 1418 y la vaianza es de 45712.5102289051"
¿Consideras que es una variable homogénea o heterogénea? ¿Por qué?
print(paste("el resumen de precios de vivienda, area construida, area de garage y area de piscina"))
## [1] "el resumen de precios de vivienda, area construida, area de garage y area de piscina"
summary(train$SalePrice)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 34900 129975 163000 180921 214000 755000
summary(train$GarageArea)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 334.5 480.0 473.0 576.0 1418.0
summary(train$GrLivArea)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 334 1130 1464 1515 1777 5642
summary(train$PoolArea)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 2.759 0.000 738.000
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
#ventas<- data.frame(vivienda=c(1;460),
#precio=c(train$SalePrice))
#ggplot(ventas, (x = viviendas, y = precio)) +
# geom_point(color = "pink") +
# ggtitle("Distribución del precio de venta") +
# xlab("viviendas") +
# ylab("precio (dolares)")
• Título: “Precio de venta según calidad general” • ¿Qué conclusiones puedes obtener sobre la relación entre calidad y precio?. Describir el grafico 8. Genere un gráfico de barras apiladas con el precio promedio (SalePrice) por barrio (Neighborhood). • ¿Qué barrios presentan los precios más altos y más bajos en promedio? 10. Haga un gráfico de dispersión (plot) entre GrLivArea y SalePrice. • ¿Existe alguna relación entre estas dos variables? 11. ¿Qué tipo de variable es OverallQual? ¿Es correcto calcular la media de esta variable?