Problem Definition

The objective is to predict house price.

Data Location

Data is saved as a csv file in /Users/Charu/Desktop/Machine Learning

Data Description

The data for these sales comes from the official public records of home sales in the King County area, Washington State. The data sets contains 21613 rows. Each represents a home sold from May 2014 through May 2015. http://your.kingcounty.gov/assessor/eRealProperty/ResGlossaryOfTerms.html. Size: 21613 obs. of 21 variables:

Setup

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(corrgram)
library(gridExtra) 
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

Read Data

dfr <- read.csv("/Users/charu/Desktop/Machine Learning/house-data.csv", header=T, stringsAsFactors=F)
head(dfr)
##           id            date   price bedrooms bathrooms sqft_living
## 1 7129300520 20141013T000000  221900        3      1.00        1180
## 2 6414100192 20141209T000000  538000        3      2.25        2570
## 3 5631500400 20150225T000000  180000        2      1.00         770
## 4 2487200875 20141209T000000  604000        4      3.00        1960
## 5 1954400510 20150218T000000  510000        3      2.00        1680
## 6 7237550310 20140512T000000 1225000        4      4.50        5420
##   sqft_lot floors waterfront view condition grade sqft_above sqft_basement
## 1     5650      1          0    0         3     7       1180             0
## 2     7242      2          0    0         3     7       2170           400
## 3    10000      1          0    0         3     6        770             0
## 4     5000      1          0    0         5     7       1050           910
## 5     8080      1          0    0         3     8       1680             0
## 6   101930      1          0    0         3    11       3890          1530
##   yr_built yr_renovated zipcode     lat     long sqft_living15 sqft_lot15
## 1     1955            0   98178 47.5112 -122.257          1340       5650
## 2     1951         1991   98125 47.7210 -122.319          1690       7639
## 3     1933            0   98028 47.7379 -122.233          2720       8062
## 4     1965            0   98136 47.5208 -122.393          1360       5000
## 5     1987            0   98074 47.6168 -122.045          1800       7503
## 6     2001            0   98053 47.6561 -122.005          4760     101930

Functions

Summary

#summary(dfr)
lapply(dfr, FUN=summary)
## $id
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.000e+06 2.123e+09 3.905e+09 4.580e+09 7.309e+09 9.900e+09 
## 
## $date
##    Length     Class      Mode 
##     21613 character character 
## 
## $price
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   75000  322000  450000  540100  645000 7700000 
## 
## $bedrooms
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.000   3.000   3.371   4.000  33.000 
## 
## $bathrooms
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.750   2.250   2.115   2.500   8.000 
## 
## $sqft_living
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     290    1427    1910    2080    2550   13540 
## 
## $sqft_lot
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     520    5040    7618   15110   10690 1651000 
## 
## $floors
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.500   1.494   2.000   3.500 
## 
## $waterfront
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000000 0.000000 0.000000 0.007542 0.000000 1.000000 
## 
## $view
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2343  0.0000  4.0000 
## 
## $condition
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   3.000   3.000   3.409   4.000   5.000 
## 
## $grade
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   7.000   7.000   7.657   8.000  13.000 
## 
## $sqft_above
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     290    1190    1560    1788    2210    9410 
## 
## $sqft_basement
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.0   291.5   560.0  4820.0 
## 
## $yr_built
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1900    1951    1975    1971    1997    2015 
## 
## $yr_renovated
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.0    84.4     0.0  2015.0 
## 
## $zipcode
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   98000   98030   98060   98080   98120   98200 
## 
## $lat
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   47.16   47.47   47.57   47.56   47.68   47.78 
## 
## $long
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -122.5  -122.3  -122.2  -122.2  -122.1  -121.3 
## 
## $sqft_living15
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     399    1490    1840    1987    2360    6210 
## 
## $sqft_lot15
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     651    5100    7620   12770   10080  871200

Remove an unknown/unwanted coloumn

dfr <- select(dfr, -c(id,date,waterfront,view,condition,zipcode,lat,long,sqft_lot,yr_built,sqft_lot15,yr_renovated))
head(dfr)
##     price bedrooms bathrooms sqft_living floors grade sqft_above
## 1  221900        3      1.00        1180      1     7       1180
## 2  538000        3      2.25        2570      2     7       2170
## 3  180000        2      1.00         770      1     6        770
## 4  604000        4      3.00        1960      1     7       1050
## 5  510000        3      2.00        1680      1     8       1680
## 6 1225000        4      4.50        5420      1    11       3890
##   sqft_basement sqft_living15
## 1             0          1340
## 2           400          1690
## 3             0          2720
## 4           910          1360
## 5             0          1800
## 6          1530          4760

Observation

The above variables have been dropped based on the correlation test done before.These items have a negligible effect on price.

#detect_outliers(dfr)
lapply(dfr, FUN=detect_outliers)
## $price
##    [1] 1225000 2000000 1350000 1325000 1450000 2250000 1505000 2400000
##    [9] 2900000 1365000 2050000 3075000 2384000 1384000 1400000 1350000
##   [17] 1550000 1300000 1450000 1199000 1150000 1385000 1280000 1197354
##   [25] 1485000 2250000 1600000 2125000 1450000 1950000 1220000 3070000
##   [33] 1250000 1145000 1484900 1578000 1300000 1310000 2250000 1355000
##   [41] 1216000 2400000 1270000 1200000 1321500 2450000 1242500 1570000
##   [49] 1200000 1278000 1955000 1750000 5110800 1150000 1875000 1225000
##   [57] 2300000 2260000 5300000 1200000 2200000 1131000 1175000 1700000
##   [65] 1400000 2537000 1355000 1229000 5350000 1328000 1218000 1250000
##   [73] 1300000 1175000 1375000 1150000 1198000 2250000 1230000 1265000
##   [81] 1679000 1928000 1175000 2350000 1135000 1315000 1430000 1525000
##   [89] 1569500 1575000 1249000 1715000 2400000 1600000 1480000 2532000
##   [97] 1568000 1200000 1250000 3850000 1195000 1635000 1325000 1735000
##  [105] 1700000 1650000 1550000 1680000 1760000 1339000 2750000 1185000
##  [113] 1400000 1810000 1365000 1187500 1544500 1600000 3278000 1600000
##  [121] 1220000 1700000 1280000 1865000 1215000 1245000 1600000 1619999
##  [129] 1650000 1237500 1165000 1599950 4500000 1272000 1380000 1150000
##  [137] 1505000 1610000 1289000 1385000 1324050 1400000 1835000 3600000
##  [145] 1250000 3000000 1195000 2950000 1325000 2525000 1799000 2298000
##  [153] 2288000 1500000 1130000 1320000 1265000 1506000 1899000 1400000
##  [161] 1135000 1901000 1208000 1900000 1306000 1965000 1675000 2075000
##  [169] 2544750 1570000 1280600 1234568 1290000 1230000 1165000 1225000
##  [177] 1250000 2095000 1575000 1598888 1288000 1440000 1350000 1240000
##  [185] 2453500 2450000 1905000 1740000 1650000 2890000 1575000 2200000
##  [193] 1325000 2950000 1225000 1415000 7062500 1208000 1495000 1300000
##  [201] 1379900 1145000 1495000 2400000 1385000 1350000 2500000 2150000
##  [209] 1210000 4000000 1191000 1600000 2880500 1285000 1875000 1570000
##  [217] 1300000 2400000 1275000 1155000 1190000 1400000 2125000 1135000
##  [225] 1150000 1150000 2050000 1705000 5570000 1150000 1338750 1670000
##  [233] 1250000 2000000 1236000 1442500 1260000 1200000 1206505 1130000
##  [241] 1160000 1405000 2485000 1345000 1130000 1195000 1490000 1465000
##  [249] 1850000 2479000 2048000 1151250 1895000 1939999 1400000 1575000
##  [257] 1165000 1200000 1385000 1295000 1525000 1150000 1309500 1297000
##  [265] 1500000 1160000 1315000 1155000 1600000 1740000 1250000 2850000
##  [273] 1780000 1215000 1150000 1880000 1691000 2400000 1600000 1200000
##  [281] 1157400 1419000 1470000 2466350 1517000 1620500 1850000 1150000
##  [289] 1500000 1460000 1697000 1795000 1525000 1727000 3418800 1200000
##  [297] 1195000 1476000 1564350 2725000 1325000 2385000 2196000 1280000
##  [305] 1150000 1240000 1445000 2400000 1695000 2200000 1202500 1450000
##  [313] 1965221 1450000 1436000 1580000 2903000 1289000 1153000 1339000
##  [321] 1925000 1940000 3650000 1240000 1339000 1350000 1337500 1349000
##  [329] 1250000 1515000 1998000 2650000 1875000 1185000 1335000 2700000
##  [337] 1255000 2250000 1900000 1430000 1437500 1555000 1370000 1488000
##  [345] 1395000 1580000 1155000 1210000 1295648 2175000 1387000 3800000
##  [353] 1500000 1650000 1735000 1430000 1255000 3200000 1300000 1490000
##  [361] 1400000 7700000 1615000 1450000 1200000 1160000 2500000 3100000
##  [369] 1245000 1535000 1525000 1150000 1775000 1184000 1600000 1975000
##  [377] 1393000 1248000 2575000 1600000 1310000 1300000 1170000 1605000
##  [385] 1398000 2500000 1320000 1485000 1700000 1146800 1445000 2538000
##  [393] 1270000 1465000 2150000 1250000 1150000 1450000 1646000 1212500
##  [401] 1200000 1150000 1900000 3200000 1824100 2900000 1290000 3400000
##  [409] 1325000 1440000 1195000 1851000 1250000 1475000 4668000 1511250
##  [417] 1250000 1865000 1452000 1625000 2450000 2193000 1320000 1218000
##  [425] 1655000 1300000 1250000 1295000 2140000 1247000 1285000 1200000
##  [433] 1240000 1208000 1590000 1150000 4489000 1438888 1485000 1200000
##  [441] 1175000 1200000 1800000 1250000 1370000 1406888 1530000 1325000
##  [449] 1910000 1348000 1205000 1325000 1360000 1400000 1234000 1955000
##  [457] 1265000 1500000 1190000 1680000 6885000 1680000 2575000 1362500
##  [465] 1335000 1565000 1925000 1249000 1698000 2375000 1275000 1350000
##  [473] 1160000 1300000 1795000 1540000 1780000 1157200 1755000 1565000
##  [481] 1482500 1395713 1175000 1695000 1140000 1388000 1220000 1250000
##  [489] 1648000 1175000 1730000 1140000 1738000 1315000 1200000 1275000
##  [497] 1150000 1250000 1695000 1227500 1900000 1326000 1365000 1400000
##  [505] 1355000 2700000 1245000 1300000 1197000 2983000 1228000 1399000
##  [513] 1375000 1200000 3400000 1145000 3120000 2000000 1350000 1500000
##  [521] 1225000 1700000 1565000 1700000 1165000 1150000 1325000 1135000
##  [529] 1308000 1688000 1575000 1250000 1769000 1582500 1610000 1650000
##  [537] 1250000 1557600 1565000 1378600 1130000 1225000 1662000 1295000
##  [545] 1820000 1175000 1381000 1234582 1450000 2555000 1400000 1699000
##  [553] 3000000 1600000 1400000 2100000 1800000 1168000 1164000 1170000
##  [561] 1705000 2300000 1295000 2750000 1655000 1180000 1325000 1209000
##  [569] 3200000 1450000 1750000 1142000 1185000 1200000 1300000 2795000
##  [577] 1250000 1730000 1420000 1135000 1545000 1400000 1370000 1280000
##  [585] 1580000 2200000 1250000 1236300 1850000 1280000 1785000 1600000
##  [593] 1620000 1475000 1950000 1400000 1620000 1288000 1443917 1130000
##  [601] 1210000 1150000 2575000 3000000 1400000 1300000 1500000 1260000
##  [609] 1195000 1595000 1430000 2135000 1665000 1300000 1190000 1450000
##  [617] 1462497 1305000 1199500 1959000 1650000 1712750 1186039 1340000
##  [625] 1650000 1550000 4208000 1289000 1169000 1900000 1890000 1960000
##  [633] 1300000 2000000 2535000 1850000 1272500 1295000 1350000 2408000
##  [641] 1149000 2280000 1313000 1749000 1300000 3100000 1610000 1250000
##  [649] 1135000 2300000 1698888 2250000 1260000 1135000 1180000 1250000
##  [657] 1250000 1610000 1700000 1651000 2546000 1275000 1800000 2000000
##  [665] 1999000 1688000 1148000 1580000 1822500 2415000 1140000 1550000
##  [673] 1169000 1272000 1170000 1328000 1650000 3168750 1775000 1475000
##  [681] 1240000 1275000 1762000 1640000 1900000 2340000 2600000 1160000
##  [689] 1325000 1712500 1350000 1810000 1795000 1150000 1886700 1160000
##  [697] 3395000 1815000 1225000 1400000 2885000 1680000 2750000 3300000
##  [705] 1500000 2110000 2225000 1340000 1285000 2200000 1625000 1950000
##  [713] 1225000 1500000 1690000 1800000 1580000 2200000 1500000 2400000
##  [721] 1225000 1260500 1989000 2888000 2230000 1442500 1378000 1475000
##  [729] 1295000 1465000 1160000 1195000 1612500 1260000 1180000 1900000
##  [737] 2600000 1520000 1340000 1375000 1500000 1480000 1425000 1640000
##  [745] 2450000 1295000 2000000 1535000 1580000 1210000 1600000 1225000
##  [753] 1770000 2152500 1950000 1755000 2200000 1900000 1300000 1150000
##  [761] 1395000 1150000 1398000 2300000 3640900 3065000 1675000 1330000
##  [769] 1305000 1468000 1220000 1950000 1650000 1311000 1220000 3200000
##  [777] 1800000 1190000 2300000 1825000 1681000 2271150 1715000 1680000
##  [785] 1850000 1637500 1160000 1675000 1690000 1256500 1295000 1187500
##  [793] 2480000 1300000 1160000 1335000 1520000 1600000 1565000 1425000
##  [801] 1180000 1636000 1190000 1346400 1190000 1280000 1400000 1365000
##  [809] 1250000 1220000 1385000 1200000 1425000 1550000 1925000 1320000
##  [817] 1300000 1309500 1815000 1285000 1490000 1200000 1950000 1400000
##  [825] 2328000 1250000 1620000 1307000 1600000 3710000 1350000 1500000
##  [833] 1185000 1387000 1200000 1200000 1660000 1538000 1750000 1220000
##  [841] 1875000 1180500 1400000 1980000 1185000 1260000 1810000 1350000
##  [849] 1298000 1310000 2195000 1200688 1150000 2140000 1150000 1355000
##  [857] 1185000 1348000 1920000 1650000 2574000 2500000 1395000 1300000
##  [865] 1400000 1200000 1388000 2300000 1225000 3100000 2187730 1475000
##  [873] 1776000 1298000 2500000 1185001 1200000 1380000 1450000 1880000
##  [881] 1970000 1457000 1454000 1288000 2250000 1350000 1205000 1735000
##  [889] 1629000 2225000 1200000 1425000 1802750 1388000 1870000 1562000
##  [897] 1215000 1200000 2510000 1350000 1650000 1284000 1550000 2005000
##  [905] 1364000 1325000 1320000 2050000 1210000 1650000 1375000 2458000
##  [913] 1595000 1190000 1350000 1320000 1205000 2027000 1990000 1321625
##  [921] 1444000 1250000 1735000 1695000 1680000 1702500 3300000 1385000
##  [929] 1855000 2367000 1211000 2475000 1387800 1355000 1862000 3300000
##  [937] 1230000 1265000 2920000 1140000 1500000 1230000 2630000 1250000
##  [945] 1415000 1728000 2700000 3300000 3650000 1650000 1137500 1450000
##  [953] 2950000 1135250 1315000 1156000 1600000 1325000 1388000 1150000
##  [961] 1160000 1165000 1225000 2147500 1268888 1700000 2680000 1675000
##  [969] 1389000 1298888 1945000 1500000 2180000 1410000 1920000 1400000
##  [977] 1190000 1150000 1161000 1240000 1411600 1505000 3800000 1225000
##  [985] 1415000 3635000 1185000 2000000 1150000 2720000 1225000 1550000
##  [993] 1800000 1518634 1430800 2160000 1550000 1950000 1400000 1450000
## [1001] 1900000 1500000 1225000 1388000 2395000 1980000 1280000 1356925
## [1009] 1360000 1410000 1200000 1150000 1264000 1450000 1850000 2205000
## [1017] 1450000 1375000 1680000 2250000 1205000 1583000 2641100 1532500
## [1025] 2320000 1255784 1710000 2700000 1330000 1260000 1262000 1595000
## [1033] 1563100 1695000 1266525 1180000 1210000 1898000 1465000 1400000
## [1041] 2700000 1280000 1238000 1750000 2250000 1237500 1575000 1481000
## [1049] 2945000 1750000 1350000 1312000 1789950 2150000 1333000 2225000
## [1057] 1300000 1302000 1206688 3204000 1240000 3000000 2350000 1360000
## [1065] 1598000 1205000 1385000 1800000 1205000 1285000 1138990 2998000
## [1073] 1600000 1267500 3345000 1800000 1289990 1699990 1500000 1898000
## [1081] 2950000 1240415 1299888 1180000 1200000 1510000 2321000 1480000
## [1089] 1130000 1135000 2300000 1325000 1250000 1485000 1600000 1900000
## [1097] 1750000 1275000 1258000 1275000 1940000 1830000 1175000 1149000
## [1105] 1550000 1300000 1242000 1270000 2900000 1274950 1139990 1880000
## [1113] 1800000 1399950 1839900 2230000 1240000 2351956 1698000 1350000
## [1121] 1500000 1999950 1488000 2065000 1174660 1765000 1987500 1750000
## [1129] 1720000 1459000 1150000 1555000 2238888 1865000 1408762 1378000
## [1137] 1450000 3000000 1325000 1881583 1375000 1700000 3567000 1222500
## [1145] 1575000 1537000
## 
## $bedrooms
##   [1]  1  6  6  6  1  6  1  1  6  6  7  6  1  6  6  6  6  1  0  1  1  7  1
##  [24]  6  6  6  6  7  6  6  6  1  1  6  1  6  6  6  6  6  1  1  8  6  6  6
##  [47]  1  6  6  6  1  1  1  6  1  1  1  6  6  1  6  6  6  6  6  6  6  6  6
##  [70]  6  6  6  6  6  1  6  6  6  6  6  1  1  1  1  6  1  0  6  1  6  1  1
##  [93]  1  6  1  0  1  6  6  6  1  1  6  6  1  6  7  6  6  6  1  1  1  6  6
## [116]  1  1  7  8  8  1  9  6  1  6  9  6  6  6  1  1  1  6  6  6  1  6  6
## [139]  6  7  7  1  7  0  1  6  1  6  6  6  1  6  6  1  6  6  6  6  6  6  1
## [162]  1  1  1  1  6  6  9  1  1  1  6  8  6  6  6  6  1  6  6  6  1  1  6
## [185]  6  0  1  7  6  6  6  7  6  1  6  6  1  6  1  1  6  6  6  6  6  1  1
## [208]  6  6  6  6  1  1  1  6  7  1  6  1  6  1  6  1  6  6  6  1  0  0  7
## [231]  1  1  9  1  1  1  7  6  6  1 11  1  1  6  1  6  1  6  8  6  6  6  1
## [254]  7  6  1  6  6  1  6  6  1  8  1  6  1  1  1  6  6  6  7  6  6  7  0
## [277]  6  6  6  1  1  6  0  1  6  6  1  1  6  6  1  6  7  1  1  1  6  6  7
## [300]  1  8  6  6  6  6  6  6  6  1  6  1  1  6  1  1  6  1  6  6  1  1  6
## [323]  6  1  6  6  6  1  6  7  1  6  1  1  1  6  1  6  6  6  1  1  6  1  1
## [346]  1  7  7  0  7  1  7  1  6  1  1  1  6  8  6  1  6  7  6 10  1  6  6
## [369]  6  6  1  1  6  1  6  1  6  6  6  7  1  6  1  7  6  6  1  1  1  1  1
## [392]  6  0  1  1  6  6  6  6  1  6  7  6  7  6  8  6  6 10  1  6  7  1  6
## [415]  1  6  6  7  6  6  8  6  7  6  1  6  1 33  6  6  6  6  7  6  1  1  6
## [438]  1  6  6  1  6  7  6  1  1  9  1  6  1  6  6  6  8  1  7  6  1  1  6
## [461]  6  1  6  1  1  6  6  1  6  1  6  1  1  6  1  1  1  6  6  1  6  1  0
## [484]  6  9  7  8  1  1  1  6  6  6  1  7  1  6  6  1  1  6  7  6  6  6  1
## [507]  6  1 10  6  1  8  7  1  6  0  6  1  6  6  6  6  6  1  1  6  6  6  6
## [530]  6  6  1  6  6  1  6  6  1  1  1  1  6  1  1  6  1
## 
## $bathrooms
##   [1] 4.50 4.00 4.00 4.75 5.00 4.25 4.50 4.25 3.75 5.00 4.00 4.00 4.25 4.25
##  [15] 0.00 4.25 4.50 4.50 3.75 4.50 4.00 0.00 4.50 5.25 4.00 3.75 4.50 4.50
##  [29] 4.25 6.00 3.75 4.25 4.00 3.75 3.75 4.75 5.00 3.75 3.75 3.75 5.00 4.25
##  [43] 3.75 4.00 4.25 4.50 3.75 4.00 3.75 5.00 3.75 4.00 3.75 3.75 4.25 4.00
##  [57] 4.25 4.25 4.50 3.75 4.00 3.75 5.00 3.75 0.50 4.00 5.00 5.00 4.50 5.50
##  [71] 4.25 4.00 4.50 3.75 3.75 4.50 3.75 5.50 4.25 5.50 0.00 5.25 4.25 4.00
##  [85] 4.50 3.75 3.75 3.75 3.75 4.25 4.50 4.75 3.75 3.75 3.75 3.75 3.75 4.50
##  [99] 4.25 4.00 4.75 4.75 3.75 4.50 4.00 3.75 3.75 4.00 4.00 3.75 4.75 4.50
## [113] 3.75 6.75 6.00 4.50 5.50 4.50 3.75 4.00 4.25 3.75 5.75 3.75 4.00 4.00
## [127] 4.00 5.25 4.75 4.00 4.25 3.75 4.00 4.00 5.25 3.75 3.75 3.75 4.50 5.00
## [141] 3.75 3.75 4.50 4.00 3.75 3.75 4.00 4.75 3.75 0.00 4.25 5.00 3.75 4.00
## [155] 4.25 4.00 4.00 4.50 4.75 4.00 4.00 4.00 4.50 3.75 3.75 4.50 5.75 4.75
## [169] 3.75 4.00 4.00 3.75 4.25 4.25 3.75 0.00 4.50 5.50 4.00 4.50 4.50 4.00
## [183] 4.50 4.25 8.00 3.75 4.25 4.00 3.75 4.50 3.75 4.50 4.50 4.50 4.00 4.25
## [197] 5.00 4.50 4.25 4.50 6.75 4.00 3.75 4.50 4.25 3.75 4.50 3.75 3.75 4.00
## [211] 7.50 4.00 4.00 4.00 4.25 3.75 4.50 3.75 4.25 4.25 4.50 4.50 4.25 7.75
## [225] 4.25 4.75 4.00 4.00 4.25 4.00 4.00 4.50 4.25 4.00 4.50 4.25 4.00 4.25
## [239] 3.75 4.00 4.00 4.00 4.00 0.00 4.50 3.75 0.00 4.25 4.00 3.75 5.00 3.75
## [253] 4.25 3.75 4.00 4.00 3.75 5.50 0.50 4.00 3.75 0.00 4.25 3.75 3.75 4.50
## [267] 4.00 4.00 4.25 4.00 4.50 4.50 4.50 3.75 4.25 3.75 4.50 3.75 4.25 4.00
## [281] 4.50 0.50 3.75 4.25 4.50 4.50 4.25 4.00 4.00 3.75 4.50 4.50 3.75 0.50
## [295] 4.25 3.75 3.75 4.00 4.75 4.50 4.50 4.00 3.75 4.00 6.00 3.75 4.00 4.25
## [309] 4.00 3.75 4.25 4.00 4.50 3.75 4.00 3.75 4.50 8.00 4.25 3.75 4.00 4.50
## [323] 4.25 5.00 5.25 3.75 4.50 4.00 4.75 4.00 4.50 3.75 4.00 5.50 3.75 4.75
## [337] 4.00 4.00 3.75 3.75 4.25 4.50 4.25 0.00 4.50 6.25 3.75 4.00 4.00 3.75
## [351] 3.75 3.75 4.50 5.00 4.00 4.50 4.00 4.25 4.50 3.75 4.00 3.75 4.50 4.75
## [365] 4.75 4.00 4.00 3.75 3.75 3.75 3.75 4.75 3.75 3.75 5.25 3.75 5.75 4.50
## [379] 5.00 4.50 5.00 3.75 3.75 4.50 3.75 4.25 4.25 4.00 3.75 4.00 4.25 5.00
## [393] 3.75 4.00 4.00 4.00 5.75 5.25 3.75 3.75 4.50 4.00 4.00 5.25 4.50 4.00
## [407] 3.75 4.25 4.00 4.25 4.50 4.00 4.25 4.00 4.25 4.50 3.75 4.25 4.00 4.50
## [421] 4.00 4.50 3.75 5.25 4.00 3.75 3.75 4.00 3.75 4.50 4.50 3.75 5.25 6.25
## [435] 4.00 4.75 4.75 5.00 5.25 4.50 4.00 3.75 4.00 4.00 4.50 3.75 4.00 3.75
## [449] 5.50 4.25 4.00 4.25 3.75 6.00 4.25 3.75 4.25 4.25 4.50 0.00 4.50 4.00
## [463] 4.00 3.75 5.00 3.75 3.75 3.75 4.25 3.75 4.00 3.75 4.50 4.00 4.00 4.25
## [477] 4.25 4.50 4.00 4.50 3.75 4.00 4.50 4.00 4.50 4.00 4.25 4.75 3.75 4.75
## [491] 4.00 4.00 3.75 4.50 5.25 4.50 4.50 3.75 3.75 3.75 4.00 3.75 3.75 5.50
## [505] 4.50 4.25 4.50 4.50 4.25 4.00 4.00 4.00 4.25 3.75 4.25 4.25 3.75 3.75
## [519] 4.00 5.50 3.75 4.00 3.75 3.75 4.50 4.50 4.25 4.00 6.50 4.50 3.75 3.75
## [533] 4.00 4.75 3.75 3.75 4.00 4.00 3.75 5.25 4.25 4.50 5.00 4.00 4.00 6.00
## [547] 5.00 4.00 4.00 4.50 3.75 4.75 4.25 6.00 3.75 3.75 3.75 4.00 4.00 4.50
## [561] 4.25 6.50 3.75 4.00 3.75 4.00 4.50 4.00 4.50 3.75 3.75
## 
## $sqft_living
##   [1]  5420  4770  4380  5180  4410  5050  5310  4550  4290  4290  4800
##  [12]  6070  4870  4860  4860  4410  4760  6050  4740  5403  4380  4640
##  [23]  6050  4370  4570  4670  4860  4270  4890  4390  4250  5670  4475
##  [34]  6070  4300  4390  8010  4460  4610  4380  4250  4930  7390  4640
##  [45]  5320  8000  6563  4740  5450  4440  4830  4280  4570  4370  5710
##  [56]  5620  5774  4470  4490  5040  5770  5180  6400  4490  6055  4250
##  [67]  6300  4350  4460  4340  4250  5110  4700  4910  4750  6840  4930
##  [78]  5180  5130  6640  5740  7350  4510  4300  4870  5020  4740  5370
##  [89]  4860  6930  6550  6110  4883  5010  4660  4600  4910  5410  4270
## [100]  4520  5340  5230  5370  4730  4430  5780  6240 10040  4250  5360
## [111]  4610  7480  5620  4340  7080  5760  4400  4280  9200  4320  4420
## [122]  4380  4580  5310  4660  4300  4240  5180  6810  4690  5830  4930
## [133]  5700  5990  5070  4750  4270  4750  5040  4890  4350  4280  6390
## [144]  4570  4350  4410  4430  4720  5450  4470  6410  6330  4250  4290
## [155]  5860  5990  4260  4360  4370  4440  4970  4940  7220  5480  4740
## [166]  4386  5440  4500  4700  4410  4240  4470  4650  5180  5400  4810
## [177]  4750  4490  7050  4370  4870  6210  4270 12050  4250  4360  4260
## [188]  5480  4530  5850  4280  4800  4240  4380  4320  5250  4330  4350
## [199]  5330  4560  4560  4540  7100  4580  5230  9640  4460  4410  5030
## [210]  5660  4330  5067  5270  5150  4380  4460  6430  4430  4610  4250
## [221]  4460  4420  5720  4500  4300  4530  5584  9890  5770  5540  5050
## [232]  5220  4340  4830  4850  5844  5530  4480  5270  5860  5545  4610
## [243]  4700  4440  5960  4420  5000  6510  4610  5050  4600  7400  5730
## [254]  4260  4490  5370  4350  4320  4720  5000  5440  6085  4250  5461
## [265]  4400  4470  6640  4620  5300  4380  4500  4850  4400  4290  4330
## [276]  4560  5000  4590  5000  6900  4570  5840  4980  4560  4340  5490
## [287]  4320  7420  4850  4310  4690  4780  4590  4370  5100  6200  4500
## [298]  4360  5280  7440  4610  4285  4430  4690  4450  4440  4600 13540
## [309]  6980  4270  4590  4300  4340  4390  5850  4460  4260  4590  6380
## [320]  4550  7880  4440  4330  4320  4470  4790  4500  5270  5370  4340
## [331]  5190  4290  4730  5350  7620  4430  6200  4410  5840  4320  5550
## [342]  5290  5290  5020  5480  8670  4480  4770  4930  4320  4420  5470
## [353]  5160  4570  4600  4370  4260  4780  4670  5770  4300  4980  4280
## [364]  6040  5310  4830  4790  5010  7000  4320  5600  6030  4830  4400
## [375]  5170  4670  4660  4680  4400  4560  4590  5310  5930  4240  4630
## [386]  4310  4720  4510  4560  4420  4510  5550  4390  4430  4240  5860
## [397]  5810  4700  4500  4250  5210  5110  5960  4640  7730  4285  4475
## [408]  4300  4620  4740  4610  5610  4270  4280  5090  4240  4270  4440
## [419]  5150  4310  4960  4510  4560  5480  5040  5120  4410  5550  6500
## [430]  4920  4480  4860  5190  4560  4820  5570  4340  4960  5240  8020
## [441]  4390  4575  5470  5540  5820  4670  4420  4290  7710  5020  4280
## [452]  7320  4270  4660  4410  4290  4690  4710  6630  5510  4720  5490
## [463]  4370  6490  4460  5300  4260  4440  4460  5080  4520  4360  4660
## [474]  4910  4270  5430  4600  4890  4310  4630  5610  4660  4270  5050
## [485]  5330  4590  4800  7850  4280  4420  4490  4645  4570  4700  5305
## [496]  4320  4670  4650  5120  5220  4340  4360  5060  4700  4920  4810
## [507]  5635  4410  4410  5550  4270  4420  4690  4280  6670  6530  4640
## [518]  5350  6370  4360  4470  5790  6260  4600  4390  5780  4700  4410
## [529]  4490  4750  4360  5790  6160  4360  4490  5370  4500  5940  4700
## [540]  4450  5520  4495  4240  5190  7120  4390  4490  4370  5430  5010
## [551]  4680  5640  6880  4350  4460  4310  5230  4590  4620  4360  7270
## [562]  4920  5150  4300  5090  4420  4410  4350  4500  4850  4910  4470
## 
## $floors
## numeric(0)
## 
## $grade
##    [1] 11  5 10 11 10 10 10 10 12 10  5 10 10 10 10 10 10  5  5 11 11 10 10
##   [24] 10 11 11  4 10 10 10 11 10 10  5 12 11 10 10 10 10 10  4 10 11  5 11
##   [47] 10 10 10  5 11 10 12 11 10 10 11 10 10 11 10 11 10 10  5  5 12 11 10
##   [70] 10 10 11 12 11  5  5  5 10 10 10 10 10 10 10  5 10 10 10 10 10 11  4
##   [93] 10  5 11 10 11  5 11  5 10  3 10 10 12  5 11 10 10 10 10 10 10 10 12
##  [116] 11 12 10 10  5 11 10 10 11 10 10 12  5 10 10 11 10 10 10 10 10 10 10
##  [139] 11 10 10 10 10 10 10 10 10 11 12 10 10 10 10  5 10 10 11 11 10 10 11
##  [162] 10 11 10 10 10 11 10 11 10 11 10  5 10  5 10 10 10 10 11 10  5 10 10
##  [185] 10 11 11 11 10 11 10  5 10 10 10 10 12 12 10 12 10 10 11 10 10 10 10
##  [208] 11 10 11 11 10 11 10 10 10 10 10 10 10 11 10 12 11 10 10 11 10 11 10
##  [231] 10 10 10 10  5 10 10  5 12 10 10 10 10  5 10 10 10 10 10 10 12 11 12
##  [254] 10 10  5 11 11  5 10 11  5 10 12 10 10 10  3 10 10  5 10 10 11 11  5
##  [277]  5 10 11 10  5 11 10 10 10 10 10  5  5 11 11 11 10  5 10 10 10 10 10
##  [300]  4 11 10  5  5 11 10 11 10 10 10 10 10 11 10 10 10 10 10  5  5 11 11
##  [323] 10 11  5  5  4 10 10 11 11 10 12  5 10 10  5 10  5 11 10 11 10 11 10
##  [346] 11  5 12 10 12  5 10 11 10 10 11 10  5 10 10 10  5  5 11 11 10 13 10
##  [369] 10 10 11 10 10 11 11 11 10 10  5  5  5 10 10 10 11 10 10 10 10  5 10
##  [392]  5 10 10  5 11 11 11 10 10 10 10 10 13 10 10  4 11 10 10 10 10 11 10
##  [415] 10  5 10 10 11 10 10 11 10 10 11 11 10 10 10 10 10 10 10  4 11 11 10
##  [438]  5  5 10  5 10 10 11 11 11  5 10 10 10 10 10 10 10 10 10 10  5 13 10
##  [461] 10  5 10 10 10 11  5  5  5 11 10  5 10 10  5  5 10 10 10 11 10 10 10
##  [484] 10 11 10  5 11 10 12 12 11  5 11 10 11 10  3 10 11  5 11 10  5 10 10
##  [507] 11  5 10  5 10 11 10 10 11  5 12  5 10 13 10  5 10  5 10 10 11 11 10
##  [530] 10  5 10 10 11 10 10 10 10 11 11 10 10 10 10 10 10 10 10  5 10 10 11
##  [553] 10  5 11 10 10 11 10  4 10 11 11 12 11 10  5 11 11 10 11 10  5 12 10
##  [576] 12 10 11  5  5 10 10 10  5 10 10 10 11  5 11 11 10 10 10 10 10  5 10
##  [599]  5 12 11 11 12 11  5 11 10 13 10 11 10 10 11 10  5 10 10  5 10 11 10
##  [622] 10 10 13 11  5 12 12 10 10 10 10 10 10 11 10 12 10 10 10 10  5 10 10
##  [645] 11 10 10 11 10 10 11 10 10 11 10 10 10 12 10 10 10 11 10 10  5 10 10
##  [668] 11 11 10 10 10 10 10 10 11 12 12  5 11 11 10 11 10 13 10 11  4  5 10
##  [691]  5 10  5 11  5 12  5  5 11 11 10 10 10  5 11 11 10  5 10 10 10  5 10
##  [714] 10 10 10 10 10 11 10 12 11 11 10 10 11 10 10 10  4  5 10  5 10  4  4
##  [737] 12 10 10 11 10 10 10 11  5 11 10 10  5 11 10 10 10 10 10 10  5 10 10
##  [760] 10 10 10 10 10 11 10 10 10 10  5 10 10 10  5 10  5 11 13 12 11 11 10
##  [783] 10 10 11 10  5 10 10 11 11 10 10 10 11 10 10 11  5 10 10 10 10 10  4
##  [806] 10 11 10 10 10 10 10 11 11 10 10  4 10 10 10 10 10 10 10 10  5 10 10
##  [829] 10  5  5 10 11 10 11 11 10  5 10 10 10 10 10 10 10 10 10  5 11 11 10
##  [852] 13 10 11 10 10 10 10  5 10 11 10 12 10 10 10 10 10 10 10 10 10 11  5
##  [875] 10 11 10 10 11 10 11 10 10 10 10 10 11 11  5 10  5 10 11 10  5 11 12
##  [898] 10 10 10 10 10 12  5  5 10 11 10  5 10 11 10 10 10 10 11  5 12 11 10
##  [921] 10 10 10 11 10 12 10 10 11 10 10 10 10 11 10  5 11  5 10 11 11 12 10
##  [944]  5 10  5 10 10 10 10 10  4  5 11 10 10 11 10 11 10  4 11 11 11 10 10
##  [967] 10 10 11 10 10 10  5 12  5 12 10  5 10 11  5 12 12 10 10 10 12 10 10
##  [990]  5 10 10 10 10 10 10 10 11 10 10 10 10 10 11 11 10 10 10 10 10 10 11
## [1013]  4 10 10 10 10 10 11 10 10 10 10 11  5 10 11 11  5 10 10 10 10 10 11
## [1036] 10 12 11  5  5  5 11 11 10 10 11 10 10  5 10 10 10  5 12 10 10  5 10
## [1059]  5 10 10 10 10  5 12  5 10 10  5  5  5 10 10 12 11 10 10 10 10 10 10
## [1082] 10 11 10 10  5 10  5 11  5 10 10 10 10 10 10 10 11 11 10 10 10 10 10
## [1105] 10 11 11 11 10 10 10 10 10 12 13 10 10 11 11 10 10  5 11 10 10 10 10
## [1128] 11 10 10 10  5 12 11  5 10 11 11 10 10 10  5 11 10 10 10  5  5 10 11
## [1151] 13 11 12 10 10  5 11  5 10 10  5 11 10 11 10 10  5 10 10 10 10 10 10
## [1174] 10  4 10 11 11 10 10  5  5 11  4 10 11  5 11 10 11 10 10 10 11  5 10
## [1197] 10 11 11 11 10 10  5 10 10 12 11 10  5  5 10  5 10  5  5 10 10 11 10
## [1220] 13  4 10 10  5 10 10 10 11  5  5 12 12 10 10  5 10 10 10  5 10 11 10
## [1243] 10 12 11 10 10 11 10 11 11 11 10 10 10 10 10 11 10 10  5 11  4 10 11
## [1266] 10 10  5 10 10 12 10 11 10 10  5 11 10 11 10 10 10  5 11 11 11 10 12
## [1289]  5 10  5 10 12 10 10  5  5 10 10 12 10 10 11 10 10  5  5 11 10 11 12
## [1312]  5 10 11 10 11 11 10 12 10 10  4 10 10 10 10  5 10 11 10 12 11 10 11
## [1335] 10 10 11 10 10 10 11 10 12  5 10 10 10 10 10 10 10 10 10 12  5  5 12
## [1358] 10 10 10 11 11 10 10 10 10 12 10 10 10  4 10 10 10 11 11 10 10 10 10
## [1381] 11 10 10 10 10 10 10  5 10 10  5 11 12 10 10  4 10 10 11 10 10 10  5
## [1404] 10 10 10 10 11 11  5 11 10 10 10 12  5 10 10 10 12 10 11 10 10 11 11
## [1427] 11 11 10 11 11 10 10  5 12  5 10 10 10 10 10  5 10 10 10 10 10  5  5
## [1450] 10 10 10 10 10  5 10 10 10 11 10 10 10 11 10  5 10 10 10  5 10  4 11
## [1473] 10 10  5  5  5 10 10 10 11 10 10 10  5  5  5 10 10 11  5  5  5  5  5
## [1496] 10 11 10 10 10 10 11  5 10 10 11 11 10  5 10  5 10 10  5  5 10 11 11
## [1519] 10 10 11 11 10 10 11 12  5 10 10 11  4  5 10 10 10 10 11 11 10 11 10
## [1542]  4 10 11 11 11 10 11 10 10 10 11  5  5 10 10 10 11 10 10 10 10 11 10
## [1565]  4 10 11 11 11 12 10 10 10 10 11 10 10  5 12 12  5 10 10 10 10 11  5
## [1588] 10 10 10 10 11 10 11  5 10 10 10 12 10 11 10 10 10 10 11 10 10  5 10
## [1611] 10  5 10 10 10 10 10 10 13 10 10 10 11 10  5 12  5 10 10 11 10 10 10
## [1634] 11 11 11 11 10 11 10 11 10 10 10  5 10 11 10 12  4 10 10 10 10 10 10
## [1657] 10  5 11 10  1 10 10 10 11 10 11 10 10 10 10 10 11 11 10 10 10 10 10
## [1680] 10 10 10 10 10 11 10 10 12 11 10 11 10 10 10 11 11 10 11 10 10 10 10
## [1703] 10 12 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 11 11 11 10
## [1726] 10 10 11 11 10 11 11 11 10 10 11 10 10 10 10 10 10 10 10 10 10 10 11
## [1749] 10 11 10 10 10 10 11 10 10 10 11 11 11 11 10 11 10 10 10 10 10 10 10
## [1772] 10 12 11 10 10 11 10 10 12 10 10 10 10 10 10 10 10 11 10 10 10 11 10
## [1795] 10 10 10 11 10 10 11 10 10 10 10 10 10 11 10 10 10 10 11 10 10 11 11
## [1818] 10 10 10 10 10 10 10 11 11 11 10 11 10 10 10 10 11 11 12 10 10 10 10
## [1841] 10 10 10 10 10 11 10 10 10 10 10 10 10 10 10 11 10 11 10 10 10 11 11
## [1864] 10 10 10 11 12 10 10 10 10 10 10 10 10 10 10 10 10 11 10  5 10 10 10
## [1887] 10 10 10 10 10 12 11 10 10 10 11 11 11 10 11 11 10 10 10 10 10 11 10
## [1910] 10 11
## 
## $sqft_above
##   [1] 3890 3760 4750 5310 3840 6070 4040 3840 4860 3860 4030 4410 6050 4740
##  [15] 5403 4120 4150 4370 4570 4670 4230 4270 5670 4475 6070 4300 3990 5990
##  [29] 4460 4190 4250 4930 3840 5000 5320 6720 4210 3890 3830 5153 4210 4740
##  [43] 5450 4280 3880 4570 3750 4130 5710 4700 3810 4490 4150 4070 3840 5770
##  [57] 4030 3810 4490 3840 4360 3960 4460 4340 3850 4750 4350 4160 5180 4070
##  [71] 5130 6350 3770 3780 4750 3950 3915 3880 4870 4030 4740 3850 4860 4310
##  [85] 3930 4070 4080 5400 6110 3859 4600 3940 3800 4230 4070 5050 4000 4220
##  [99] 3860 3890 3890 5230 5370 4140 4270 4430 4210 4130 3780 4610 7680 4250
## [113] 3820 4120 3770 4610 5080 4140 4340 3770 5760 4390 4000 3810 4400 6200
## [127] 4320 4380 4040 4580 4180 4240 4070 6110 4115 3780 3830 3780 5830 4140
## [141] 3920 3930 5990 5070 3910 3810 4020 3830 3880 3810 4130 4750 4060 4060
## [155] 4890 3820 4160 4020 4090 3990 4560 4570 3760 4060 4430 4720 3810 5450
## [169] 3780 3840 4160 5610 4900 4290 3830 4050 4260 4440 3930 3750 6220 3910
## [183] 4500 3900 3930 3831 4130 4410 4470 4280 5400 4810 4750 3800 4320 3920
## [197] 4760 8570 4360 4260 5140 3920 4410 4230 4280 4380 5250 4210 3880 3950
## [211] 3750 4030 3970 4200 4140 5240 4820 3840 4100 3880 4360 4380 4010 4050
## [225] 3830 6430 3790 4150 4060 4080 3810 3800 3770 4170 3950 5584 8860 4500
## [239] 4200 4280 5220 4340 4850 3790 5844 5530 4190 4010 3950 4910 3810 3930
## [253] 4060 3880 4610 3790 4700 3900 3830 3840 3930 5000 3920 3870 3870 4120
## [267] 5050 6290 4280 3860 3980 4490 3810 5370 4070 3760 4090 5000 3770 5440
## [281] 6085 3920 4400 3860 4470 3750 6640 4620 4570 4380 4500 3850 3990 4070
## [295] 3810 4330 4030 4170 4170 4820 4570 4570 3800 4320 3820 7420 4850 4310
## [309] 3920 4000 4225 4370 3950 3910 3780 5050 3980 5550 4133 4610 3990 3870
## [323] 4285 4430 3940 9410 4080 3910 4220 3830 5330 4270 4020 3960 4670 4020
## [337] 3870 4260 6380 7880 4440 4160 3910 4470 4500 4010 5370 4180 5190 3880
## [351] 3880 5020 5980 4430 4440 4140 4410 5550 4540 4540 4130 5020 4430 6120
## [365] 4480 4110 4770 4930 5160 4570 3800 4110 4230 4100 4780 4670 4980 3790
## [379] 3860 3900 4100 4110 4190 4830 4080 4790 4150 5010 4320 4440 4050 3930
## [393] 4400 4040 4670 4070 4660 4030 3790 3980 3920 4630 3970 3750 4210 3780
## [407] 4510 4560 3920 4420 3960 4240 4150 4040 3790 4700 3800 4940 5110 4770
## [421] 6660 4060 4300 3870 4740 4610 4090 4010 4270 4080 5090 4240 3920 3930
## [435] 4070 4270 4070 4170 4510 3920 4040 5480 4200 5040 4490 3960 3810 3920
## [449] 4150 4410 3880 4030 5180 4180 4010 3820 4010 3830 4170 3950 3860 3760
## [463] 5240 4083 8020 3905 4590 3810 3950 3750 4670 4290 6090 3970 7320 4270
## [477] 4040 3880 4290 3900 4040 4930 3820 4910 3780 3960 5490 3940 4460 5300
## [491] 3990 4260 3850 4460 4065 3830 5080 3800 3820 4360 4660 4000 5430 3750
## [505] 4180 4160 4170 4120 4750 4660 3970 4180 4590 4800 7850 4280 4000 3855
## [519] 3750 4570 3745 3900 3820 4220 4340 4130 3860 3845 4100 3800 5060 4140
## [533] 3910 4000 3820 4150 3960 4810 4360 4410 4270 3970 4010 4150 4030 4030
## [547] 4280 4960 6530 4640 3860 6370 4360 3990 3980 4020 4430 3870 4840 4150
## [561] 4390 3880 3828 4260 4140 3940 4430 4040 4130 4230 4950 3940 4240 5190
## [575] 3906 5480 4073 3830 3820 3860 3930 4100 3870 4140 4370 4010 4150 4000
## [589] 3950 4900 4070 4350 3980 4100 3950 3770 4450 3890 4590 4620 4010 6420
## [603] 4280 4300 4180 4420 4350 4500 4170 3990 4470
## 
## $sqft_basement
##   [1] 1530 1700 1620 1510 1600 1700 1600 1490 1640 1460 2000 1430 1950 2060
##  [15] 1820 1450 1950 1600 1600 1580 1470 1780 1900 1450 1760 1600 1720 1760
##  [29] 1510 1800 1810 1460 1830 1590 1620 1420 1960 1560 2020 2110 1490 2390
##  [43] 1600 1780 2020 1700 1410 1450 1500 1800 1580 1800 1480 1670 1510 1800
##  [57] 1620 2570 2110 1490 2590 1580 1480 1410 1480 2500 1940 1550 2350 2490
##  [71] 1481 1470 1520 1850 1830 1660 2130 2600 1450 1850 1690 1850 1600 1520
##  [85] 1780 2620 1600 1430 1798 2060 1610 1780 1440 1590 1500 1450 1600 1690
##  [99] 1500 1470 1570 1590 1650 1500 1510 1910 1630 2360 1852 2090 2400 1480
## [113] 1790 2150 1500 1440 1500 1430 1720 1500 1410 1680 2100 3000 1870 1530
## [127] 1420 1460 1710 1640 1500 1410 2030 1650 1820 1540 1540 1710 1460 2850
## [141] 1610 1800 1660 1420 1420 2170 1450 2150 1560 1560 1830 1720 1460 1590
## [155] 1670 1500 1530 1430 1500 1530 2170 1940 2040 1800 1490 1750 1570 1420
## [169] 1460 2030 2720 1720 1420 1660 1610 1460 1500 2730 1720 1440 1590 1480
## [183] 1450 1420 1840 3480 1440 1540 1790 1490 1540 1680 2160 1430 1690 1450
## [197] 1650 1700 1870 1920 2330 2330 1690 1480 1860 1500 1490 2050 4820 1410
## [211] 2060 1600 1780 1560 1913 1830 1550 1440 1750 1760 1910 1870 1430 1410
## [225] 1540 1420 1550 1480 1500 1680 1650 1580 1790 1850 2010 1940 1410 1450
## [239] 2060 1680 3260 1420 1660 1820 2200 1450 1480 1590 1510 1730 1600 1460
## [253] 1510 1700 1500 2020 2100 1540 2196 1590 1570 1930 1810 1410 1800 1640
## [267] 1540 1420 1590 2080 1510 1530 1900 1490 1430 2580 1940 1548 1740 1500
## [281] 1900 1630 1560 1460 1410 1620 1860 1460 2060 1890 1570 1450 1510 1500
## [295] 1420 2220 1500 1420 2070 4130 1520 1460 1460 1650 1580 1630 1590 1430
## [309] 1590 1790 1740 1500 1910 2090 1450 2250 1450 2240 2160 1760 1440 1600
## [323] 1680 1870 1430 1990 2170 1510 1440 1500 1860 1720 1790 1410 1640 1660
## [337] 1760 2080 1580 1430 1440 1500 1500 1540 1410 2550 1580 1660 1740 1470
## [351] 1580 2300 1850 1650 2610 1890 1500 1600 1460 1940 1660 1500 1650 3500
## [365] 1510 1700 1510 2160 1590 1780 1500 1650 1530 2150 2070 1580 1570 2220
## [379] 2010 1690 1450 1750 1690 1600 1440 1590 1680 1450 1530 1420 2200 1430
## [393] 1420 1550 1710 2020 1500 1620 1760 1430 1816 1580 1520 1460 2000 1420
## [407] 1510 1650 1550 1500 1610 1540 1780 1900 1800 1490 1630 2020 1570 1600
## [421] 1590 1430 1520 1780 1670 1600 1500 1540 2190 1560 1800 1690 1760 1710
## [435] 1690 1500 1460 1520 1590 2070 1850 1620 1525 1880 1570 1840 1620 1450
## [449] 1800 1470 1440 1600 1480 1700 1790 1670 1570 1580 1790 2550 1600 2220
## [463] 1950 1730 1480 1450 1420 1580 1420 1440 1760 2180 1560 1720 1830 1670
## [477] 1500 1710 2310 1420 1520 1420 1770 2120 1870 1870 1540 2040 1950 1640
## [491] 1420 1640 2810 1450 1490 1800
## 
## $sqft_living15
##   [1] 4760 4110 3740 3836 3890 4180 3920 4550 3860 3860 4050 4680 3930 4300
##  [15] 3850 4210 3980 3920 3715 4190 4230 3890 4100 4090 3680 4100 3720 4750
##  [29] 3950 5790 3890 4320 4640 3970 4600 3950 5600 3820 3950 3720 4800 3990
##  [43] 3720 3860 3900 3720 4060 3910 3730 3760 3910 3770 3960 4620 4050 3860
##  [57] 3730 4470 4890 3690 3680 3780 3980 3690 4240 3890 4670 4050 4350 5380
##  [71] 4330 3830 5000 3680 4250 4080 4110 4190 3770 3736 4600 3980 4170 4090
##  [85] 3750 3680 4630 3910 3880 4110 3960 3710 3930 4170 3721 3930 4650 4020
##  [99] 3920 3900 4470 3680 3710 4240 4020 3920 4320 3770 4190 3720 4230 3960
## [113] 3950 3740 3740 3670 4080 5340 4440 3690 4690 4920 4160 3810 4480 3880
## [127] 3690 3990 4050 5790 5330 3930 3920 4010 4700 4090 3750 3930 3820 4100
## [141] 3720 3750 3880 4300 3790 3790 3710 3990 4150 4020 4340 3850 3700 3840
## [155] 4042 3850 3930 3800 3850 3850 5030 3715 4740 5070 4280 3940 4940 4770
## [169] 3880 4830 3730 3730 4080 4100 3880 5170 3800 3960 3840 3860 4190 4590
## [183] 4130 3770 3990 3970 4060 4060 3990 4140 3890 3670 3810 4010 3680 3750
## [197] 4080 4310 3910 3950 3720 4042 3790 3740 3680 4220 4300 3730 5030 4540
## [211] 4470 3700 4190 4470 3860 4200 3800 3930 3930 3850 4170 3770 3690 3750
## [225] 4240 3860 4510 3830 3720 3810 3780 4480 3740 4390 4570 3740 6110 4260
## [239] 3780 4340 4080 3890 4290 4010 3880 4560 4000 4830 3960 3980 4830 3670
## [253] 3790 4440 3750 4620 3730 3890 4010 4670 4070 3750 3680 3690 5110 4060
## [267] 3850 3670 4560 4670 3830 4130 4170 3810 4850 4290 4410 5610 3680 3850
## [281] 3780 4660 3900 3870 3920 4370 4590 4610 3960 3700 3960 3980 4050 4740
## [295] 4400 3680 4290 4950 3900 4070 4180 5500 4850 3730 3930 3860 3674 3750
## [309] 3800 3950 4210 3780 4030 3730 3740 3880 3830 4390 4440 4320 3940 4050
## [323] 4760 4280 3740 4400 3750 3840 3870 3680 3670 3930 3740 3950 3690 3930
## [337] 4750 3750 3750 3890 4930 4040 4100 4240 3710 4140 3740 4190 4520 4630
## [351] 4030 4760 4490 3760 4180 3730 4130 4040 3770 4080 3770 4410 3800 3820
## [365] 3830 4760 4913 3670 3970 4040 4770 3810 4080 3720 3800 5220 3800 3880
## [379] 3720 3860 4630 4630 4240 4050 3850 3870 4210 5790 3790 4730 3710 3720
## [393] 3920 4110 3810 3920 4020 4340 3860 3980 3760 3920 3830 3820 3920 4100
## [407] 4780 3920 4440 3670 4270 3810 3740 3920 3740 4240 4300 3810 3740 3840
## [421] 4760 4090 3910 3700 3880 4040 4010 3840 4460 3670 3810 3880 3920 5200
## [435] 3730 4160 3900 3990 4900 4210 4190 4140 4190 3940 4410 3780 4470 3760
## [449] 3760 4220 4130 4540 3730 3710 3680 4120 3920 3770 4730 4270 3940 3820
## [463] 5080 3680 4320 4590 4362 3740 3730 4420 3690 4560 4100 4420 3670 4750
## [477] 6210 4510 4290 3690 4920 3820 4290 4310 3880 3730 3730 3970 3690 4920
## [491] 3970 3726 4120 4420 4410 3960 4800 4690 3740 3760 5790 4070 4480 3920
## [505] 3700 3930 4390 4530 5790 4225 4310 4410 4130 4443 3690 4240 3940 3770
## [519] 4000 3770 4495 4080 3840 3860 4100 3710 3710 4200 4340 3820 4920 4690
## [533] 4620 3770 3690 3710 4620 4920 3860 4650 5790 3860 3990 4560

Functions

lapply(dfr, FUN=graph_outliers )
## $price

## 
## $bedrooms

## 
## $bathrooms

## 
## $sqft_living

## 
## $floors

## 
## $grade

## 
## $sqft_above

## 
## $sqft_basement

## 
## $sqft_living15

Observation
Outliers present in many features.
But Outlier count is high.
For this model we will work with the outliers.

head(dfr)
##     price bedrooms bathrooms sqft_living floors grade sqft_above
## 1  221900        3      1.00        1180      1     7       1180
## 2  538000        3      2.25        2570      2     7       2170
## 3  180000        2      1.00         770      1     6        770
## 4  604000        4      3.00        1960      1     7       1050
## 5  510000        3      2.00        1680      1     8       1680
## 6 1225000        4      4.50        5420      1    11       3890
##   sqft_basement sqft_living15
## 1             0          1340
## 2           400          1690
## 3             0          2720
## 4           910          1360
## 5             0          1800
## 6          1530          4760

Observation
Here we see that many predictor variables numeric categoric variable. We should convert these to factor variables

Correlation

vctCorr = numeric(0)
for (i in names(dfr)){
    cor.result <- cor(dfr$price, as.numeric(dfr[,i]))
    vctCorr <- c(vctCorr, cor.result)
}
dfrCorr <- vctCorr
names(dfrCorr) <- names(dfr)
dfrCorr
##         price      bedrooms     bathrooms   sqft_living        floors 
##     1.0000000     0.3083496     0.5251375     0.7020351     0.2567939 
##         grade    sqft_above sqft_basement sqft_living15 
##     0.6674343     0.6055673     0.3238160     0.5853789

Visualize

dfrGraph <- gather(dfr, variable, value, -price)
head(dfrGraph)
##     price variable value
## 1  221900 bedrooms     3
## 2  538000 bedrooms     3
## 3  180000 bedrooms     2
## 4  604000 bedrooms     4
## 5  510000 bedrooms     3
## 6 1225000 bedrooms     4
ggplot(dfrGraph) +
    geom_jitter(aes(value,price, colour=variable)) + 
    geom_smooth(aes(value,price, colour=variable), method=lm, se=FALSE) +
    facet_wrap(~variable, scales="free_x") +
    labs(title="Relation Of PRICE With Other Features")

#?step()
stp=step(lm(data=dfr, price~.), trace=0, steps=100)
stpSummary <- summary(stp)
stpSummary 
## 
## Call:
## lm(formula = price ~ bedrooms + bathrooms + sqft_living + floors + 
##     grade + sqft_above + sqft_living15, data = dfr)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1044392  -132853   -21972    96253  4662663 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -5.237e+05  1.509e+04 -34.693  < 2e-16 ***
## bedrooms      -4.064e+04  2.276e+03 -17.854  < 2e-16 ***
## bathrooms     -2.145e+04  3.714e+03  -5.774 7.84e-09 ***
## sqft_living    2.682e+02  5.057e+00  53.030  < 2e-16 ***
## floors        -7.439e+03  4.191e+03  -1.775   0.0759 .  
## grade          1.044e+05  2.518e+03  41.462  < 2e-16 ***
## sqft_above    -7.876e+01  4.982e+00 -15.809  < 2e-16 ***
## sqft_living15  2.061e+01  4.042e+00   5.098 3.46e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 246100 on 21605 degrees of freedom
## Multiple R-squared:  0.5509, Adjusted R-squared:  0.5507 
## F-statistic:  3785 on 7 and 21605 DF,  p-value: < 2.2e-16
# 100= column*column ie 10*10

Observation
Best results given by price ~ bedrooms + bathrooms + sqft_living + floors + grade + sqft_above + sqft_living15

Make Final Multi Linear Model

x1 <- dfr$bedrooms
x2 <- dfr$bathrooms
x3 <- dfr$sqft_living
x4 <- dfr$floors
x5 <- dfr$grade
x6 <- dfr$sqft_above
x7 <- dfr$sqft_living15
y <- dfr$price
mlmModel <- lm(y~x1+x2+x3+x4+x5+x6+x7)

Observation
No errors. Model successfully created.

Show Model

# print summary
summary(mlmModel)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1044392  -132853   -21972    96253  4662663 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.237e+05  1.509e+04 -34.693  < 2e-16 ***
## x1          -4.064e+04  2.276e+03 -17.854  < 2e-16 ***
## x2          -2.145e+04  3.714e+03  -5.774 7.84e-09 ***
## x3           2.682e+02  5.057e+00  53.030  < 2e-16 ***
## x4          -7.439e+03  4.191e+03  -1.775   0.0759 .  
## x5           1.044e+05  2.518e+03  41.462  < 2e-16 ***
## x6          -7.876e+01  4.982e+00 -15.809  < 2e-16 ***
## x7           2.061e+01  4.042e+00   5.098 3.46e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 246100 on 21605 degrees of freedom
## Multiple R-squared:  0.5509, Adjusted R-squared:  0.5507 
## F-statistic:  3785 on 7 and 21605 DF,  p-value: < 2.2e-16

Test Data

# find price of a house with 3 bedrooms, 1 bathroom, 1180 sqft_living,1 floor, 7 grade, 1180 sqft_above, 1340 sqft_living15
dfrTest <- data.frame(x1=c(3),x2=c(1),x3=c(1180),x4=c(1),x5=c(7),x6=(1180),x7=(1340))
dfrTest 
##   x1 x2   x3 x4 x5   x6   x7
## 1  3  1 1180  1  7 1180 1340
#dfrTest 

Observation
Test Data successfully created.

Predict

result <-  predict(mlmModel, dfrTest)
print(result)
##        1 
## 307419.4